From a94cf1c2956ea0b3d54ab4537c2700a33b336653 Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Fri, 9 Jan 2026 12:07:52 +0200 Subject: [PATCH 01/12] OCaml C-stub analysis --- src/analyses/base.ml | 23 +++ src/analyses/ocaml.ml | 203 +++++++++++++++++++++++++ src/config/options.schema.json | 6 +- src/util/library/libraryDesc.ml | 1 + src/util/library/libraryFunctions.ml | 8 + tests/regression/90-ocaml/01-bagnall.c | 50 ++++++ 6 files changed, 289 insertions(+), 2 deletions(-) create mode 100644 src/analyses/ocaml.ml create mode 100644 tests/regression/90-ocaml/01-bagnall.c diff --git a/src/analyses/base.ml b/src/analyses/base.ml index c68a414b52..6a355417bc 100644 --- a/src/analyses/base.ml +++ b/src/analyses/base.ml @@ -2736,6 +2736,29 @@ struct set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set) | _ -> st end + | OCamlAlloc wosize, _ -> + (*begin match lv with + | Some lv -> + let heap_var = AD.unknown_ptr (*AD.of_var (heap_var false ctx)*) in + let sizeval = eval_int ~ctx st wosize in + set_many ~ctx st [ + (heap_var, TVoid [], Blob (VD.bot (), sizeval, true)); + (eval_lv ~ctx st lv, (Cilfacade.typeOfLval lv), Address heap_var) + ] + | _ -> st + end*) + let open Queries.AllocationLocation in + begin + (* The behavior for alloc(0) is implementation defined. Here, we rely on the options specified for the malloc also applying to alloca. *) + match lv with + | Some lv -> + let loc = Heap in + let (heap_var, addr) = alloc loc wosize in + (* ignore @@ printf "alloca will allocate %a bytes\n" ID.pretty (eval_int ~man size); *) + let blob_set = Option.map_default (fun heap_var -> [(heap_var, TVoid [], VD.Blob (VD.bot (), eval_int ~man st wosize, ZeroInit.malloc))]) [] heap_var in + set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set) + | _ -> st + end | Calloc { count = n; size }, _ -> begin match lv with | Some lv -> (* array length is set to one, as num*size is done when turning into `Calloc *) diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml new file mode 100644 index 0000000000..e38b569371 --- /dev/null +++ b/src/analyses/ocaml.ml @@ -0,0 +1,203 @@ +(** Simple interprocedural analysis of OCaml C-stubs ([ocaml]). *) + +(* Goblint documentation: https://goblint.readthedocs.io/en/latest/ *) +(* Helpful link on CIL: https://goblint.github.io/cil/ *) +(* TODO: Write tests and test them with `ruby scripts/update_suite.rb group ocaml` *) +(* after removing the `SKIP` from the beginning of the tests in tests/regression/90-ocaml/{01-bagnall.c,04-o_inter.c} *) + +open GoblintCil +open Analyses + +module VarinfoSet = SetDomain.Make(CilType.Varinfo) + +(* Use to check if a specific function is a sink / source *) +(* Sources take value-typed arguments *) +(* Sinks may trigger garbage collection *) +let is_sink varinfo = Cil.hasAttribute "ocaml_sink" varinfo.vattr +let is_source varinfo = Cil.hasAttribute "ocaml_source" varinfo.vattr + + +(** "Fake" variable to handle returning from a function *) +let return_varinfo = dummyFunDec.svar + +module Spec : Analyses.MCPSpec = +struct + include Analyses.DefaultSpec + + let name () = "ocaml" + module D = + struct + (* The first set contains variables of type value that are definitely in order. The second contains definitely registered variables. *) + module P = Lattice.Prod (VarinfoSet) (VarinfoSet) + include P + + let empty () = (VarinfoSet.empty (), VarinfoSet.empty ()) + + (* After garbage collection, the second set is written to the first set *) + let after_gc (accounted, registered) = (registered, registered) + + let mem_a v (accounted, registered) = + VarinfoSet.mem v accounted + + let mem_r v (accounted, registered) = + VarinfoSet.mem v registered + + let add_a v (accounted, registered) = + (VarinfoSet.add v accounted, registered) + + let add_r v (accounted, registered) = + (accounted, VarinfoSet.add v registered) + + let remove_a v (accounted, registered) = + (VarinfoSet.remove v accounted, registered) + + let remove_r v (accounted, registered) = + (accounted, VarinfoSet.remove v registered) + end + module C = Printable.Unit + + (* We are context insensitive in this analysis *) + let context ctx _ _ = () + let startcontext () = () + + + (** Determines whether an expression [e] is healthy, given a [state]. *) + let rec exp_accounted_for (state:D.t) (e:Cil.exp) = match e with + (* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is accounted for *) + | AddrOf v + | StartOf v + | Lval v -> lval_accounted_for state v + | BinOp (_,e1,e2,_) -> exp_accounted_for state e1 && exp_accounted_for state e2 + | Real e + | Imag e + | SizeOfE e + | AlignOfE e + | CastE (_,e) + | UnOp (_,e,_) -> exp_accounted_for state e + | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> false + | Question (b, t, f, _) -> exp_accounted_for state b && exp_accounted_for state t && exp_accounted_for state f + and lval_accounted_for state = function + | (Var v, _) -> + (* Checks whether variable v is accounted for *) (*false*) + if D.mem_a v state then true else let _ = M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v in false + | _ -> + (* The Gemara asks: is using an offset safe for the expression? The Gemara answers: by default, no. We assume our language has no pointers *) + false + + (* transfer functions *) + + (** Handles assignment of [rval] to [lval]. *) + let assign ctx (lval:lval) (rval:exp) : D.t = + let state = ctx.local in + match lval with + | Var v,_ -> + (* If lval is of type value, checks whether rval is accounted for, handles assignment to v accordingly *) (* state *) + if exp_accounted_for state rval then D.add_a v state + else D.remove_a v state + | _ -> state + + (** Handles conditional branching yielding truth value [tv]. *) + let branch ctx (exp:exp) (tv:bool) : D.t = + (* Nothing needs to be done *) + ctx.local + + (** Handles going from start node of function [f] into the function body of [f]. + Meant to handle e.g. initializiation of local variables. *) + let body ctx (f:fundec) : D.t = + (* The (non-formals) locals are initially accounted for *) + let state = ctx.local in + List.fold_left (fun st v -> D.add_a v st) state f.sformals + + (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) + let return ctx (exp:exp option) (f:fundec) : D.t = + let state = ctx.local in + match exp with + | Some e -> + (* Checks that value returned is accounted for. *) + (* Return_varinfo is used in place of a "real" variable. *) + (* state *) + if exp_accounted_for state e then D.add_a return_varinfo state + else let _ = M.warn "Value returned might be garbage collected" in D.remove_a return_varinfo state + | None -> state + + (** For a function call "lval = f(args)" or "f(args)", + [enter] returns a caller state, and the initial state of the callee. + In [enter], the caller state can usually be returned unchanged, as [combine_env] and [combine_assign] (below) + will compute the caller state after the function call, given the return state of the callee. *) + let enter ctx (lval: lval option) (f:fundec) (args:exp list) : (D.t * D.t) list = + let caller_state = ctx.local in + (* Create list of (formal, actual_exp)*) + (* + let zipped = List.combine f.sformals args in + (* TODO: For the initial callee_state, collect formal parameters where the actual is healthy. *) + let callee_state = List.fold_left (fun ts (f,a) -> + if exp_accounted_for caller_state a + then D.add f ts (* TODO: Change accumulator ts here? *) + else D.remove f ts) + (D.bot ()) + zipped in + *) + (* TODO: Should this be checked with locals or formals, and how exactly? Likely with locals. *) + let callee_state = caller_state in + (* first component is state of caller, second component is state of callee *) + [caller_state, callee_state] + + (** For a function call "lval = f(args)" or "f(args)", + computes the global environment state of the caller after the call. + Argument [callee_local] is the state of [f] at its return node. *) + let combine_env ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = + (* Nothing needs to be done *) + ctx.local + + (** For a function call "lval = f(args)" or "f(args)", + computes the state of the caller after assigning the return value from the call. + Argument [callee_local] is the state of [f] at its return node. *) + let combine_assign ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = + let caller_state = ctx.local in + (* Records whether lval was accounted for. *) (* caller_state *) + match lval with (* The variable returned is played by return_varinfo *) + | Some (Var v, _) -> if D.mem_a return_varinfo callee_local then D.add_a v caller_state + else let _ = M.warn "Returned value may be garbage-collected" in D.remove_a v caller_state + | _ -> caller_state + + (** For a call to a _special_ function f "lval = f(args)" or "f(args)", + computes the caller state after the function call. + For this analysis, source and sink functions will be considered _special_ and have to be treated here. *) + let special ctx (lval: lval option) (f:varinfo) (arglist:exp list) : D.t = + let caller_state = ctx.local in + (* TODO: Check if f is a sink / source and handle it appropriately *) + (* To warn about a potential issue in the code, use M.warn. *) + (* caller_state *) + let desc = LibraryFunctions.find f in + match desc.special arglist with + (* TODO: Add a source function that registers variables and add the non-buggy Bagnall code to the test. *) + | OCamlAlloc size_exp -> + (* Garbage collection may trigger here and overwrite unregistered variables *) + let _ = M.info "Garbage collection triggers" in (match lval with + | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) + | _ -> + (* if not (List.for_all (exp_accounted_for caller_state) arglist) then let _ = M.warn "GC might delete value" in D.empty () else *) D.empty () + ) + | _ -> + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to sinks/sources *) + if is_source f then match lval with (* Assigning the source's result makes the variable accounted for. *) + | Some (Var v, _) -> D.add_a v caller_state + | _ -> caller_state + else + if is_sink f then (* Warns if unaccounted variables reach the function. Empties the state of unregistered variables. *) + let _ = M.info "Garbage collection triggers" in match lval with + | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) + | _ -> + (* if not (List.for_all (exp_accounted_for caller_state) arglist) then let _ = M.warn "GC might delete value" in D.empty () else *) + D.after_gc caller_state + else caller_state + + (* You may leave these alone *) + let startstate v = D.bot () + let threadenter ctx ~multiple lval f args = [D.top ()] + let threadspawn ctx ~multiple lval f args fctx = ctx.local + let exitstate v = D.top () +end + +let _ = + MCP.register_analysis (module Spec : MCPSpec) diff --git a/src/config/options.schema.json b/src/config/options.schema.json index 134ce7eb95..edb271afe4 100644 --- a/src/config/options.schema.json +++ b/src/config/options.schema.json @@ -1501,7 +1501,8 @@ "pcre", "zlib", "liblzma", - "legacy" + "legacy", + "ocaml" ] }, "default": [ @@ -1513,7 +1514,8 @@ "linux-userspace", "goblint", "ncurses", - "legacy" + "legacy", + "ocaml" ] } }, diff --git a/src/util/library/libraryDesc.ml b/src/util/library/libraryDesc.ml index 862205aad1..7fe6c73f87 100644 --- a/src/util/library/libraryDesc.ml +++ b/src/util/library/libraryDesc.ml @@ -47,6 +47,7 @@ type math = type special = | Alloca of Cil.exp | Malloc of Cil.exp + | OCamlAlloc of Cil.exp | Calloc of { count: Cil.exp; size: Cil.exp; } | Realloc of { ptr: Cil.exp; size: Cil.exp; } | Free of Cil.exp diff --git a/src/util/library/libraryFunctions.ml b/src/util/library/libraryFunctions.ml index ee663d4152..b3c160b261 100644 --- a/src/util/library/libraryFunctions.ml +++ b/src/util/library/libraryFunctions.ml @@ -1242,6 +1242,13 @@ let legacy_libs_misc_list: (string * LibraryDesc.t) list = LibraryDsl.[ ] [@@coverage off] +let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ + ("caml_copy_double", unknown [drop "nptr" []]); + (* Eeskuju: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *) + ("caml_alloc_small", special [__ "wosize" []; __ "tag" []] @@ fun wosize tag -> OCamlAlloc wosize); + ] +[@@coverage off] + let libraries = Hashtbl.of_list [ ("c", c_descs_list @ math_descs_list); ("posix", posix_descs_list); @@ -1259,6 +1266,7 @@ let libraries = Hashtbl.of_list [ ("zlib", zlib_descs_list); ("liblzma", liblzma_descs_list); ("legacy", legacy_libs_misc_list); + ("ocaml", ocaml_descs_list) ] let libraries = diff --git a/tests/regression/90-ocaml/01-bagnall.c b/tests/regression/90-ocaml/01-bagnall.c new file mode 100644 index 0000000000..0c479a5cd6 --- /dev/null +++ b/tests/regression/90-ocaml/01-bagnall.c @@ -0,0 +1,50 @@ +// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "pringo_LXM_copy" --set "mainfun[+]" "pringo_LXM_copy_correct" --set "mainfun[+]" "pringo_LXM_init_unboxed" --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Buggy code from https://github.com/xavierleroy/pringo/issues/6 where value v is not registered. + +// putchar from the standard library is marked as a sink + +#include +#include +#include +#include +#include + +/* A small definition of the LXM state so sizeof works - from AI */ +struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; + +/* Minimal macros to mimic expected behaviour */ +#define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value)) +#define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) + +#define CAMLparam1(x) __goblint_caml_param1(&x) +#define CAMLreturn(x) return (x) // From AI - CAMLreturn needs some variable named caml__frame, which is not available in our mock CAMLparam1, so we mock the return as well. + +/*CAMLextern value caml_alloc_small (mlsize_t wosize, tag_t) __attribute__((__ocaml_sink__));*/ + +CAMLprim value pringo_LXM_copy(value v) +{ + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); + return res; +} + +CAMLprim value pringo_LXM_copy_correct(value v) +{ + CAMLparam1(v); + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); + CAMLreturn(res); +} + +CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2, + uint64_t i3, uint64_t i4) +{ + value v = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + struct LXM_state * st = LXM_val(v); + st->a = i1 | 1; /* must be odd */ + st->x[0] = i2 != 0 ? i2 : 1; /* must be nonzero */ + st->x[1] = i3 != 0 ? i3 : 2; /* must be nonzero */ + st->s = i4; + return v; +} \ No newline at end of file From d663309dc3bc8e5b11222f65733546510daf1aef Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Fri, 16 Jan 2026 13:53:41 +0200 Subject: [PATCH 02/12] Added registering variables to analysis and new tests --- src/analyses/base.ml | 28 ++++ src/analyses/ocaml.ml | 53 +++++++- src/util/library/libraryDesc.ml | 2 + src/util/library/libraryFunctions.ml | 8 +- tests/regression/90-ocaml/01-bagnall.c | 8 +- tests/regression/90-ocaml/02-floatops.c | 162 +++++++++++++++++++++++ tests/regression/90-ocaml/03-demimarie.c | 51 +++++++ 7 files changed, 303 insertions(+), 9 deletions(-) create mode 100644 tests/regression/90-ocaml/02-floatops.c create mode 100644 tests/regression/90-ocaml/03-demimarie.c diff --git a/src/analyses/base.ml b/src/analyses/base.ml index 6a355417bc..3a6e21f585 100644 --- a/src/analyses/base.ml +++ b/src/analyses/base.ml @@ -2759,6 +2759,34 @@ struct set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set) | _ -> st end + (* TODO: Rethink OCamlParam's placement *) + | OCamlParam {param1; param2; param3; param4; param5}, _ -> + begin match param1 with + | Some p1 -> + let _ = eval_rv ~man st p1 in + begin match param2 with + | Some p2 -> + let _ = eval_rv ~man st p2 in + begin match param3 with + | Some p3 -> + let _ = eval_rv ~man st p3 in + begin match param4 with + | Some p4 -> + let _ = eval_rv ~man st p4 in + begin match param5 with + | Some p5 -> + let _ = eval_rv ~man st p5 in + st + | _ -> st + end + | _ -> st + end + | _ -> st + end + | _ -> st + end + | _ -> st + end | Calloc { count = n; size }, _ -> begin match lv with | Some lv -> (* array length is set to one, as num*size is done when turning into `Calloc *) diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index e38b569371..2a6fa1f328 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -170,10 +170,57 @@ struct (* caller_state *) let desc = LibraryFunctions.find f in match desc.special arglist with - (* TODO: Add a source function that registers variables and add the non-buggy Bagnall code to the test. *) + | OCamlParam params -> + (* Variables are registered with a Param macro. *) + (* TODO: Which pattern below do the params in fact match? Does it vary? *) + (* The function extract_varinfo is from AI - the code can be shortened when we know what the params are. *) + let rec extract_varinfo (e: Cil.exp): varinfo option = match e with + (* Direct variable lvalue *) + | Lval (Var v, _) -> Some v + (* Dereferenced pointer - try to extract from inner expression *) + | Lval (Mem inner, _) -> extract_varinfo inner + (* Address of variable *) + | AddrOf (Var v, _) -> Some v (* TODO: Maybe this is the only thing a param can be. *) + (* Address of dereferenced - try to extract from inner *) + | AddrOf (Mem inner, _) -> extract_varinfo inner + (* Array start of variable *) + | StartOf (Var v, _) -> Some v + (* Array start of dereferenced *) + | StartOf (Mem inner, _) -> extract_varinfo inner + (* Type cast - extract from inner expression *) + | CastE (_, inner) -> extract_varinfo inner + (* Unary operations - extract from operand *) + | UnOp (_, inner, _) -> extract_varinfo inner + (* Real/imaginary parts *) + | Real inner | Imag inner -> extract_varinfo inner + (* Binary operations - try both operands *) + | BinOp (_, e1, e2, _) -> + (match extract_varinfo e1 with + | Some v -> Some v + | None -> extract_varinfo e2) + (* Size and alignment - may contain var info in the type *) + | SizeOfE inner | AlignOfE inner -> extract_varinfo inner + (* Constants, SizeOf, AlignOf, SizeOfStr, AddrOfLabel - no varinfo *) + | Const _ | SizeOf _ | AlignOf _ | SizeOfStr _ | AddrOfLabel _ -> None + (* Question conditional *) + | Question (cond, e1, e2, _) -> + (match extract_varinfo cond with + | Some v -> Some v + | None -> + (match extract_varinfo e1 with + | Some v -> Some v + | None -> extract_varinfo e2)) + in + List.fold_left (fun state param -> + match param with + | Some e -> + (match extract_varinfo e with + | Some v -> D.add_r v state + | None -> state) + | None -> state) caller_state [params.param1; params.param2; params.param3; params.param4; params.param5] | OCamlAlloc size_exp -> (* Garbage collection may trigger here and overwrite unregistered variables *) - let _ = M.info "Garbage collection triggers" in (match lval with + let _ = M.debug "Garbage collection triggers" in (match lval with | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) | _ -> (* if not (List.for_all (exp_accounted_for caller_state) arglist) then let _ = M.warn "GC might delete value" in D.empty () else *) D.empty () @@ -185,7 +232,7 @@ struct | _ -> caller_state else if is_sink f then (* Warns if unaccounted variables reach the function. Empties the state of unregistered variables. *) - let _ = M.info "Garbage collection triggers" in match lval with + let _ = M.debug "Garbage collection triggers" in match lval with | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) | _ -> (* if not (List.for_all (exp_accounted_for caller_state) arglist) then let _ = M.warn "GC might delete value" in D.empty () else *) diff --git a/src/util/library/libraryDesc.ml b/src/util/library/libraryDesc.ml index 7fe6c73f87..7539afd4f2 100644 --- a/src/util/library/libraryDesc.ml +++ b/src/util/library/libraryDesc.ml @@ -48,6 +48,8 @@ type special = | Alloca of Cil.exp | Malloc of Cil.exp | OCamlAlloc of Cil.exp + (* TODO: Rethink OCamlParam's placement and make the params a list. *) + | OCamlParam of { param1: Cil.exp option; param2: Cil.exp option; param3: Cil.exp option; param4: Cil.exp option; param5: Cil.exp option; } | Calloc of { count: Cil.exp; size: Cil.exp; } | Realloc of { ptr: Cil.exp; size: Cil.exp; } | Free of Cil.exp diff --git a/src/util/library/libraryFunctions.ml b/src/util/library/libraryFunctions.ml index b3c160b261..f28110362d 100644 --- a/src/util/library/libraryFunctions.ml +++ b/src/util/library/libraryFunctions.ml @@ -1243,9 +1243,15 @@ let legacy_libs_misc_list: (string * LibraryDesc.t) list = LibraryDsl.[ [@@coverage off] let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ - ("caml_copy_double", unknown [drop "nptr" []]); + ("caml_copy_double", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1))); (* Eeskuju: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *) ("caml_alloc_small", special [__ "wosize" []; __ "tag" []] @@ fun wosize tag -> OCamlAlloc wosize); + ("__goblint_caml_param0", special [] @@ OCamlParam {param1 = None; param2 = None; param3 = None; param4 = None; param5 = None}); + ("__goblint_caml_param1", special [__ "param" []] @@ fun param -> OCamlParam {param1 = Some param; param2 = None; param3 = None; param4 = None; param5 = None}); + ("__goblint_caml_param2", special [__ "param1" []; __ "param2" []] @@ fun param1 param2 -> OCamlParam {param1 = Some param1; param2 = Some param2; param3 = None; param4 = None; param5 = None}); + ("__goblint_caml_param3", special [__ "param1" []; __ "param2" []; __ "param3" []] @@ fun param1 param2 param3 -> OCamlParam {param1 = Some param1; param2 = Some param2; param3 = Some param3; param4 = None; param5 = None}); + ("__goblint_caml_param4", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []] @@ fun param1 param2 param3 param4 -> OCamlParam {param1 = Some param1; param2 = Some param2; param3 = Some param3; param4 = Some param4; param5 = None}); + ("__goblint_caml_param5", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []; __ "param5" []] @@ fun param1 param2 param3 param4 param5 -> OCamlParam {param1 = Some param1; param2 = Some param2; param3 = Some param3; param4 = Some param4; param5 = Some param5}); ] [@@coverage off] diff --git a/tests/regression/90-ocaml/01-bagnall.c b/tests/regression/90-ocaml/01-bagnall.c index 0c479a5cd6..8cf6eb78d3 100644 --- a/tests/regression/90-ocaml/01-bagnall.c +++ b/tests/regression/90-ocaml/01-bagnall.c @@ -2,8 +2,6 @@ // Buggy code from https://github.com/xavierleroy/pringo/issues/6 where value v is not registered. -// putchar from the standard library is marked as a sink - #include #include #include @@ -25,7 +23,7 @@ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; CAMLprim value pringo_LXM_copy(value v) { value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); - memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN return res; } @@ -33,14 +31,14 @@ CAMLprim value pringo_LXM_copy_correct(value v) { CAMLparam1(v); value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); - memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // NOWARN CAMLreturn(res); } CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2, uint64_t i3, uint64_t i4) { - value v = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + value v = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); // NOWARN struct LXM_state * st = LXM_val(v); st->a = i1 | 1; /* must be odd */ st->x[0] = i2 != 0 ? i2 : 1; /* must be nonzero */ diff --git a/tests/regression/90-ocaml/02-floatops.c b/tests/regression/90-ocaml/02-floatops.c new file mode 100644 index 0000000000..4baedaf26b --- /dev/null +++ b/tests/regression/90-ocaml/02-floatops.c @@ -0,0 +1,162 @@ +// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "UNARY_OP" --set "mainfun[+]" "BINARY_OP" --set "mainfun[+]" "atof_double" --set "mainfun[+]" "atof_float" --set "mainfun[+]" "max_float" --set "mainfun[+]" "smallest_float" --set "mainfun[+]" "pi_float" --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Code from stubs.c in src/common/cdomains/floatOps that should be correct despite missing CAMLparam, as discussed in https://github.com/goblint/analyzer/issues/1371. + +/* A small definition of the LXM state so sizeof works - from AI */ +//struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; + +/* Minimal macros to mimic expected behaviour */ +#define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value)) +#define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) + +#define CAMLparam1(x) __goblint_caml_param1(&x) +#define CAMLreturn(x) return (x) // From AI - CAMLreturn needs some variable named caml__frame, which is not available in our mock CAMLparam1, so we mock the return as well. + + +#define _GNU_SOURCE // necessary for M_PI to be defined +#include +#include +#include +#include +#include +#include +#include + +// Order must match with round_mode in floatOps.ml +enum round_mode +{ + Nearest, + ToZero, + Up, + Down +}; + +static void change_round_mode(int mode) +{ + switch (mode) + { + case Nearest: + fesetround(FE_TONEAREST); + break; + case ToZero: + fesetround(FE_TOWARDZERO); + break; + case Up: + fesetround(FE_UPWARD); + break; + case Down: + fesetround(FE_DOWNWARD); + break; + default: + // Assert ignored to focus on the OCaml stubs. + assert(0); // FAIL + break; + } +} + +#define UNARY_OP(name, type, op) \ + CAMLprim value name##_##type(value mode, value x) \ + { \ + /* No need to use CAMLparam to keep mode and x as GC roots, + because next GC poll point is at allocation in caml_copy_double. + We have already read their values by then. */ \ + int old_roundingmode = fegetround(); \ + change_round_mode(Int_val(mode)); \ + volatile type r, x1 = Double_val(x); \ + r = op(x1); \ + fesetround(old_roundingmode); \ + return caml_copy_double(r); /* NOWARN */ \ + /* No need to use CAMLreturn because we don't use CAMLparam. */ \ + } + +UNARY_OP(sqrt, double, sqrt); +UNARY_OP(sqrt, float, sqrtf); +UNARY_OP(acos, double, acos); +UNARY_OP(acos, float, acosf); +UNARY_OP(asin, double, asin); +UNARY_OP(asin, float, asinf); +UNARY_OP(atan, double, atan); +UNARY_OP(atan, float, atanf); +UNARY_OP(cos, double, cos); +UNARY_OP(cos, float, cosf); +UNARY_OP(sin, double, sin); +UNARY_OP(sin, float, sinf); +UNARY_OP(tan, double, tan); +UNARY_OP(tan, float, tanf); + +#define BINARY_OP(name, type, op) \ + CAMLprim value name##_##type(value mode, value x, value y) \ + { \ + /* No need to use CAMLparam to keep mode, x and y as GC roots, + because next GC poll point is at allocation in caml_copy_double. + We have already read their values by then. */ \ + int old_roundingmode = fegetround(); \ + change_round_mode(Int_val(mode)); \ + volatile type r, x1 = Double_val(x), y1 = Double_val(y); \ + r = x1 op y1; \ + fesetround(old_roundingmode); \ + return caml_copy_double(r); /* NOWARN */ \ + /* No need to use CAMLreturn because we don't use CAMLparam. */ \ + } + +BINARY_OP(add, double, +); +BINARY_OP(add, float, +); +BINARY_OP(sub, double, -); +BINARY_OP(sub, float, -); +BINARY_OP(mul, double, *); +BINARY_OP(mul, float, *); +BINARY_OP(div, double, /); +BINARY_OP(div, float, /); + +CAMLprim value atof_double(value mode, value str) +{ + // No need to use CAMLparam to keep mode and str as GC roots, + // because next GC poll point is at allocation in caml_copy_double. + // We have already read their values by then. + const char *s = String_val(str); + volatile double r; + int old_roundingmode = fegetround(); + change_round_mode(Int_val(mode)); + r = atof(s); + fesetround(old_roundingmode); + return caml_copy_double(r); // NOWARN + // No need to use CAMLreturn because we don't use CAMLparam. +} + +CAMLprim value atof_float(value mode, value str) +{ + // No need to use CAMLparam to keep mode and str as GC roots, + // because next GC poll point is at allocation in caml_copy_double. + // We have already read their values by then. + const char *s = String_val(str); // Despite not being a value, this is a pointer into the OCaml heap and needs to be checked against GC. Doubles are also pointers. + volatile float r; + int old_roundingmode = fegetround(); // NOWARN + change_round_mode(Int_val(mode)); // This makes a copy of the mode and no problems will arise. + r = (float)atof(s); + fesetround(old_roundingmode); + return caml_copy_double(r); // NOWARN + // No need to use CAMLreturn because we don't use CAMLparam. +} + +// These are only given for floats as these operations involve no rounding and their OCaml implementation (Float module) can be used + +CAMLprim value max_float(value unit) +{ + // No need to use CAMLparam to keep unit as GC root, + // because we don't use it. + return caml_copy_double(FLT_MAX); // NOWARN + // No need to use CAMLreturn because we don't use CAMLparam. +} + +CAMLprim value smallest_float(value unit) +{ + // No need to use CAMLparam to keep unit as GC root, + // because we don't use it. + return caml_copy_double(FLT_MIN); // NOWARN + // No need to use CAMLreturn because we don't use CAMLparam. +} + +CAMLprim value pi_float(value unit) +{ + return caml_copy_double(M_PI); // NOWARN +} diff --git a/tests/regression/90-ocaml/03-demimarie.c b/tests/regression/90-ocaml/03-demimarie.c new file mode 100644 index 0000000000..a3d430aee5 --- /dev/null +++ b/tests/regression/90-ocaml/03-demimarie.c @@ -0,0 +1,51 @@ +// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "caml_gc_counters" --set "mainfun[+]" "caml_gc_counters_correct" --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// OCaml code fixed by Demi Marie Obenour in https://github.com/ocaml/ocaml/pull/13370, before and after the fix. + +/* Minimal macros to mimic expected behaviour */ +#define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value)) +#define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) + +#define CAMLparam0() __goblint_caml_param0() +#define CAMLparam1(x) __goblint_caml_param1(&x) +#define CAMLreturn(x) return (x) // From AI - CAMLreturn needs some variable named caml__frame, which is not available in our mock CAMLparam1, so we mock the return as well. + + +CAMLprim value caml_gc_counters(value v) +{ + CAMLparam0 (); /* v is ignored */ + CAMLlocal1 (res); + + /* get a copy of these before allocating anything... */ + double minwords = caml_gc_minor_words_unboxed(); + double prowords = (double)Caml_state->stat_promoted_words; + double majwords = Caml_state->stat_major_words + + (double) Caml_state->allocated_words; + + res = caml_alloc_3(0, + caml_copy_double (minwords), + caml_copy_double (prowords), + caml_copy_double (majwords)); // WARN + CAMLreturn (res); +} + +CAMLprim value caml_gc_counters_correct(value v) +{ + CAMLparam0 (); /* v is ignored */ + CAMLlocal3 (minwords_, prowords_, majwords_); + + /* get a copy of these before allocating anything... */ + double minwords = caml_gc_minor_words_unboxed(); + double prowords = (double)Caml_state->stat_promoted_words; + double majwords = Caml_state->stat_major_words + + (double) Caml_state->allocated_words; + + minwords_ = caml_copy_double(minwords); + prowords_ = caml_copy_double(prowords); + majwords_ = caml_copy_double(majwords); + v = caml_alloc_small(3, 0); + Field(v, 0) = minwords_; + Field(v, 1) = prowords_; + Field(v, 2) = majwords_; + CAMLreturn(v); +} \ No newline at end of file From ac87d3831c393aec57dd105884fcddd0acc7ab33 Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Fri, 23 Jan 2026 03:04:13 +0200 Subject: [PATCH 03/12] Analysis has tracking set, params are a list --- src/analyses/base.ml | 29 +---- src/analyses/ocaml.ml | 128 +++++++++++------------ src/util/library/libraryDesc.ml | 4 +- src/util/library/libraryFunctions.ml | 13 +-- tests/regression/90-ocaml/02-floatops.c | 48 ++++----- tests/regression/90-ocaml/03-demimarie.c | 21 ++-- 6 files changed, 110 insertions(+), 133 deletions(-) diff --git a/src/analyses/base.ml b/src/analyses/base.ml index 3a6e21f585..b3504cded6 100644 --- a/src/analyses/base.ml +++ b/src/analyses/base.ml @@ -2760,33 +2760,8 @@ struct | _ -> st end (* TODO: Rethink OCamlParam's placement *) - | OCamlParam {param1; param2; param3; param4; param5}, _ -> - begin match param1 with - | Some p1 -> - let _ = eval_rv ~man st p1 in - begin match param2 with - | Some p2 -> - let _ = eval_rv ~man st p2 in - begin match param3 with - | Some p3 -> - let _ = eval_rv ~man st p3 in - begin match param4 with - | Some p4 -> - let _ = eval_rv ~man st p4 in - begin match param5 with - | Some p5 -> - let _ = eval_rv ~man st p5 in - st - | _ -> st - end - | _ -> st - end - | _ -> st - end - | _ -> st - end - | _ -> st - end + | OCamlParam params, _ -> + List.fold_left (fun acc param -> let _ = eval_rv ~man acc param in acc) st params | Calloc { count = n; size }, _ -> begin match lv with | Some lv -> (* array length is set to one, as num*size is done when turning into `Calloc *) diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index 2a6fa1f328..b8647d47de 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -27,32 +27,42 @@ struct let name () = "ocaml" module D = struct - (* The first set contains variables of type value that are definitely in order. The second contains definitely registered variables. *) - module P = Lattice.Prod (VarinfoSet) (VarinfoSet) + (* The first set contains variables of type value that are definitely in order. The second contains definitely registered variables. The third contains variables the analysis tracks. *) + module P = Lattice.Prod3 (VarinfoSet) (VarinfoSet) (VarinfoSet) include P - let empty () = (VarinfoSet.empty (), VarinfoSet.empty ()) + let empty () = (VarinfoSet.empty (), VarinfoSet.empty (), VarinfoSet.empty ()) (* After garbage collection, the second set is written to the first set *) - let after_gc (accounted, registered) = (registered, registered) + let after_gc (accounted, registered, tracked) = (registered, registered, tracked) - let mem_a v (accounted, registered) = + (* Untracked variables are always fine. *) + let mem_a v (accounted, registered, tracked) = VarinfoSet.mem v accounted - let mem_r v (accounted, registered) = + let mem_r v (accounted, registered, tracked) = VarinfoSet.mem v registered - let add_a v (accounted, registered) = - (VarinfoSet.add v accounted, registered) + let mem_t v (accounted, registered, tracked) = + VarinfoSet.mem v tracked - let add_r v (accounted, registered) = - (accounted, VarinfoSet.add v registered) + let add_a v (accounted, registered, tracked) = + (VarinfoSet.add v accounted, registered, tracked) - let remove_a v (accounted, registered) = - (VarinfoSet.remove v accounted, registered) + let add_r v (accounted, registered, tracked) = + (accounted, VarinfoSet.add v registered, tracked) - let remove_r v (accounted, registered) = - (accounted, VarinfoSet.remove v registered) + let add_t v (accounted, registered, tracked) = + (accounted, registered, VarinfoSet.add v tracked) + + let remove_a v (accounted, registered, tracked) = + (VarinfoSet.remove v accounted, registered, tracked) + + let remove_r v (accounted, registered, tracked) = + (accounted, VarinfoSet.remove v registered, tracked) + + let remove_t v (accounted, registered, tracked) = + (accounted, registered, VarinfoSet.remove v tracked) end module C = Printable.Unit @@ -79,11 +89,33 @@ struct and lval_accounted_for state = function | (Var v, _) -> (* Checks whether variable v is accounted for *) (*false*) - if D.mem_a v state then true else let _ = M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v in false + if D.mem_a v state || not (D.mem_t v state) then true else let _ = M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v in false | _ -> (* The Gemara asks: is using an offset safe for the expression? The Gemara answers: by default, no. We assume our language has no pointers *) false + (** Determines whether an expression [e] has parts in the OCaml heap, given a [state]. *) + let rec exp_tracked (state:D.t) (e:Cil.exp) = match e with + (* Recurse over the structure in the expression, returning true if some varinfo appearing in the expression is tracked *) + | AddrOf v + | StartOf v + | Lval v -> lval_tracked state v + | BinOp (_,e1,e2,_) -> exp_tracked state e1 || exp_tracked state e2 + | Real e + | Imag e + | SizeOfE e + | AlignOfE e + | CastE (_,e) + | UnOp (_,e,_) -> exp_tracked state e + | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> false + | Question (b, t, f, _) -> exp_tracked state b || exp_tracked state t || exp_tracked state f + and lval_tracked state = function + | (Var v, _) -> + (* Checks whether variable v is tracked *) + D.mem_t v state + | _ -> + false + (* transfer functions *) (** Handles assignment of [rval] to [lval]. *) @@ -92,8 +124,10 @@ struct match lval with | Var v,_ -> (* If lval is of type value, checks whether rval is accounted for, handles assignment to v accordingly *) (* state *) - if exp_accounted_for state rval then D.add_a v state - else D.remove_a v state + if exp_accounted_for state rval then + if exp_tracked state rval then D.add_a v (D.add_t v state) + else D.add_a v (D.remove_t v state) + else let _ = M.info "The above is being assigned" in D.remove_a v (D.add_t v state) | _ -> state (** Handles conditional branching yielding truth value [tv]. *) @@ -104,9 +138,9 @@ struct (** Handles going from start node of function [f] into the function body of [f]. Meant to handle e.g. initializiation of local variables. *) let body ctx (f:fundec) : D.t = - (* The (non-formals) locals are initially accounted for *) + (* The (non-formals) locals are tracked and initially accounted for *) let state = ctx.local in - List.fold_left (fun st v -> D.add_a v st) state f.sformals + List.fold_left (fun st v -> D.add_a v (D.add_t v st)) state f.sformals (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) let return ctx (exp:exp option) (f:fundec) : D.t = @@ -115,6 +149,7 @@ struct | Some e -> (* Checks that value returned is accounted for. *) (* Return_varinfo is used in place of a "real" variable. *) + (* TODO: Consider how the return_varinfo needs to be tracked. *) (* state *) if exp_accounted_for state e then D.add_a return_varinfo state else let _ = M.warn "Value returned might be garbage collected" in D.remove_a return_varinfo state @@ -155,6 +190,7 @@ struct let combine_assign ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = let caller_state = ctx.local in (* Records whether lval was accounted for. *) (* caller_state *) + (* TODO: Consider how the return_varinfo needs to be tracked. *) match lval with (* The variable returned is played by return_varinfo *) | Some (Var v, _) -> if D.mem_a return_varinfo callee_local then D.add_a v caller_state else let _ = M.warn "Returned value may be garbage-collected" in D.remove_a v caller_state @@ -170,54 +206,14 @@ struct (* caller_state *) let desc = LibraryFunctions.find f in match desc.special arglist with + (* Variables are registered with a Param macro. *) | OCamlParam params -> - (* Variables are registered with a Param macro. *) - (* TODO: Which pattern below do the params in fact match? Does it vary? *) - (* The function extract_varinfo is from AI - the code can be shortened when we know what the params are. *) - let rec extract_varinfo (e: Cil.exp): varinfo option = match e with - (* Direct variable lvalue *) - | Lval (Var v, _) -> Some v - (* Dereferenced pointer - try to extract from inner expression *) - | Lval (Mem inner, _) -> extract_varinfo inner - (* Address of variable *) - | AddrOf (Var v, _) -> Some v (* TODO: Maybe this is the only thing a param can be. *) - (* Address of dereferenced - try to extract from inner *) - | AddrOf (Mem inner, _) -> extract_varinfo inner - (* Array start of variable *) - | StartOf (Var v, _) -> Some v - (* Array start of dereferenced *) - | StartOf (Mem inner, _) -> extract_varinfo inner - (* Type cast - extract from inner expression *) - | CastE (_, inner) -> extract_varinfo inner - (* Unary operations - extract from operand *) - | UnOp (_, inner, _) -> extract_varinfo inner - (* Real/imaginary parts *) - | Real inner | Imag inner -> extract_varinfo inner - (* Binary operations - try both operands *) - | BinOp (_, e1, e2, _) -> - (match extract_varinfo e1 with - | Some v -> Some v - | None -> extract_varinfo e2) - (* Size and alignment - may contain var info in the type *) - | SizeOfE inner | AlignOfE inner -> extract_varinfo inner - (* Constants, SizeOf, AlignOf, SizeOfStr, AddrOfLabel - no varinfo *) - | Const _ | SizeOf _ | AlignOf _ | SizeOfStr _ | AddrOfLabel _ -> None - (* Question conditional *) - | Question (cond, e1, e2, _) -> - (match extract_varinfo cond with - | Some v -> Some v - | None -> - (match extract_varinfo e1 with - | Some v -> Some v - | None -> extract_varinfo e2)) - in - List.fold_left (fun state param -> - match param with - | Some e -> - (match extract_varinfo e with - | Some v -> D.add_r v state - | None -> state) - | None -> state) caller_state [params.param1; params.param2; params.param3; params.param4; params.param5] + List.fold_left (fun state param -> let _ = M.info "We reach here" in + match param with + | AddrOf (Var v, _) -> let _ = M.info "Address of" in + D.add_r v state + | _ -> state + ) caller_state params | OCamlAlloc size_exp -> (* Garbage collection may trigger here and overwrite unregistered variables *) let _ = M.debug "Garbage collection triggers" in (match lval with diff --git a/src/util/library/libraryDesc.ml b/src/util/library/libraryDesc.ml index 7539afd4f2..35b2f4ad41 100644 --- a/src/util/library/libraryDesc.ml +++ b/src/util/library/libraryDesc.ml @@ -48,8 +48,8 @@ type special = | Alloca of Cil.exp | Malloc of Cil.exp | OCamlAlloc of Cil.exp - (* TODO: Rethink OCamlParam's placement and make the params a list. *) - | OCamlParam of { param1: Cil.exp option; param2: Cil.exp option; param3: Cil.exp option; param4: Cil.exp option; param5: Cil.exp option; } + (* TODO: Rethink OCamlParam's placement. *) + | OCamlParam of Cil.exp list | Calloc of { count: Cil.exp; size: Cil.exp; } | Realloc of { ptr: Cil.exp; size: Cil.exp; } | Free of Cil.exp diff --git a/src/util/library/libraryFunctions.ml b/src/util/library/libraryFunctions.ml index f28110362d..ab39c11541 100644 --- a/src/util/library/libraryFunctions.ml +++ b/src/util/library/libraryFunctions.ml @@ -162,6 +162,7 @@ let c_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ ("longjmp", special [__ "env" [r]; __ "value" []] @@ fun env value -> Longjmp { env; value }); ("atexit", unknown [drop "function" [if_ (fun () -> not (get_bool "sem.atexit.ignore")) s]]); ("atoi", unknown [drop "nptr" [r]]); + ("atof", unknown [drop "nptr" [r]]); (* TODO: Rethink atof's position in this list. *) ("atol", unknown [drop "nptr" [r]]); ("atoll", unknown [drop "nptr" [r]]); ("setlocale", unknown [drop "category" []; drop "locale" [r]]); @@ -1246,12 +1247,12 @@ let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ ("caml_copy_double", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1))); (* Eeskuju: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *) ("caml_alloc_small", special [__ "wosize" []; __ "tag" []] @@ fun wosize tag -> OCamlAlloc wosize); - ("__goblint_caml_param0", special [] @@ OCamlParam {param1 = None; param2 = None; param3 = None; param4 = None; param5 = None}); - ("__goblint_caml_param1", special [__ "param" []] @@ fun param -> OCamlParam {param1 = Some param; param2 = None; param3 = None; param4 = None; param5 = None}); - ("__goblint_caml_param2", special [__ "param1" []; __ "param2" []] @@ fun param1 param2 -> OCamlParam {param1 = Some param1; param2 = Some param2; param3 = None; param4 = None; param5 = None}); - ("__goblint_caml_param3", special [__ "param1" []; __ "param2" []; __ "param3" []] @@ fun param1 param2 param3 -> OCamlParam {param1 = Some param1; param2 = Some param2; param3 = Some param3; param4 = None; param5 = None}); - ("__goblint_caml_param4", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []] @@ fun param1 param2 param3 param4 -> OCamlParam {param1 = Some param1; param2 = Some param2; param3 = Some param3; param4 = Some param4; param5 = None}); - ("__goblint_caml_param5", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []; __ "param5" []] @@ fun param1 param2 param3 param4 param5 -> OCamlParam {param1 = Some param1; param2 = Some param2; param3 = Some param3; param4 = Some param4; param5 = Some param5}); + ("__goblint_caml_param0", special [] @@ OCamlParam []); + ("__goblint_caml_param1", special [__ "param" []] @@ fun param -> OCamlParam [param]); + ("__goblint_caml_param2", special [__ "param1" []; __ "param2" []] @@ fun param1 param2 -> OCamlParam [param1; param2]); + ("__goblint_caml_param3", special [__ "param1" []; __ "param2" []; __ "param3" []] @@ fun param1 param2 param3 -> OCamlParam [param1; param2; param3]); + ("__goblint_caml_param4", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []] @@ fun param1 param2 param3 param4 -> OCamlParam [param1; param2; param3; param4]); + ("__goblint_caml_param5", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []; __ "param5" []] @@ fun param1 param2 param3 param4 param5 -> OCamlParam [param1; param2; param3; param4; param5]); ] [@@coverage off] diff --git a/tests/regression/90-ocaml/02-floatops.c b/tests/regression/90-ocaml/02-floatops.c index 4baedaf26b..cbe5495f96 100644 --- a/tests/regression/90-ocaml/02-floatops.c +++ b/tests/regression/90-ocaml/02-floatops.c @@ -1,4 +1,4 @@ -// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "UNARY_OP" --set "mainfun[+]" "BINARY_OP" --set "mainfun[+]" "atof_double" --set "mainfun[+]" "atof_float" --set "mainfun[+]" "max_float" --set "mainfun[+]" "smallest_float" --set "mainfun[+]" "pi_float" --disable warn.imprecise --set "exp.extraspecials[+]" printInt +// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "sqrt_double" --set "mainfun[+]" "sqrt_float" --set "mainfun[+]" "acos_double" --set "mainfun[+]" "acos_float" --set "mainfun[+]" "asin_double" --set "mainfun[+]" "asin_float" --set "mainfun[+]" "atan_double" --set "mainfun[+]" "atan_float" --set "mainfun[+]" "cos_double" --set "mainfun[+]" "cos_float" --set "mainfun[+]" "sin_double" --set "mainfun[+]" "sin_float" --set "mainfun[+]" "tan_double" --set "mainfun[+]" "tan_float" --set "mainfun[+]" "add_double" --set "mainfun[+]" "add_float" --set "mainfun[+]" "sub_double" --set "mainfun[+]" "sub_float" --set "mainfun[+]" "mul_double" --set "mainfun[+]" "mul_float" --set "mainfun[+]" "div_double" --set "mainfun[+]" "div_float" --set "mainfun[+]" "atof_double" --set "mainfun[+]" "atof_float" --set "mainfun[+]" "max_float" --set "mainfun[+]" "smallest_float" --set "mainfun[+]" "pi_float" --disable warn.imprecise --set "exp.extraspecials[+]" printInt // Code from stubs.c in src/common/cdomains/floatOps that should be correct despite missing CAMLparam, as discussed in https://github.com/goblint/analyzer/issues/1371. @@ -69,20 +69,20 @@ static void change_round_mode(int mode) /* No need to use CAMLreturn because we don't use CAMLparam. */ \ } -UNARY_OP(sqrt, double, sqrt); -UNARY_OP(sqrt, float, sqrtf); -UNARY_OP(acos, double, acos); -UNARY_OP(acos, float, acosf); -UNARY_OP(asin, double, asin); -UNARY_OP(asin, float, asinf); -UNARY_OP(atan, double, atan); -UNARY_OP(atan, float, atanf); -UNARY_OP(cos, double, cos); -UNARY_OP(cos, float, cosf); -UNARY_OP(sin, double, sin); -UNARY_OP(sin, float, sinf); -UNARY_OP(tan, double, tan); -UNARY_OP(tan, float, tanf); +UNARY_OP(sqrt, double, sqrt); // NOWARN +UNARY_OP(sqrt, float, sqrtf); // NOWARN +UNARY_OP(acos, double, acos); // NOWARN +UNARY_OP(acos, float, acosf); // NOWARN +UNARY_OP(asin, double, asin); // NOWARN +UNARY_OP(asin, float, asinf); // NOWARN +UNARY_OP(atan, double, atan); // NOWARN +UNARY_OP(atan, float, atanf); // NOWARN +UNARY_OP(cos, double, cos); // NOWARN +UNARY_OP(cos, float, cosf); // NOWARN +UNARY_OP(sin, double, sin); // NOWARN +UNARY_OP(sin, float, sinf); // NOWARN +UNARY_OP(tan, double, tan); // NOWARN +UNARY_OP(tan, float, tanf); // NOWARN #define BINARY_OP(name, type, op) \ CAMLprim value name##_##type(value mode, value x, value y) \ @@ -99,14 +99,14 @@ UNARY_OP(tan, float, tanf); /* No need to use CAMLreturn because we don't use CAMLparam. */ \ } -BINARY_OP(add, double, +); -BINARY_OP(add, float, +); -BINARY_OP(sub, double, -); -BINARY_OP(sub, float, -); -BINARY_OP(mul, double, *); -BINARY_OP(mul, float, *); -BINARY_OP(div, double, /); -BINARY_OP(div, float, /); +BINARY_OP(add, double, +); // NOWARN +BINARY_OP(add, float, +); // NOWARN +BINARY_OP(sub, double, -); // NOWARN +BINARY_OP(sub, float, -); // NOWARN +BINARY_OP(mul, double, *); // NOWARN +BINARY_OP(mul, float, *); // NOWARN +BINARY_OP(div, double, /); // NOWARN +BINARY_OP(div, float, /); // NOWARN CAMLprim value atof_double(value mode, value str) { @@ -115,7 +115,7 @@ CAMLprim value atof_double(value mode, value str) // We have already read their values by then. const char *s = String_val(str); volatile double r; - int old_roundingmode = fegetround(); + int old_roundingmode = fegetround(); // NOWARN change_round_mode(Int_val(mode)); r = atof(s); fesetround(old_roundingmode); diff --git a/tests/regression/90-ocaml/03-demimarie.c b/tests/regression/90-ocaml/03-demimarie.c index a3d430aee5..95537e0a1c 100644 --- a/tests/regression/90-ocaml/03-demimarie.c +++ b/tests/regression/90-ocaml/03-demimarie.c @@ -2,19 +2,24 @@ // OCaml code fixed by Demi Marie Obenour in https://github.com/ocaml/ocaml/pull/13370, before and after the fix. +#include +#include + /* Minimal macros to mimic expected behaviour */ #define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value)) #define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) #define CAMLparam0() __goblint_caml_param0() #define CAMLparam1(x) __goblint_caml_param1(&x) +#define CAMLlocal1(x) __goblint_caml_param1(&x) // The local and param functions behave the same for our purposes, registering variables. +#define CAMLlocal3(x, y, z) __goblint_caml_param3(&x, &y, &z) #define CAMLreturn(x) return (x) // From AI - CAMLreturn needs some variable named caml__frame, which is not available in our mock CAMLparam1, so we mock the return as well. CAMLprim value caml_gc_counters(value v) { - CAMLparam0 (); /* v is ignored */ - CAMLlocal1 (res); + CAMLparam0(); /* v is ignored */ + CAMLlocal1(res); /* get a copy of these before allocating anything... */ double minwords = caml_gc_minor_words_unboxed(); @@ -23,16 +28,16 @@ CAMLprim value caml_gc_counters(value v) (double) Caml_state->allocated_words; res = caml_alloc_3(0, - caml_copy_double (minwords), - caml_copy_double (prowords), - caml_copy_double (majwords)); // WARN - CAMLreturn (res); + caml_copy_double(minwords), + caml_copy_double(prowords), + caml_copy_double(majwords)); // WARN + CAMLreturn(res); } CAMLprim value caml_gc_counters_correct(value v) { - CAMLparam0 (); /* v is ignored */ - CAMLlocal3 (minwords_, prowords_, majwords_); + CAMLparam0(); /* v is ignored */ + CAMLlocal3(minwords_, prowords_, majwords_); /* get a copy of these before allocating anything... */ double minwords = caml_gc_minor_words_unboxed(); From 8f28a38abf5c42512b9a2f652f98fc532af30259 Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Fri, 30 Jan 2026 03:05:56 +0200 Subject: [PATCH 04/12] Deleted is_sink and is_source, added checking for pointers, added tracking and warning to special, experimented with SplitBranch --- src/analyses/ocaml.ml | 64 ++++++++----------- src/util/library/libraryFunctions.ml | 2 + .../90-ocaml/{03-demimarie.c => 03-obenour.c} | 21 ++++-- 3 files changed, 42 insertions(+), 45 deletions(-) rename tests/regression/90-ocaml/{03-demimarie.c => 03-obenour.c} (58%) diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index b8647d47de..0f9a5e3d7a 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -10,13 +10,6 @@ open Analyses module VarinfoSet = SetDomain.Make(CilType.Varinfo) -(* Use to check if a specific function is a sink / source *) -(* Sources take value-typed arguments *) -(* Sinks may trigger garbage collection *) -let is_sink varinfo = Cil.hasAttribute "ocaml_sink" varinfo.vattr -let is_source varinfo = Cil.hasAttribute "ocaml_source" varinfo.vattr - - (** "Fake" variable to handle returning from a function *) let return_varinfo = dummyFunDec.svar @@ -84,12 +77,12 @@ struct | AlignOfE e | CastE (_,e) | UnOp (_,e,_) -> exp_accounted_for state e - | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> false + | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true | Question (b, t, f, _) -> exp_accounted_for state b && exp_accounted_for state t && exp_accounted_for state f and lval_accounted_for state = function | (Var v, _) -> (* Checks whether variable v is accounted for *) (*false*) - if D.mem_a v state || not (D.mem_t v state) then true else let _ = M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v in false + if D.mem_a v state || not (D.mem_t v state) then true else (M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v; false) | _ -> (* The Gemara asks: is using an offset safe for the expression? The Gemara answers: by default, no. We assume our language has no pointers *) false @@ -123,11 +116,15 @@ struct let state = ctx.local in match lval with | Var v,_ -> - (* If lval is of type value, checks whether rval is accounted for, handles assignment to v accordingly *) (* state *) - if exp_accounted_for state rval then - if exp_tracked state rval then D.add_a v (D.add_t v state) - else D.add_a v (D.remove_t v state) - else let _ = M.info "The above is being assigned" in D.remove_a v (D.add_t v state) + (* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *) (* state *) + (* Emits an event for the variable v not being zero. *) + ctx.emit (Events.SplitBranch (Cil.BinOp (Cil.Eq, Cil.Lval lval, Cil.zero, Cil.intType), false)); + if Cil.isPointerType (Cil.typeOf rval) then + if exp_accounted_for state rval then + if exp_tracked state rval then D.add_a v (D.add_t v state) + else D.add_a v (D.remove_t v state) (* TODO: Is add_a necessary for untracked variables? *) + else (M.info "The above is being assigned"; D.remove_a v (D.add_t v state)) + else D.remove_t v state | _ -> state (** Handles conditional branching yielding truth value [tv]. *) @@ -152,7 +149,7 @@ struct (* TODO: Consider how the return_varinfo needs to be tracked. *) (* state *) if exp_accounted_for state e then D.add_a return_varinfo state - else let _ = M.warn "Value returned might be garbage collected" in D.remove_a return_varinfo state + else (M.warn "Value returned might be garbage collected"; D.remove_a return_varinfo state) | None -> state (** For a function call "lval = f(args)" or "f(args)", @@ -193,7 +190,7 @@ struct (* TODO: Consider how the return_varinfo needs to be tracked. *) match lval with (* The variable returned is played by return_varinfo *) | Some (Var v, _) -> if D.mem_a return_varinfo callee_local then D.add_a v caller_state - else let _ = M.warn "Returned value may be garbage-collected" in D.remove_a v caller_state + else (M.warn "Returned value may be garbage-collected"; D.remove_a v caller_state) | _ -> caller_state (** For a call to a _special_ function f "lval = f(args)" or "f(args)", @@ -206,34 +203,25 @@ struct (* caller_state *) let desc = LibraryFunctions.find f in match desc.special arglist with - (* Variables are registered with a Param macro. *) | OCamlParam params -> - List.fold_left (fun state param -> let _ = M.info "We reach here" in + (* Variables are registered with a Param macro. Such variables are also tracked. *) + List.fold_left (fun state param -> M.debug "We reach here"; match param with - | AddrOf (Var v, _) -> let _ = M.info "Address of" in - D.add_r v state + | AddrOf (Var v, _) -> M.debug "Address of"; + D.add_r v (D.add_t v state) | _ -> state ) caller_state params | OCamlAlloc size_exp -> - (* Garbage collection may trigger here and overwrite unregistered variables *) - let _ = M.debug "Garbage collection triggers" in (match lval with - | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) - | _ -> - (* if not (List.for_all (exp_accounted_for caller_state) arglist) then let _ = M.warn "GC might delete value" in D.empty () else *) D.empty () - ) + (* Garbage collection may trigger here and overwrite unregistered variables. *) + M.debug "Garbage collection triggers"; + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings *) + (match lval with + | Some (Var v, _) -> D.add_a v (D.add_t v (D.after_gc caller_state)) + | _ -> D.after_gc caller_state + ) | _ -> - List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to sinks/sources *) - if is_source f then match lval with (* Assigning the source's result makes the variable accounted for. *) - | Some (Var v, _) -> D.add_a v caller_state - | _ -> caller_state - else - if is_sink f then (* Warns if unaccounted variables reach the function. Empties the state of unregistered variables. *) - let _ = M.debug "Garbage collection triggers" in match lval with - | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) - | _ -> - (* if not (List.for_all (exp_accounted_for caller_state) arglist) then let _ = M.warn "GC might delete value" in D.empty () else *) - D.after_gc caller_state - else caller_state + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to special functions *) + caller_state (* You may leave these alone *) let startstate v = D.bot () diff --git a/src/util/library/libraryFunctions.ml b/src/util/library/libraryFunctions.ml index ab39c11541..4367acb42f 100644 --- a/src/util/library/libraryFunctions.ml +++ b/src/util/library/libraryFunctions.ml @@ -1247,6 +1247,8 @@ let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ ("caml_copy_double", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1))); (* Eeskuju: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *) ("caml_alloc_small", special [__ "wosize" []; __ "tag" []] @@ fun wosize tag -> OCamlAlloc wosize); + (* TODO: This specification is not correct because it doesn't warn where it should. Also rethink the argument names. *) + ("caml_alloc_3", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]] @@ fun tag _arg1 _arg2 _arg3 -> OCamlAlloc (GoblintCil.integer 3)); ("__goblint_caml_param0", special [] @@ OCamlParam []); ("__goblint_caml_param1", special [__ "param" []] @@ fun param -> OCamlParam [param]); ("__goblint_caml_param2", special [__ "param1" []; __ "param2" []] @@ fun param1 param2 -> OCamlParam [param1; param2]); diff --git a/tests/regression/90-ocaml/03-demimarie.c b/tests/regression/90-ocaml/03-obenour.c similarity index 58% rename from tests/regression/90-ocaml/03-demimarie.c rename to tests/regression/90-ocaml/03-obenour.c index 95537e0a1c..3ad03bb81c 100644 --- a/tests/regression/90-ocaml/03-demimarie.c +++ b/tests/regression/90-ocaml/03-obenour.c @@ -1,6 +1,6 @@ // PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "caml_gc_counters" --set "mainfun[+]" "caml_gc_counters_correct" --disable warn.imprecise --set "exp.extraspecials[+]" printInt -// OCaml code fixed by Demi Marie Obenour in https://github.com/ocaml/ocaml/pull/13370, before and after the fix. +// Buggy code from https://github.com/ocaml/ocaml/pull/13370 where some local variables are not registered. #include #include @@ -11,10 +11,12 @@ #define CAMLparam0() __goblint_caml_param0() #define CAMLparam1(x) __goblint_caml_param1(&x) -#define CAMLlocal1(x) __goblint_caml_param1(&x) // The local and param functions behave the same for our purposes, registering variables. -#define CAMLlocal3(x, y, z) __goblint_caml_param3(&x, &y, &z) +#define CAMLlocal1(x) value x = Val_unit; __goblint_caml_param1(&x) // The local and param functions behave the same for our purposes, registering variables. +#define CAMLlocal3(x, y, z) value x = Val_unit; value y = Val_unit; value z = Val_unit; __goblint_caml_param3(&x, &y, &z) #define CAMLreturn(x) return (x) // From AI - CAMLreturn needs some variable named caml__frame, which is not available in our mock CAMLparam1, so we mock the return as well. +// A reference to caml_gc_minor_words_unboxed can be found in _opam/lib/ocaml/ml/gc.ml. +#define caml_gc_minor_words_unboxed() (0.0) // TODO: Put a value here that's in the OCaml heap. CAMLprim value caml_gc_counters(value v) { @@ -23,10 +25,13 @@ CAMLprim value caml_gc_counters(value v) /* get a copy of these before allocating anything... */ double minwords = caml_gc_minor_words_unboxed(); - double prowords = (double)Caml_state->stat_promoted_words; + /*double prowords = (double)Caml_state->stat_promoted_words; double majwords = Caml_state->stat_major_words + - (double) Caml_state->allocated_words; + (double) Caml_state->allocated_words;*/ + double prowords = v; // It does not find the Caml_state so dummy values are used for the test. The argument v, which is otherwise unused, is used as the dummy value because it allows for simulating the variables' registration. + double majwords = v; + // Evaluating one argument could erase the next variables. res = caml_alloc_3(0, caml_copy_double(minwords), caml_copy_double(prowords), @@ -41,9 +46,11 @@ CAMLprim value caml_gc_counters_correct(value v) /* get a copy of these before allocating anything... */ double minwords = caml_gc_minor_words_unboxed(); - double prowords = (double)Caml_state->stat_promoted_words; + /*double prowords = (double)Caml_state->stat_promoted_words; double majwords = Caml_state->stat_major_words + - (double) Caml_state->allocated_words; + (double) Caml_state->allocated_words;*/ + double prowords = 0; + double majwords = 0; minwords_ = caml_copy_double(minwords); prowords_ = caml_copy_double(prowords); From 9917a29a4d1e1b4132f9968bf89d6b7f60194f9c Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Thu, 5 Feb 2026 23:50:52 +0200 Subject: [PATCH 05/12] Macros redefined in goblint_caml, SplitBranch checks type --- src/analyses/ocaml.ml | 21 +++++++++++------- src/util/library/libraryFunctions.ml | 6 ++++-- tests/regression/90-ocaml/01-bagnall.c | 14 +----------- tests/regression/90-ocaml/02-floatops.c | 12 +---------- tests/regression/90-ocaml/03-obenour.c | 21 ++++-------------- tests/regression/90-ocaml/goblint_caml.h | 27 ++++++++++++++++++++++++ 6 files changed, 50 insertions(+), 51 deletions(-) create mode 100644 tests/regression/90-ocaml/goblint_caml.h diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index 0f9a5e3d7a..4ead5dc26f 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -118,7 +118,7 @@ struct | Var v,_ -> (* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *) (* state *) (* Emits an event for the variable v not being zero. *) - ctx.emit (Events.SplitBranch (Cil.BinOp (Cil.Eq, Cil.Lval lval, Cil.zero, Cil.intType), false)); + if Cil.isPointerType (Cil.typeOf rval) then if exp_accounted_for state rval then if exp_tracked state rval then D.add_a v (D.add_t v state) @@ -137,7 +137,14 @@ struct let body ctx (f:fundec) : D.t = (* The (non-formals) locals are tracked and initially accounted for *) let state = ctx.local in - List.fold_left (fun st v -> D.add_a v (D.add_t v st)) state f.sformals + (* It is assumed that value-typed arguments are never nptrs. *) + let is_value_type (t:typ): bool = match t with + | TNamed (info, attr) -> info.tname = "value" + | _ -> false in + List.fold_left (fun st v -> if is_value_type v.vtype then + (ctx.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true)); D.add_a v (D.add_t v st)) + else D.add_a v (D.add_t v st)) + state f.sformals (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) let return ctx (exp:exp option) (f:fundec) : D.t = @@ -205,12 +212,10 @@ struct match desc.special arglist with | OCamlParam params -> (* Variables are registered with a Param macro. Such variables are also tracked. *) - List.fold_left (fun state param -> M.debug "We reach here"; - match param with - | AddrOf (Var v, _) -> M.debug "Address of"; - D.add_r v (D.add_t v state) - | _ -> state - ) caller_state params + List.fold_left (fun state param -> match param with + | AddrOf (Var v, _) -> D.add_r v (D.add_t v state) + | _ -> state + ) caller_state params | OCamlAlloc size_exp -> (* Garbage collection may trigger here and overwrite unregistered variables. *) M.debug "Garbage collection triggers"; diff --git a/src/util/library/libraryFunctions.ml b/src/util/library/libraryFunctions.ml index 4367acb42f..cc0fe5e012 100644 --- a/src/util/library/libraryFunctions.ml +++ b/src/util/library/libraryFunctions.ml @@ -1245,10 +1245,12 @@ let legacy_libs_misc_list: (string * LibraryDesc.t) list = LibraryDsl.[ let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ ("caml_copy_double", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1))); - (* Eeskuju: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *) + (* Example: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *) ("caml_alloc_small", special [__ "wosize" []; __ "tag" []] @@ fun wosize tag -> OCamlAlloc wosize); - (* TODO: This specification is not correct because it doesn't warn where it should. Also rethink the argument names. *) + ("caml_alloc_2", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]] @@ fun tag _arg1 _arg2 -> OCamlAlloc (GoblintCil.integer 2)); ("caml_alloc_3", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]] @@ fun tag _arg1 _arg2 _arg3 -> OCamlAlloc (GoblintCil.integer 3)); + ("caml_alloc_4", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 -> OCamlAlloc (GoblintCil.integer 4)); + ("caml_alloc_5", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]; __ "arg5" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 _arg5 -> OCamlAlloc (GoblintCil.integer 5)); ("__goblint_caml_param0", special [] @@ OCamlParam []); ("__goblint_caml_param1", special [__ "param" []] @@ fun param -> OCamlParam [param]); ("__goblint_caml_param2", special [__ "param1" []; __ "param2" []] @@ fun param1 param2 -> OCamlParam [param1; param2]); diff --git a/tests/regression/90-ocaml/01-bagnall.c b/tests/regression/90-ocaml/01-bagnall.c index 8cf6eb78d3..4c58da0fdd 100644 --- a/tests/regression/90-ocaml/01-bagnall.c +++ b/tests/regression/90-ocaml/01-bagnall.c @@ -6,19 +6,7 @@ #include #include #include -#include - -/* A small definition of the LXM state so sizeof works - from AI */ -struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; - -/* Minimal macros to mimic expected behaviour */ -#define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value)) -#define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) - -#define CAMLparam1(x) __goblint_caml_param1(&x) -#define CAMLreturn(x) return (x) // From AI - CAMLreturn needs some variable named caml__frame, which is not available in our mock CAMLparam1, so we mock the return as well. - -/*CAMLextern value caml_alloc_small (mlsize_t wosize, tag_t) __attribute__((__ocaml_sink__));*/ +#include "goblint_caml.h" CAMLprim value pringo_LXM_copy(value v) { diff --git a/tests/regression/90-ocaml/02-floatops.c b/tests/regression/90-ocaml/02-floatops.c index cbe5495f96..029cbbf46e 100644 --- a/tests/regression/90-ocaml/02-floatops.c +++ b/tests/regression/90-ocaml/02-floatops.c @@ -2,17 +2,6 @@ // Code from stubs.c in src/common/cdomains/floatOps that should be correct despite missing CAMLparam, as discussed in https://github.com/goblint/analyzer/issues/1371. -/* A small definition of the LXM state so sizeof works - from AI */ -//struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; - -/* Minimal macros to mimic expected behaviour */ -#define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value)) -#define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) - -#define CAMLparam1(x) __goblint_caml_param1(&x) -#define CAMLreturn(x) return (x) // From AI - CAMLreturn needs some variable named caml__frame, which is not available in our mock CAMLparam1, so we mock the return as well. - - #define _GNU_SOURCE // necessary for M_PI to be defined #include #include @@ -21,6 +10,7 @@ #include #include #include +#include "goblint_caml.h" // Order must match with round_mode in floatOps.ml enum round_mode diff --git a/tests/regression/90-ocaml/03-obenour.c b/tests/regression/90-ocaml/03-obenour.c index 3ad03bb81c..c69b5b42cd 100644 --- a/tests/regression/90-ocaml/03-obenour.c +++ b/tests/regression/90-ocaml/03-obenour.c @@ -1,22 +1,10 @@ // PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "caml_gc_counters" --set "mainfun[+]" "caml_gc_counters_correct" --disable warn.imprecise --set "exp.extraspecials[+]" printInt -// Buggy code from https://github.com/ocaml/ocaml/pull/13370 where some local variables are not registered. +// Buggy code from https://github.com/ocaml/ocaml/pull/13370 where unregistered temporary variables may be garbage-collected. #include #include - -/* Minimal macros to mimic expected behaviour */ -#define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value)) -#define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) - -#define CAMLparam0() __goblint_caml_param0() -#define CAMLparam1(x) __goblint_caml_param1(&x) -#define CAMLlocal1(x) value x = Val_unit; __goblint_caml_param1(&x) // The local and param functions behave the same for our purposes, registering variables. -#define CAMLlocal3(x, y, z) value x = Val_unit; value y = Val_unit; value z = Val_unit; __goblint_caml_param3(&x, &y, &z) -#define CAMLreturn(x) return (x) // From AI - CAMLreturn needs some variable named caml__frame, which is not available in our mock CAMLparam1, so we mock the return as well. - -// A reference to caml_gc_minor_words_unboxed can be found in _opam/lib/ocaml/ml/gc.ml. -#define caml_gc_minor_words_unboxed() (0.0) // TODO: Put a value here that's in the OCaml heap. +#include "goblint_caml.h" CAMLprim value caml_gc_counters(value v) { @@ -28,10 +16,9 @@ CAMLprim value caml_gc_counters(value v) /*double prowords = (double)Caml_state->stat_promoted_words; double majwords = Caml_state->stat_major_words + (double) Caml_state->allocated_words;*/ - double prowords = v; // It does not find the Caml_state so dummy values are used for the test. The argument v, which is otherwise unused, is used as the dummy value because it allows for simulating the variables' registration. - double majwords = v; + double prowords = 0; // It does not find the Caml_state so dummy values are used for the test. + double majwords = 0; - // Evaluating one argument could erase the next variables. res = caml_alloc_3(0, caml_copy_double(minwords), caml_copy_double(prowords), diff --git a/tests/regression/90-ocaml/goblint_caml.h b/tests/regression/90-ocaml/goblint_caml.h new file mode 100644 index 0000000000..35d48b045c --- /dev/null +++ b/tests/regression/90-ocaml/goblint_caml.h @@ -0,0 +1,27 @@ +/* Redefinitions of caml macros for the C-stub analysis. This file replaces caml/memory.h in tests' imports. */ + +/* A small definition of the LXM state so sizeof works - from AI */ +struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; + +/* Minimal macros to mimic expected behaviour */ +#define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value)) +#define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) + +// Param and local macros redefined to register variables as GC roots, and CAMLreturn mocked to work with them. +#define CAMLparam0() __goblint_caml_param0() +#define CAMLparam1(x) __goblint_caml_param1(&x) +#define CAMLparam2(x, y) __goblint_caml_param2(&x, &y) +#define CAMLparam3(x, y, z) __goblint_caml_param3(&x, &y, &z) +#define CAMLparam4(x, y, z, w) __goblint_caml_param4(&x, &y, &z, &w) +#define CAMLparam5(x, y, z, w, v) __goblint_caml_param5(&x, &y, &z, &w, &v) + +#define CAMLlocal1(x) value x = Val_unit; __goblint_caml_param1(&x) // The local and param functions behave the same for our purposes, registering variables. +#define CAMLlocal2(x, y) value x = Val_unit; value y = Val_unit; __goblint_caml_param2(&x, &y) +#define CAMLlocal3(x, y, z) value x = Val_unit; value y = Val_unit; value z = Val_unit; __goblint_caml_param3(&x, &y, &z) +#define CAMLlocal4(x, y, z, w) value x = Val_unit; value y = Val_unit; value z = Val_unit; value w = Val_unit; __goblint_caml_param4(&x, &y, &z, &w) +#define CAMLlocal5(x, y, z, w, v) value x = Val_unit; value y = Val_unit; value z = Val_unit; value w = Val_unit; value v = Val_unit; __goblint_caml_param5(&x, &y, &z, &w, &v) + +#define CAMLreturn(x) return (x) // The real CAMLreturn needs some variable named caml__frame, which is not available in our redefinitions above. + +// A reference to caml_gc_minor_words_unboxed can be found in _opam/lib/ocaml/ml/gc.ml. +#define caml_gc_minor_words_unboxed() (0.0) From ec33e2710a1e1cac7847e3333b749a740da6ca0a Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Fri, 20 Feb 2026 03:07:57 +0200 Subject: [PATCH 06/12] Undefs and working analysis with two varinfosets --- src/analyses/ocaml.ml | 77 +++++++++++------------- tests/regression/90-ocaml/01-bagnall.c | 1 + tests/regression/90-ocaml/goblint_caml.h | 13 ++++ 3 files changed, 48 insertions(+), 43 deletions(-) diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index 4ead5dc26f..9d8b5244ad 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -21,41 +21,32 @@ struct module D = struct (* The first set contains variables of type value that are definitely in order. The second contains definitely registered variables. The third contains variables the analysis tracks. *) - module P = Lattice.Prod3 (VarinfoSet) (VarinfoSet) (VarinfoSet) + module P = Lattice.Prod (VarinfoSet) (VarinfoSet) include P - let empty () = (VarinfoSet.empty (), VarinfoSet.empty (), VarinfoSet.empty ()) + let empty () = (VarinfoSet.empty (), VarinfoSet.empty ()) (* After garbage collection, the second set is written to the first set *) - let after_gc (accounted, registered, tracked) = (registered, registered, tracked) + let after_gc (accounted, registered) = (registered, registered) (* Untracked variables are always fine. *) - let mem_a v (accounted, registered, tracked) = + let mem_a v (accounted, registered) = VarinfoSet.mem v accounted - let mem_r v (accounted, registered, tracked) = + let mem_r v (accounted, registered) = VarinfoSet.mem v registered - let mem_t v (accounted, registered, tracked) = - VarinfoSet.mem v tracked + let add_a v (accounted, registered) = + (VarinfoSet.add v accounted, registered) - let add_a v (accounted, registered, tracked) = - (VarinfoSet.add v accounted, registered, tracked) + let add_r v (accounted, registered) = + (accounted, VarinfoSet.add v registered) - let add_r v (accounted, registered, tracked) = - (accounted, VarinfoSet.add v registered, tracked) + let remove_a v (accounted, registered) = + (VarinfoSet.remove v accounted, registered) - let add_t v (accounted, registered, tracked) = - (accounted, registered, VarinfoSet.add v tracked) - - let remove_a v (accounted, registered, tracked) = - (VarinfoSet.remove v accounted, registered, tracked) - - let remove_r v (accounted, registered, tracked) = - (accounted, VarinfoSet.remove v registered, tracked) - - let remove_t v (accounted, registered, tracked) = - (accounted, registered, VarinfoSet.remove v tracked) + let remove_r v (accounted, registered) = + (accounted, VarinfoSet.remove v registered) end module C = Printable.Unit @@ -82,30 +73,30 @@ struct and lval_accounted_for state = function | (Var v, _) -> (* Checks whether variable v is accounted for *) (*false*) - if D.mem_a v state || not (D.mem_t v state) then true else (M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v; false) + if D.mem_a v state then true else (M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v; false) | _ -> (* The Gemara asks: is using an offset safe for the expression? The Gemara answers: by default, no. We assume our language has no pointers *) false - (** Determines whether an expression [e] has parts in the OCaml heap, given a [state]. *) - let rec exp_tracked (state:D.t) (e:Cil.exp) = match e with - (* Recurse over the structure in the expression, returning true if some varinfo appearing in the expression is tracked *) + (** Determines whether an expression [e] is registered, given a [state]. *) + let rec exp_registered (state:D.t) (e:Cil.exp) = match e with + (* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is registered *) | AddrOf v | StartOf v - | Lval v -> lval_tracked state v - | BinOp (_,e1,e2,_) -> exp_tracked state e1 || exp_tracked state e2 + | Lval v -> lval_registered_for state v + | BinOp (_,e1,e2,_) -> exp_registered state e1 && exp_registered state e2 | Real e | Imag e | SizeOfE e | AlignOfE e | CastE (_,e) - | UnOp (_,e,_) -> exp_tracked state e - | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> false - | Question (b, t, f, _) -> exp_tracked state b || exp_tracked state t || exp_tracked state f - and lval_tracked state = function + | UnOp (_,e,_) -> exp_registered state e + | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true + | Question (b, t, f, _) -> exp_registered state b && exp_registered state t && exp_registered state f + and lval_registered_for state = function | (Var v, _) -> - (* Checks whether variable v is tracked *) - D.mem_t v state + (* Checks whether variable v is registered *) (*false*) + D.mem_r v state | _ -> false @@ -121,10 +112,10 @@ struct if Cil.isPointerType (Cil.typeOf rval) then if exp_accounted_for state rval then - if exp_tracked state rval then D.add_a v (D.add_t v state) - else D.add_a v (D.remove_t v state) (* TODO: Is add_a necessary for untracked variables? *) - else (M.info "The above is being assigned"; D.remove_a v (D.add_t v state)) - else D.remove_t v state + if exp_registered state rval then D.add_a v (D.add_r v state) + else D.add_a v (D.remove_r v state) + else (M.info "The above is being assigned"; D.remove_a v state) + else D.add_a v (D.add_r v state) | _ -> state (** Handles conditional branching yielding truth value [tv]. *) @@ -142,8 +133,8 @@ struct | TNamed (info, attr) -> info.tname = "value" | _ -> false in List.fold_left (fun st v -> if is_value_type v.vtype then - (ctx.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true)); D.add_a v (D.add_t v st)) - else D.add_a v (D.add_t v st)) + (ctx.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true)); D.add_a v st) + else D.add_a v (D.add_r v st)) state f.sformals (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) @@ -211,9 +202,9 @@ struct let desc = LibraryFunctions.find f in match desc.special arglist with | OCamlParam params -> - (* Variables are registered with a Param macro. Such variables are also tracked. *) + (* Variables are registered with a Param macro. *) List.fold_left (fun state param -> match param with - | AddrOf (Var v, _) -> D.add_r v (D.add_t v state) + | AddrOf (Var v, _) -> D.add_r v state | _ -> state ) caller_state params | OCamlAlloc size_exp -> @@ -221,7 +212,7 @@ struct M.debug "Garbage collection triggers"; List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings *) (match lval with - | Some (Var v, _) -> D.add_a v (D.add_t v (D.after_gc caller_state)) + | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) | _ -> D.after_gc caller_state ) | _ -> diff --git a/tests/regression/90-ocaml/01-bagnall.c b/tests/regression/90-ocaml/01-bagnall.c index 4c58da0fdd..5a06ba4e6d 100644 --- a/tests/regression/90-ocaml/01-bagnall.c +++ b/tests/regression/90-ocaml/01-bagnall.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "goblint_caml.h" CAMLprim value pringo_LXM_copy(value v) diff --git a/tests/regression/90-ocaml/goblint_caml.h b/tests/regression/90-ocaml/goblint_caml.h index 35d48b045c..d90e4030ef 100644 --- a/tests/regression/90-ocaml/goblint_caml.h +++ b/tests/regression/90-ocaml/goblint_caml.h @@ -1,4 +1,5 @@ /* Redefinitions of caml macros for the C-stub analysis. This file replaces caml/memory.h in tests' imports. */ +// TODO: add undefs /* A small definition of the LXM state so sizeof works - from AI */ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; @@ -8,6 +9,12 @@ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; #define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) // Param and local macros redefined to register variables as GC roots, and CAMLreturn mocked to work with them. +#undef CAMLparam0 +#undef CAMLparam1 +#undef CAMLparam2 +#undef CAMLparam3 +#undef CAMLparam4 +#undef CAMLparam5 #define CAMLparam0() __goblint_caml_param0() #define CAMLparam1(x) __goblint_caml_param1(&x) #define CAMLparam2(x, y) __goblint_caml_param2(&x, &y) @@ -15,12 +22,18 @@ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; #define CAMLparam4(x, y, z, w) __goblint_caml_param4(&x, &y, &z, &w) #define CAMLparam5(x, y, z, w, v) __goblint_caml_param5(&x, &y, &z, &w, &v) +#undef CAMLlocal1 +#undef CAMLlocal2 +#undef CAMLlocal3 +#undef CAMLlocal4 +#undef CAMLlocal5 #define CAMLlocal1(x) value x = Val_unit; __goblint_caml_param1(&x) // The local and param functions behave the same for our purposes, registering variables. #define CAMLlocal2(x, y) value x = Val_unit; value y = Val_unit; __goblint_caml_param2(&x, &y) #define CAMLlocal3(x, y, z) value x = Val_unit; value y = Val_unit; value z = Val_unit; __goblint_caml_param3(&x, &y, &z) #define CAMLlocal4(x, y, z, w) value x = Val_unit; value y = Val_unit; value z = Val_unit; value w = Val_unit; __goblint_caml_param4(&x, &y, &z, &w) #define CAMLlocal5(x, y, z, w, v) value x = Val_unit; value y = Val_unit; value z = Val_unit; value w = Val_unit; value v = Val_unit; __goblint_caml_param5(&x, &y, &z, &w, &v) +#undef CAMLreturn #define CAMLreturn(x) return (x) // The real CAMLreturn needs some variable named caml__frame, which is not available in our redefinitions above. // A reference to caml_gc_minor_words_unboxed can be found in _opam/lib/ocaml/ml/gc.ml. From b903c4652401abbf22ff48a748cce68dc0147779 Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Fri, 6 Mar 2026 02:50:41 +0200 Subject: [PATCH 07/12] Reversed domain, added new tests, tried to automatically collect mainfuns --- src/analyses/ocaml.ml | 16 +++++++-------- src/cdomain/value/cdomains/valueDomain.ml | 3 +-- src/common/util/cilfacade.ml | 1 + tests/regression/90-ocaml/01-bagnall.c | 2 +- tests/regression/90-ocaml/03-obenour.c | 24 +++++++++++++++++++++-- tests/regression/90-ocaml/04-branching.c | 22 +++++++++++++++++++++ tests/regression/90-ocaml/goblint_caml.h | 5 ++++- 7 files changed, 58 insertions(+), 15 deletions(-) create mode 100644 tests/regression/90-ocaml/04-branching.c diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index 9d8b5244ad..fcf0ebdbff 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -20,14 +20,14 @@ struct let name () = "ocaml" module D = struct - (* The first set contains variables of type value that are definitely in order. The second contains definitely registered variables. The third contains variables the analysis tracks. *) - module P = Lattice.Prod (VarinfoSet) (VarinfoSet) + (* The first set contains variables of type value that are definitely accounted. The second contains definitely registered variables. *) + module P = Lattice.Reverse (Lattice.Prod (VarinfoSet) (VarinfoSet)) include P let empty () = (VarinfoSet.empty (), VarinfoSet.empty ()) - (* After garbage collection, the second set is written to the first set *) - let after_gc (accounted, registered) = (registered, registered) + (* After garbage collection, the first set loses variables not in the second set. *) + let after_gc (accounted, registered) = (VarinfoSet.inter accounted registered, registered) (* Untracked variables are always fine. *) let mem_a v (accounted, registered) = @@ -200,6 +200,7 @@ struct (* To warn about a potential issue in the code, use M.warn. *) (* caller_state *) let desc = LibraryFunctions.find f in + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to special functions *) match desc.special arglist with | OCamlParam params -> (* Variables are registered with a Param macro. *) @@ -210,17 +211,14 @@ struct | OCamlAlloc size_exp -> (* Garbage collection may trigger here and overwrite unregistered variables. *) M.debug "Garbage collection triggers"; - List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings *) (match lval with | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) | _ -> D.after_gc caller_state ) - | _ -> - List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to special functions *) - caller_state + | _ -> caller_state (* You may leave these alone *) - let startstate v = D.bot () + let startstate v = D.empty () let threadenter ctx ~multiple lval f args = [D.top ()] let threadspawn ctx ~multiple lval f args fctx = ctx.local let exitstate v = D.top () diff --git a/src/cdomain/value/cdomains/valueDomain.ml b/src/cdomain/value/cdomains/valueDomain.ml index 6f234cce50..60417597d9 100644 --- a/src/cdomain/value/cdomains/valueDomain.ml +++ b/src/cdomain/value/cdomains/valueDomain.ml @@ -520,8 +520,7 @@ struct (* cast to voidPtr are ignored TODO what happens if our value does not fit? *) | TPtr (t,_) -> Address (match v with - | Int x when ID.to_int x = Some Z.zero -> AD.null_ptr - | Int x -> AD.top_ptr + | Int x -> AD.of_int x (* we ignore casts to void* (above)! TODO report UB! *) | Address x -> cast_addr t x (*| Address x -> x*) diff --git a/src/common/util/cilfacade.ml b/src/common/util/cilfacade.ml index 452d0297a5..1120c5dd48 100644 --- a/src/common/util/cilfacade.ml +++ b/src/common/util/cilfacade.ml @@ -232,6 +232,7 @@ let getFuns fileAST : startfuns = Logs.debug "Cleanup function: %s" mn; set_string "exitfun[+]" mn; add_exit def acc | GFun ({svar={vstorage=NoStorage; vattr; _}; _} as def, _) when get_bool "nonstatic" && not (Cil.hasAttribute "goblint_stub" vattr) -> add_other def acc | GFun ({svar={vattr; _}; _} as def, _) when get_bool "allfuns" && not (Cil.hasAttribute "goblint_stub" vattr) -> add_other def acc + | GFun({svar={vname=mn; vattr=attr; vtype=t; _}; _} as def, _) when Cil.hasAttribute "c_stub" attr -> add_main def acc | _ -> acc in foldGlobals fileAST f ([],[],[]) diff --git a/tests/regression/90-ocaml/01-bagnall.c b/tests/regression/90-ocaml/01-bagnall.c index 5a06ba4e6d..a0f47f61f6 100644 --- a/tests/regression/90-ocaml/01-bagnall.c +++ b/tests/regression/90-ocaml/01-bagnall.c @@ -1,4 +1,4 @@ -// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "pringo_LXM_copy" --set "mainfun[+]" "pringo_LXM_copy_correct" --set "mainfun[+]" "pringo_LXM_init_unboxed" --disable warn.imprecise --set "exp.extraspecials[+]" printInt +// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "pringo_LXM_copy_correct" --set "mainfun[+]" "pringo_LXM_init_unboxed" --disable warn.imprecise --set "exp.extraspecials[+]" printInt // Buggy code from https://github.com/xavierleroy/pringo/issues/6 where value v is not registered. diff --git a/tests/regression/90-ocaml/03-obenour.c b/tests/regression/90-ocaml/03-obenour.c index c69b5b42cd..74a9f21bdf 100644 --- a/tests/regression/90-ocaml/03-obenour.c +++ b/tests/regression/90-ocaml/03-obenour.c @@ -1,4 +1,4 @@ -// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "caml_gc_counters" --set "mainfun[+]" "caml_gc_counters_correct" --disable warn.imprecise --set "exp.extraspecials[+]" printInt +// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "caml_gc_counters" --set "mainfun[+]" "caml_gc_counters_correct_1" --set "mainfun[+]" "caml_gc_counters_correct_2" --disable warn.imprecise --set "exp.extraspecials[+]" printInt // Buggy code from https://github.com/ocaml/ocaml/pull/13370 where unregistered temporary variables may be garbage-collected. @@ -26,7 +26,7 @@ CAMLprim value caml_gc_counters(value v) CAMLreturn(res); } -CAMLprim value caml_gc_counters_correct(value v) +CAMLprim value caml_gc_counters_correct_1(value v) { CAMLparam0(); /* v is ignored */ CAMLlocal3(minwords_, prowords_, majwords_); @@ -47,4 +47,24 @@ CAMLprim value caml_gc_counters_correct(value v) Field(v, 1) = prowords_; Field(v, 2) = majwords_; CAMLreturn(v); +} + +CAMLprim value caml_gc_counters_correct_2(value v) +{ + CAMLparam0(); /* v is ignored */ + CAMLlocal4(minwords_, prowords_, majwords_, res); + + /* get a copy of these before allocating anything... */ + double minwords = caml_gc_minor_words_unboxed(); + /*double prowords = (double)Caml_state->stat_promoted_words; + double majwords = Caml_state->stat_major_words + + (double) Caml_state->allocated_words;*/ + double prowords = 0; + double majwords = 0; + + minwords_ = caml_copy_double(minwords); + prowords_ = caml_copy_double(prowords); + majwords_ = caml_copy_double(majwords); + res = caml_alloc_3(0, minwords_, prowords_, majwords_); + CAMLreturn(res); } \ No newline at end of file diff --git a/tests/regression/90-ocaml/04-branching.c b/tests/regression/90-ocaml/04-branching.c new file mode 100644 index 0000000000..94bd0255dc --- /dev/null +++ b/tests/regression/90-ocaml/04-branching.c @@ -0,0 +1,22 @@ +// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "branching_test" --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Artificial test where one branch registers the argument v and the other does not, but both branches use v. + +#include +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +CAMLprim value branching_test(value v, bool b) +{ + if (b) + { + CAMLparam1(v); + } + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + CAMLreturn(res); +} \ No newline at end of file diff --git a/tests/regression/90-ocaml/goblint_caml.h b/tests/regression/90-ocaml/goblint_caml.h index d90e4030ef..3daf57044a 100644 --- a/tests/regression/90-ocaml/goblint_caml.h +++ b/tests/regression/90-ocaml/goblint_caml.h @@ -1,5 +1,4 @@ /* Redefinitions of caml macros for the C-stub analysis. This file replaces caml/memory.h in tests' imports. */ -// TODO: add undefs /* A small definition of the LXM state so sizeof works - from AI */ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; @@ -8,6 +7,10 @@ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; #define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value)) #define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) +// Redefine CAMLprim to have the annotate attribute marking it as c_stub. +#undef CAMLprim +#define CAMLprim extern __attribute__((c_stub)) + // Param and local macros redefined to register variables as GC roots, and CAMLreturn mocked to work with them. #undef CAMLparam0 #undef CAMLparam1 From 02777ba2400e7da04b933accdb740cb6b68b2130 Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Fri, 20 Mar 2026 09:50:22 +0200 Subject: [PATCH 08/12] Automatic collection of c-stubs and analysis support for function calls --- src/analyses/ocaml.ml | 33 ++++++++------------ src/common/util/cilfacade.ml | 2 +- tests/regression/90-ocaml/01-bagnall.c | 2 +- tests/regression/90-ocaml/02-floatops.c | 2 +- tests/regression/90-ocaml/03-obenour.c | 2 +- tests/regression/90-ocaml/04-branching.c | 2 +- tests/regression/90-ocaml/05-lateparam.c | 18 +++++++++++ tests/regression/90-ocaml/06-nested.c | 38 ++++++++++++++++++++++++ tests/regression/90-ocaml/goblint_caml.h | 2 +- 9 files changed, 75 insertions(+), 26 deletions(-) create mode 100644 tests/regression/90-ocaml/05-lateparam.c create mode 100644 tests/regression/90-ocaml/06-nested.c diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index fcf0ebdbff..10d85888f7 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -100,6 +100,10 @@ struct | _ -> false + let is_value_type (t:typ): bool = match t with + | TNamed (info, attr) -> info.tname = "value" + | _ -> false + (* transfer functions *) (** Handles assignment of [rval] to [lval]. *) @@ -120,7 +124,8 @@ struct (** Handles conditional branching yielding truth value [tv]. *) let branch ctx (exp:exp) (tv:bool) : D.t = - (* Nothing needs to be done *) + (* The expression checked must be accounted for *) + ignore (exp_accounted_for ctx.local exp); ctx.local (** Handles going from start node of function [f] into the function body of [f]. @@ -129,9 +134,6 @@ struct (* The (non-formals) locals are tracked and initially accounted for *) let state = ctx.local in (* It is assumed that value-typed arguments are never nptrs. *) - let is_value_type (t:typ): bool = match t with - | TNamed (info, attr) -> info.tname = "value" - | _ -> false in List.fold_left (fun st v -> if is_value_type v.vtype then (ctx.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true)); D.add_a v st) else D.add_a v (D.add_r v st)) @@ -156,18 +158,8 @@ struct will compute the caller state after the function call, given the return state of the callee. *) let enter ctx (lval: lval option) (f:fundec) (args:exp list) : (D.t * D.t) list = let caller_state = ctx.local in - (* Create list of (formal, actual_exp)*) - (* - let zipped = List.combine f.sformals args in - (* TODO: For the initial callee_state, collect formal parameters where the actual is healthy. *) - let callee_state = List.fold_left (fun ts (f,a) -> - if exp_accounted_for caller_state a - then D.add f ts (* TODO: Change accumulator ts here? *) - else D.remove f ts) - (D.bot ()) - zipped in - *) - (* TODO: Should this be checked with locals or formals, and how exactly? Likely with locals. *) + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) args; + (* Entering a function doesn't change the caller state *) let callee_state = caller_state in (* first component is state of caller, second component is state of callee *) [caller_state, callee_state] @@ -176,16 +168,17 @@ struct computes the global environment state of the caller after the call. Argument [callee_local] is the state of [f] at its return node. *) let combine_env ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = - (* Nothing needs to be done *) - ctx.local + (* If GC could have triggered during the call, the caller state loses variables not registered in the callee. *) + (* Since the callee state is copied from the caller, the caller state changes the same way through the callee's GCs. *) + callee_local (** For a function call "lval = f(args)" or "f(args)", computes the state of the caller after assigning the return value from the call. Argument [callee_local] is the state of [f] at its return node. *) let combine_assign ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = let caller_state = ctx.local in - (* Records whether lval was accounted for. *) (* caller_state *) - (* TODO: Consider how the return_varinfo needs to be tracked. *) + (* Records whether lval was accounted for. Registration for v must already be handled. *) + (* TODO: What happens if a pointer to a value is returned? *) match lval with (* The variable returned is played by return_varinfo *) | Some (Var v, _) -> if D.mem_a return_varinfo callee_local then D.add_a v caller_state else (M.warn "Returned value may be garbage-collected"; D.remove_a v caller_state) diff --git a/src/common/util/cilfacade.ml b/src/common/util/cilfacade.ml index 1120c5dd48..5eaa5c272d 100644 --- a/src/common/util/cilfacade.ml +++ b/src/common/util/cilfacade.ml @@ -232,7 +232,7 @@ let getFuns fileAST : startfuns = Logs.debug "Cleanup function: %s" mn; set_string "exitfun[+]" mn; add_exit def acc | GFun ({svar={vstorage=NoStorage; vattr; _}; _} as def, _) when get_bool "nonstatic" && not (Cil.hasAttribute "goblint_stub" vattr) -> add_other def acc | GFun ({svar={vattr; _}; _} as def, _) when get_bool "allfuns" && not (Cil.hasAttribute "goblint_stub" vattr) -> add_other def acc - | GFun({svar={vname=mn; vattr=attr; vtype=t; _}; _} as def, _) when Cil.hasAttribute "c_stub" attr -> add_main def acc + | GFun({svar={vname=mn; vattr=attr; vtype= TFun (t, _, _, _); _}; _} as def, _) when Cil.hasAttribute "c_stub" attr || Cil.hasAttribute "c_stub" (Cil.typeAttrs(t)) -> add_main def acc | _ -> acc in foldGlobals fileAST f ([],[],[]) diff --git a/tests/regression/90-ocaml/01-bagnall.c b/tests/regression/90-ocaml/01-bagnall.c index a0f47f61f6..dfeca785e9 100644 --- a/tests/regression/90-ocaml/01-bagnall.c +++ b/tests/regression/90-ocaml/01-bagnall.c @@ -1,4 +1,4 @@ -// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "pringo_LXM_copy_correct" --set "mainfun[+]" "pringo_LXM_init_unboxed" --disable warn.imprecise --set "exp.extraspecials[+]" printInt +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt // Buggy code from https://github.com/xavierleroy/pringo/issues/6 where value v is not registered. diff --git a/tests/regression/90-ocaml/02-floatops.c b/tests/regression/90-ocaml/02-floatops.c index 029cbbf46e..9d35150ca5 100644 --- a/tests/regression/90-ocaml/02-floatops.c +++ b/tests/regression/90-ocaml/02-floatops.c @@ -1,4 +1,4 @@ -// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "sqrt_double" --set "mainfun[+]" "sqrt_float" --set "mainfun[+]" "acos_double" --set "mainfun[+]" "acos_float" --set "mainfun[+]" "asin_double" --set "mainfun[+]" "asin_float" --set "mainfun[+]" "atan_double" --set "mainfun[+]" "atan_float" --set "mainfun[+]" "cos_double" --set "mainfun[+]" "cos_float" --set "mainfun[+]" "sin_double" --set "mainfun[+]" "sin_float" --set "mainfun[+]" "tan_double" --set "mainfun[+]" "tan_float" --set "mainfun[+]" "add_double" --set "mainfun[+]" "add_float" --set "mainfun[+]" "sub_double" --set "mainfun[+]" "sub_float" --set "mainfun[+]" "mul_double" --set "mainfun[+]" "mul_float" --set "mainfun[+]" "div_double" --set "mainfun[+]" "div_float" --set "mainfun[+]" "atof_double" --set "mainfun[+]" "atof_float" --set "mainfun[+]" "max_float" --set "mainfun[+]" "smallest_float" --set "mainfun[+]" "pi_float" --disable warn.imprecise --set "exp.extraspecials[+]" printInt +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt // Code from stubs.c in src/common/cdomains/floatOps that should be correct despite missing CAMLparam, as discussed in https://github.com/goblint/analyzer/issues/1371. diff --git a/tests/regression/90-ocaml/03-obenour.c b/tests/regression/90-ocaml/03-obenour.c index 74a9f21bdf..0e341f6764 100644 --- a/tests/regression/90-ocaml/03-obenour.c +++ b/tests/regression/90-ocaml/03-obenour.c @@ -1,4 +1,4 @@ -// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "caml_gc_counters" --set "mainfun[+]" "caml_gc_counters_correct_1" --set "mainfun[+]" "caml_gc_counters_correct_2" --disable warn.imprecise --set "exp.extraspecials[+]" printInt +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt // Buggy code from https://github.com/ocaml/ocaml/pull/13370 where unregistered temporary variables may be garbage-collected. diff --git a/tests/regression/90-ocaml/04-branching.c b/tests/regression/90-ocaml/04-branching.c index 94bd0255dc..11460fff38 100644 --- a/tests/regression/90-ocaml/04-branching.c +++ b/tests/regression/90-ocaml/04-branching.c @@ -1,4 +1,4 @@ -// PARAM: --set "ana.activated[+]" ocaml --set "mainfun[+]" "branching_test" --disable warn.imprecise --set "exp.extraspecials[+]" printInt +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt // Artificial test where one branch registers the argument v and the other does not, but both branches use v. diff --git a/tests/regression/90-ocaml/05-lateparam.c b/tests/regression/90-ocaml/05-lateparam.c new file mode 100644 index 0000000000..faefaddc74 --- /dev/null +++ b/tests/regression/90-ocaml/05-lateparam.c @@ -0,0 +1,18 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Artificial test where the argument v is registered after GC could delete it. + +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +CAMLprim value late_test(value v) +{ + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + CAMLparam1(v); // WARN + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + CAMLreturn(res); +} \ No newline at end of file diff --git a/tests/regression/90-ocaml/06-nested.c b/tests/regression/90-ocaml/06-nested.c new file mode 100644 index 0000000000..fc7538fc86 --- /dev/null +++ b/tests/regression/90-ocaml/06-nested.c @@ -0,0 +1,38 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Artificial tests with one C-stub calling another inside it. + +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +CAMLprim value pringo_LXM_copy(value v) +{ + value res1 = pringo_LXM_copy_correct(v); // NOWARN + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + return res1; +} + +CAMLprim value pringo_LXM_copy_correct(value v) +{ + CAMLparam1(v); + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // NOWARN + CAMLreturn(res); +} + +CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2, + uint64_t i3, uint64_t i4) +{ + value v = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); // NOWARN + struct LXM_state * st = LXM_val(v); + st->a = i1 | 1; /* must be odd */ + st->x[0] = i2 != 0 ? i2 : 1; /* must be nonzero */ + st->x[1] = i3 != 0 ? i3 : 2; /* must be nonzero */ + st->s = i4; + return v; +} \ No newline at end of file diff --git a/tests/regression/90-ocaml/goblint_caml.h b/tests/regression/90-ocaml/goblint_caml.h index 3daf57044a..eb418ca74b 100644 --- a/tests/regression/90-ocaml/goblint_caml.h +++ b/tests/regression/90-ocaml/goblint_caml.h @@ -9,7 +9,7 @@ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; // Redefine CAMLprim to have the annotate attribute marking it as c_stub. #undef CAMLprim -#define CAMLprim extern __attribute__((c_stub)) +#define CAMLprim __attribute__((c_stub)) // Param and local macros redefined to register variables as GC roots, and CAMLreturn mocked to work with them. #undef CAMLparam0 From 9bf8a6cb92255f39a879306389232466eea05a6e Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Fri, 27 Mar 2026 10:53:18 +0200 Subject: [PATCH 09/12] Support for entering and returning from functions --- src/analyses/ocaml.ml | 54 +++++++++++++++++++++------ tests/regression/90-ocaml/06-nested.c | 16 ++++++++ 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index 10d85888f7..2e729ef381 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -12,6 +12,8 @@ module VarinfoSet = SetDomain.Make(CilType.Varinfo) (** "Fake" variable to handle returning from a function *) let return_varinfo = dummyFunDec.svar +(** Flag for first function entered *) +let first_function = true module Spec : Analyses.MCPSpec = struct @@ -114,7 +116,7 @@ struct (* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *) (* state *) (* Emits an event for the variable v not being zero. *) - if Cil.isPointerType (Cil.typeOf rval) then + if Cil.isPointerType (Cil.typeOf rval) || is_value_type (Cil.typeOf rval) then if exp_accounted_for state rval then if exp_registered state rval then D.add_a v (D.add_r v state) else D.add_a v (D.remove_r v state) @@ -131,13 +133,16 @@ struct (** Handles going from start node of function [f] into the function body of [f]. Meant to handle e.g. initializiation of local variables. *) let body ctx (f:fundec) : D.t = - (* The (non-formals) locals are tracked and initially accounted for *) + (* The (non-formals) locals are initially accounted for *) let state = ctx.local in - (* It is assumed that value-typed arguments are never nptrs. *) - List.fold_left (fun st v -> if is_value_type v.vtype then + (* It is assumed that the startstate's values are not nptrs. *) + let state = List.fold_left (fun st v -> if first_function && is_value_type v.vtype then (ctx.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true)); D.add_a v st) else D.add_a v (D.add_r v st)) - state f.sformals + state f.sformals in + (* TODO: Is there a way without a flag to only emit at the start? *) + let first_function = false in + state (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) let return ctx (exp:exp option) (f:fundec) : D.t = @@ -147,9 +152,16 @@ struct (* Checks that value returned is accounted for. *) (* Return_varinfo is used in place of a "real" variable. *) (* TODO: Consider how the return_varinfo needs to be tracked. *) - (* state *) - if exp_accounted_for state e then D.add_a return_varinfo state - else (M.warn "Value returned might be garbage collected"; D.remove_a return_varinfo state) + let return_state = + if Cil.isPointerType (Cil.typeOf e) || is_value_type (Cil.typeOf e) then + if exp_accounted_for state e then + if exp_registered state e then D.add_a return_varinfo (D.add_r return_varinfo state) + else D.add_a return_varinfo (D.remove_r return_varinfo state) + else (M.warn "Value returned might be garbage collected"; D.remove_a return_varinfo state) + else D.add_a return_varinfo (D.add_r return_varinfo state) + in + (* Remove this function's formals and locals *) + List.fold_left (fun st v -> D.remove_a v (D.remove_r v st)) return_state (f.sformals @ f.slocals) | None -> state (** For a function call "lval = f(args)" or "f(args)", @@ -160,7 +172,17 @@ struct let caller_state = ctx.local in List.iter (fun e -> ignore (exp_accounted_for caller_state e)) args; (* Entering a function doesn't change the caller state *) - let callee_state = caller_state in + let callee_state = List.fold_left2 (fun st v rval -> + (* At the start, arguments are accounted for and not registered. *) + if rval == MyCFG.unknown_exp then D.add_a v (D.remove_r v st) else + (* Arguments of inner functions inherit the caller's state. *) + if Cil.isPointerType (Cil.typeOf rval) || is_value_type (Cil.typeOf rval) then + if exp_accounted_for st rval then + if exp_registered st rval then D.add_a v (D.add_r v st) + else D.add_a v (D.remove_r v st) + else (M.info "The above is being assigned"; D.remove_a v st) + else D.add_a v (D.add_r v st)) + caller_state f.sformals args in (* first component is state of caller, second component is state of callee *) [caller_state, callee_state] @@ -169,7 +191,7 @@ struct Argument [callee_local] is the state of [f] at its return node. *) let combine_env ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = (* If GC could have triggered during the call, the caller state loses variables not registered in the callee. *) - (* Since the callee state is copied from the caller, the caller state changes the same way through the callee's GCs. *) + (* Since the callee state is basically copied from the caller, the caller state changes the same way through the callee's GCs. *) callee_local (** For a function call "lval = f(args)" or "f(args)", @@ -179,9 +201,17 @@ struct let caller_state = ctx.local in (* Records whether lval was accounted for. Registration for v must already be handled. *) (* TODO: What happens if a pointer to a value is returned? *) + (* TODO: Rethink type checking with return_varinfo. *) match lval with (* The variable returned is played by return_varinfo *) - | Some (Var v, _) -> if D.mem_a return_varinfo callee_local then D.add_a v caller_state - else (M.warn "Returned value may be garbage-collected"; D.remove_a v caller_state) + | Some (Var v, _) -> + let state = + if Cil.isPointerType return_varinfo.vtype || is_value_type return_varinfo.vtype then + if D.mem_a return_varinfo callee_local then + if D.mem_r return_varinfo callee_local then D.add_a v (D.add_r v caller_state) + else D.add_a v (D.remove_r v caller_state) + else (M.warn "Returned value may be garbage-collected"; D.remove_a v caller_state) + else D.add_a v (D.add_r v caller_state) in + D.remove_a return_varinfo (D.remove_r return_varinfo state) | _ -> caller_state (** For a call to a _special_ function f "lval = f(args)" or "f(args)", diff --git a/tests/regression/90-ocaml/06-nested.c b/tests/regression/90-ocaml/06-nested.c index fc7538fc86..18b1f6f159 100644 --- a/tests/regression/90-ocaml/06-nested.c +++ b/tests/regression/90-ocaml/06-nested.c @@ -25,6 +25,22 @@ CAMLprim value pringo_LXM_copy_correct(value v) CAMLreturn(res); } +CAMLprim value pringo_LXM_copy_1(value v) +{ + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + return v; +} + +CAMLprim value pringo_LXM_copy_2(value v) +{ + CAMLparam1(v); + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + v = pringo_LXM_copy_1(v); // WARN + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); + CAMLreturn(res); +} + CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2, uint64_t i3, uint64_t i4) { From 48c25df07fe175c6c7332f8064a7c74a96448510 Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Mon, 6 Apr 2026 02:07:11 +0300 Subject: [PATCH 10/12] Added separate function for everything assignment-like, distinguished between regular and correct returns, added apron test --- src/analyses/ocaml.ml | 86 +++++++++++------------- src/util/library/libraryDesc.ml | 1 + src/util/library/libraryFunctions.ml | 2 + tests/regression/90-ocaml/05-lateparam.c | 1 + tests/regression/90-ocaml/06-nested.c | 10 ++- tests/regression/90-ocaml/07-apron.c | 54 +++++++++++++++ tests/regression/90-ocaml/goblint_caml.h | 8 ++- 7 files changed, 114 insertions(+), 48 deletions(-) create mode 100644 tests/regression/90-ocaml/07-apron.c diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index 2e729ef381..94a28d15cc 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -13,7 +13,9 @@ module VarinfoSet = SetDomain.Make(CilType.Varinfo) (** "Fake" variable to handle returning from a function *) let return_varinfo = dummyFunDec.svar (** Flag for first function entered *) -let first_function = true +let first_function = (emptyFunction "@first").svar +(** Flag for deregistering at return *) +let to_deregister = (emptyFunction "@dereg").svar module Spec : Analyses.MCPSpec = struct @@ -106,22 +108,22 @@ struct | TNamed (info, attr) -> info.tname = "value" | _ -> false + let assignment (v:varinfo) (rval:exp) (state:D.t) (warning:string): D.t = + (* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *) + if Cil.isPointerType (Cil.typeOf rval) || is_value_type (Cil.typeOf rval) then + if exp_accounted_for state rval then + if exp_registered state rval then D.add_a v (D.add_r v state) + else D.add_a v (D.remove_r v state) + else (M.info "%s" warning; D.remove_a v state) + else D.add_a v (D.add_r v state) + (* transfer functions *) (** Handles assignment of [rval] to [lval]. *) let assign ctx (lval:lval) (rval:exp) : D.t = let state = ctx.local in match lval with - | Var v,_ -> - (* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *) (* state *) - (* Emits an event for the variable v not being zero. *) - - if Cil.isPointerType (Cil.typeOf rval) || is_value_type (Cil.typeOf rval) then - if exp_accounted_for state rval then - if exp_registered state rval then D.add_a v (D.add_r v state) - else D.add_a v (D.remove_r v state) - else (M.info "The above is being assigned"; D.remove_a v state) - else D.add_a v (D.add_r v state) + | Var v,_ -> assignment v rval state "The above is being assigned" | _ -> state (** Handles conditional branching yielding truth value [tv]. *) @@ -133,16 +135,14 @@ struct (** Handles going from start node of function [f] into the function body of [f]. Meant to handle e.g. initializiation of local variables. *) let body ctx (f:fundec) : D.t = - (* The (non-formals) locals are initially accounted for *) let state = ctx.local in - (* It is assumed that the startstate's values are not nptrs. *) - let state = List.fold_left (fun st v -> if first_function && is_value_type v.vtype then - (ctx.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true)); D.add_a v st) - else D.add_a v (D.add_r v st)) - state f.sformals in + (* It is assumed that the startstate's values are not nptrs. This avoids warnings from other analyses. *) + if D.mem_r first_function state then + List.iter (fun v -> (if is_value_type v.vtype then + (ctx.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true))))) + f.sformals; (* TODO: Is there a way without a flag to only emit at the start? *) - let first_function = false in - state + D.remove_r first_function state (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) let return ctx (exp:exp option) (f:fundec) : D.t = @@ -151,17 +151,11 @@ struct | Some e -> (* Checks that value returned is accounted for. *) (* Return_varinfo is used in place of a "real" variable. *) - (* TODO: Consider how the return_varinfo needs to be tracked. *) - let return_state = - if Cil.isPointerType (Cil.typeOf e) || is_value_type (Cil.typeOf e) then - if exp_accounted_for state e then - if exp_registered state e then D.add_a return_varinfo (D.add_r return_varinfo state) - else D.add_a return_varinfo (D.remove_r return_varinfo state) - else (M.warn "Value returned might be garbage collected"; D.remove_a return_varinfo state) - else D.add_a return_varinfo (D.add_r return_varinfo state) - in - (* Remove this function's formals and locals *) - List.fold_left (fun st v -> D.remove_a v (D.remove_r v st)) return_state (f.sformals @ f.slocals) + let return_state = assignment return_varinfo e state "The above is being returned" in + (* Remove this function's formals and locals if correctly returned *) + D.remove_r to_deregister (if D.mem_r to_deregister return_state then + List.fold_left (fun st v -> D.remove_a v (D.remove_r v st)) return_state (f.sformals @ f.slocals) + else return_state) | None -> state (** For a function call "lval = f(args)" or "f(args)", @@ -173,16 +167,13 @@ struct List.iter (fun e -> ignore (exp_accounted_for caller_state e)) args; (* Entering a function doesn't change the caller state *) let callee_state = List.fold_left2 (fun st v rval -> - (* At the start, arguments are accounted for and not registered. *) - if rval == MyCFG.unknown_exp then D.add_a v (D.remove_r v st) else - (* Arguments of inner functions inherit the caller's state. *) - if Cil.isPointerType (Cil.typeOf rval) || is_value_type (Cil.typeOf rval) then - if exp_accounted_for st rval then - if exp_registered st rval then D.add_a v (D.add_r v st) - else D.add_a v (D.remove_r v st) - else (M.info "The above is being assigned"; D.remove_a v st) - else D.add_a v (D.add_r v st)) - caller_state f.sformals args in + (* At the start, arguments are accounted for and not registered. The first_function flag is added.*) + if rval == MyCFG.unknown_exp then + if is_value_type v.vtype then D.add_r first_function (D.add_a v (D.remove_r v st)) + else D.add_a v (D.add_r v st) + (* Arguments of inner functions inherit the caller's state. *) + else assignment v rval st "Entering function with possibly deleted argument") + caller_state f.sformals args in (* first component is state of caller, second component is state of callee *) [caller_state, callee_state] @@ -201,15 +192,15 @@ struct let caller_state = ctx.local in (* Records whether lval was accounted for. Registration for v must already be handled. *) (* TODO: What happens if a pointer to a value is returned? *) - (* TODO: Rethink type checking with return_varinfo. *) match lval with (* The variable returned is played by return_varinfo *) | Some (Var v, _) -> let state = - if Cil.isPointerType return_varinfo.vtype || is_value_type return_varinfo.vtype then - if D.mem_a return_varinfo callee_local then - if D.mem_r return_varinfo callee_local then D.add_a v (D.add_r v caller_state) + (* TODO: It never warns about being combined. Is there a problem with the svar type? *) + if Cil.isPointerType f.svar.vtype || is_value_type f.svar.vtype then + if D.mem_a return_varinfo caller_state then + if D.mem_r return_varinfo caller_state then D.add_a v (D.add_r v caller_state) else D.add_a v (D.remove_r v caller_state) - else (M.warn "Returned value may be garbage-collected"; D.remove_a v caller_state) + else (M.info "The above is being combined"; D.remove_a v caller_state) else D.add_a v (D.add_r v caller_state) in D.remove_a return_varinfo (D.remove_r return_varinfo state) | _ -> caller_state @@ -219,7 +210,6 @@ struct For this analysis, source and sink functions will be considered _special_ and have to be treated here. *) let special ctx (lval: lval option) (f:varinfo) (arglist:exp list) : D.t = let caller_state = ctx.local in - (* TODO: Check if f is a sink / source and handle it appropriately *) (* To warn about a potential issue in the code, use M.warn. *) (* caller_state *) let desc = LibraryFunctions.find f in @@ -238,6 +228,10 @@ struct | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) | _ -> D.after_gc caller_state ) + (* TODO: Does not deregister, but queues it. This is unsound if deregistration is done before returning instead of at the same time. *) + | OCamlReturn -> + (* Marks a function as deregistering at return *) + D.add_r to_deregister caller_state | _ -> caller_state (* You may leave these alone *) diff --git a/src/util/library/libraryDesc.ml b/src/util/library/libraryDesc.ml index 35b2f4ad41..ed1d3fc3c2 100644 --- a/src/util/library/libraryDesc.ml +++ b/src/util/library/libraryDesc.ml @@ -50,6 +50,7 @@ type special = | OCamlAlloc of Cil.exp (* TODO: Rethink OCamlParam's placement. *) | OCamlParam of Cil.exp list + | OCamlReturn | Calloc of { count: Cil.exp; size: Cil.exp; } | Realloc of { ptr: Cil.exp; size: Cil.exp; } | Free of Cil.exp diff --git a/src/util/library/libraryFunctions.ml b/src/util/library/libraryFunctions.ml index cc0fe5e012..1c3bc4c0b9 100644 --- a/src/util/library/libraryFunctions.ml +++ b/src/util/library/libraryFunctions.ml @@ -1247,6 +1247,7 @@ let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ ("caml_copy_double", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1))); (* Example: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *) ("caml_alloc_small", special [__ "wosize" []; __ "tag" []] @@ fun wosize tag -> OCamlAlloc wosize); + ("camlidl_apron_policy_ptr_c2ml", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1))); ("caml_alloc_2", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]] @@ fun tag _arg1 _arg2 -> OCamlAlloc (GoblintCil.integer 2)); ("caml_alloc_3", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]] @@ fun tag _arg1 _arg2 _arg3 -> OCamlAlloc (GoblintCil.integer 3)); ("caml_alloc_4", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 -> OCamlAlloc (GoblintCil.integer 4)); @@ -1257,6 +1258,7 @@ let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ ("__goblint_caml_param3", special [__ "param1" []; __ "param2" []; __ "param3" []] @@ fun param1 param2 param3 -> OCamlParam [param1; param2; param3]); ("__goblint_caml_param4", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []] @@ fun param1 param2 param3 param4 -> OCamlParam [param1; param2; param3; param4]); ("__goblint_caml_param5", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []; __ "param5" []] @@ fun param1 param2 param3 param4 param5 -> OCamlParam [param1; param2; param3; param4; param5]); + ("__goblint_caml_return", special [] @@ OCamlReturn); ] [@@coverage off] diff --git a/tests/regression/90-ocaml/05-lateparam.c b/tests/regression/90-ocaml/05-lateparam.c index faefaddc74..f212b41c06 100644 --- a/tests/regression/90-ocaml/05-lateparam.c +++ b/tests/regression/90-ocaml/05-lateparam.c @@ -1,6 +1,7 @@ // PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt // Artificial test where the argument v is registered after GC could delete it. +// TODO: Add late local as well. #include #include diff --git a/tests/regression/90-ocaml/06-nested.c b/tests/regression/90-ocaml/06-nested.c index 18b1f6f159..a7e72497e6 100644 --- a/tests/regression/90-ocaml/06-nested.c +++ b/tests/regression/90-ocaml/06-nested.c @@ -29,7 +29,7 @@ CAMLprim value pringo_LXM_copy_1(value v) { value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN - return v; + return v; // WARN } CAMLprim value pringo_LXM_copy_2(value v) @@ -41,6 +41,14 @@ CAMLprim value pringo_LXM_copy_2(value v) CAMLreturn(res); } +CAMLprim value pringo_LXM_copy_3(value v) +{ + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + value r = pringo_LXM_copy_1(v); // WARN + memcpy(LXM_val(res), LXM_val(r), sizeof(struct LXM_state)); + CAMLreturn(res); +} + CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2, uint64_t i3, uint64_t i4) { diff --git a/tests/regression/90-ocaml/07-apron.c b/tests/regression/90-ocaml/07-apron.c new file mode 100644 index 0000000000..4ad5e37e0e --- /dev/null +++ b/tests/regression/90-ocaml/07-apron.c @@ -0,0 +1,54 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Buggy code from https://github.com/antoinemine/apron/pull/112 where v and v2 are initialised as 0. +// TODO: It should warn when a value initialised as 0 is used without being registered after a garbage collection. + +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +// In place of value, it was ap_policy_optr*. +CAMLprim value camlidl_apron_policy_optr_c2ml(value p) +{ + if (p==NULL){ + return Val_int(0); + } else { + value v,v2=0; + Begin_roots1(v2); + v2 = camlidl_apron_policy_ptr_c2ml(p); + v = caml_alloc_small(1,0); + Field(v,0) = v2; + End_roots(); + return v; + } +} + +CAMLprim value camlidl_apron_policy_optr_c2ml_correct(value p) +{ + if (p==NULL){ + return Val_int(0); + } else { + value v,v2 = Val_unit; + Begin_roots1(v2); + v2 = camlidl_apron_policy_ptr_c2ml(p); + v = caml_alloc_small(1,0); + Field(v,0) = v2; + End_roots(); + return v; + } +} + + +// This function basically allocates memory with caml_alloc_custom and is simulated with OCamlParam in libraryfunctions.ml. +/*CAMLprim value camlidl_apron_policy_ptr_c2ml(ap_policy_ptr* p) +{ + value v; + assert((*p)->pman!=NULL); + v = caml_alloc_custom(&camlidl_apron_custom_policy_ptr, sizeof(ap_policy_ptr), + 0,1); + *((ap_policy_ptr *) Data_custom_val(v)) = *p; + return v; +}*/ \ No newline at end of file diff --git a/tests/regression/90-ocaml/goblint_caml.h b/tests/regression/90-ocaml/goblint_caml.h index eb418ca74b..8f4c3e308b 100644 --- a/tests/regression/90-ocaml/goblint_caml.h +++ b/tests/regression/90-ocaml/goblint_caml.h @@ -37,7 +37,13 @@ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; #define CAMLlocal5(x, y, z, w, v) value x = Val_unit; value y = Val_unit; value z = Val_unit; value w = Val_unit; value v = Val_unit; __goblint_caml_param5(&x, &y, &z, &w, &v) #undef CAMLreturn -#define CAMLreturn(x) return (x) // The real CAMLreturn needs some variable named caml__frame, which is not available in our redefinitions above. +#define CAMLreturn(x) __goblint_caml_return(); return (x) // The real CAMLreturn needs some variable named caml__frame, which is not available in our redefinitions above. + +// Marking roots is like registering and deregistering them. +#undef Begin_roots1 +#undef End_roots +#define Begin_roots1(x) __goblint_caml_param1(&x) +#define End_roots() __goblint_caml_return() // A reference to caml_gc_minor_words_unboxed can be found in _opam/lib/ocaml/ml/gc.ml. #define caml_gc_minor_words_unboxed() (0.0) From b4ff96785e89c4f22dddaca1be4cab9c5da0492f Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Mon, 20 Apr 2026 01:47:43 +0300 Subject: [PATCH 11/12] Changed assignment logic and deregistering at return, added warnings for memory leaks and registered null pointers, added tests --- src/analyses/ocaml.ml | 70 +++++++++++--------- tests/regression/90-ocaml/06-nested.c | 12 ++-- tests/regression/90-ocaml/07-apron.c | 4 +- tests/regression/90-ocaml/08-registrations.c | 67 +++++++++++++++++++ 4 files changed, 114 insertions(+), 39 deletions(-) create mode 100644 tests/regression/90-ocaml/08-registrations.c diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index 94a28d15cc..0ecaab1311 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -87,7 +87,7 @@ struct (* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is registered *) | AddrOf v | StartOf v - | Lval v -> lval_registered_for state v + | Lval v -> lval_registered state v | BinOp (_,e1,e2,_) -> exp_registered state e1 && exp_registered state e2 | Real e | Imag e @@ -97,7 +97,7 @@ struct | UnOp (_,e,_) -> exp_registered state e | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true | Question (b, t, f, _) -> exp_registered state b && exp_registered state t && exp_registered state f - and lval_registered_for state = function + and lval_registered state = function | (Var v, _) -> (* Checks whether variable v is registered *) (*false*) D.mem_r v state @@ -108,12 +108,12 @@ struct | TNamed (info, attr) -> info.tname = "value" | _ -> false - let assignment (v:varinfo) (rval:exp) (state:D.t) (warning:string): D.t = + let assignment (v:varinfo) (rval:exp) (rval_type:typ) (state:D.t) (warning:string): D.t = (* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *) - if Cil.isPointerType (Cil.typeOf rval) || is_value_type (Cil.typeOf rval) then + if Cil.isPointerType rval_type || is_value_type rval_type then if exp_accounted_for state rval then - if exp_registered state rval then D.add_a v (D.add_r v state) - else D.add_a v (D.remove_r v state) + if exp_registered state rval then D.add_a v state + else D.add_a v state else (M.info "%s" warning; D.remove_a v state) else D.add_a v (D.add_r v state) @@ -123,7 +123,7 @@ struct let assign ctx (lval:lval) (rval:exp) : D.t = let state = ctx.local in match lval with - | Var v,_ -> assignment v rval state "The above is being assigned" + | Var v,_ -> assignment v rval (Cil.typeOf rval) state "The above is being assigned" | _ -> state (** Handles conditional branching yielding truth value [tv]. *) @@ -147,15 +147,20 @@ struct (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) let return ctx (exp:exp option) (f:fundec) : D.t = let state = ctx.local in + (* Warn if variables are still registered *) + List.iter (fun v -> if D.mem_r v state then M.warn "Value %a registered at return" CilType.Varinfo.pretty v) (f.sformals @ f.slocals); match exp with - | Some e -> - (* Checks that value returned is accounted for. *) - (* Return_varinfo is used in place of a "real" variable. *) - let return_state = assignment return_varinfo e state "The above is being returned" in - (* Remove this function's formals and locals if correctly returned *) - D.remove_r to_deregister (if D.mem_r to_deregister return_state then - List.fold_left (fun st v -> D.remove_a v (D.remove_r v st)) return_state (f.sformals @ f.slocals) - else return_state) + (* Checks that value returned is accounted for. *) + (* Return_varinfo is used in place of a "real" variable. *) + (* TODO: Warn if registered local variables remain. *) + | Some e -> assignment return_varinfo e (Cil.typeOf e) state "The above is being returned" + (* Checks that value returned is accounted for. *) + (* Return_varinfo is used in place of a "real" variable. *) + (* let return_state = assignment return_varinfo e (Cil.typeOf e) state "The above is being returned" in + (* Remove this function's formals and locals if correctly returned *) + D.remove_r to_deregister (if D.mem_r to_deregister return_state then + List.fold_left (fun st v -> D.remove_a v (D.remove_r v st)) return_state (f.sformals @ f.slocals) + else return_state) *) | None -> state (** For a function call "lval = f(args)" or "f(args)", @@ -172,7 +177,8 @@ struct if is_value_type v.vtype then D.add_r first_function (D.add_a v (D.remove_r v st)) else D.add_a v (D.add_r v st) (* Arguments of inner functions inherit the caller's state. *) - else assignment v rval st "Entering function with possibly deleted argument") + (* TODO: Assignment was changed and no longer copies registration. It must be passed using other means. *) + else assignment v rval (Cil.typeOf rval) st "Entering function with possibly deleted argument") caller_state f.sformals args in (* first component is state of caller, second component is state of callee *) [caller_state, callee_state] @@ -193,21 +199,17 @@ struct (* Records whether lval was accounted for. Registration for v must already be handled. *) (* TODO: What happens if a pointer to a value is returned? *) match lval with (* The variable returned is played by return_varinfo *) - | Some (Var v, _) -> - let state = - (* TODO: It never warns about being combined. Is there a problem with the svar type? *) - if Cil.isPointerType f.svar.vtype || is_value_type f.svar.vtype then - if D.mem_a return_varinfo caller_state then - if D.mem_r return_varinfo caller_state then D.add_a v (D.add_r v caller_state) - else D.add_a v (D.remove_r v caller_state) - else (M.info "The above is being combined"; D.remove_a v caller_state) - else D.add_a v (D.add_r v caller_state) in + | Some (Var v, _) -> let state = + (* Unlike other assignment-like functions, the type here is the return type of the function, not of return_varinfo. *) + (match f.svar.vtype with + | TFun (t, _, _, _) -> + assignment v (Cil.Lval (Cil.var return_varinfo)) t caller_state "The above is being combined" + | _ -> caller_state) in D.remove_a return_varinfo (D.remove_r return_varinfo state) | _ -> caller_state (** For a call to a _special_ function f "lval = f(args)" or "f(args)", - computes the caller state after the function call. - For this analysis, source and sink functions will be considered _special_ and have to be treated here. *) + computes the caller state after the function call. *) let special ctx (lval: lval option) (f:varinfo) (arglist:exp list) : D.t = let caller_state = ctx.local in (* To warn about a potential issue in the code, use M.warn. *) @@ -218,7 +220,12 @@ struct | OCamlParam params -> (* Variables are registered with a Param macro. *) List.fold_left (fun state param -> match param with - | AddrOf (Var v, _) -> D.add_r v state + | AddrOf (Var v, _) -> (match ctx.ask (Queries.EvalInt (Cil.Lval (Cil.var v))) with + | `Bot -> () + | `Lifted x -> if IntDomain.IntDomTuple.equal_to Z.zero x <> `Neq then M.warn "Null pointer registered" + | `Top -> M.warn "Null pointer possibly registered" + ); + D.add_r v state | _ -> state ) caller_state params | OCamlAlloc size_exp -> @@ -228,10 +235,11 @@ struct | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) | _ -> D.after_gc caller_state ) - (* TODO: Does not deregister, but queues it. This is unsound if deregistration is done before returning instead of at the same time. *) | OCamlReturn -> - (* Marks a function as deregistering at return *) - D.add_r to_deregister caller_state + (* Deregisters all formal and local variables. *) + (* TODO: Write tests to check not too much is deregistered. *) + let caller_fun = Node.find_fundec ctx.node in + List.fold_left (fun st v -> D.remove_r v st) caller_state (caller_fun.sformals @ caller_fun.slocals) | _ -> caller_state (* You may leave these alone *) diff --git a/tests/regression/90-ocaml/06-nested.c b/tests/regression/90-ocaml/06-nested.c index a7e72497e6..e147acab1b 100644 --- a/tests/regression/90-ocaml/06-nested.c +++ b/tests/regression/90-ocaml/06-nested.c @@ -14,7 +14,7 @@ CAMLprim value pringo_LXM_copy(value v) value res1 = pringo_LXM_copy_correct(v); // NOWARN value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN - return res1; + return res1; // WARN } CAMLprim value pringo_LXM_copy_correct(value v) @@ -22,7 +22,7 @@ CAMLprim value pringo_LXM_copy_correct(value v) CAMLparam1(v); value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // NOWARN - CAMLreturn(res); + CAMLreturn(res); // NOWARN } CAMLprim value pringo_LXM_copy_1(value v) @@ -37,16 +37,16 @@ CAMLprim value pringo_LXM_copy_2(value v) CAMLparam1(v); value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); v = pringo_LXM_copy_1(v); // WARN - memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); - CAMLreturn(res); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + CAMLreturn(res); // WARN } CAMLprim value pringo_LXM_copy_3(value v) { value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); value r = pringo_LXM_copy_1(v); // WARN - memcpy(LXM_val(res), LXM_val(r), sizeof(struct LXM_state)); - CAMLreturn(res); + memcpy(LXM_val(res), LXM_val(r), sizeof(struct LXM_state)); // WARN + CAMLreturn(res); // WARN } CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2, diff --git a/tests/regression/90-ocaml/07-apron.c b/tests/regression/90-ocaml/07-apron.c index 4ad5e37e0e..b4eab14f90 100644 --- a/tests/regression/90-ocaml/07-apron.c +++ b/tests/regression/90-ocaml/07-apron.c @@ -17,7 +17,7 @@ CAMLprim value camlidl_apron_policy_optr_c2ml(value p) return Val_int(0); } else { value v,v2=0; - Begin_roots1(v2); + Begin_roots1(v2); // WARN v2 = camlidl_apron_policy_ptr_c2ml(p); v = caml_alloc_small(1,0); Field(v,0) = v2; @@ -32,7 +32,7 @@ CAMLprim value camlidl_apron_policy_optr_c2ml_correct(value p) return Val_int(0); } else { value v,v2 = Val_unit; - Begin_roots1(v2); + Begin_roots1(v2); // NOWARN v2 = camlidl_apron_policy_ptr_c2ml(p); v = caml_alloc_small(1,0); Field(v,0) = v2; diff --git a/tests/regression/90-ocaml/08-registrations.c b/tests/regression/90-ocaml/08-registrations.c new file mode 100644 index 0000000000..9814a54f68 --- /dev/null +++ b/tests/regression/90-ocaml/08-registrations.c @@ -0,0 +1,67 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Artificial tests for assignment to values with different states of registration. + +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +CAMLprim value registration_test_1(value v) +{ + CAMLparam1(v); + CAMLlocal1(res); + res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + CAMLreturn(res); // NOWARN +} + +// Though v is not directly registered, it is pointed to by a registered value, so it should not be garbage collected. +// If res is then deregistered, the status of v should follow. Changing the analysis to account for this case would be very hard. +CAMLprim value registration_test_2(value v) +{ + CAMLlocal1(res); + res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + CAMLreturn(res); // NOWARN +} + +// While v will survive the garbage collection, res may no longer point to it. +CAMLprim value registration_test_3(value v) +{ + CAMLparam1(v); + value res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + CAMLreturn(res); // WARN +} + +CAMLprim value registration_test_4(value v) +{ + value res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + return res; // WARN +} + +// Though v is indirectly registered via res, the analysis does not see this. +// This causes res2 to be in R, but not in A: a problem. +// TODO: Fix or count as limitation. +CAMLprim value registration_test_5(value v) +{ + CAMLlocal2(res, res2); + res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + res2 = v; + CAMLreturn(res2); // NOWARN +} + +// The memory leak test. +CAMLprim value registration_test_6(value v) +{ + CAMLparam1(v); + CAMLlocal1(res); + res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + return res; // WARN +} \ No newline at end of file From 088d41f24476856ed27293b7fcaf63723232b02c Mon Sep 17 00:00:00 2001 From: Ronald Judin Date: Wed, 29 Apr 2026 18:32:32 +0300 Subject: [PATCH 12/12] Added tests, renamed ctx to man, changed enter assignment and check at return, added new domain with a stack (some bugs remain) Co-authored-by: Copilot --- src/analyses/ocaml.ml | 62 ++-- src/analyses/ocaml2.ml | 293 +++++++++++++++++++ src/util/library/libraryDesc.ml | 4 +- src/util/library/libraryFunctions.ml | 5 +- tests/regression/90-ocaml/06-nested.c | 15 + tests/regression/90-ocaml/08-registrations.c | 17 +- tests/regression/90-ocaml/goblint_caml.h | 24 +- 7 files changed, 378 insertions(+), 42 deletions(-) create mode 100644 src/analyses/ocaml2.ml diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml index 0ecaab1311..d132ac0f58 100644 --- a/src/analyses/ocaml.ml +++ b/src/analyses/ocaml.ml @@ -55,7 +55,7 @@ struct module C = Printable.Unit (* We are context insensitive in this analysis *) - let context ctx _ _ = () + let context man _ _ = () let startcontext () = () @@ -120,39 +120,41 @@ struct (* transfer functions *) (** Handles assignment of [rval] to [lval]. *) - let assign ctx (lval:lval) (rval:exp) : D.t = - let state = ctx.local in + let assign man (lval:lval) (rval:exp) : D.t = + let state = man.local in match lval with | Var v,_ -> assignment v rval (Cil.typeOf rval) state "The above is being assigned" | _ -> state (** Handles conditional branching yielding truth value [tv]. *) - let branch ctx (exp:exp) (tv:bool) : D.t = + let branch man (exp:exp) (tv:bool) : D.t = (* The expression checked must be accounted for *) - ignore (exp_accounted_for ctx.local exp); - ctx.local + ignore (exp_accounted_for man.local exp); + man.local (** Handles going from start node of function [f] into the function body of [f]. Meant to handle e.g. initializiation of local variables. *) - let body ctx (f:fundec) : D.t = - let state = ctx.local in + let body man (f:fundec) : D.t = + let state = man.local in (* It is assumed that the startstate's values are not nptrs. This avoids warnings from other analyses. *) if D.mem_r first_function state then List.iter (fun v -> (if is_value_type v.vtype then - (ctx.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true))))) + (man.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true))))) f.sformals; (* TODO: Is there a way without a flag to only emit at the start? *) D.remove_r first_function state (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) - let return ctx (exp:exp option) (f:fundec) : D.t = - let state = ctx.local in - (* Warn if variables are still registered *) - List.iter (fun v -> if D.mem_r v state then M.warn "Value %a registered at return" CilType.Varinfo.pretty v) (f.sformals @ f.slocals); + let return man (exp:exp option) (f:fundec) : D.t = + let state = man.local in + (* Warns if values or pointers are still registered. *) + List.iter (fun v -> if D.mem_r v state && + (Cil.isPointerType (Cil.typeOf (Cil.Lval (Cil.var v))) || + is_value_type (Cil.typeOf (Cil.Lval (Cil.var v)))) + then M.warn "Value %a registered at return" CilType.Varinfo.pretty v) (f.sformals @ f.slocals); match exp with (* Checks that value returned is accounted for. *) (* Return_varinfo is used in place of a "real" variable. *) - (* TODO: Warn if registered local variables remain. *) | Some e -> assignment return_varinfo e (Cil.typeOf e) state "The above is being returned" (* Checks that value returned is accounted for. *) (* Return_varinfo is used in place of a "real" variable. *) @@ -167,8 +169,8 @@ struct [enter] returns a caller state, and the initial state of the callee. In [enter], the caller state can usually be returned unchanged, as [combine_env] and [combine_assign] (below) will compute the caller state after the function call, given the return state of the callee. *) - let enter ctx (lval: lval option) (f:fundec) (args:exp list) : (D.t * D.t) list = - let caller_state = ctx.local in + let enter man (lval: lval option) (f:fundec) (args:exp list) : (D.t * D.t) list = + let caller_state = man.local in List.iter (fun e -> ignore (exp_accounted_for caller_state e)) args; (* Entering a function doesn't change the caller state *) let callee_state = List.fold_left2 (fun st v rval -> @@ -178,7 +180,13 @@ struct else D.add_a v (D.add_r v st) (* Arguments of inner functions inherit the caller's state. *) (* TODO: Assignment was changed and no longer copies registration. It must be passed using other means. *) - else assignment v rval (Cil.typeOf rval) st "Entering function with possibly deleted argument") + else (*assignment v rval (Cil.typeOf rval) st "Entering function with possibly deleted argument")*) + if Cil.isPointerType (Cil.typeOf rval) || is_value_type (Cil.typeOf rval) then + if exp_accounted_for st rval then + if exp_registered st rval then D.add_a v (D.add_r v st) + else D.add_a v (D.remove_r v st) + else (M.info "Entering function with possibly deleted argument"; D.remove_a v st) + else D.add_a v (D.add_r v st)) caller_state f.sformals args in (* first component is state of caller, second component is state of callee *) [caller_state, callee_state] @@ -186,7 +194,7 @@ struct (** For a function call "lval = f(args)" or "f(args)", computes the global environment state of the caller after the call. Argument [callee_local] is the state of [f] at its return node. *) - let combine_env ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = + let combine_env man (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = (* If GC could have triggered during the call, the caller state loses variables not registered in the callee. *) (* Since the callee state is basically copied from the caller, the caller state changes the same way through the callee's GCs. *) callee_local @@ -194,8 +202,8 @@ struct (** For a function call "lval = f(args)" or "f(args)", computes the state of the caller after assigning the return value from the call. Argument [callee_local] is the state of [f] at its return node. *) - let combine_assign ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = - let caller_state = ctx.local in + let combine_assign man (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = + let caller_state = man.local in (* Records whether lval was accounted for. Registration for v must already be handled. *) (* TODO: What happens if a pointer to a value is returned? *) match lval with (* The variable returned is played by return_varinfo *) @@ -210,8 +218,8 @@ struct (** For a call to a _special_ function f "lval = f(args)" or "f(args)", computes the caller state after the function call. *) - let special ctx (lval: lval option) (f:varinfo) (arglist:exp list) : D.t = - let caller_state = ctx.local in + let special man (lval: lval option) (f:varinfo) (arglist:exp list) : D.t = + let caller_state = man.local in (* To warn about a potential issue in the code, use M.warn. *) (* caller_state *) let desc = LibraryFunctions.find f in @@ -220,7 +228,7 @@ struct | OCamlParam params -> (* Variables are registered with a Param macro. *) List.fold_left (fun state param -> match param with - | AddrOf (Var v, _) -> (match ctx.ask (Queries.EvalInt (Cil.Lval (Cil.var v))) with + | AddrOf (Var v, _) -> (match man.ask (Queries.EvalInt (Cil.Lval (Cil.var v))) with | `Bot -> () | `Lifted x -> if IntDomain.IntDomTuple.equal_to Z.zero x <> `Neq then M.warn "Null pointer registered" | `Top -> M.warn "Null pointer possibly registered" @@ -235,17 +243,17 @@ struct | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) | _ -> D.after_gc caller_state ) - | OCamlReturn -> + | OCamlDrop | OCamlEndRoots -> (* Deregisters all formal and local variables. *) (* TODO: Write tests to check not too much is deregistered. *) - let caller_fun = Node.find_fundec ctx.node in + let caller_fun = Node.find_fundec man.node in List.fold_left (fun st v -> D.remove_r v st) caller_state (caller_fun.sformals @ caller_fun.slocals) | _ -> caller_state (* You may leave these alone *) let startstate v = D.empty () - let threadenter ctx ~multiple lval f args = [D.top ()] - let threadspawn ctx ~multiple lval f args fctx = ctx.local + let threadenter man ~multiple lval f args = [D.top ()] + let threadspawn man ~multiple lval f args fman = man.local let exitstate v = D.top () end diff --git a/src/analyses/ocaml2.ml b/src/analyses/ocaml2.ml new file mode 100644 index 0000000000..e7d2a97195 --- /dev/null +++ b/src/analyses/ocaml2.ml @@ -0,0 +1,293 @@ +(** Simple interprocedural analysis of OCaml C-stubs ([ocaml2]). *) + +(* Goblint documentation: https://goblint.readthedocs.io/en/latest/ *) +(* Helpful link on CIL: https://goblint.github.io/cil/ *) +(* TODO: Write tests and test them with `ruby scripts/update_suite.rb group ocaml2` *) +(* after removing the `SKIP` from the beginning of the tests in tests/regression/90-ocaml/{01-bagnall.c,04-o_inter.c} *) + +open GoblintCil +open Analyses + +module VarinfoSet = SetDomain.Make(CilType.Varinfo) + +(** "Fake" variable to handle returning from a function *) +let return_varinfo = dummyFunDec.svar +(** Flag for first function entered *) +let first_function = (emptyFunction "@first").svar +(** Flag for deregistering at return *) +let to_deregister = (emptyFunction "@dereg").svar + +module Spec : Analyses.MCPSpec = +struct + include Analyses.DefaultSpec + + let name () = "ocaml2" + module D = + struct + (* The first set contains variables of type value that are definitely accounted. The second contains definitely registered variables. *) + module P = Lattice.Prod (Lattice.Reverse (VarinfoSet)) (Lattice.Liszt (Lattice.Reverse (VarinfoSet))) + include P + + let empty () = (VarinfoSet.empty (), []) + + (* Puts all registered variables into a single set. *) + let flatten_r (accounted, registered) = + List.fold_left (fun acc r -> VarinfoSet.union acc r) (VarinfoSet.empty ()) registered + + (* After garbage collection, the first set loses variables not in the registered stack. *) + let after_gc (accounted, registered) = + (VarinfoSet.inter accounted (flatten_r (accounted, registered)), registered) + + let mem_a v (accounted, registered) = + VarinfoSet.mem v accounted + + let mem_r v (accounted, registered) = + VarinfoSet.mem v (flatten_r (accounted, registered)) + + let add_a v (accounted, registered) = + (VarinfoSet.add v accounted, registered) + + (* Opens a new block. *) + let push_r (accounted, registered) = + (accounted, VarinfoSet.empty () :: registered) + + (* Registers a variable in the current block. *) + let add_r v (accounted, registered) = + match registered with + | [] -> M.warn "Variable %a registered without CAMLparam0" CilType.Varinfo.pretty v; + (accounted, [VarinfoSet.singleton v]) + | r::rs -> (accounted, (VarinfoSet.add v r)::rs) + + let remove_a v (accounted, registered) = + (VarinfoSet.remove v accounted, registered) + + (* Simulates End_roots by removing one block. *) + (* TODO: End_roots actually removes blocks until it has removed one named caml_roots_block. *) + let pop_r (accounted, registered) = + match registered with + | [] -> (accounted, []) + | _::rs -> (accounted, rs) + + (* Removes all blocks created in the current scope, like CAMLreturn. *) + (* vs: current function's formals and locals *) + let rec after_drop vs (accounted, registered) = + match registered with + | [] -> (accounted, []) + | r::rs -> + (* Checks whether any of the variables in vs is in the current block. *) + (* TODO: If CAMLparam0 is not used, this will also delete the previous block. Could this be improved? *) + if List.exists (fun v -> VarinfoSet.mem v r) vs then + after_drop vs (accounted, rs) + else (accounted, registered) + end + module C = Printable.Unit + + (* We are context insensitive in this analysis *) + let context man _ _ = () + let startcontext () = () + + + (** Determines whether an expression [e] is healthy, given a [state]. *) + let rec exp_accounted_for (state:D.t) (e:Cil.exp) = match e with + (* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is accounted for *) + | AddrOf v + | StartOf v + | Lval v -> lval_accounted_for state v + | BinOp (_,e1,e2,_) -> exp_accounted_for state e1 && exp_accounted_for state e2 + | Real e + | Imag e + | SizeOfE e + | AlignOfE e + | CastE (_,e) + | UnOp (_,e,_) -> exp_accounted_for state e + | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true + | Question (b, t, f, _) -> exp_accounted_for state b && exp_accounted_for state t && exp_accounted_for state f + and lval_accounted_for state = function + | (Var v, _) -> + (* Checks whether variable v is accounted for *) (*false*) + if D.mem_a v state then true else (M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v; false) + | _ -> + (* The Gemara asks: is using an offset safe for the expression? The Gemara answers: by default, no. We assume our language has no pointers *) + false + + (** Determines whether an expression [e] is registered, given a [state]. *) + let rec exp_registered (state:D.t) (e:Cil.exp) = match e with + (* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is registered *) + | AddrOf v + | StartOf v + | Lval v -> lval_registered state v + | BinOp (_,e1,e2,_) -> exp_registered state e1 && exp_registered state e2 + | Real e + | Imag e + | SizeOfE e + | AlignOfE e + | CastE (_,e) + | UnOp (_,e,_) -> exp_registered state e + | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true + | Question (b, t, f, _) -> exp_registered state b && exp_registered state t && exp_registered state f + and lval_registered state = function + | (Var v, _) -> + (* Checks whether variable v is registered *) (*false*) + D.mem_r v state + | _ -> + false + + let is_value_type (t:typ): bool = match t with + | TNamed (info, attr) -> info.tname = "value" + | _ -> false + + let assignment (v:varinfo) (rval:exp) (rval_type:typ) (state:D.t) (warning:string): D.t = + (* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *) + if Cil.isPointerType rval_type || is_value_type rval_type then + if exp_accounted_for state rval then + if exp_registered state rval then D.add_a v state + else D.add_a v state + else (M.info "%s" warning; D.remove_a v state) + (* TODO: End_roots should not remove untracked variables like tracked ones. *) + else D.add_a v (D.add_r v state) + + (* transfer functions *) + + (** Handles assignment of [rval] to [lval]. *) + let assign man (lval:lval) (rval:exp) : D.t = + let state = man.local in + match lval with + | Var v,_ -> assignment v rval (Cil.typeOf rval) state "The above is being assigned" + | _ -> state + + (** Handles conditional branching yielding truth value [tv]. *) + let branch man (exp:exp) (tv:bool) : D.t = + (* The expression checked must be accounted for *) + ignore (exp_accounted_for man.local exp); + man.local + + (** Handles going from start node of function [f] into the function body of [f]. + Meant to handle e.g. initializiation of local variables. *) + let body man (f:fundec) : D.t = + let state = man.local in + (* It is assumed that the startstate's values are not nptrs. This avoids warnings from other analyses. *) + if D.mem_a first_function state then + List.iter (fun v -> (if is_value_type v.vtype then + (man.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true))))) + f.sformals; + (* TODO: Is there a way without a flag to only emit at the start? *) + D.remove_a first_function state + + (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) + let return man (exp:exp option) (f:fundec) : D.t = + let state = man.local in + (* Warns if values or pointers are still registered. *) + List.iter (fun v -> if D.mem_r v state && + (Cil.isPointerType (Cil.typeOf (Cil.Lval (Cil.var v))) || + is_value_type (Cil.typeOf (Cil.Lval (Cil.var v)))) + then M.warn "Value %a registered at return" CilType.Varinfo.pretty v) (f.sformals @ f.slocals); + (* After the warning, they need to be counted as deregistered to not upset the stack structure of the outer function. *) + let state = D.after_drop (f.sformals @ f.slocals) state in + match exp with + (* Checks that value returned is accounted for. *) + (* Return_varinfo is used in place of a "real" variable. *) + | Some e -> assignment return_varinfo e (Cil.typeOf e) state "The above is being returned" + (* Checks that value returned is accounted for. *) + (* Return_varinfo is used in place of a "real" variable. *) + (* let return_state = assignment return_varinfo e (Cil.typeOf e) state "The above is being returned" in + (* Remove this function's formals and locals if correctly returned *) + D.remove_r to_deregister (if D.mem_r to_deregister return_state then + List.fold_left (fun st v -> D.remove_a v (D.remove_r v st)) return_state (f.sformals @ f.slocals) + else return_state) *) + | None -> state + + (** For a function call "lval = f(args)" or "f(args)", + [enter] returns a caller state, and the initial state of the callee. + In [enter], the caller state can usually be returned unchanged, as [combine_env] and [combine_assign] (below) + will compute the caller state after the function call, given the return state of the callee. *) + let enter man (lval: lval option) (f:fundec) (args:exp list) : (D.t * D.t) list = + let caller_state = man.local in + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) args; + (* Entering a function doesn't change the caller state *) + let callee_state = List.fold_left2 (fun st v rval -> + (* At the start, arguments are accounted for and not registered. The first_function flag is added.*) + if rval == MyCFG.unknown_exp then + if is_value_type v.vtype then D.add_a first_function (D.add_a v st) + else D.add_a v (D.add_r v (D.push_r st)) + (* Arguments of inner functions inherit the caller's state. *) + (* Every registration copied becomes its own set to avoid them going to a previous bigger set. *) + else (*assignment v rval (Cil.typeOf rval) st "Entering function with possibly deleted argument")*) + if Cil.isPointerType (Cil.typeOf rval) || is_value_type (Cil.typeOf rval) then + if exp_accounted_for st rval then + if exp_registered st rval then D.add_a v (D.add_r v (D.push_r st)) + else D.add_a v st + else (M.info "Entering function with possibly deleted argument"; D.remove_a v st) + else D.add_a v (D.add_r v (D.push_r st))) + caller_state f.sformals args in + (* first component is state of caller, second component is state of callee *) + [caller_state, callee_state] + + (** For a function call "lval = f(args)" or "f(args)", + computes the global environment state of the caller after the call. + Argument [callee_local] is the state of [f] at its return node. *) + let combine_env man (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = + (* If GC could have triggered during the call, the caller state loses variables not registered in the callee. *) + (* Since the callee state is basically copied from the caller, the caller state changes the same way through the callee's GCs. *) + callee_local + + (** For a function call "lval = f(args)" or "f(args)", + computes the state of the caller after assigning the return value from the call. + Argument [callee_local] is the state of [f] at its return node. *) + let combine_assign man (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = + let caller_state = man.local in + (* Records whether lval was accounted for. Registration for v must already be handled. *) + (* TODO: What happens if a pointer to a value is returned? *) + match lval with (* The variable returned is played by return_varinfo *) + | Some (Var v, _) -> let state = + (* Unlike other assignment-like functions, the type here is the return type of the function, not of return_varinfo. *) + (match f.svar.vtype with + | TFun (t, _, _, _) -> + assignment v (Cil.Lval (Cil.var return_varinfo)) t caller_state "The above is being combined" + | _ -> caller_state) in + D.remove_a return_varinfo state + | _ -> caller_state + + (** For a call to a _special_ function f "lval = f(args)" or "f(args)", + computes the caller state after the function call. *) + let special man (lval: lval option) (f:varinfo) (arglist:exp list) : D.t = + let caller_state = man.local in + (* To warn about a potential issue in the code, use M.warn. *) + (* caller_state *) + let desc = LibraryFunctions.find f in + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to special functions *) + match desc.special arglist with + | OCamlParam0 -> D.push_r caller_state + | OCamlParam params -> + (* Variables are registered with a Param macro. *) + List.fold_left (fun state param -> match param with + | AddrOf (Var v, _) -> (match man.ask (Queries.EvalInt (Cil.Lval (Cil.var v))) with + | `Bot -> () + | `Lifted x -> if IntDomain.IntDomTuple.equal_to Z.zero x <> `Neq then M.warn "Null pointer registered" + | `Top -> M.warn "Null pointer possibly registered" + ); + D.add_r v state + | _ -> state + ) caller_state params + | OCamlAlloc size_exp -> + (* Garbage collection may trigger here and overwrite unregistered variables. *) + M.debug "Garbage collection triggers"; + (match lval with + | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) + | _ -> D.after_gc caller_state + ) + | OCamlDrop -> + (* Deregisters all formal and local variables. *) + let caller_fun = Node.find_fundec man.node in + D.after_drop (caller_fun.sformals @ caller_fun.slocals) caller_state + | OCamlEndRoots -> D.pop_r caller_state + | _ -> caller_state + + (* You may leave these alone *) + let startstate v = D.empty () + let threadenter man ~multiple lval f args = [D.top ()] + let threadspawn man ~multiple lval f args fman = man.local + let exitstate v = D.top () +end + +let _ = + MCP.register_analysis (module Spec : MCPSpec) diff --git a/src/util/library/libraryDesc.ml b/src/util/library/libraryDesc.ml index ed1d3fc3c2..ad2a95e2b8 100644 --- a/src/util/library/libraryDesc.ml +++ b/src/util/library/libraryDesc.ml @@ -49,8 +49,10 @@ type special = | Malloc of Cil.exp | OCamlAlloc of Cil.exp (* TODO: Rethink OCamlParam's placement. *) + | OCamlParam0 | OCamlParam of Cil.exp list - | OCamlReturn + | OCamlDrop + | OCamlEndRoots | Calloc of { count: Cil.exp; size: Cil.exp; } | Realloc of { ptr: Cil.exp; size: Cil.exp; } | Free of Cil.exp diff --git a/src/util/library/libraryFunctions.ml b/src/util/library/libraryFunctions.ml index 1c3bc4c0b9..2bd295544f 100644 --- a/src/util/library/libraryFunctions.ml +++ b/src/util/library/libraryFunctions.ml @@ -1252,13 +1252,14 @@ let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ ("caml_alloc_3", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]] @@ fun tag _arg1 _arg2 _arg3 -> OCamlAlloc (GoblintCil.integer 3)); ("caml_alloc_4", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 -> OCamlAlloc (GoblintCil.integer 4)); ("caml_alloc_5", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]; __ "arg5" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 _arg5 -> OCamlAlloc (GoblintCil.integer 5)); - ("__goblint_caml_param0", special [] @@ OCamlParam []); + ("__goblint_caml_param0", special [] @@ OCamlParam0); ("__goblint_caml_param1", special [__ "param" []] @@ fun param -> OCamlParam [param]); ("__goblint_caml_param2", special [__ "param1" []; __ "param2" []] @@ fun param1 param2 -> OCamlParam [param1; param2]); ("__goblint_caml_param3", special [__ "param1" []; __ "param2" []; __ "param3" []] @@ fun param1 param2 param3 -> OCamlParam [param1; param2; param3]); ("__goblint_caml_param4", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []] @@ fun param1 param2 param3 param4 -> OCamlParam [param1; param2; param3; param4]); ("__goblint_caml_param5", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []; __ "param5" []] @@ fun param1 param2 param3 param4 param5 -> OCamlParam [param1; param2; param3; param4; param5]); - ("__goblint_caml_return", special [] @@ OCamlReturn); + ("__goblint_caml_drop", special [] @@ OCamlDrop); + ("__goblint_caml_end_roots", special [] @@ OCamlEndRoots); ] [@@coverage off] diff --git a/tests/regression/90-ocaml/06-nested.c b/tests/regression/90-ocaml/06-nested.c index e147acab1b..7820109b6b 100644 --- a/tests/regression/90-ocaml/06-nested.c +++ b/tests/regression/90-ocaml/06-nested.c @@ -59,4 +59,19 @@ CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2, st->x[1] = i3 != 0 ? i3 : 2; /* must be nonzero */ st->s = i4; return v; +} + +// If entering does not copy registration status, this function will give false positives. +CAMLprim value enter_test_1(value v) +{ + CAMLparam1(v); + value res = enter_test_2(v); // NOWARN + CAMLreturn(res); // NOWARN +} + +CAMLprim value enter_test_2(value v) +{ + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + // TODO: Inner function warns of memory leak. Are inner and outer registration the same or different? Investigate. + return v; // WARN } \ No newline at end of file diff --git a/tests/regression/90-ocaml/08-registrations.c b/tests/regression/90-ocaml/08-registrations.c index 9814a54f68..7269d9a235 100644 --- a/tests/regression/90-ocaml/08-registrations.c +++ b/tests/regression/90-ocaml/08-registrations.c @@ -22,6 +22,7 @@ CAMLprim value registration_test_1(value v) // If res is then deregistered, the status of v should follow. Changing the analysis to account for this case would be very hard. CAMLprim value registration_test_2(value v) { + CAMLparam0(); CAMLlocal1(res); res = v; caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); @@ -34,7 +35,7 @@ CAMLprim value registration_test_3(value v) CAMLparam1(v); value res = v; caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); - CAMLreturn(res); // WARN + CAMLreturn(res); // TODO NOWARN } CAMLprim value registration_test_4(value v) @@ -49,11 +50,12 @@ CAMLprim value registration_test_4(value v) // TODO: Fix or count as limitation. CAMLprim value registration_test_5(value v) { + CAMLparam0(); CAMLlocal2(res, res2); res = v; caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); res2 = v; - CAMLreturn(res2); // NOWARN + CAMLreturn(res2); // TODO NOWARN } // The memory leak test. @@ -64,4 +66,15 @@ CAMLprim value registration_test_6(value v) res = v; caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); return res; // WARN +} + +// With two begin_roots and one end_roots, something stays, but the analysis does not warn. +CAMLprim value registration_test_7(value v) +{ + value res = Val_unit; + Begin_roots1(v); + Begin_roots1(res); + res = v; + End_roots(); + return res; // TODO WARN } \ No newline at end of file diff --git a/tests/regression/90-ocaml/goblint_caml.h b/tests/regression/90-ocaml/goblint_caml.h index 8f4c3e308b..e3e9b5dd4b 100644 --- a/tests/regression/90-ocaml/goblint_caml.h +++ b/tests/regression/90-ocaml/goblint_caml.h @@ -11,6 +11,10 @@ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; #undef CAMLprim #define CAMLprim __attribute__((c_stub)) +// Redefine Val_unit to explicitly be of type value. +#undef Val_unit +#define Val_unit ((value) 1) + // Param and local macros redefined to register variables as GC roots, and CAMLreturn mocked to work with them. #undef CAMLparam0 #undef CAMLparam1 @@ -19,11 +23,11 @@ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; #undef CAMLparam4 #undef CAMLparam5 #define CAMLparam0() __goblint_caml_param0() -#define CAMLparam1(x) __goblint_caml_param1(&x) -#define CAMLparam2(x, y) __goblint_caml_param2(&x, &y) -#define CAMLparam3(x, y, z) __goblint_caml_param3(&x, &y, &z) -#define CAMLparam4(x, y, z, w) __goblint_caml_param4(&x, &y, &z, &w) -#define CAMLparam5(x, y, z, w, v) __goblint_caml_param5(&x, &y, &z, &w, &v) +#define CAMLparam1(x) __goblint_caml_param0(); __goblint_caml_param1(&x) +#define CAMLparam2(x, y) __goblint_caml_param0(); __goblint_caml_param2(&x, &y) +#define CAMLparam3(x, y, z) __goblint_caml_param0(); __goblint_caml_param3(&x, &y, &z) +#define CAMLparam4(x, y, z, t) __goblint_caml_param0(); __goblint_caml_param4(&x, &y, &z, &t) +#define CAMLparam5(x, y, z, t, u) __goblint_caml_param0(); __goblint_caml_param5(&x, &y, &z, &t, &u) #undef CAMLlocal1 #undef CAMLlocal2 @@ -33,17 +37,17 @@ struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; #define CAMLlocal1(x) value x = Val_unit; __goblint_caml_param1(&x) // The local and param functions behave the same for our purposes, registering variables. #define CAMLlocal2(x, y) value x = Val_unit; value y = Val_unit; __goblint_caml_param2(&x, &y) #define CAMLlocal3(x, y, z) value x = Val_unit; value y = Val_unit; value z = Val_unit; __goblint_caml_param3(&x, &y, &z) -#define CAMLlocal4(x, y, z, w) value x = Val_unit; value y = Val_unit; value z = Val_unit; value w = Val_unit; __goblint_caml_param4(&x, &y, &z, &w) -#define CAMLlocal5(x, y, z, w, v) value x = Val_unit; value y = Val_unit; value z = Val_unit; value w = Val_unit; value v = Val_unit; __goblint_caml_param5(&x, &y, &z, &w, &v) +#define CAMLlocal4(x, y, z, t) value x = Val_unit; value y = Val_unit; value z = Val_unit; value t = Val_unit; __goblint_caml_param4(&x, &y, &z, &t) +#define CAMLlocal5(x, y, z, t, u) value x = Val_unit; value y = Val_unit; value z = Val_unit; value t = Val_unit; value u = Val_unit; __goblint_caml_param5(&x, &y, &z, &t, &u) #undef CAMLreturn -#define CAMLreturn(x) __goblint_caml_return(); return (x) // The real CAMLreturn needs some variable named caml__frame, which is not available in our redefinitions above. +#define CAMLreturn(x) __goblint_caml_drop(); return (x) // The real CAMLreturn needs some variable named caml__frame, which is not available in our redefinitions above. // Marking roots is like registering and deregistering them. #undef Begin_roots1 #undef End_roots -#define Begin_roots1(x) __goblint_caml_param1(&x) -#define End_roots() __goblint_caml_return() +#define Begin_roots1(x) __goblint_caml_param0(); __goblint_caml_param1(&x) +#define End_roots() __goblint_caml_end_roots() // A reference to caml_gc_minor_words_unboxed can be found in _opam/lib/ocaml/ml/gc.ml. #define caml_gc_minor_words_unboxed() (0.0)