Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/analyses/base.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2736,6 +2736,32 @@ struct
set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set)
| _ -> st
end
| OCamlAlloc wosize, _ ->
(*begin match lv with
| Some lv ->
let heap_var = AD.unknown_ptr (*AD.of_var (heap_var false ctx)*) in
let sizeval = eval_int ~ctx st wosize in
set_many ~ctx st [
(heap_var, TVoid [], Blob (VD.bot (), sizeval, true));
(eval_lv ~ctx st lv, (Cilfacade.typeOfLval lv), Address heap_var)
]
| _ -> st
end*)
let open Queries.AllocationLocation in
begin
(* The behavior for alloc(0) is implementation defined. Here, we rely on the options specified for the malloc also applying to alloca. *)
match lv with
| Some lv ->
let loc = Heap in
let (heap_var, addr) = alloc loc wosize in
(* ignore @@ printf "alloca will allocate %a bytes\n" ID.pretty (eval_int ~man size); *)
let blob_set = Option.map_default (fun heap_var -> [(heap_var, TVoid [], VD.Blob (VD.bot (), eval_int ~man st wosize, ZeroInit.malloc))]) [] heap_var in
Comment on lines +2756 to +2758
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OCamlAlloc wosize is modeled using alloc loc wosize, but wosize for OCaml allocation APIs is in words (not bytes). Treating it as bytes under-allocates blobs and can cause incorrect memory modeling (e.g., memcpy into an OCaml block may look out-of-bounds). Convert wosize to bytes (e.g., multiply by word size / sizeof(value)) before allocating/setting blob sizes.

Suggested change
let (heap_var, addr) = alloc loc wosize in
(* ignore @@ printf "alloca will allocate %a bytes\n" ID.pretty (eval_int ~man size); *)
let blob_set = Option.map_default (fun heap_var -> [(heap_var, TVoid [], VD.Blob (VD.bot (), eval_int ~man st wosize, ZeroInit.malloc))]) [] heap_var in
(* OCaml [wosize] is in words; convert to bytes using the machine word size (size of a pointer). *)
let word_size_bytes = Cil.bitsSizeOf (TPtr (TVoid [], [])) / 8 in
let word_size_expr = Cil.integer word_size_bytes in
let size_bytes = BinOp (Mult, wosize, word_size_expr, Cil.intType) in
let (heap_var, addr) = alloc loc size_bytes in
(* ignore @@ printf "alloca will allocate %a bytes\n" ID.pretty (eval_int ~man size); *)
let blob_set =
Option.map_default
(fun heap_var ->
[(heap_var, TVoid [], VD.Blob (VD.bot (), eval_int ~man st size_bytes, ZeroInit.malloc))])
[] heap_var
in

Copilot uses AI. Check for mistakes.
set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set)
| _ -> st
end
(* TODO: Rethink OCamlParam's placement *)
| OCamlParam params, _ ->
List.fold_left (fun acc param -> let _ = eval_rv ~man acc param in acc) st params
| Calloc { count = n; size }, _ ->
begin match lv with
| Some lv -> (* array length is set to one, as num*size is done when turning into `Calloc *)
Expand Down
253 changes: 253 additions & 0 deletions src/analyses/ocaml.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
(** Simple interprocedural analysis of OCaml C-stubs ([ocaml]). *)

(* Goblint documentation: https://goblint.readthedocs.io/en/latest/ *)
(* Helpful link on CIL: https://goblint.github.io/cil/ *)
(* TODO: Write tests and test them with `ruby scripts/update_suite.rb group ocaml` *)
(* after removing the `SKIP` from the beginning of the tests in tests/regression/90-ocaml/{01-bagnall.c,04-o_inter.c} *)

open GoblintCil
open Analyses

module VarinfoSet = SetDomain.Make(CilType.Varinfo)

(** "Fake" variable to handle returning from a function *)
let return_varinfo = dummyFunDec.svar
(** Flag for first function entered *)
let first_function = (emptyFunction "@first").svar
(** Flag for deregistering at return *)
let to_deregister = (emptyFunction "@dereg").svar

module Spec : Analyses.MCPSpec =
struct
include Analyses.DefaultSpec

let name () = "ocaml"
module D =
struct
(* The first set contains variables of type value that are definitely accounted. The second contains definitely registered variables. *)
module P = Lattice.Reverse (Lattice.Prod (VarinfoSet) (VarinfoSet))
include P

let empty () = (VarinfoSet.empty (), VarinfoSet.empty ())

(* After garbage collection, the first set loses variables not in the second set. *)
let after_gc (accounted, registered) = (VarinfoSet.inter accounted registered, registered)

(* Untracked variables are always fine. *)
let mem_a v (accounted, registered) =
VarinfoSet.mem v accounted

let mem_r v (accounted, registered) =
VarinfoSet.mem v registered

let add_a v (accounted, registered) =
(VarinfoSet.add v accounted, registered)

let add_r v (accounted, registered) =
(accounted, VarinfoSet.add v registered)

let remove_a v (accounted, registered) =
(VarinfoSet.remove v accounted, registered)

let remove_r v (accounted, registered) =
(accounted, VarinfoSet.remove v registered)
end
module C = Printable.Unit

(* We are context insensitive in this analysis *)
let context ctx _ _ = ()
let startcontext () = ()


(** Determines whether an expression [e] is healthy, given a [state]. *)
let rec exp_accounted_for (state:D.t) (e:Cil.exp) = match e with
(* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is accounted for *)
| AddrOf v
| StartOf v
| Lval v -> lval_accounted_for state v
| BinOp (_,e1,e2,_) -> exp_accounted_for state e1 && exp_accounted_for state e2
| Real e
| Imag e
| SizeOfE e
| AlignOfE e
| CastE (_,e)
| UnOp (_,e,_) -> exp_accounted_for state e
| SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true
| Question (b, t, f, _) -> exp_accounted_for state b && exp_accounted_for state t && exp_accounted_for state f
and lval_accounted_for state = function
| (Var v, _) ->
(* Checks whether variable v is accounted for *) (*false*)
if D.mem_a v state then true else (M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v; false)
| _ ->
(* The Gemara asks: is using an offset safe for the expression? The Gemara answers: by default, no. We assume our language has no pointers *)
false

(** Determines whether an expression [e] is registered, given a [state]. *)
let rec exp_registered (state:D.t) (e:Cil.exp) = match e with
(* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is registered *)
| AddrOf v
| StartOf v
| Lval v -> lval_registered state v
| BinOp (_,e1,e2,_) -> exp_registered state e1 && exp_registered state e2
| Real e
| Imag e
| SizeOfE e
| AlignOfE e
| CastE (_,e)
| UnOp (_,e,_) -> exp_registered state e
| SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true
| Question (b, t, f, _) -> exp_registered state b && exp_registered state t && exp_registered state f
and lval_registered state = function
| (Var v, _) ->
(* Checks whether variable v is registered *) (*false*)
D.mem_r v state
| _ ->
false

let is_value_type (t:typ): bool = match t with
| TNamed (info, attr) -> info.tname = "value"
| _ -> false

let assignment (v:varinfo) (rval:exp) (rval_type:typ) (state:D.t) (warning:string): D.t =
(* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *)
if Cil.isPointerType rval_type || is_value_type rval_type then
if exp_accounted_for state rval then
if exp_registered state rval then D.add_a v state
else D.add_a v state
else (M.info "%s" warning; D.remove_a v state)
else D.add_a v (D.add_r v state)

(* transfer functions *)

(** Handles assignment of [rval] to [lval]. *)
let assign ctx (lval:lval) (rval:exp) : D.t =
let state = ctx.local in
match lval with
| Var v,_ -> assignment v rval (Cil.typeOf rval) state "The above is being assigned"

Check warning

Code scanning / Semgrep OSS

Semgrep Finding: semgrep.cilfacade Warning

use Cilfacade instead
| _ -> state

(** Handles conditional branching yielding truth value [tv]. *)
let branch ctx (exp:exp) (tv:bool) : D.t =
(* The expression checked must be accounted for *)
ignore (exp_accounted_for ctx.local exp);
ctx.local

(** Handles going from start node of function [f] into the function body of [f].
Meant to handle e.g. initializiation of local variables. *)
let body ctx (f:fundec) : D.t =
let state = ctx.local in
(* It is assumed that the startstate's values are not nptrs. This avoids warnings from other analyses. *)
if D.mem_r first_function state then
List.iter (fun v -> (if is_value_type v.vtype then
(ctx.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true)))))
f.sformals;
(* TODO: Is there a way without a flag to only emit at the start? *)
D.remove_r first_function state

(** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *)
let return ctx (exp:exp option) (f:fundec) : D.t =
let state = ctx.local in
(* Warn if variables are still registered *)
List.iter (fun v -> if D.mem_r v state then M.warn "Value %a registered at return" CilType.Varinfo.pretty v) (f.sformals @ f.slocals);
match exp with
(* Checks that value returned is accounted for. *)
(* Return_varinfo is used in place of a "real" variable. *)
(* TODO: Warn if registered local variables remain. *)
| Some e -> assignment return_varinfo e (Cil.typeOf e) state "The above is being returned"

Check warning

Code scanning / Semgrep OSS

Semgrep Finding: semgrep.cilfacade Warning

use Cilfacade instead
(* Checks that value returned is accounted for. *)
(* Return_varinfo is used in place of a "real" variable. *)
(* let return_state = assignment return_varinfo e (Cil.typeOf e) state "The above is being returned" in
(* Remove this function's formals and locals if correctly returned *)
D.remove_r to_deregister (if D.mem_r to_deregister return_state then
List.fold_left (fun st v -> D.remove_a v (D.remove_r v st)) return_state (f.sformals @ f.slocals)
else return_state) *)
| None -> state

(** For a function call "lval = f(args)" or "f(args)",
[enter] returns a caller state, and the initial state of the callee.
In [enter], the caller state can usually be returned unchanged, as [combine_env] and [combine_assign] (below)
will compute the caller state after the function call, given the return state of the callee. *)
let enter ctx (lval: lval option) (f:fundec) (args:exp list) : (D.t * D.t) list =
let caller_state = ctx.local in
List.iter (fun e -> ignore (exp_accounted_for caller_state e)) args;
(* Entering a function doesn't change the caller state *)
let callee_state = List.fold_left2 (fun st v rval ->
(* At the start, arguments are accounted for and not registered. The first_function flag is added.*)
if rval == MyCFG.unknown_exp then
if is_value_type v.vtype then D.add_r first_function (D.add_a v (D.remove_r v st))
else D.add_a v (D.add_r v st)
(* Arguments of inner functions inherit the caller's state. *)
(* TODO: Assignment was changed and no longer copies registration. It must be passed using other means. *)
else assignment v rval (Cil.typeOf rval) st "Entering function with possibly deleted argument")

Check warning

Code scanning / Semgrep OSS

Semgrep Finding: semgrep.cilfacade Warning

use Cilfacade instead
caller_state f.sformals args in
(* first component is state of caller, second component is state of callee *)
[caller_state, callee_state]

(** For a function call "lval = f(args)" or "f(args)",
computes the global environment state of the caller after the call.
Argument [callee_local] is the state of [f] at its return node. *)
let combine_env ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t =
(* If GC could have triggered during the call, the caller state loses variables not registered in the callee. *)
(* Since the callee state is basically copied from the caller, the caller state changes the same way through the callee's GCs. *)
callee_local

(** For a function call "lval = f(args)" or "f(args)",
computes the state of the caller after assigning the return value from the call.
Argument [callee_local] is the state of [f] at its return node. *)
let combine_assign ctx (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t =
let caller_state = ctx.local in
(* Records whether lval was accounted for. Registration for v must already be handled. *)
(* TODO: What happens if a pointer to a value is returned? *)
match lval with (* The variable returned is played by return_varinfo *)
| Some (Var v, _) -> let state =
(* Unlike other assignment-like functions, the type here is the return type of the function, not of return_varinfo. *)
(match f.svar.vtype with
| TFun (t, _, _, _) ->
assignment v (Cil.Lval (Cil.var return_varinfo)) t caller_state "The above is being combined"
| _ -> caller_state) in
D.remove_a return_varinfo (D.remove_r return_varinfo state)
| _ -> caller_state

(** For a call to a _special_ function f "lval = f(args)" or "f(args)",
computes the caller state after the function call. *)
let special ctx (lval: lval option) (f:varinfo) (arglist:exp list) : D.t =
let caller_state = ctx.local in
(* To warn about a potential issue in the code, use M.warn. *)
(* caller_state *)
let desc = LibraryFunctions.find f in
List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to special functions *)
match desc.special arglist with
| OCamlParam params ->
(* Variables are registered with a Param macro. *)
List.fold_left (fun state param -> match param with
| AddrOf (Var v, _) -> (match ctx.ask (Queries.EvalInt (Cil.Lval (Cil.var v))) with
| `Bot -> ()
| `Lifted x -> if IntDomain.IntDomTuple.equal_to Z.zero x <> `Neq then M.warn "Null pointer registered"
| `Top -> M.warn "Null pointer possibly registered"
);
D.add_r v state
| _ -> state
) caller_state params
| OCamlAlloc size_exp ->
(* Garbage collection may trigger here and overwrite unregistered variables. *)
M.debug "Garbage collection triggers";
(match lval with
| Some (Var v, _) -> D.add_a v (D.after_gc caller_state)
| _ -> D.after_gc caller_state
)
| OCamlReturn ->
(* Deregisters all formal and local variables. *)
(* TODO: Write tests to check not too much is deregistered. *)
let caller_fun = Node.find_fundec ctx.node in
List.fold_left (fun st v -> D.remove_r v st) caller_state (caller_fun.sformals @ caller_fun.slocals)
| _ -> caller_state

(* You may leave these alone *)
let startstate v = D.empty ()
let threadenter ctx ~multiple lval f args = [D.top ()]
let threadspawn ctx ~multiple lval f args fctx = ctx.local
let exitstate v = D.top ()
end

let _ =
MCP.register_analysis (module Spec : MCPSpec)
3 changes: 1 addition & 2 deletions src/cdomain/value/cdomains/valueDomain.ml
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,7 @@ struct
(* cast to voidPtr are ignored TODO what happens if our value does not fit? *)
| TPtr (t,_) ->
Address (match v with
| Int x when ID.to_int x = Some Z.zero -> AD.null_ptr
| Int x -> AD.top_ptr
| Int x -> AD.of_int x
(* we ignore casts to void* (above)! TODO report UB! *)
| Address x -> cast_addr t x
(*| Address x -> x*)
Expand Down
1 change: 1 addition & 0 deletions src/common/util/cilfacade.ml
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ let getFuns fileAST : startfuns =
Logs.debug "Cleanup function: %s" mn; set_string "exitfun[+]" mn; add_exit def acc
| GFun ({svar={vstorage=NoStorage; vattr; _}; _} as def, _) when get_bool "nonstatic" && not (Cil.hasAttribute "goblint_stub" vattr) -> add_other def acc
| GFun ({svar={vattr; _}; _} as def, _) when get_bool "allfuns" && not (Cil.hasAttribute "goblint_stub" vattr) -> add_other def acc
| GFun({svar={vname=mn; vattr=attr; vtype= TFun (t, _, _, _); _}; _} as def, _) when Cil.hasAttribute "c_stub" attr || Cil.hasAttribute "c_stub" (Cil.typeAttrs(t)) -> add_main def acc
| _ -> acc
in
foldGlobals fileAST f ([],[],[])
Expand Down
6 changes: 4 additions & 2 deletions src/config/options.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1501,7 +1501,8 @@
"pcre",
"zlib",
"liblzma",
"legacy"
"legacy",
"ocaml"
]
},
"default": [
Expand All @@ -1513,7 +1514,8 @@
"linux-userspace",
"goblint",
"ncurses",
"legacy"
"legacy",
"ocaml"
Comment on lines +1517 to +1518
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lib.activated now includes "ocaml" in the default list. This changes analyzer behavior globally by treating caml_* functions as special even when the OCaml analysis isn't enabled, which can be a surprising breaking change. Consider keeping ocaml available in the enum but not enabling it by default (require explicit opt-in).

Suggested change
"legacy",
"ocaml"
"legacy"

Copilot uses AI. Check for mistakes.
]
}
},
Expand Down
4 changes: 4 additions & 0 deletions src/util/library/libraryDesc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ type math =
type special =
| Alloca of Cil.exp
| Malloc of Cil.exp
| OCamlAlloc of Cil.exp
(* TODO: Rethink OCamlParam's placement. *)
| OCamlParam of Cil.exp list
| OCamlReturn
| Calloc of { count: Cil.exp; size: Cil.exp; }
| Realloc of { ptr: Cil.exp; size: Cil.exp; }
| Free of Cil.exp
Expand Down
21 changes: 21 additions & 0 deletions src/util/library/libraryFunctions.ml
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ let c_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[
("longjmp", special [__ "env" [r]; __ "value" []] @@ fun env value -> Longjmp { env; value });
("atexit", unknown [drop "function" [if_ (fun () -> not (get_bool "sem.atexit.ignore")) s]]);
("atoi", unknown [drop "nptr" [r]]);
("atof", unknown [drop "nptr" [r]]); (* TODO: Rethink atof's position in this list. *)
("atol", unknown [drop "nptr" [r]]);
("atoll", unknown [drop "nptr" [r]]);
("setlocale", unknown [drop "category" []; drop "locale" [r]]);
Expand Down Expand Up @@ -1242,6 +1243,25 @@ let legacy_libs_misc_list: (string * LibraryDesc.t) list = LibraryDsl.[
]
[@@coverage off]

let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[
("caml_copy_double", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1)));
(* Example: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *)
("caml_alloc_small", special [__ "wosize" []; __ "tag" []] @@ fun wosize tag -> OCamlAlloc wosize);
("camlidl_apron_policy_ptr_c2ml", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1)));
("caml_alloc_2", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]] @@ fun tag _arg1 _arg2 -> OCamlAlloc (GoblintCil.integer 2));
("caml_alloc_3", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]] @@ fun tag _arg1 _arg2 _arg3 -> OCamlAlloc (GoblintCil.integer 3));
("caml_alloc_4", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 -> OCamlAlloc (GoblintCil.integer 4));
("caml_alloc_5", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]; __ "arg5" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 _arg5 -> OCamlAlloc (GoblintCil.integer 5));
("__goblint_caml_param0", special [] @@ OCamlParam []);
("__goblint_caml_param1", special [__ "param" []] @@ fun param -> OCamlParam [param]);
("__goblint_caml_param2", special [__ "param1" []; __ "param2" []] @@ fun param1 param2 -> OCamlParam [param1; param2]);
("__goblint_caml_param3", special [__ "param1" []; __ "param2" []; __ "param3" []] @@ fun param1 param2 param3 -> OCamlParam [param1; param2; param3]);
("__goblint_caml_param4", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []] @@ fun param1 param2 param3 param4 -> OCamlParam [param1; param2; param3; param4]);
("__goblint_caml_param5", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []; __ "param5" []] @@ fun param1 param2 param3 param4 param5 -> OCamlParam [param1; param2; param3; param4; param5]);
("__goblint_caml_return", special [] @@ OCamlReturn);
]
[@@coverage off]

let libraries = Hashtbl.of_list [
("c", c_descs_list @ math_descs_list);
("posix", posix_descs_list);
Expand All @@ -1259,6 +1279,7 @@ let libraries = Hashtbl.of_list [
("zlib", zlib_descs_list);
("liblzma", liblzma_descs_list);
("legacy", legacy_libs_misc_list);
("ocaml", ocaml_descs_list)
]

let libraries =
Expand Down
Loading
Loading