diff --git a/src/analyses/base.ml b/src/analyses/base.ml index c68a414b52..b3504cded6 100644 --- a/src/analyses/base.ml +++ b/src/analyses/base.ml @@ -2736,6 +2736,32 @@ struct set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set) | _ -> st end + | OCamlAlloc wosize, _ -> + (*begin match lv with + | Some lv -> + let heap_var = AD.unknown_ptr (*AD.of_var (heap_var false ctx)*) in + let sizeval = eval_int ~ctx st wosize in + set_many ~ctx st [ + (heap_var, TVoid [], Blob (VD.bot (), sizeval, true)); + (eval_lv ~ctx st lv, (Cilfacade.typeOfLval lv), Address heap_var) + ] + | _ -> st + end*) + let open Queries.AllocationLocation in + begin + (* The behavior for alloc(0) is implementation defined. Here, we rely on the options specified for the malloc also applying to alloca. *) + match lv with + | Some lv -> + let loc = Heap in + let (heap_var, addr) = alloc loc wosize in + (* ignore @@ printf "alloca will allocate %a bytes\n" ID.pretty (eval_int ~man size); *) + let blob_set = Option.map_default (fun heap_var -> [(heap_var, TVoid [], VD.Blob (VD.bot (), eval_int ~man st wosize, ZeroInit.malloc))]) [] heap_var in + set_many ~man st ((eval_lv ~man st lv, (Cilfacade.typeOfLval lv), VD.Address addr) :: blob_set) + | _ -> st + end + (* TODO: Rethink OCamlParam's placement *) + | OCamlParam params, _ -> + List.fold_left (fun acc param -> let _ = eval_rv ~man acc param in acc) st params | Calloc { count = n; size }, _ -> begin match lv with | Some lv -> (* array length is set to one, as num*size is done when turning into `Calloc *) diff --git a/src/analyses/ocaml.ml b/src/analyses/ocaml.ml new file mode 100644 index 0000000000..d132ac0f58 --- /dev/null +++ b/src/analyses/ocaml.ml @@ -0,0 +1,261 @@ +(** Simple interprocedural analysis of OCaml C-stubs ([ocaml]). *) + +(* Goblint documentation: https://goblint.readthedocs.io/en/latest/ *) +(* Helpful link on CIL: https://goblint.github.io/cil/ *) +(* TODO: Write tests and test them with `ruby scripts/update_suite.rb group ocaml` *) +(* after removing the `SKIP` from the beginning of the tests in tests/regression/90-ocaml/{01-bagnall.c,04-o_inter.c} *) + +open GoblintCil +open Analyses + +module VarinfoSet = SetDomain.Make(CilType.Varinfo) + +(** "Fake" variable to handle returning from a function *) +let return_varinfo = dummyFunDec.svar +(** Flag for first function entered *) +let first_function = (emptyFunction "@first").svar +(** Flag for deregistering at return *) +let to_deregister = (emptyFunction "@dereg").svar + +module Spec : Analyses.MCPSpec = +struct + include Analyses.DefaultSpec + + let name () = "ocaml" + module D = + struct + (* The first set contains variables of type value that are definitely accounted. The second contains definitely registered variables. *) + module P = Lattice.Reverse (Lattice.Prod (VarinfoSet) (VarinfoSet)) + include P + + let empty () = (VarinfoSet.empty (), VarinfoSet.empty ()) + + (* After garbage collection, the first set loses variables not in the second set. *) + let after_gc (accounted, registered) = (VarinfoSet.inter accounted registered, registered) + + (* Untracked variables are always fine. *) + let mem_a v (accounted, registered) = + VarinfoSet.mem v accounted + + let mem_r v (accounted, registered) = + VarinfoSet.mem v registered + + let add_a v (accounted, registered) = + (VarinfoSet.add v accounted, registered) + + let add_r v (accounted, registered) = + (accounted, VarinfoSet.add v registered) + + let remove_a v (accounted, registered) = + (VarinfoSet.remove v accounted, registered) + + let remove_r v (accounted, registered) = + (accounted, VarinfoSet.remove v registered) + end + module C = Printable.Unit + + (* We are context insensitive in this analysis *) + let context man _ _ = () + let startcontext () = () + + + (** Determines whether an expression [e] is healthy, given a [state]. *) + let rec exp_accounted_for (state:D.t) (e:Cil.exp) = match e with + (* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is accounted for *) + | AddrOf v + | StartOf v + | Lval v -> lval_accounted_for state v + | BinOp (_,e1,e2,_) -> exp_accounted_for state e1 && exp_accounted_for state e2 + | Real e + | Imag e + | SizeOfE e + | AlignOfE e + | CastE (_,e) + | UnOp (_,e,_) -> exp_accounted_for state e + | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true + | Question (b, t, f, _) -> exp_accounted_for state b && exp_accounted_for state t && exp_accounted_for state f + and lval_accounted_for state = function + | (Var v, _) -> + (* Checks whether variable v is accounted for *) (*false*) + if D.mem_a v state then true else (M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v; false) + | _ -> + (* The Gemara asks: is using an offset safe for the expression? The Gemara answers: by default, no. We assume our language has no pointers *) + false + + (** Determines whether an expression [e] is registered, given a [state]. *) + let rec exp_registered (state:D.t) (e:Cil.exp) = match e with + (* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is registered *) + | AddrOf v + | StartOf v + | Lval v -> lval_registered state v + | BinOp (_,e1,e2,_) -> exp_registered state e1 && exp_registered state e2 + | Real e + | Imag e + | SizeOfE e + | AlignOfE e + | CastE (_,e) + | UnOp (_,e,_) -> exp_registered state e + | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true + | Question (b, t, f, _) -> exp_registered state b && exp_registered state t && exp_registered state f + and lval_registered state = function + | (Var v, _) -> + (* Checks whether variable v is registered *) (*false*) + D.mem_r v state + | _ -> + false + + let is_value_type (t:typ): bool = match t with + | TNamed (info, attr) -> info.tname = "value" + | _ -> false + + let assignment (v:varinfo) (rval:exp) (rval_type:typ) (state:D.t) (warning:string): D.t = + (* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *) + if Cil.isPointerType rval_type || is_value_type rval_type then + if exp_accounted_for state rval then + if exp_registered state rval then D.add_a v state + else D.add_a v state + else (M.info "%s" warning; D.remove_a v state) + else D.add_a v (D.add_r v state) + + (* transfer functions *) + + (** Handles assignment of [rval] to [lval]. *) + let assign man (lval:lval) (rval:exp) : D.t = + let state = man.local in + match lval with + | Var v,_ -> assignment v rval (Cil.typeOf rval) state "The above is being assigned" + | _ -> state + + (** Handles conditional branching yielding truth value [tv]. *) + let branch man (exp:exp) (tv:bool) : D.t = + (* The expression checked must be accounted for *) + ignore (exp_accounted_for man.local exp); + man.local + + (** Handles going from start node of function [f] into the function body of [f]. + Meant to handle e.g. initializiation of local variables. *) + let body man (f:fundec) : D.t = + let state = man.local in + (* It is assumed that the startstate's values are not nptrs. This avoids warnings from other analyses. *) + if D.mem_r first_function state then + List.iter (fun v -> (if is_value_type v.vtype then + (man.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true))))) + f.sformals; + (* TODO: Is there a way without a flag to only emit at the start? *) + D.remove_r first_function state + + (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) + let return man (exp:exp option) (f:fundec) : D.t = + let state = man.local in + (* Warns if values or pointers are still registered. *) + List.iter (fun v -> if D.mem_r v state && + (Cil.isPointerType (Cil.typeOf (Cil.Lval (Cil.var v))) || + is_value_type (Cil.typeOf (Cil.Lval (Cil.var v)))) + then M.warn "Value %a registered at return" CilType.Varinfo.pretty v) (f.sformals @ f.slocals); + match exp with + (* Checks that value returned is accounted for. *) + (* Return_varinfo is used in place of a "real" variable. *) + | Some e -> assignment return_varinfo e (Cil.typeOf e) state "The above is being returned" + (* Checks that value returned is accounted for. *) + (* Return_varinfo is used in place of a "real" variable. *) + (* let return_state = assignment return_varinfo e (Cil.typeOf e) state "The above is being returned" in + (* Remove this function's formals and locals if correctly returned *) + D.remove_r to_deregister (if D.mem_r to_deregister return_state then + List.fold_left (fun st v -> D.remove_a v (D.remove_r v st)) return_state (f.sformals @ f.slocals) + else return_state) *) + | None -> state + + (** For a function call "lval = f(args)" or "f(args)", + [enter] returns a caller state, and the initial state of the callee. + In [enter], the caller state can usually be returned unchanged, as [combine_env] and [combine_assign] (below) + will compute the caller state after the function call, given the return state of the callee. *) + let enter man (lval: lval option) (f:fundec) (args:exp list) : (D.t * D.t) list = + let caller_state = man.local in + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) args; + (* Entering a function doesn't change the caller state *) + let callee_state = List.fold_left2 (fun st v rval -> + (* At the start, arguments are accounted for and not registered. The first_function flag is added.*) + if rval == MyCFG.unknown_exp then + if is_value_type v.vtype then D.add_r first_function (D.add_a v (D.remove_r v st)) + else D.add_a v (D.add_r v st) + (* Arguments of inner functions inherit the caller's state. *) + (* TODO: Assignment was changed and no longer copies registration. It must be passed using other means. *) + else (*assignment v rval (Cil.typeOf rval) st "Entering function with possibly deleted argument")*) + if Cil.isPointerType (Cil.typeOf rval) || is_value_type (Cil.typeOf rval) then + if exp_accounted_for st rval then + if exp_registered st rval then D.add_a v (D.add_r v st) + else D.add_a v (D.remove_r v st) + else (M.info "Entering function with possibly deleted argument"; D.remove_a v st) + else D.add_a v (D.add_r v st)) + caller_state f.sformals args in + (* first component is state of caller, second component is state of callee *) + [caller_state, callee_state] + + (** For a function call "lval = f(args)" or "f(args)", + computes the global environment state of the caller after the call. + Argument [callee_local] is the state of [f] at its return node. *) + let combine_env man (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = + (* If GC could have triggered during the call, the caller state loses variables not registered in the callee. *) + (* Since the callee state is basically copied from the caller, the caller state changes the same way through the callee's GCs. *) + callee_local + + (** For a function call "lval = f(args)" or "f(args)", + computes the state of the caller after assigning the return value from the call. + Argument [callee_local] is the state of [f] at its return node. *) + let combine_assign man (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = + let caller_state = man.local in + (* Records whether lval was accounted for. Registration for v must already be handled. *) + (* TODO: What happens if a pointer to a value is returned? *) + match lval with (* The variable returned is played by return_varinfo *) + | Some (Var v, _) -> let state = + (* Unlike other assignment-like functions, the type here is the return type of the function, not of return_varinfo. *) + (match f.svar.vtype with + | TFun (t, _, _, _) -> + assignment v (Cil.Lval (Cil.var return_varinfo)) t caller_state "The above is being combined" + | _ -> caller_state) in + D.remove_a return_varinfo (D.remove_r return_varinfo state) + | _ -> caller_state + + (** For a call to a _special_ function f "lval = f(args)" or "f(args)", + computes the caller state after the function call. *) + let special man (lval: lval option) (f:varinfo) (arglist:exp list) : D.t = + let caller_state = man.local in + (* To warn about a potential issue in the code, use M.warn. *) + (* caller_state *) + let desc = LibraryFunctions.find f in + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to special functions *) + match desc.special arglist with + | OCamlParam params -> + (* Variables are registered with a Param macro. *) + List.fold_left (fun state param -> match param with + | AddrOf (Var v, _) -> (match man.ask (Queries.EvalInt (Cil.Lval (Cil.var v))) with + | `Bot -> () + | `Lifted x -> if IntDomain.IntDomTuple.equal_to Z.zero x <> `Neq then M.warn "Null pointer registered" + | `Top -> M.warn "Null pointer possibly registered" + ); + D.add_r v state + | _ -> state + ) caller_state params + | OCamlAlloc size_exp -> + (* Garbage collection may trigger here and overwrite unregistered variables. *) + M.debug "Garbage collection triggers"; + (match lval with + | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) + | _ -> D.after_gc caller_state + ) + | OCamlDrop | OCamlEndRoots -> + (* Deregisters all formal and local variables. *) + (* TODO: Write tests to check not too much is deregistered. *) + let caller_fun = Node.find_fundec man.node in + List.fold_left (fun st v -> D.remove_r v st) caller_state (caller_fun.sformals @ caller_fun.slocals) + | _ -> caller_state + + (* You may leave these alone *) + let startstate v = D.empty () + let threadenter man ~multiple lval f args = [D.top ()] + let threadspawn man ~multiple lval f args fman = man.local + let exitstate v = D.top () +end + +let _ = + MCP.register_analysis (module Spec : MCPSpec) diff --git a/src/analyses/ocaml2.ml b/src/analyses/ocaml2.ml new file mode 100644 index 0000000000..e7d2a97195 --- /dev/null +++ b/src/analyses/ocaml2.ml @@ -0,0 +1,293 @@ +(** Simple interprocedural analysis of OCaml C-stubs ([ocaml2]). *) + +(* Goblint documentation: https://goblint.readthedocs.io/en/latest/ *) +(* Helpful link on CIL: https://goblint.github.io/cil/ *) +(* TODO: Write tests and test them with `ruby scripts/update_suite.rb group ocaml2` *) +(* after removing the `SKIP` from the beginning of the tests in tests/regression/90-ocaml/{01-bagnall.c,04-o_inter.c} *) + +open GoblintCil +open Analyses + +module VarinfoSet = SetDomain.Make(CilType.Varinfo) + +(** "Fake" variable to handle returning from a function *) +let return_varinfo = dummyFunDec.svar +(** Flag for first function entered *) +let first_function = (emptyFunction "@first").svar +(** Flag for deregistering at return *) +let to_deregister = (emptyFunction "@dereg").svar + +module Spec : Analyses.MCPSpec = +struct + include Analyses.DefaultSpec + + let name () = "ocaml2" + module D = + struct + (* The first set contains variables of type value that are definitely accounted. The second contains definitely registered variables. *) + module P = Lattice.Prod (Lattice.Reverse (VarinfoSet)) (Lattice.Liszt (Lattice.Reverse (VarinfoSet))) + include P + + let empty () = (VarinfoSet.empty (), []) + + (* Puts all registered variables into a single set. *) + let flatten_r (accounted, registered) = + List.fold_left (fun acc r -> VarinfoSet.union acc r) (VarinfoSet.empty ()) registered + + (* After garbage collection, the first set loses variables not in the registered stack. *) + let after_gc (accounted, registered) = + (VarinfoSet.inter accounted (flatten_r (accounted, registered)), registered) + + let mem_a v (accounted, registered) = + VarinfoSet.mem v accounted + + let mem_r v (accounted, registered) = + VarinfoSet.mem v (flatten_r (accounted, registered)) + + let add_a v (accounted, registered) = + (VarinfoSet.add v accounted, registered) + + (* Opens a new block. *) + let push_r (accounted, registered) = + (accounted, VarinfoSet.empty () :: registered) + + (* Registers a variable in the current block. *) + let add_r v (accounted, registered) = + match registered with + | [] -> M.warn "Variable %a registered without CAMLparam0" CilType.Varinfo.pretty v; + (accounted, [VarinfoSet.singleton v]) + | r::rs -> (accounted, (VarinfoSet.add v r)::rs) + + let remove_a v (accounted, registered) = + (VarinfoSet.remove v accounted, registered) + + (* Simulates End_roots by removing one block. *) + (* TODO: End_roots actually removes blocks until it has removed one named caml_roots_block. *) + let pop_r (accounted, registered) = + match registered with + | [] -> (accounted, []) + | _::rs -> (accounted, rs) + + (* Removes all blocks created in the current scope, like CAMLreturn. *) + (* vs: current function's formals and locals *) + let rec after_drop vs (accounted, registered) = + match registered with + | [] -> (accounted, []) + | r::rs -> + (* Checks whether any of the variables in vs is in the current block. *) + (* TODO: If CAMLparam0 is not used, this will also delete the previous block. Could this be improved? *) + if List.exists (fun v -> VarinfoSet.mem v r) vs then + after_drop vs (accounted, rs) + else (accounted, registered) + end + module C = Printable.Unit + + (* We are context insensitive in this analysis *) + let context man _ _ = () + let startcontext () = () + + + (** Determines whether an expression [e] is healthy, given a [state]. *) + let rec exp_accounted_for (state:D.t) (e:Cil.exp) = match e with + (* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is accounted for *) + | AddrOf v + | StartOf v + | Lval v -> lval_accounted_for state v + | BinOp (_,e1,e2,_) -> exp_accounted_for state e1 && exp_accounted_for state e2 + | Real e + | Imag e + | SizeOfE e + | AlignOfE e + | CastE (_,e) + | UnOp (_,e,_) -> exp_accounted_for state e + | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true + | Question (b, t, f, _) -> exp_accounted_for state b && exp_accounted_for state t && exp_accounted_for state f + and lval_accounted_for state = function + | (Var v, _) -> + (* Checks whether variable v is accounted for *) (*false*) + if D.mem_a v state then true else (M.warn "Value %a might be garbage collected" CilType.Varinfo.pretty v; false) + | _ -> + (* The Gemara asks: is using an offset safe for the expression? The Gemara answers: by default, no. We assume our language has no pointers *) + false + + (** Determines whether an expression [e] is registered, given a [state]. *) + let rec exp_registered (state:D.t) (e:Cil.exp) = match e with + (* Recurse over the structure in the expression, returning true if all varinfo appearing in the expression is registered *) + | AddrOf v + | StartOf v + | Lval v -> lval_registered state v + | BinOp (_,e1,e2,_) -> exp_registered state e1 && exp_registered state e2 + | Real e + | Imag e + | SizeOfE e + | AlignOfE e + | CastE (_,e) + | UnOp (_,e,_) -> exp_registered state e + | SizeOf _ | SizeOfStr _ | Const _ | AlignOf _ | AddrOfLabel _ -> true + | Question (b, t, f, _) -> exp_registered state b && exp_registered state t && exp_registered state f + and lval_registered state = function + | (Var v, _) -> + (* Checks whether variable v is registered *) (*false*) + D.mem_r v state + | _ -> + false + + let is_value_type (t:typ): bool = match t with + | TNamed (info, attr) -> info.tname = "value" + | _ -> false + + let assignment (v:varinfo) (rval:exp) (rval_type:typ) (state:D.t) (warning:string): D.t = + (* If rval is a pointer, checks whether rval is accounted for, handles assignment to v accordingly *) + if Cil.isPointerType rval_type || is_value_type rval_type then + if exp_accounted_for state rval then + if exp_registered state rval then D.add_a v state + else D.add_a v state + else (M.info "%s" warning; D.remove_a v state) + (* TODO: End_roots should not remove untracked variables like tracked ones. *) + else D.add_a v (D.add_r v state) + + (* transfer functions *) + + (** Handles assignment of [rval] to [lval]. *) + let assign man (lval:lval) (rval:exp) : D.t = + let state = man.local in + match lval with + | Var v,_ -> assignment v rval (Cil.typeOf rval) state "The above is being assigned" + | _ -> state + + (** Handles conditional branching yielding truth value [tv]. *) + let branch man (exp:exp) (tv:bool) : D.t = + (* The expression checked must be accounted for *) + ignore (exp_accounted_for man.local exp); + man.local + + (** Handles going from start node of function [f] into the function body of [f]. + Meant to handle e.g. initializiation of local variables. *) + let body man (f:fundec) : D.t = + let state = man.local in + (* It is assumed that the startstate's values are not nptrs. This avoids warnings from other analyses. *) + if D.mem_a first_function state then + List.iter (fun v -> (if is_value_type v.vtype then + (man.emit (Events.SplitBranch (Cil.Lval (Cil.var v), true))))) + f.sformals; + (* TODO: Is there a way without a flag to only emit at the start? *) + D.remove_a first_function state + + (** Handles the [return] statement, i.e. "return exp" or "return", in function [f]. *) + let return man (exp:exp option) (f:fundec) : D.t = + let state = man.local in + (* Warns if values or pointers are still registered. *) + List.iter (fun v -> if D.mem_r v state && + (Cil.isPointerType (Cil.typeOf (Cil.Lval (Cil.var v))) || + is_value_type (Cil.typeOf (Cil.Lval (Cil.var v)))) + then M.warn "Value %a registered at return" CilType.Varinfo.pretty v) (f.sformals @ f.slocals); + (* After the warning, they need to be counted as deregistered to not upset the stack structure of the outer function. *) + let state = D.after_drop (f.sformals @ f.slocals) state in + match exp with + (* Checks that value returned is accounted for. *) + (* Return_varinfo is used in place of a "real" variable. *) + | Some e -> assignment return_varinfo e (Cil.typeOf e) state "The above is being returned" + (* Checks that value returned is accounted for. *) + (* Return_varinfo is used in place of a "real" variable. *) + (* let return_state = assignment return_varinfo e (Cil.typeOf e) state "The above is being returned" in + (* Remove this function's formals and locals if correctly returned *) + D.remove_r to_deregister (if D.mem_r to_deregister return_state then + List.fold_left (fun st v -> D.remove_a v (D.remove_r v st)) return_state (f.sformals @ f.slocals) + else return_state) *) + | None -> state + + (** For a function call "lval = f(args)" or "f(args)", + [enter] returns a caller state, and the initial state of the callee. + In [enter], the caller state can usually be returned unchanged, as [combine_env] and [combine_assign] (below) + will compute the caller state after the function call, given the return state of the callee. *) + let enter man (lval: lval option) (f:fundec) (args:exp list) : (D.t * D.t) list = + let caller_state = man.local in + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) args; + (* Entering a function doesn't change the caller state *) + let callee_state = List.fold_left2 (fun st v rval -> + (* At the start, arguments are accounted for and not registered. The first_function flag is added.*) + if rval == MyCFG.unknown_exp then + if is_value_type v.vtype then D.add_a first_function (D.add_a v st) + else D.add_a v (D.add_r v (D.push_r st)) + (* Arguments of inner functions inherit the caller's state. *) + (* Every registration copied becomes its own set to avoid them going to a previous bigger set. *) + else (*assignment v rval (Cil.typeOf rval) st "Entering function with possibly deleted argument")*) + if Cil.isPointerType (Cil.typeOf rval) || is_value_type (Cil.typeOf rval) then + if exp_accounted_for st rval then + if exp_registered st rval then D.add_a v (D.add_r v (D.push_r st)) + else D.add_a v st + else (M.info "Entering function with possibly deleted argument"; D.remove_a v st) + else D.add_a v (D.add_r v (D.push_r st))) + caller_state f.sformals args in + (* first component is state of caller, second component is state of callee *) + [caller_state, callee_state] + + (** For a function call "lval = f(args)" or "f(args)", + computes the global environment state of the caller after the call. + Argument [callee_local] is the state of [f] at its return node. *) + let combine_env man (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = + (* If GC could have triggered during the call, the caller state loses variables not registered in the callee. *) + (* Since the callee state is basically copied from the caller, the caller state changes the same way through the callee's GCs. *) + callee_local + + (** For a function call "lval = f(args)" or "f(args)", + computes the state of the caller after assigning the return value from the call. + Argument [callee_local] is the state of [f] at its return node. *) + let combine_assign man (lval:lval option) fexp (f:fundec) (args:exp list) fc (callee_local:D.t) (f_ask: Queries.ask): D.t = + let caller_state = man.local in + (* Records whether lval was accounted for. Registration for v must already be handled. *) + (* TODO: What happens if a pointer to a value is returned? *) + match lval with (* The variable returned is played by return_varinfo *) + | Some (Var v, _) -> let state = + (* Unlike other assignment-like functions, the type here is the return type of the function, not of return_varinfo. *) + (match f.svar.vtype with + | TFun (t, _, _, _) -> + assignment v (Cil.Lval (Cil.var return_varinfo)) t caller_state "The above is being combined" + | _ -> caller_state) in + D.remove_a return_varinfo state + | _ -> caller_state + + (** For a call to a _special_ function f "lval = f(args)" or "f(args)", + computes the caller state after the function call. *) + let special man (lval: lval option) (f:varinfo) (arglist:exp list) : D.t = + let caller_state = man.local in + (* To warn about a potential issue in the code, use M.warn. *) + (* caller_state *) + let desc = LibraryFunctions.find f in + List.iter (fun e -> ignore (exp_accounted_for caller_state e)) arglist; (* Just to trigger warnings for arguments passed to special functions *) + match desc.special arglist with + | OCamlParam0 -> D.push_r caller_state + | OCamlParam params -> + (* Variables are registered with a Param macro. *) + List.fold_left (fun state param -> match param with + | AddrOf (Var v, _) -> (match man.ask (Queries.EvalInt (Cil.Lval (Cil.var v))) with + | `Bot -> () + | `Lifted x -> if IntDomain.IntDomTuple.equal_to Z.zero x <> `Neq then M.warn "Null pointer registered" + | `Top -> M.warn "Null pointer possibly registered" + ); + D.add_r v state + | _ -> state + ) caller_state params + | OCamlAlloc size_exp -> + (* Garbage collection may trigger here and overwrite unregistered variables. *) + M.debug "Garbage collection triggers"; + (match lval with + | Some (Var v, _) -> D.add_a v (D.after_gc caller_state) + | _ -> D.after_gc caller_state + ) + | OCamlDrop -> + (* Deregisters all formal and local variables. *) + let caller_fun = Node.find_fundec man.node in + D.after_drop (caller_fun.sformals @ caller_fun.slocals) caller_state + | OCamlEndRoots -> D.pop_r caller_state + | _ -> caller_state + + (* You may leave these alone *) + let startstate v = D.empty () + let threadenter man ~multiple lval f args = [D.top ()] + let threadspawn man ~multiple lval f args fman = man.local + let exitstate v = D.top () +end + +let _ = + MCP.register_analysis (module Spec : MCPSpec) diff --git a/src/cdomain/value/cdomains/valueDomain.ml b/src/cdomain/value/cdomains/valueDomain.ml index 6f234cce50..60417597d9 100644 --- a/src/cdomain/value/cdomains/valueDomain.ml +++ b/src/cdomain/value/cdomains/valueDomain.ml @@ -520,8 +520,7 @@ struct (* cast to voidPtr are ignored TODO what happens if our value does not fit? *) | TPtr (t,_) -> Address (match v with - | Int x when ID.to_int x = Some Z.zero -> AD.null_ptr - | Int x -> AD.top_ptr + | Int x -> AD.of_int x (* we ignore casts to void* (above)! TODO report UB! *) | Address x -> cast_addr t x (*| Address x -> x*) diff --git a/src/common/util/cilfacade.ml b/src/common/util/cilfacade.ml index 452d0297a5..5eaa5c272d 100644 --- a/src/common/util/cilfacade.ml +++ b/src/common/util/cilfacade.ml @@ -232,6 +232,7 @@ let getFuns fileAST : startfuns = Logs.debug "Cleanup function: %s" mn; set_string "exitfun[+]" mn; add_exit def acc | GFun ({svar={vstorage=NoStorage; vattr; _}; _} as def, _) when get_bool "nonstatic" && not (Cil.hasAttribute "goblint_stub" vattr) -> add_other def acc | GFun ({svar={vattr; _}; _} as def, _) when get_bool "allfuns" && not (Cil.hasAttribute "goblint_stub" vattr) -> add_other def acc + | GFun({svar={vname=mn; vattr=attr; vtype= TFun (t, _, _, _); _}; _} as def, _) when Cil.hasAttribute "c_stub" attr || Cil.hasAttribute "c_stub" (Cil.typeAttrs(t)) -> add_main def acc | _ -> acc in foldGlobals fileAST f ([],[],[]) diff --git a/src/config/options.schema.json b/src/config/options.schema.json index 134ce7eb95..edb271afe4 100644 --- a/src/config/options.schema.json +++ b/src/config/options.schema.json @@ -1501,7 +1501,8 @@ "pcre", "zlib", "liblzma", - "legacy" + "legacy", + "ocaml" ] }, "default": [ @@ -1513,7 +1514,8 @@ "linux-userspace", "goblint", "ncurses", - "legacy" + "legacy", + "ocaml" ] } }, diff --git a/src/util/library/libraryDesc.ml b/src/util/library/libraryDesc.ml index 862205aad1..ad2a95e2b8 100644 --- a/src/util/library/libraryDesc.ml +++ b/src/util/library/libraryDesc.ml @@ -47,6 +47,12 @@ type math = type special = | Alloca of Cil.exp | Malloc of Cil.exp + | OCamlAlloc of Cil.exp + (* TODO: Rethink OCamlParam's placement. *) + | OCamlParam0 + | OCamlParam of Cil.exp list + | OCamlDrop + | OCamlEndRoots | Calloc of { count: Cil.exp; size: Cil.exp; } | Realloc of { ptr: Cil.exp; size: Cil.exp; } | Free of Cil.exp diff --git a/src/util/library/libraryFunctions.ml b/src/util/library/libraryFunctions.ml index ee663d4152..2bd295544f 100644 --- a/src/util/library/libraryFunctions.ml +++ b/src/util/library/libraryFunctions.ml @@ -162,6 +162,7 @@ let c_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ ("longjmp", special [__ "env" [r]; __ "value" []] @@ fun env value -> Longjmp { env; value }); ("atexit", unknown [drop "function" [if_ (fun () -> not (get_bool "sem.atexit.ignore")) s]]); ("atoi", unknown [drop "nptr" [r]]); + ("atof", unknown [drop "nptr" [r]]); (* TODO: Rethink atof's position in this list. *) ("atol", unknown [drop "nptr" [r]]); ("atoll", unknown [drop "nptr" [r]]); ("setlocale", unknown [drop "category" []; drop "locale" [r]]); @@ -1242,6 +1243,26 @@ let legacy_libs_misc_list: (string * LibraryDesc.t) list = LibraryDsl.[ ] [@@coverage off] +let ocaml_descs_list: (string * LibraryDesc.t) list = LibraryDsl.[ + ("caml_copy_double", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1))); + (* Example: ("malloc", special [__ "size" []] @@ fun size -> Malloc size); *) + ("caml_alloc_small", special [__ "wosize" []; __ "tag" []] @@ fun wosize tag -> OCamlAlloc wosize); + ("camlidl_apron_policy_ptr_c2ml", special [drop "nptr" []] (OCamlAlloc (GoblintCil.integer 1))); + ("caml_alloc_2", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]] @@ fun tag _arg1 _arg2 -> OCamlAlloc (GoblintCil.integer 2)); + ("caml_alloc_3", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]] @@ fun tag _arg1 _arg2 _arg3 -> OCamlAlloc (GoblintCil.integer 3)); + ("caml_alloc_4", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 -> OCamlAlloc (GoblintCil.integer 4)); + ("caml_alloc_5", special [__ "tag" []; __ "arg1" [r]; __ "arg2" [r]; __ "arg3" [r]; __ "arg4" [r]; __ "arg5" [r]] @@ fun tag _arg1 _arg2 _arg3 _arg4 _arg5 -> OCamlAlloc (GoblintCil.integer 5)); + ("__goblint_caml_param0", special [] @@ OCamlParam0); + ("__goblint_caml_param1", special [__ "param" []] @@ fun param -> OCamlParam [param]); + ("__goblint_caml_param2", special [__ "param1" []; __ "param2" []] @@ fun param1 param2 -> OCamlParam [param1; param2]); + ("__goblint_caml_param3", special [__ "param1" []; __ "param2" []; __ "param3" []] @@ fun param1 param2 param3 -> OCamlParam [param1; param2; param3]); + ("__goblint_caml_param4", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []] @@ fun param1 param2 param3 param4 -> OCamlParam [param1; param2; param3; param4]); + ("__goblint_caml_param5", special [__ "param1" []; __ "param2" []; __ "param3" []; __ "param4" []; __ "param5" []] @@ fun param1 param2 param3 param4 param5 -> OCamlParam [param1; param2; param3; param4; param5]); + ("__goblint_caml_drop", special [] @@ OCamlDrop); + ("__goblint_caml_end_roots", special [] @@ OCamlEndRoots); + ] +[@@coverage off] + let libraries = Hashtbl.of_list [ ("c", c_descs_list @ math_descs_list); ("posix", posix_descs_list); @@ -1259,6 +1280,7 @@ let libraries = Hashtbl.of_list [ ("zlib", zlib_descs_list); ("liblzma", liblzma_descs_list); ("legacy", legacy_libs_misc_list); + ("ocaml", ocaml_descs_list) ] let libraries = diff --git a/tests/regression/90-ocaml/01-bagnall.c b/tests/regression/90-ocaml/01-bagnall.c new file mode 100644 index 0000000000..dfeca785e9 --- /dev/null +++ b/tests/regression/90-ocaml/01-bagnall.c @@ -0,0 +1,37 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Buggy code from https://github.com/xavierleroy/pringo/issues/6 where value v is not registered. + +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +CAMLprim value pringo_LXM_copy(value v) +{ + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + return res; +} + +CAMLprim value pringo_LXM_copy_correct(value v) +{ + CAMLparam1(v); + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // NOWARN + CAMLreturn(res); +} + +CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2, + uint64_t i3, uint64_t i4) +{ + value v = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); // NOWARN + struct LXM_state * st = LXM_val(v); + st->a = i1 | 1; /* must be odd */ + st->x[0] = i2 != 0 ? i2 : 1; /* must be nonzero */ + st->x[1] = i3 != 0 ? i3 : 2; /* must be nonzero */ + st->s = i4; + return v; +} \ No newline at end of file diff --git a/tests/regression/90-ocaml/02-floatops.c b/tests/regression/90-ocaml/02-floatops.c new file mode 100644 index 0000000000..9d35150ca5 --- /dev/null +++ b/tests/regression/90-ocaml/02-floatops.c @@ -0,0 +1,152 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Code from stubs.c in src/common/cdomains/floatOps that should be correct despite missing CAMLparam, as discussed in https://github.com/goblint/analyzer/issues/1371. + +#define _GNU_SOURCE // necessary for M_PI to be defined +#include +#include +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +// Order must match with round_mode in floatOps.ml +enum round_mode +{ + Nearest, + ToZero, + Up, + Down +}; + +static void change_round_mode(int mode) +{ + switch (mode) + { + case Nearest: + fesetround(FE_TONEAREST); + break; + case ToZero: + fesetround(FE_TOWARDZERO); + break; + case Up: + fesetround(FE_UPWARD); + break; + case Down: + fesetround(FE_DOWNWARD); + break; + default: + // Assert ignored to focus on the OCaml stubs. + assert(0); // FAIL + break; + } +} + +#define UNARY_OP(name, type, op) \ + CAMLprim value name##_##type(value mode, value x) \ + { \ + /* No need to use CAMLparam to keep mode and x as GC roots, + because next GC poll point is at allocation in caml_copy_double. + We have already read their values by then. */ \ + int old_roundingmode = fegetround(); \ + change_round_mode(Int_val(mode)); \ + volatile type r, x1 = Double_val(x); \ + r = op(x1); \ + fesetround(old_roundingmode); \ + return caml_copy_double(r); /* NOWARN */ \ + /* No need to use CAMLreturn because we don't use CAMLparam. */ \ + } + +UNARY_OP(sqrt, double, sqrt); // NOWARN +UNARY_OP(sqrt, float, sqrtf); // NOWARN +UNARY_OP(acos, double, acos); // NOWARN +UNARY_OP(acos, float, acosf); // NOWARN +UNARY_OP(asin, double, asin); // NOWARN +UNARY_OP(asin, float, asinf); // NOWARN +UNARY_OP(atan, double, atan); // NOWARN +UNARY_OP(atan, float, atanf); // NOWARN +UNARY_OP(cos, double, cos); // NOWARN +UNARY_OP(cos, float, cosf); // NOWARN +UNARY_OP(sin, double, sin); // NOWARN +UNARY_OP(sin, float, sinf); // NOWARN +UNARY_OP(tan, double, tan); // NOWARN +UNARY_OP(tan, float, tanf); // NOWARN + +#define BINARY_OP(name, type, op) \ + CAMLprim value name##_##type(value mode, value x, value y) \ + { \ + /* No need to use CAMLparam to keep mode, x and y as GC roots, + because next GC poll point is at allocation in caml_copy_double. + We have already read their values by then. */ \ + int old_roundingmode = fegetround(); \ + change_round_mode(Int_val(mode)); \ + volatile type r, x1 = Double_val(x), y1 = Double_val(y); \ + r = x1 op y1; \ + fesetround(old_roundingmode); \ + return caml_copy_double(r); /* NOWARN */ \ + /* No need to use CAMLreturn because we don't use CAMLparam. */ \ + } + +BINARY_OP(add, double, +); // NOWARN +BINARY_OP(add, float, +); // NOWARN +BINARY_OP(sub, double, -); // NOWARN +BINARY_OP(sub, float, -); // NOWARN +BINARY_OP(mul, double, *); // NOWARN +BINARY_OP(mul, float, *); // NOWARN +BINARY_OP(div, double, /); // NOWARN +BINARY_OP(div, float, /); // NOWARN + +CAMLprim value atof_double(value mode, value str) +{ + // No need to use CAMLparam to keep mode and str as GC roots, + // because next GC poll point is at allocation in caml_copy_double. + // We have already read their values by then. + const char *s = String_val(str); + volatile double r; + int old_roundingmode = fegetround(); // NOWARN + change_round_mode(Int_val(mode)); + r = atof(s); + fesetround(old_roundingmode); + return caml_copy_double(r); // NOWARN + // No need to use CAMLreturn because we don't use CAMLparam. +} + +CAMLprim value atof_float(value mode, value str) +{ + // No need to use CAMLparam to keep mode and str as GC roots, + // because next GC poll point is at allocation in caml_copy_double. + // We have already read their values by then. + const char *s = String_val(str); // Despite not being a value, this is a pointer into the OCaml heap and needs to be checked against GC. Doubles are also pointers. + volatile float r; + int old_roundingmode = fegetround(); // NOWARN + change_round_mode(Int_val(mode)); // This makes a copy of the mode and no problems will arise. + r = (float)atof(s); + fesetround(old_roundingmode); + return caml_copy_double(r); // NOWARN + // No need to use CAMLreturn because we don't use CAMLparam. +} + +// These are only given for floats as these operations involve no rounding and their OCaml implementation (Float module) can be used + +CAMLprim value max_float(value unit) +{ + // No need to use CAMLparam to keep unit as GC root, + // because we don't use it. + return caml_copy_double(FLT_MAX); // NOWARN + // No need to use CAMLreturn because we don't use CAMLparam. +} + +CAMLprim value smallest_float(value unit) +{ + // No need to use CAMLparam to keep unit as GC root, + // because we don't use it. + return caml_copy_double(FLT_MIN); // NOWARN + // No need to use CAMLreturn because we don't use CAMLparam. +} + +CAMLprim value pi_float(value unit) +{ + return caml_copy_double(M_PI); // NOWARN +} diff --git a/tests/regression/90-ocaml/03-obenour.c b/tests/regression/90-ocaml/03-obenour.c new file mode 100644 index 0000000000..0e341f6764 --- /dev/null +++ b/tests/regression/90-ocaml/03-obenour.c @@ -0,0 +1,70 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Buggy code from https://github.com/ocaml/ocaml/pull/13370 where unregistered temporary variables may be garbage-collected. + +#include +#include +#include "goblint_caml.h" + +CAMLprim value caml_gc_counters(value v) +{ + CAMLparam0(); /* v is ignored */ + CAMLlocal1(res); + + /* get a copy of these before allocating anything... */ + double minwords = caml_gc_minor_words_unboxed(); + /*double prowords = (double)Caml_state->stat_promoted_words; + double majwords = Caml_state->stat_major_words + + (double) Caml_state->allocated_words;*/ + double prowords = 0; // It does not find the Caml_state so dummy values are used for the test. + double majwords = 0; + + res = caml_alloc_3(0, + caml_copy_double(minwords), + caml_copy_double(prowords), + caml_copy_double(majwords)); // WARN + CAMLreturn(res); +} + +CAMLprim value caml_gc_counters_correct_1(value v) +{ + CAMLparam0(); /* v is ignored */ + CAMLlocal3(minwords_, prowords_, majwords_); + + /* get a copy of these before allocating anything... */ + double minwords = caml_gc_minor_words_unboxed(); + /*double prowords = (double)Caml_state->stat_promoted_words; + double majwords = Caml_state->stat_major_words + + (double) Caml_state->allocated_words;*/ + double prowords = 0; + double majwords = 0; + + minwords_ = caml_copy_double(minwords); + prowords_ = caml_copy_double(prowords); + majwords_ = caml_copy_double(majwords); + v = caml_alloc_small(3, 0); + Field(v, 0) = minwords_; + Field(v, 1) = prowords_; + Field(v, 2) = majwords_; + CAMLreturn(v); +} + +CAMLprim value caml_gc_counters_correct_2(value v) +{ + CAMLparam0(); /* v is ignored */ + CAMLlocal4(minwords_, prowords_, majwords_, res); + + /* get a copy of these before allocating anything... */ + double minwords = caml_gc_minor_words_unboxed(); + /*double prowords = (double)Caml_state->stat_promoted_words; + double majwords = Caml_state->stat_major_words + + (double) Caml_state->allocated_words;*/ + double prowords = 0; + double majwords = 0; + + minwords_ = caml_copy_double(minwords); + prowords_ = caml_copy_double(prowords); + majwords_ = caml_copy_double(majwords); + res = caml_alloc_3(0, minwords_, prowords_, majwords_); + CAMLreturn(res); +} \ No newline at end of file diff --git a/tests/regression/90-ocaml/04-branching.c b/tests/regression/90-ocaml/04-branching.c new file mode 100644 index 0000000000..11460fff38 --- /dev/null +++ b/tests/regression/90-ocaml/04-branching.c @@ -0,0 +1,22 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Artificial test where one branch registers the argument v and the other does not, but both branches use v. + +#include +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +CAMLprim value branching_test(value v, bool b) +{ + if (b) + { + CAMLparam1(v); + } + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + CAMLreturn(res); +} \ No newline at end of file diff --git a/tests/regression/90-ocaml/05-lateparam.c b/tests/regression/90-ocaml/05-lateparam.c new file mode 100644 index 0000000000..f212b41c06 --- /dev/null +++ b/tests/regression/90-ocaml/05-lateparam.c @@ -0,0 +1,19 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Artificial test where the argument v is registered after GC could delete it. +// TODO: Add late local as well. + +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +CAMLprim value late_test(value v) +{ + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + CAMLparam1(v); // WARN + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + CAMLreturn(res); +} \ No newline at end of file diff --git a/tests/regression/90-ocaml/06-nested.c b/tests/regression/90-ocaml/06-nested.c new file mode 100644 index 0000000000..7820109b6b --- /dev/null +++ b/tests/regression/90-ocaml/06-nested.c @@ -0,0 +1,77 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Artificial tests with one C-stub calling another inside it. + +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +CAMLprim value pringo_LXM_copy(value v) +{ + value res1 = pringo_LXM_copy_correct(v); // NOWARN + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + return res1; // WARN +} + +CAMLprim value pringo_LXM_copy_correct(value v) +{ + CAMLparam1(v); + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // NOWARN + CAMLreturn(res); // NOWARN +} + +CAMLprim value pringo_LXM_copy_1(value v) +{ + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + return v; // WARN +} + +CAMLprim value pringo_LXM_copy_2(value v) +{ + CAMLparam1(v); + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + v = pringo_LXM_copy_1(v); // WARN + memcpy(LXM_val(res), LXM_val(v), sizeof(struct LXM_state)); // WARN + CAMLreturn(res); // WARN +} + +CAMLprim value pringo_LXM_copy_3(value v) +{ + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + value r = pringo_LXM_copy_1(v); // WARN + memcpy(LXM_val(res), LXM_val(r), sizeof(struct LXM_state)); // WARN + CAMLreturn(res); // WARN +} + +CAMLprim value pringo_LXM_init_unboxed(uint64_t i1, uint64_t i2, + uint64_t i3, uint64_t i4) +{ + value v = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); // NOWARN + struct LXM_state * st = LXM_val(v); + st->a = i1 | 1; /* must be odd */ + st->x[0] = i2 != 0 ? i2 : 1; /* must be nonzero */ + st->x[1] = i3 != 0 ? i3 : 2; /* must be nonzero */ + st->s = i4; + return v; +} + +// If entering does not copy registration status, this function will give false positives. +CAMLprim value enter_test_1(value v) +{ + CAMLparam1(v); + value res = enter_test_2(v); // NOWARN + CAMLreturn(res); // NOWARN +} + +CAMLprim value enter_test_2(value v) +{ + value res = caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + // TODO: Inner function warns of memory leak. Are inner and outer registration the same or different? Investigate. + return v; // WARN +} \ No newline at end of file diff --git a/tests/regression/90-ocaml/07-apron.c b/tests/regression/90-ocaml/07-apron.c new file mode 100644 index 0000000000..b4eab14f90 --- /dev/null +++ b/tests/regression/90-ocaml/07-apron.c @@ -0,0 +1,54 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Buggy code from https://github.com/antoinemine/apron/pull/112 where v and v2 are initialised as 0. +// TODO: It should warn when a value initialised as 0 is used without being registered after a garbage collection. + +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +// In place of value, it was ap_policy_optr*. +CAMLprim value camlidl_apron_policy_optr_c2ml(value p) +{ + if (p==NULL){ + return Val_int(0); + } else { + value v,v2=0; + Begin_roots1(v2); // WARN + v2 = camlidl_apron_policy_ptr_c2ml(p); + v = caml_alloc_small(1,0); + Field(v,0) = v2; + End_roots(); + return v; + } +} + +CAMLprim value camlidl_apron_policy_optr_c2ml_correct(value p) +{ + if (p==NULL){ + return Val_int(0); + } else { + value v,v2 = Val_unit; + Begin_roots1(v2); // NOWARN + v2 = camlidl_apron_policy_ptr_c2ml(p); + v = caml_alloc_small(1,0); + Field(v,0) = v2; + End_roots(); + return v; + } +} + + +// This function basically allocates memory with caml_alloc_custom and is simulated with OCamlParam in libraryfunctions.ml. +/*CAMLprim value camlidl_apron_policy_ptr_c2ml(ap_policy_ptr* p) +{ + value v; + assert((*p)->pman!=NULL); + v = caml_alloc_custom(&camlidl_apron_custom_policy_ptr, sizeof(ap_policy_ptr), + 0,1); + *((ap_policy_ptr *) Data_custom_val(v)) = *p; + return v; +}*/ \ No newline at end of file diff --git a/tests/regression/90-ocaml/08-registrations.c b/tests/regression/90-ocaml/08-registrations.c new file mode 100644 index 0000000000..7269d9a235 --- /dev/null +++ b/tests/regression/90-ocaml/08-registrations.c @@ -0,0 +1,80 @@ +// PARAM: --set "ana.activated[+]" ocaml --disable warn.imprecise --set "exp.extraspecials[+]" printInt + +// Artificial tests for assignment to values with different states of registration. + +#include +#include +#include +#include +#include +#include "goblint_caml.h" + +CAMLprim value registration_test_1(value v) +{ + CAMLparam1(v); + CAMLlocal1(res); + res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + CAMLreturn(res); // NOWARN +} + +// Though v is not directly registered, it is pointed to by a registered value, so it should not be garbage collected. +// If res is then deregistered, the status of v should follow. Changing the analysis to account for this case would be very hard. +CAMLprim value registration_test_2(value v) +{ + CAMLparam0(); + CAMLlocal1(res); + res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + CAMLreturn(res); // NOWARN +} + +// While v will survive the garbage collection, res may no longer point to it. +CAMLprim value registration_test_3(value v) +{ + CAMLparam1(v); + value res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + CAMLreturn(res); // TODO NOWARN +} + +CAMLprim value registration_test_4(value v) +{ + value res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + return res; // WARN +} + +// Though v is indirectly registered via res, the analysis does not see this. +// This causes res2 to be in R, but not in A: a problem. +// TODO: Fix or count as limitation. +CAMLprim value registration_test_5(value v) +{ + CAMLparam0(); + CAMLlocal2(res, res2); + res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + res2 = v; + CAMLreturn(res2); // TODO NOWARN +} + +// The memory leak test. +CAMLprim value registration_test_6(value v) +{ + CAMLparam1(v); + CAMLlocal1(res); + res = v; + caml_alloc_small(Wsizeof(struct LXM_state), Abstract_tag); + return res; // WARN +} + +// With two begin_roots and one end_roots, something stays, but the analysis does not warn. +CAMLprim value registration_test_7(value v) +{ + value res = Val_unit; + Begin_roots1(v); + Begin_roots1(res); + res = v; + End_roots(); + return res; // TODO WARN +} \ No newline at end of file diff --git a/tests/regression/90-ocaml/goblint_caml.h b/tests/regression/90-ocaml/goblint_caml.h new file mode 100644 index 0000000000..e3e9b5dd4b --- /dev/null +++ b/tests/regression/90-ocaml/goblint_caml.h @@ -0,0 +1,53 @@ +/* Redefinitions of caml macros for the C-stub analysis. This file replaces caml/memory.h in tests' imports. */ + +/* A small definition of the LXM state so sizeof works - from AI */ +struct LXM_state { uint64_t a; uint64_t x[2]; uint64_t s; }; + +/* Minimal macros to mimic expected behaviour */ +#define Wsizeof(ty) ((sizeof(ty) + sizeof(value) - 1) / sizeof(value)) +#define LXM_val(v) ((struct LXM_state *) Data_abstract_val(v)) + +// Redefine CAMLprim to have the annotate attribute marking it as c_stub. +#undef CAMLprim +#define CAMLprim __attribute__((c_stub)) + +// Redefine Val_unit to explicitly be of type value. +#undef Val_unit +#define Val_unit ((value) 1) + +// Param and local macros redefined to register variables as GC roots, and CAMLreturn mocked to work with them. +#undef CAMLparam0 +#undef CAMLparam1 +#undef CAMLparam2 +#undef CAMLparam3 +#undef CAMLparam4 +#undef CAMLparam5 +#define CAMLparam0() __goblint_caml_param0() +#define CAMLparam1(x) __goblint_caml_param0(); __goblint_caml_param1(&x) +#define CAMLparam2(x, y) __goblint_caml_param0(); __goblint_caml_param2(&x, &y) +#define CAMLparam3(x, y, z) __goblint_caml_param0(); __goblint_caml_param3(&x, &y, &z) +#define CAMLparam4(x, y, z, t) __goblint_caml_param0(); __goblint_caml_param4(&x, &y, &z, &t) +#define CAMLparam5(x, y, z, t, u) __goblint_caml_param0(); __goblint_caml_param5(&x, &y, &z, &t, &u) + +#undef CAMLlocal1 +#undef CAMLlocal2 +#undef CAMLlocal3 +#undef CAMLlocal4 +#undef CAMLlocal5 +#define CAMLlocal1(x) value x = Val_unit; __goblint_caml_param1(&x) // The local and param functions behave the same for our purposes, registering variables. +#define CAMLlocal2(x, y) value x = Val_unit; value y = Val_unit; __goblint_caml_param2(&x, &y) +#define CAMLlocal3(x, y, z) value x = Val_unit; value y = Val_unit; value z = Val_unit; __goblint_caml_param3(&x, &y, &z) +#define CAMLlocal4(x, y, z, t) value x = Val_unit; value y = Val_unit; value z = Val_unit; value t = Val_unit; __goblint_caml_param4(&x, &y, &z, &t) +#define CAMLlocal5(x, y, z, t, u) value x = Val_unit; value y = Val_unit; value z = Val_unit; value t = Val_unit; value u = Val_unit; __goblint_caml_param5(&x, &y, &z, &t, &u) + +#undef CAMLreturn +#define CAMLreturn(x) __goblint_caml_drop(); return (x) // The real CAMLreturn needs some variable named caml__frame, which is not available in our redefinitions above. + +// Marking roots is like registering and deregistering them. +#undef Begin_roots1 +#undef End_roots +#define Begin_roots1(x) __goblint_caml_param0(); __goblint_caml_param1(&x) +#define End_roots() __goblint_caml_end_roots() + +// A reference to caml_gc_minor_words_unboxed can be found in _opam/lib/ocaml/ml/gc.ml. +#define caml_gc_minor_words_unboxed() (0.0)