diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1857da78c..92aed12ab 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -56,6 +56,7 @@ jobs: - stresstest_large_expr - typeinfer - taylor51 + - luminal-llama - proof_testing_eqsat-basic - proof_testing_unify - proof_testing_typecheck diff --git a/egglog-bridge/src/lib.rs b/egglog-bridge/src/lib.rs index 770ba350b..5ad2d730f 100644 --- a/egglog-bridge/src/lib.rs +++ b/egglog-bridge/src/lib.rs @@ -465,6 +465,7 @@ impl EGraph { fn run_rules_inner(&mut self, rules: &[RuleId]) -> Result { let ts = self.next_ts(); + let uf_size_before = self.db.get_table(self.uf_table).len(); let rule_set_report = run_rules_impl(&mut self.db, &mut self.rules, rules, ts, self.report_level)?; if let Some(message) = self.panic_message.lock().unwrap().take() { @@ -475,7 +476,13 @@ impl EGraph { rule_set_report, rebuild_time: Duration::ZERO, }; - if !iteration_report.changed() { + let uf_size_after = self.db.get_table(self.uf_table).len(); + if uf_size_before == uf_size_after { + // No new unions: skip the full rebuild but still advance the + // timestamp so that seminaive evaluation sees a fresh epoch. + // Rebuilding is only necessary when new unions have been made because ids may need to be updated. + // Adding terms doesn't necessarily touch the union-find, only doing a union between existing ids does. + self.inc_ts(); return Ok(iteration_report); } @@ -804,9 +811,15 @@ impl EGraph { /// Flush the pending update buffers to the EGraph. /// Returns `true` if the database is updated. pub fn flush_updates(&mut self) -> bool { + let uf_size_before = self.db.get_table(self.uf_table).len(); let updated = self.db.merge_all(); self.inc_ts(); - self.rebuild().unwrap(); + let uf_size_after = self.db.get_table(self.uf_table).len(); + if uf_size_before != uf_size_after { + // Rebuilding is only necessary when new unions have been made because ids may need to be updated. + // Adding terms doesn't necessarily touch the union-find, only doing a union between existing ids does. + self.rebuild().unwrap(); + } updated } diff --git a/tests/luminal-llama.egg b/tests/luminal-llama.egg new file mode 100644 index 000000000..03d3567b1 --- /dev/null +++ b/tests/luminal-llama.egg @@ -0,0 +1,3400 @@ +(ruleset expr) +(ruleset cleanup) +(ruleset early) + +; -------- SYMBOLIC ALGEBRA ------- +(datatype* + (Expression + (MNum i64) + (MFloat f64) + (MIter) + (MVar String) + (MAdd Expression Expression) + (MSub Expression Expression) + (MMul Expression Expression) + (MCeilDiv Expression Expression) + (MDiv Expression Expression) + (MMod Expression Expression) + (MMin Expression Expression) + (MMax Expression Expression) + (MAnd Expression Expression) + (MOr Expression Expression) + (MGte Expression Expression) + (MLt Expression Expression) + (MFloorTo Expression Expression) + (MReplace Expression Expression Expression) + ) + + ; eqsort list for vectors of Expression + (EList + (ECons Expression EList) + (ENil) + (MReplaceList EList Expression Expression) + (ReplaceNthFromEnd EList Expression i64) + (RemoveNthFromEnd EList i64) + (RowMajor EList) + ) + + (DType + (F32) + (F16) + (Bf16) + (Int) + ) +) + +; ---- Algebraic rewrites ---- +;(rewrite (MAdd a b) (MAdd b a) :ruleset expr) ; communativity leads to some explosions +;(rewrite (MMul a b) (MMul b a) :ruleset expr) + +;(rewrite (MAdd (MAdd a b) c) (MAdd a (MAdd b c)) :ruleset expr) ; explodes weirdly, see no_explode test in symbolic.rs +;(rewrite (MMul (MMul a b) c) (MMul a (MMul b c)) :ruleset expr) + +(rewrite (MAdd (MNum a) (MNum b)) (MNum (+ a b)) :ruleset expr) +(rewrite (MSub (MNum a) (MNum b)) (MNum (- a b)) :ruleset expr) +; multiply const folding +(rule + ( + (= ?e (MMul (MNum ?a) (MNum ?b))) + (= ?prod (* ?a ?b)) + ) + ( + (union ?e (MNum ?prod)) + (subsume (MMul (MNum ?a) (MNum ?b))) + ) + :ruleset expr +) +(rewrite (MDiv (MNum a) (MNum b)) (MNum (/ a b)) :when ((!= 0 b) (= 0 (% a b))) :ruleset expr) +(rewrite (MCeilDiv (MNum a) (MNum b)) (MNum (/ a b)) :when ((!= 0 b) (= 0 (% a b))) :ruleset expr) +(rewrite (MMax (MNum a) (MNum b)) (MNum (max a b)) :ruleset expr) +(rewrite (MMin (MNum a) (MNum b)) (MNum (min a b)) :ruleset expr) +(rewrite (MAnd (MNum a) (MNum b)) (MNum (& a b)) :ruleset expr) +(rewrite (MFloat -1.0) (MNum -1) :ruleset expr) +(rewrite (MNum -1) (MFloat -1.0) :ruleset expr) +;(rewrite (MDiv (MMul ?x (MNum ?a)) (MNum ?b)) (MMul ?x (MNum (/ ?a ?b))) :when ((< ?b ?a) (= (% ?a ?b) 0)) :ruleset expr) ; why does this explode??? + +(rewrite (MAdd a (MNum 0)) a :ruleset expr) +(rule ((= ?e (MMul ?a (MNum 1)))) ((union ?e ?a)) :ruleset expr) +(rule ((= ?e (MMul ?a (MNum 0)))) ((union ?e (MNum 0)) (subsume (MMul ?a (MNum 0)))) :ruleset expr) +(rewrite (MDiv a (MNum 1)) a :ruleset expr) +(rewrite (MMod (MMul ?x ?y) ?y) (MNum 0) :ruleset expr) +(rewrite (MMod (MMod ?x (MNum ?y)) (MNum ?z)) (MMod ?x (MNum ?y)) + :when ((>= ?z ?y) (= 0 (% ?y ?z))) :ruleset expr) +(rewrite (MMod (MMod ?x (MNum ?y)) (MNum ?z)) (MMod ?x (MNum ?z)) + :when ((>= ?y ?z) (= 0 (% ?z ?y))) :ruleset expr) +(rewrite (MDiv (MDiv a b) c) (MDiv a (MMul b c)) :ruleset expr) +(rewrite (MAdd (MDiv a b) c) (MDiv (MAdd a (MMul c b)) b) :ruleset expr) +(rewrite (MAdd a (MSub b a)) b :ruleset expr) +(rewrite (MAdd (MSub b a) a) b :ruleset expr) +(rewrite (MSub a a) (MNum 0) :ruleset expr) +(rewrite + (MAdd (MSub a (MNum ?b)) (MNum ?c)) + (MSub a (MNum (- ?b ?c))) + :ruleset expr +) +(rewrite + (MAdd (MNum ?c) (MSub a (MNum ?b))) + (MSub a (MNum (- ?b ?c))) + :ruleset expr +) +(rewrite + (MSub (MAdd a (MNum ?b)) (MNum ?c)) + (MAdd a (MNum (- ?b ?c))) + :ruleset expr +) +(rewrite + (MSub (MSub a (MNum ?b)) (MNum ?c)) + (MSub a (MNum (+ ?b ?c))) + :ruleset expr +) +(rewrite (MAdd (MMul a b) (MMul a c)) (MMul a (MAdd b c)) :ruleset expr) +(rewrite (MAdd a a) (MMul (MNum 2) a) :ruleset expr) + +; ---- Replacement over expressions ---- +(rewrite (MReplace ?x ?y ?z) ?z :when ((= ?x ?y)) :ruleset expr) +(rewrite (MReplace (MAdd ?a ?b) ?x ?y) (MAdd (MReplace ?a ?x ?y) (MReplace ?b ?x ?y)) :ruleset expr) +(rewrite (MReplace (MSub ?a ?b) ?x ?y) (MSub (MReplace ?a ?x ?y) (MReplace ?b ?x ?y)) :ruleset expr) +(rewrite (MReplace (MMul ?a ?b) ?x ?y) (MMul (MReplace ?a ?x ?y) (MReplace ?b ?x ?y)) :ruleset expr) +(rewrite (MReplace (MDiv ?a ?b) ?x ?y) (MDiv (MReplace ?a ?x ?y) (MReplace ?b ?x ?y)) :ruleset expr) +(rewrite (MReplace (MCeilDiv ?a ?b) ?x ?y) (MCeilDiv (MReplace ?a ?x ?y) (MReplace ?b ?x ?y)) :ruleset expr) +(rewrite (MReplace (MMod ?a ?b) ?x ?y) (MMod (MReplace ?a ?x ?y) (MReplace ?b ?x ?y)) :ruleset expr) +(rewrite (MReplace (MMin ?a ?b) ?x ?y) (MMin (MReplace ?a ?x ?y) (MReplace ?b ?x ?y)) :ruleset expr) +(rewrite (MReplace (MMax ?a ?b) ?x ?y) (MMax (MReplace ?a ?x ?y) (MReplace ?b ?x ?y)) :ruleset expr) +(rewrite (MReplace (MFloorTo ?a ?b) ?x ?y) (MFloorTo (MReplace ?a ?x ?y) (MReplace ?b ?x ?y)) :ruleset expr) +(rewrite (MReplace (MNum ?n) ?x ?y) (MNum ?n) :ruleset expr) +(rewrite (MReplace (MVar ?z) ?find ?replace) (MVar ?z) :when ((!= ?find (MVar ?z))) :ruleset expr) +(rewrite (MReplace (MIter) ?find ?replace) (MIter) :when ((!= ?find (MIter))) :ruleset expr) + +; EList helper functions +(function len (EList) i64 :merge new) +(rule ((= ?e (ENil))) ((set (len ?e) 0)) :ruleset expr) +(rule ((= ?e (ECons ?expr ?list)) (= ?prev_len (len ?list))) ((set (len ?e) (+ ?prev_len 1))) :ruleset expr) + +(function nth_from_end (EList i64) Expression :merge new) +(rule ((= ?e (ECons ?expr ?list)) (= ?list_len (len ?list))) ((set (nth_from_end ?e ?list_len) ?expr)) :ruleset expr) +(rule ((= ?e (ECons ?expr ?list)) (= ?other_nth (nth_from_end ?list ?n))) ((set (nth_from_end ?e ?n) ?other_nth)) :ruleset expr) + +(function n_elements (EList) Expression :merge new) +(rule ((= ?e (ENil))) ((set (n_elements ?e) (MNum 1))) :ruleset expr) +(rule + ( + (= ?e (ECons ?dim ?other)) + (= ?other_elems (n_elements ?other)) + ) + ((set (n_elements ?e) (MMul ?dim ?other_elems))) + :ruleset expr +) + +(rule + ( + (= ?other (ECons ?other_dim ?other_other)) + (= ?list (ECons ?d ?other)) + (= ?e (RowMajor ?list)) + (= ?n_elems (n_elements ?other)) + ) + ( + (union ?e (ECons ?n_elems (RowMajor ?other))) + ) + :ruleset expr +) +(rewrite (RowMajor (ECons ?dim (ENil))) (ECons (MNum 1) (ENil)) :ruleset expr) + +(rewrite (MReplaceList (ECons ?expr ?list) ?from ?to) (ECons (MReplace ?expr ?from ?to) (MReplaceList ?list ?from ?to)) :ruleset expr) +(rule + ( + (= ?e (ReplaceNthFromEnd (ECons ?expr ?list) ?to ?ind)) + (= ?ind (len ?list)) + ) + ( + (union ?e (ECons ?to ?list)) + ) + :ruleset expr +) +(rule + ( + (= ?e (ReplaceNthFromEnd (ECons ?expr ?list) ?to ?ind)) + (< ?ind (len ?list)) + ) + ( + (union ?e (ECons ?expr (ReplaceNthFromEnd ?list ?to ?ind))) + ) + :ruleset expr +) +(rule + ( + (= ?e (RemoveNthFromEnd (ECons ?expr ?list) ?ind)) + (= ?ind (len ?list)) + ) + ( + (union ?e ?list) + ) + :ruleset expr +) +(rule + ( + (= ?e (RemoveNthFromEnd (ECons ?expr ?list) ?ind)) + (< ?ind (len ?list)) + ) + ( + (union ?e (ECons ?expr (RemoveNthFromEnd ?list ?ind))) + ) + :ruleset expr +) + + + (datatype* + (IR + (OutputJoin IR IR) + (Exp EList IR EList EList) +(Sigmoid EList IR EList EList) +(CubeMul EList EList IR EList Expression Expression Expression IR EList Expression Expression Expression EList Expression Expression Expression) +(TileSum EList EList Expression IR EList Expression Expression Expression EList Expression Expression) +(KernelAdd EList IR EList IR EList EList DType) +(KernelMul EList IR EList IR EList EList DType) +(KernelIota Expression Expression) +(KernelGather EList IR EList IR EList EList DType) +(KernelSum EList Expression IR EList Expression EList DType) +(KernelMax EList Expression IR EList Expression EList DType) +(KernelMean EList Expression IR EList Expression EList DType) +(KernelArgsort IR i64 EList EList EList Expression DType) +(RowAdd EList IR EList IR EList EList Expression) +(RowSwishMul EList IR EList IR EList Expression) +(RowRMSNorm EList IR EList Expression IR) +(RowRope EList IR EList Expression IR) +(TileMatmul EList EList Expression IR EList Expression Expression IR EList Expression Expression EList Expression Expression) +(Input i64 String DType) +(Output IR i64) +(CustomOpHLIR IList i64 DType) +(Constant f64) +(Cast IR DType) +(Iota Expression Expression) +(Exp2 EList IR EList EList) +(Log2 EList IR EList EList) +(Sin EList IR EList EList) +(Recip EList IR EList EList) +(Sqrt EList IR EList EList) +(Add EList IR EList IR EList EList) +(Mul EList IR EList IR EList EList) +(Mod EList IR EList IR EList EList) +(LessThan EList IR EList IR EList EList) +(Gather IR EList EList IR EList EList) +(Sum EList Expression IR EList Expression EList) +(Max EList Expression IR EList Expression EList) + ) + (IList + (ICons IR IList) + (INil) + ) + ) + (function dtype (IR) DType :merge new) + +(rule + ( + (= ?exp_const (Constant 1.442695)) + (= ?mul (Mul ?shape ?x ?x_stride ?exp_const ?const_stride ?intermediate_stride)) + (= ?exp2 (Exp2 ?shape ?mul ?intermediate_stride ?out_stride)) + (= ?dt (dtype ?x)) + ) + ( + (let ?exp (Exp ?shape ?x ?x_stride ?out_stride)) + (union ?exp2 ?exp) + (set (dtype ?exp) ?dt) + ) + ) +(rule + ( + (= ?neg_input (Mul ?input_range ?input ?input_stride (Constant -1.0) ?const_stride ?intermediate_stride)) + (= ?exp (Exp ?input_range ?neg_input ?intermediate_stride ?exp_stride)) + (= ?plus_one (Add ?input_range ?exp ?exp_stride (Constant 1.0) ?const_stride ?plus_one_stride)) + (= ?sig_out (Recip ?input_range ?plus_one ?plus_one_stride ?out_stride)) + (= ?dt (dtype ?input)) + ) + ( + (let ?sig (Sigmoid ?input_range ?input ?input_stride ?out_stride)) + (union ?sig_out ?sig) + (set (dtype ?sig) ?dt) + ) + :name "sigmoid" + ) +(rule + ( + ; get mul + (= ?sa (Mul ?shape ?a ?a_stride ?b ?b_stride ?out_stride)) + (= ?shape_last (nth_from_end ?shape 0)) + (= ?shape_second_to_last (nth_from_end ?shape 1)) + (= ?shape_third_to_last (nth_from_end ?shape 2)) + (!= ?shape_last (MNum 0)) + (!= ?shape_second_to_last (MNum 0)) + (!= ?shape_third_to_last (MNum 0)) + ; get m, n, and k strides for A, B, and outputs + (= ?a_n_width (nth_from_end ?a_stride 1)) + (= ?b_n_width (nth_from_end ?b_stride 1)) + (= ?out_n_width (nth_from_end ?out_stride 1)) + (= ?a_m_width (nth_from_end ?a_stride 2)) + (= ?b_m_width (nth_from_end ?b_stride 2)) + (= ?out_m_width (nth_from_end ?out_stride 2)) + (= ?a_k_width (nth_from_end ?a_stride 0)) + (= ?b_k_width (nth_from_end ?b_stride 0)) + (= ?out_k_width (nth_from_end ?out_stride 0)) + (= ?dt (dtype ?a)) + ) + ( + ; divide the last 3 dimensions by TILE_SIZE + (let ?new_shape + (ReplaceNthFromEnd + (ReplaceNthFromEnd + (ReplaceNthFromEnd + ?shape + (MCeilDiv ?shape_last (MNum 64)) 0) + (MCeilDiv ?shape_second_to_last (MNum 64)) 1) + (MCeilDiv ?shape_third_to_last (MNum 64)) 2) + ) + ; multiply last 3 strides by TILE_SIZE + (let ?new_a_stride + (ReplaceNthFromEnd + (ReplaceNthFromEnd + (ReplaceNthFromEnd + ?a_stride + (MMul (MIter) (MNum 64)) 0) + (MMul ?a_n_width (MNum 64)) 1) + (MMul ?a_m_width (MNum 64)) 2) + ) + (let ?new_b_stride + (ReplaceNthFromEnd + (ReplaceNthFromEnd + (ReplaceNthFromEnd + ?b_stride + (MMul (MIter) (MNum 64)) 0) + (MMul ?b_n_width (MNum 64)) 1) + (MMul ?b_m_width (MNum 64)) 2) + ) + (let ?new_out_stride + (ReplaceNthFromEnd + (ReplaceNthFromEnd + (ReplaceNthFromEnd + ?out_stride + (MMul (MIter) (MNum 64)) 0) + (MMul ?out_n_width (MNum 64)) 1) + (MMul ?out_m_width (MNum 64)) 2) + ) + (let ?cm (CubeMul ?new_shape ?shape ?a ?new_a_stride ?a_m_width ?a_n_width ?a_k_width ?b ?new_b_stride ?b_m_width ?b_n_width ?b_k_width ?new_out_stride ?out_m_width ?out_n_width ?out_k_width)) + (union ?sa ?cm) + (set (dtype ?cm) (F32)) + ) + ) +(rule + ( + ; get sum + (= ?sa (Sum ?shape ?iters ?a ?a_stride ?a_k_stride ?out_stride)) + (= ?shape_n (nth_from_end ?shape 0)) (!= ?shape_n (MNum 0)) + (= ?shape_m (nth_from_end ?shape 1)) (!= ?shape_m (MNum 0)) + ; get m and n strides for A + (= ?a_m_stride (nth_from_end ?a_stride 1)) + (= ?a_n_stride (nth_from_end ?a_stride 0)) + ; get m and n strides for out + (= ?out_m_stride (nth_from_end ?out_stride 1)) + (= ?out_n_stride (nth_from_end ?out_stride 0)) + (= (F32) (dtype ?a)) + ) + ( + ; divide second to last and last dimensions by TILE_SIZE + (let ?new_shape + (ReplaceNthFromEnd + (ReplaceNthFromEnd + ?shape + (MCeilDiv ?shape_n (MNum 64)) 0) + (MCeilDiv ?shape_m (MNum 64)) 1) + ) + ; multiply second to last and last strides by TILE_SIZE + (let ?new_a_stride + (ReplaceNthFromEnd + (ReplaceNthFromEnd + ?a_stride + (MMul ?a_n_stride (MNum 64)) 0) + (MMul ?a_m_stride (MNum 64)) 1) + ) + (let ?new_out_stride + (ReplaceNthFromEnd + (ReplaceNthFromEnd + ?out_stride + (MMul ?out_n_stride (MNum 64)) 0) + (MMul ?out_m_stride (MNum 64)) 1) + ) + (let ?ts (TileSum ?new_shape ?shape ?iters ?a ?new_a_stride ?a_m_stride ?a_n_stride ?a_k_stride ?new_out_stride ?out_m_stride ?out_n_stride)) + (union ?sa ?ts) + (set (dtype ?ts) (F32)) + ) + ) + +(rule + ( + (= ?a (Add ?out_shape ?inp_a ?inp_a_strides ?inp_b ?inp_b_strides ?out_strides)) + (= ?dty (dtype ?inp_a)) + ) + ( + (union ?a (KernelAdd ?out_shape ?inp_a ?inp_a_strides ?inp_b ?inp_b_strides ?out_strides ?dty)) + ) + :name "kernel add" +) + +(rule + ( + (= ?a (Mul ?out_shape ?inp_a ?inp_a_strides ?inp_b ?inp_b_strides ?out_strides)) + (= ?dty (dtype ?inp_a)) + ) + ( + (union ?a (KernelMul ?out_shape ?inp_a ?inp_a_strides ?inp_b ?inp_b_strides ?out_strides ?dty)) + ) + :name "kernel mul" +) + +(rule + ( + (= ?a (Iota ?expr ?range)) + ) + ( + (let ?kernel_iota (KernelIota ?expr ?range)) + (union ?a ?kernel_iota) + (set (dtype ?kernel_iota) (Int)) + ) + :name "kernel iota" +) + +(rule + ( + (= ?a (Gather ?indexes ?out_shape ?index_strides ?data ?data_shape ?data_strides)) + (= ?dty (dtype ?data)) + ) + ( + (let ?out_strides (RowMajor ?out_shape)) + (union ?a (KernelGather ?out_shape ?indexes ?index_strides ?data ?data_strides ?out_strides ?dty)) + ) + :name "kernel gather" +) + +(rule + ( + (= ?a (Sum ?out_shape ?iters ?inp ?in_stride ?iter_stride ?out_stride)) + (= ?dty (dtype ?inp)) + ) + ( + (union ?a (KernelSum ?out_shape ?iters ?inp ?in_stride ?iter_stride ?out_stride ?dty)) + ) + :name "kernel sum reduce" +) + +(rule + ( + (= ?a (Max ?out_shape ?iters ?inp ?in_stride ?iter_stride ?out_stride)) + (= ?dty (dtype ?inp)) + ) + ( + (union ?a (KernelMax ?out_shape ?iters ?inp ?in_stride ?iter_stride ?out_stride ?dty)) + ) + :name "kernel max reduce" +) + +(rule + ( + (= ?sum (Sum ?out_shape ?iters ?inp ?in_stride ?iter_stride ?sum_out_stride)) + (= ?iota (Iota ?iters ?one)) + (= ?cast (Cast ?iota (F32))) + (= ?recip (Recip ?r_shape ?cast ?r_in_strides ?r_out_strides)) + (= ?result (Mul ?shape ?sum ?sum_strides ?recip ?recip_strides ?out_strides)) + (= ?dty (dtype ?inp)) + ) + ( + (union ?result (KernelMean ?out_shape ?iters ?inp ?in_stride ?iter_stride ?out_strides ?dty)) + ) + :name "kernel mean reduce" +) + + + (rule + ( + ; Ascending: LessThan(add_eps, input) means a.gt(b) in Rust + (= ?add_eps (Add ?add_shape ?inp ?inp_str1 ?eps ?eps_str ?add_out)) + (= ?cmp (LessThan ?cmp_shape ?add_eps ?add_str ?inp ?inp_str2 ?cmp_out)) + + (= ?sum_cmp (Sum ?sum_shape ?sum_iters ?cmp ?sum_in_strides ?sum_iter_stride ?sum_out_strides)) + (= ?cast (Cast ?sum_cmp (Int))) + (= ?cand_iota (Iota ?cand_expr ?cand_size)) + (= ?pos_iota (Iota ?pos_expr ?pos_size)) + (= ?lt1 (LessThan ?lt1_shape ?cast ?cast_str1 ?cand_iota ?cand_str1 ?lt1_out)) + (= ?lt2 (LessThan ?lt2_shape ?cand_iota ?cand_str2 ?cast ?cast_str2 ?lt2_out)) + (= ?ne (Add ?ne_shape ?lt1 ?lt1_str ?lt2 ?lt2_str ?ne_out)) + (= ?neg1 (Constant -1.000000)) + (= ?neg_ne (Mul ?neg_shape ?ne ?ne_str ?neg1 ?neg1_str ?neg_out)) + (= ?one (Constant 1.000000)) + (= ?eq (Add ?eq_shape ?neg_ne ?neg_str ?one ?one_str ?eq_out)) + (= ?mul_pos (Mul ?mul_shape ?eq ?eq_str ?pos_iota ?pos_str ?mul_out)) + (= ?result (Sum ?final_shape ?final_iters ?mul_pos ?mul_strides ?mul_iter_stride ?out_strides)) + (= ?dty (dtype ?inp)) + + ) + ( + (union ?result (KernelArgsort ?inp 0 ?final_shape ?inp_str2 ?out_strides ?sum_iters ?dty)) + ) + :name "kernel argsort ascending" + ) + + (rule + ( + ; Descending: LessThan(input, add_eps) means a.lt(b) in Rust + (= ?add_eps (Add ?add_shape ?inp ?inp_str1 ?eps ?eps_str ?add_out)) + (= ?cmp (LessThan ?cmp_shape ?inp ?inp_str2 ?add_eps ?add_str ?cmp_out)) + + (= ?sum_cmp (Sum ?sum_shape ?sum_iters ?cmp ?sum_in_strides ?sum_iter_stride ?sum_out_strides)) + (= ?cast (Cast ?sum_cmp (Int))) + (= ?cand_iota (Iota ?cand_expr ?cand_size)) + (= ?pos_iota (Iota ?pos_expr ?pos_size)) + (= ?lt1 (LessThan ?lt1_shape ?cast ?cast_str1 ?cand_iota ?cand_str1 ?lt1_out)) + (= ?lt2 (LessThan ?lt2_shape ?cand_iota ?cand_str2 ?cast ?cast_str2 ?lt2_out)) + (= ?ne (Add ?ne_shape ?lt1 ?lt1_str ?lt2 ?lt2_str ?ne_out)) + (= ?neg1 (Constant -1.000000)) + (= ?neg_ne (Mul ?neg_shape ?ne ?ne_str ?neg1 ?neg1_str ?neg_out)) + (= ?one (Constant 1.000000)) + (= ?eq (Add ?eq_shape ?neg_ne ?neg_str ?one ?one_str ?eq_out)) + (= ?mul_pos (Mul ?mul_shape ?eq ?eq_str ?pos_iota ?pos_str ?mul_out)) + (= ?result (Sum ?final_shape ?final_iters ?mul_pos ?mul_strides ?mul_iter_stride ?out_strides)) + (= ?dty (dtype ?inp)) + + ) + ( + (union ?result (KernelArgsort ?inp 1 ?final_shape ?inp_str2 ?out_strides ?sum_iters ?dty)) + ) + :name "kernel argsort descending" + ) +(rule + ( + ; get add + (= ?sa (Add ?shape ?a ?a_stride ?b ?b_stride ?out_stride)) + (= ?row_width (nth_from_end ?shape 0)) + (= (MNum ?row_width_num) ?row_width) + (<= ?row_width_num 4096) ; currently load full row to sram, should instead load chunks in up to capacity and stream rest in + ; assert the row is contiguous + (= (MNum 1) (nth_from_end ?a_stride 0)) + (= (MNum 1) (nth_from_end ?b_stride 0)) + (= (MNum 1) (nth_from_end ?out_stride 0)) + ;(= (F32) (dtype ?a)) + ) + ( + (let ?new_shape (RemoveNthFromEnd ?shape 0)) + (let ?new_a_stride (RemoveNthFromEnd ?a_stride 0)) + (let ?new_b_stride (RemoveNthFromEnd ?b_stride 0)) + (let ?new_out_stride (RemoveNthFromEnd ?out_stride 0)) + (let ?ra (RowAdd ?new_shape ?a ?new_a_stride ?b ?new_b_stride ?new_out_stride ?row_width)) + (union ?sa ?ra) + (set (dtype ?ra) (F32)) + ) + :name "row add" + ) +(rule + ( + (= ?sigmoid (Sigmoid + (ECons ?batch (ECons ?width (ENil))) + ?self + (ECons ?width (ECons (MNum 1) (ENil))) + (ECons ?width (ECons (MNum 1) (ENil))) + )) + (= ?swish (Mul + (ECons ?batch (ECons ?width (ENil))) + ?self + (ECons ?width (ECons (MNum 1) (ENil))) + ?sigmoid + (ECons ?width (ECons (MNum 1) (ENil))) + (ECons ?width (ECons (MNum 1) (ENil))) + )) + (= ?swishmul (Mul + (ECons ?batch (ECons ?width (ENil))) + ?swish + (ECons ?width (ECons (MNum 1) (ENil))) + ?other + (ECons ?width (ECons (MNum 1) (ENil))) + (ECons ?width (ECons (MNum 1) (ENil))) + )) + ;(= (F32) (dtype ?self)) + ) + ( + (let ?rsm (RowSwishMul + (ECons ?batch (ENil)) + ?self + (ECons ?width (ENil)) + ?other + (ECons ?width (ENil)) + ?width + )) + (union ?swishmul ?rsm) + (set (dtype ?rsm) (F32)) + ) + :name "row swish mul" + ) +(rule + ( + (= ?square (Mul ?inp_range ?x ?inp_stride ?x ?inp_stride ?square_stride)) + (= ?width (nth_from_end ?inp_range 0)) + (= ?batch (nth_from_end ?inp_range 1)) + (= ?square_summed + (Sum + (ECons ?batch (ENil)) + ?width + ?square + (ECons ?width (ENil)) + (MNum 1) + (ECons (MNum 1) (ENil)) + ) + ) + (= ?inv_div_factor + (Recip (ECons ?batch (ENil)) (Cast (Iota ?width (MNum 1)) (F32)) + (ECons (MNum 0) (ENil)) ; broadcast the constant + (ECons (MNum 1) (ENil)))) ; produce per-batch vector + + (= ?mean + (Mul (ECons ?batch (ENil)) + ?square_summed (ECons (MNum 1) (ENil)) + ?inv_div_factor (ECons (MNum 1) (ENil)) + (ECons (MNum 1) (ENil)))) + (= ?eps_add + (Add + (ECons ?batch (ENil)) + ?mean + (ECons (MNum 1) (ENil)) + (Constant ?eps) + (ECons (MNum 0) (ENil)) + (ECons (MNum 1) (ENil)) + ) + ) + (= ?sqrt + (Sqrt + (ECons ?batch (ENil)) + ?eps_add + (ECons (MNum 1) (ENil)) + (ECons (MNum 1) (ENil)) + ) + ) + (= ?recip + (Recip + (ECons ?batch (ENil)) + ?sqrt + (ECons (MNum 1) (ENil)) + (ECons (MNum 1) (ENil)) + ) + ) + (= ?std_normed + (Mul + ?inp_range + ?recip + (ECons (MNum 1) (ECons (MNum 0) (ENil))) + ?x + ?inp_stride + ?inp_stride + ) + ) + (= ?final + (Mul + ?inp_range + ?std_normed + ?inp_stride + ?weight + (ECons (MNum 0) (ECons (MNum 1) (ENil))) + ?inp_stride + ) + ) + ;(= (F32) (dtype ?x)) + ) + ( + (let ?new + (RowRMSNorm + (ECons ?batch (ENil)) + ?x + (ECons ?width (ENil)) + ?width + ?weight + ) + ) + (union ?final ?new) + (set (dtype ?new) (F32)) + ) + :name "row rms norm" + ) +(rule + ( + (= ?e (RowRope ?shape ?inp ?stride ?row_width ?pos_ids)) + (= (F32) (dtype ?inp)) + ) + ((set (dtype ?e) (F32))) + ) + + ; Cube mul - Tile sum -> TileMatmul (row major) + (rule + ( + ; get cube mul + (= ?cm (CubeMul ?mul_shape ?untiled_mul_shape ?a ?a_stride ?a_m_stride ?a_n_stride ?a_k_stride ?b ?b_stride ?b_m_stride ?b_n_stride ?b_k_stride ?out_stride ?out_m_stride ?out_n_stride ?out_k_stride)) + ; get tile sum + (= ?ts (TileSum ?sum_shape ?untiled_sum_shape ?iters ?cm ?sum_in_stride ?sum_in_m_stride ?sum_in_n_stride ?sum_in_k_stride ?sum_out_stride ?sum_out_m_stride ?sum_out_n_stride)) + ; assert k stride on the intermediate is 1 + (= ?out_k_stride (MNum 1)) + (= ?sum_in_k_stride (MNum 1)) + ; assert matmul strides + (= ?b_n_stride (MNum 1)) + ; get dimensions + (= ?t_n (nth_from_end ?mul_shape 1)) + (= ?t_k (nth_from_end ?mul_shape 0)) + (= (F32) (dtype ?a)) + ) + ( + ; input strides are same as cube mul but without last element + (let ?new_a_stride (RemoveNthFromEnd ?a_stride 0)) + (let ?new_b_stride (RemoveNthFromEnd ?b_stride 0)) + (let ?tm (TileMatmul ?sum_shape ?untiled_sum_shape ?iters ?a ?new_a_stride (MMul ?t_k (MNum 64)) (MNum 1) ?b ?new_b_stride (MNum 1) (MMul ?t_n (MNum 64)) ?sum_out_stride (MMul ?t_n (MNum 64)) (MNum 1))) + (union ?ts ?tm) + (set (dtype ?tm) (F32)) + ; Subsume TileSum and CubeMul so they aren't chosen over TileMatmul + (subsume (TileSum ?sum_shape ?untiled_sum_shape ?iters ?cm ?sum_in_stride ?sum_in_m_stride ?sum_in_n_stride ?sum_in_k_stride ?sum_out_stride ?sum_out_m_stride ?sum_out_n_stride)) + (subsume (CubeMul ?mul_shape ?untiled_mul_shape ?a ?a_stride ?a_m_stride ?a_n_stride ?a_k_stride ?b ?b_stride ?b_m_stride ?b_n_stride ?b_k_stride ?out_stride ?out_m_stride ?out_n_stride ?out_k_stride)) + ) + ) + + ; Cube mul - Tile sum -> TileMatmul (A row-major, B col-major, C row-major) + (rule + ( + ; get cube mul + (= ?cm (CubeMul ?mul_shape ?untiled_mul_shape + ?a ?a_stride ?a_m_stride ?a_n_stride ?a_k_stride + ?b ?b_stride ?b_m_stride ?b_n_stride ?b_k_stride + ?out_stride ?out_m_stride ?out_n_stride ?out_k_stride)) + ; get tile sum + (= ?ts (TileSum ?sum_shape ?untiled_sum_shape ?iters ?cm + ?sum_in_stride ?sum_in_m_stride ?sum_in_n_stride ?sum_in_k_stride + ?sum_out_stride ?sum_out_m_stride ?sum_out_n_stride)) + + ; assert k stride on the intermediate is 1 (contiguous) + (= ?out_k_stride (MNum 1)) + (= ?sum_in_k_stride (MNum 1)) + + ; A row-major (contiguous in its last dim k) + (= ?a_k_stride (MNum 1)) + + ; B col-major (contiguous in its first dim k) + (= ?b_k_stride (MNum 1)) + + ; get tile dims + (= ?t_n (nth_from_end ?mul_shape 1)) + (= ?t_k (nth_from_end ?mul_shape 0)) + ;(= (F32) (dtype ?a)) + ) + ( + ; input strides are same as cube mul but without last element + (let ?new_a_stride (RemoveNthFromEnd ?a_stride 0)) + (let ?new_b_stride (RemoveNthFromEnd ?b_stride 0)) + + ; Emit TileMatmul: + ; - A row-major tile strides: m -> t_k*TILE_SIZE, k -> 1 + ; - B col-major tile strides: k -> 1, n -> t_k*TILE_SIZE + ; - C row-major tile strides: m -> t_n*TILE_SIZE, n -> 1 + (let ?tm (TileMatmul ?sum_shape ?untiled_sum_shape ?iters + ?a ?new_a_stride (MMul ?t_k (MNum 64)) (MNum 1) + ?b ?new_b_stride ?b_k_stride (MMul ?t_k (MNum 64)) + ?sum_out_stride (MMul ?t_n (MNum 64)) (MNum 1))) + (union ?ts ?tm) + (set (dtype ?tm) (F32)) + ; Subsume TileSum and CubeMul so they aren't chosen over TileMatmul + (subsume (TileSum ?sum_shape ?untiled_sum_shape ?iters ?cm ?sum_in_stride ?sum_in_m_stride ?sum_in_n_stride ?sum_in_k_stride ?sum_out_stride ?sum_out_m_stride ?sum_out_n_stride)) + (subsume (CubeMul ?mul_shape ?untiled_mul_shape ?a ?a_stride ?a_m_stride ?a_n_stride ?a_k_stride ?b ?b_stride ?b_m_stride ?b_n_stride ?b_k_stride ?out_stride ?out_m_stride ?out_n_stride ?out_k_stride)) + ) + :name "cube mul" + ) + +(rule + ((= ?e (Input ?node ?label ?dty))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Output ?inp ?node)) (= ?dty (dtype ?inp))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (CustomOpHLIR ?a ?b ?dty))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Constant ?f))) + ((set (dtype ?e) (F32))) + ) +(rule + ((= ?e (Cast ?inp ?dty))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Iota ?expr ?range))) + ((set (dtype ?e) (Int))) + ) +(rule + ((= ?e (Exp2 ?shape ?inp ?a ?b)) (= ?dty (dtype ?inp))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Log2 ?shape ?inp ?a ?b)) (= ?dty (dtype ?inp))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Sin ?shape ?inp ?a ?b)) (= ?dty (dtype ?inp))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Recip ?shape ?inp ?a ?b)) (= ?dty (dtype ?inp))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Sqrt ?shape ?inp ?a ?b)) (= ?dty (dtype ?inp))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Add ?shape ?inp_a ?a ?inp_b ?b ?o)) (= ?dty (dtype ?inp_a))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Mul ?shape ?inp_a ?a ?inp_b ?b ?o)) (= ?dty (dtype ?inp_a))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Mod ?shape ?inp_a ?a ?inp_b ?b ?o)) (= ?dty (dtype ?inp_a))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (LessThan ?shape ?inp_a ?a ?inp_b ?b ?o)) (= ?dty (dtype ?inp_a))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Gather ?indexes ?index_shape ?index_stride ?data ?data_shape ?data_stride)) (= ?dty (dtype ?data))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Sum ?shape ?iters ?inp ?a ?stride ?o)) (= ?dty (dtype ?inp))) + ((set (dtype ?e) ?dty)) + ) +(rule + ((= ?e (Max ?shape ?iters ?inp ?a ?stride ?o)) (= ?dty (dtype ?inp))) + ((set (dtype ?e) ?dty)) + ) + + (rule + ((= ?m (Exp a b c d))) + ((delete (Exp a b c d))) + :ruleset cleanup + ) +(rule + ((= ?m (Sigmoid a b c d))) + ((delete (Sigmoid a b c d))) + :ruleset cleanup + ) +(rule + ((= ?m (CubeMul a b c d e f g h i j k l m n o p))) + ((delete (CubeMul a b c d e f g h i j k l m n o p))) + :ruleset cleanup + ) +(rule + ((= ?m (TileSum a b c d e f g h i j k))) + ((delete (TileSum a b c d e f g h i j k))) + :ruleset cleanup + ) +(rule + ((= ?m (Constant a))) + ((delete (Constant a))) + :ruleset cleanup + ) +(rule + ((= ?m (Cast a b))) + ((delete (Cast a b))) + :ruleset cleanup + ) +(rule + ((= ?m (Iota a b))) + ((delete (Iota a b))) + :ruleset cleanup + ) +(rule + ((= ?m (Exp2 a b c d))) + ((delete (Exp2 a b c d))) + :ruleset cleanup + ) +(rule + ((= ?m (Log2 a b c d))) + ((delete (Log2 a b c d))) + :ruleset cleanup + ) +(rule + ((= ?m (Sin a b c d))) + ((delete (Sin a b c d))) + :ruleset cleanup + ) +(rule + ((= ?m (Recip a b c d))) + ((delete (Recip a b c d))) + :ruleset cleanup + ) +(rule + ((= ?m (Sqrt a b c d))) + ((delete (Sqrt a b c d))) + :ruleset cleanup + ) +(rule + ((= ?m (Add a b c d e f))) + ((delete (Add a b c d e f))) + :ruleset cleanup + ) +(rule + ((= ?m (Mul a b c d e f))) + ((delete (Mul a b c d e f))) + :ruleset cleanup + ) +(rule + ((= ?m (Mod a b c d e f))) + ((delete (Mod a b c d e f))) + :ruleset cleanup + ) +(rule + ((= ?m (LessThan a b c d e f))) + ((delete (LessThan a b c d e f))) + :ruleset cleanup + ) +(rule + ((= ?m (Gather a b c d e f))) + ((delete (Gather a b c d e f))) + :ruleset cleanup + ) +(rule + ((= ?m (Sum a b c d e f))) + ((delete (Sum a b c d e f))) + :ruleset cleanup + ) +(rule + ((= ?m (Max a b c d e f))) + ((delete (Max a b c d e f))) + :ruleset cleanup + ) + +(ruleset base_cleanup) +(rule + ((= ?m (MReplace ?a ?b ?c))) + ((delete (MReplace ?a ?b ?c))) + :ruleset base_cleanup +) +(rule + ((= ?m (MReplaceList ?a ?b ?c))) + ((delete (MReplaceList ?a ?b ?c))) + :ruleset base_cleanup +) +(rule + ((= ?m (ReplaceNthFromEnd ?a ?b ?c))) + ((delete (ReplaceNthFromEnd ?a ?b ?c))) + :ruleset base_cleanup +) +(rule + ((= ?m (RemoveNthFromEnd ?a ?b))) + ((delete (RemoveNthFromEnd ?a ?b))) + :ruleset base_cleanup +) +(rule + ((= ?m (len ?x))) + ((delete (len ?x))) + :ruleset base_cleanup +) +(rule + ((= ?m (nth_from_end ?x ?y))) + ((delete (nth_from_end ?x ?y))) + :ruleset base_cleanup +) +(rule + ((= ?m (n_elements ?x))) + ((delete (n_elements ?x))) + :ruleset base_cleanup +) +(rule + ((= ?m (RowMajor ?x))) + ((delete (RowMajor ?x))) + :ruleset base_cleanup +) +(let t0 "s") +(let t1 (MVar t0)) +(let t2 128256) +(let t3 (MNum t2)) +(let t4 (ENil )) +(let t5 (ECons t3 t4)) +(let t6 (ECons t1 t5)) +(let t7 4096) +(let t8 (MNum t7)) +(let t9 (ECons t8 t4)) +(let t10 (ECons t3 t9)) +(let t11 (ECons t1 t10)) +(let t12 (ECons t1 t9)) +(let t13 (ECons t1 t4)) +(let t14 0) +(let t15 "input") +(let t16 (Int )) +(let t17 (Input t14 t15 t16)) +(let t18 1) +(let t19 (MNum t18)) +(let t20 (ECons t19 t4)) +(let t21 (Iota t8 t19)) +(let t22 (MNum t14)) +(let t23 (ECons t22 t4)) +(let t24 (Mul t13 t17 t20 t21 t23 t20)) +(let t25 (ECons t19 t23)) +(let t26 (MIter )) +(let t27 (Iota t26 t8)) +(let t28 (ECons t22 t20)) +(let t29 (ECons t8 t20)) +(let t30 (Add t12 t24 t25 t27 t28 t29)) +(let t31 292) +(let t32 "model.embed_tokens.weight") +(let t33 (F32 )) +(let t34 (Input t31 t32 t33)) +(let t35 (Gather t30 t12 t29 t34 t10 t29)) +(let t36 (ECons t8 t9)) +(let t37 (ECons t1 t36)) +(let t38 (Mul t12 t35 t29 t35 t29 t29)) +(let t39 (Sum t13 t8 t38 t9 t19 t20)) +(let t40 (Cast t21 t33)) +(let t41 (Recip t13 t40 t23 t20)) +(let t42 (Mul t13 t39 t20 t41 t20 t20)) +(let t43 0.00001) +(let t44 (Constant t43)) +(let t45 (Add t13 t42 t20 t44 t23 t20)) +(let t46 (Sqrt t13 t45 t20 t20)) +(let t47 (Recip t13 t46 t20 t20)) +(let t48 (Mul t12 t47 t25 t35 t29 t29)) +(let t49 9) +(let t50 "model.layers.0.input_layernorm.weight") +(let t51 (Input t49 t50 t33)) +(let t52 (Mul t12 t48 t29 t51 t28 t29)) +(let t53 (ECons t8 t28)) +(let t54 5) +(let t55 "model.layers.0.self_attn.q_proj.weight") +(let t56 (Input t54 t55 t33)) +(let t57 (ECons t22 t29)) +(let t58 16777216) +(let t59 (MNum t58)) +(let t60 (ECons t59 t29)) +(let t61 (Mul t37 t52 t53 t56 t57 t60)) +(let t62 (ECons t59 t9)) +(let t63 (Sum t12 t8 t61 t62 t19 t29)) +(let t64 "token_ids") +(let t65 (Input t18 t64 t16)) +(let t66 (RowRope t13 t63 t9 t8 t65)) +(let t67 1024) +(let t68 (MNum t67)) +(let t69 (ECons t68 t4)) +(let t70 (ECons t1 t69)) +(let t71 (ECons t68 t9)) +(let t72 (ECons t1 t71)) +(let t73 6) +(let t74 "model.layers.0.self_attn.k_proj.weight") +(let t75 (Input t73 t74 t33)) +(let t76 4194304) +(let t77 (MNum t76)) +(let t78 (ECons t77 t29)) +(let t79 (Mul t72 t52 t53 t75 t57 t78)) +(let t80 (ECons t77 t9)) +(let t81 (ECons t68 t20)) +(let t82 (Sum t70 t8 t79 t80 t19 t81)) +(let t83 (RowRope t13 t82 t69 t68 t65)) +(let t84 7) +(let t85 "model.layers.0.self_attn.v_proj.weight") +(let t86 (Input t84 t85 t33)) +(let t87 (Mul t72 t52 t53 t86 t57 t78)) +(let t88 (Sum t70 t8 t87 t80 t19 t81)) +(let t89 (INil )) +(let t90 (ICons t88 t89)) +(let t91 (ICons t83 t90)) +(let t92 (ICons t66 t91)) +(let t93 (CustomOpHLIR t92 t14 t33)) +(let t94 8) +(let t95 "model.layers.0.self_attn.o_proj.weight") +(let t96 (Input t94 t95 t33)) +(let t97 (Mul t37 t93 t53 t96 t57 t60)) +(let t98 (Sum t12 t8 t97 t62 t19 t29)) +(let t99 (Add t12 t35 t29 t98 t29 t29)) +(let t100 14336) +(let t101 (MNum t100)) +(let t102 (ECons t101 t4)) +(let t103 (ECons t8 t102)) +(let t104 (ECons t1 t103)) +(let t105 (ECons t1 t102)) +(let t106 (ECons t101 t9)) +(let t107 (ECons t1 t106)) +(let t108 (Mul t12 t99 t29 t99 t29 t29)) +(let t109 (Sum t13 t8 t108 t9 t19 t20)) +(let t110 (Mul t13 t109 t20 t41 t20 t20)) +(let t111 (Add t13 t110 t20 t44 t23 t20)) +(let t112 (Sqrt t13 t111 t20 t20)) +(let t113 (Recip t13 t112 t20 t20)) +(let t114 (Mul t12 t113 t25 t99 t29 t29)) +(let t115 10) +(let t116 "model.layers.0.post_attention_layernorm.weight") +(let t117 (Input t115 t116 t33)) +(let t118 (Mul t12 t114 t29 t117 t28 t29)) +(let t119 3) +(let t120 "model.layers.0.mlp.gate_proj.weight") +(let t121 (Input t119 t120 t33)) +(let t122 58720256) +(let t123 (MNum t122)) +(let t124 (ECons t123 t29)) +(let t125 (Mul t107 t118 t53 t121 t57 t124)) +(let t126 (ECons t123 t9)) +(let t127 (ECons t101 t20)) +(let t128 (Sum t105 t8 t125 t126 t19 t127)) +(let t129 -1.0) +(let t130 (Constant t129)) +(let t131 (ECons t22 t23)) +(let t132 (Mul t105 t128 t127 t130 t131 t127)) +(let t133 1.442695) +(let t134 (Constant t133)) +(let t135 (Mul t105 t132 t127 t134 t131 t127)) +(let t136 (Exp2 t105 t135 t127 t127)) +(let t137 1.0) +(let t138 (Constant t137)) +(let t139 (Add t105 t136 t127 t138 t131 t127)) +(let t140 (Recip t105 t139 t127 t127)) +(let t141 (Mul t105 t128 t127 t140 t127 t127)) +(let t142 2) +(let t143 "model.layers.0.mlp.up_proj.weight") +(let t144 (Input t142 t143 t33)) +(let t145 (Mul t107 t118 t53 t144 t57 t124)) +(let t146 (Sum t105 t8 t145 t126 t19 t127)) +(let t147 (Mul t105 t141 t127 t146 t127 t127)) +(let t148 (ECons t101 t28)) +(let t149 4) +(let t150 "model.layers.0.mlp.down_proj.weight") +(let t151 (Input t149 t150 t33)) +(let t152 (ECons t22 t127)) +(let t153 (ECons t123 t127)) +(let t154 (Mul t104 t147 t148 t151 t152 t153)) +(let t155 (ECons t123 t102)) +(let t156 (Sum t12 t101 t154 t155 t19 t29)) +(let t157 (Add t12 t99 t29 t156 t29 t29)) +(let t158 (Mul t12 t157 t29 t157 t29 t29)) +(let t159 (Sum t13 t8 t158 t9 t19 t20)) +(let t160 (Mul t13 t159 t20 t41 t20 t20)) +(let t161 (Add t13 t160 t20 t44 t23 t20)) +(let t162 (Sqrt t13 t161 t20 t20)) +(let t163 (Recip t13 t162 t20 t20)) +(let t164 (Mul t12 t163 t25 t157 t29 t29)) +(let t165 18) +(let t166 "model.layers.1.input_layernorm.weight") +(let t167 (Input t165 t166 t33)) +(let t168 (Mul t12 t164 t29 t167 t28 t29)) +(let t169 14) +(let t170 "model.layers.1.self_attn.q_proj.weight") +(let t171 (Input t169 t170 t33)) +(let t172 (Mul t37 t168 t53 t171 t57 t60)) +(let t173 (Sum t12 t8 t172 t62 t19 t29)) +(let t174 (RowRope t13 t173 t9 t8 t65)) +(let t175 15) +(let t176 "model.layers.1.self_attn.k_proj.weight") +(let t177 (Input t175 t176 t33)) +(let t178 (Mul t72 t168 t53 t177 t57 t78)) +(let t179 (Sum t70 t8 t178 t80 t19 t81)) +(let t180 (RowRope t13 t179 t69 t68 t65)) +(let t181 16) +(let t182 "model.layers.1.self_attn.v_proj.weight") +(let t183 (Input t181 t182 t33)) +(let t184 (Mul t72 t168 t53 t183 t57 t78)) +(let t185 (Sum t70 t8 t184 t80 t19 t81)) +(let t186 (ICons t185 t89)) +(let t187 (ICons t180 t186)) +(let t188 (ICons t174 t187)) +(let t189 (CustomOpHLIR t188 t18 t33)) +(let t190 17) +(let t191 "model.layers.1.self_attn.o_proj.weight") +(let t192 (Input t190 t191 t33)) +(let t193 (Mul t37 t189 t53 t192 t57 t60)) +(let t194 (Sum t12 t8 t193 t62 t19 t29)) +(let t195 (Add t12 t157 t29 t194 t29 t29)) +(let t196 (Mul t12 t195 t29 t195 t29 t29)) +(let t197 (Sum t13 t8 t196 t9 t19 t20)) +(let t198 (Mul t13 t197 t20 t41 t20 t20)) +(let t199 (Add t13 t198 t20 t44 t23 t20)) +(let t200 (Sqrt t13 t199 t20 t20)) +(let t201 (Recip t13 t200 t20 t20)) +(let t202 (Mul t12 t201 t25 t195 t29 t29)) +(let t203 19) +(let t204 "model.layers.1.post_attention_layernorm.weight") +(let t205 (Input t203 t204 t33)) +(let t206 (Mul t12 t202 t29 t205 t28 t29)) +(let t207 12) +(let t208 "model.layers.1.mlp.gate_proj.weight") +(let t209 (Input t207 t208 t33)) +(let t210 (Mul t107 t206 t53 t209 t57 t124)) +(let t211 (Sum t105 t8 t210 t126 t19 t127)) +(let t212 (Mul t105 t211 t127 t130 t131 t127)) +(let t213 (Mul t105 t212 t127 t134 t131 t127)) +(let t214 (Exp2 t105 t213 t127 t127)) +(let t215 (Add t105 t214 t127 t138 t131 t127)) +(let t216 (Recip t105 t215 t127 t127)) +(let t217 (Mul t105 t211 t127 t216 t127 t127)) +(let t218 11) +(let t219 "model.layers.1.mlp.up_proj.weight") +(let t220 (Input t218 t219 t33)) +(let t221 (Mul t107 t206 t53 t220 t57 t124)) +(let t222 (Sum t105 t8 t221 t126 t19 t127)) +(let t223 (Mul t105 t217 t127 t222 t127 t127)) +(let t224 13) +(let t225 "model.layers.1.mlp.down_proj.weight") +(let t226 (Input t224 t225 t33)) +(let t227 (Mul t104 t223 t148 t226 t152 t153)) +(let t228 (Sum t12 t101 t227 t155 t19 t29)) +(let t229 (Add t12 t195 t29 t228 t29 t29)) +(let t230 (Mul t12 t229 t29 t229 t29 t29)) +(let t231 (Sum t13 t8 t230 t9 t19 t20)) +(let t232 (Mul t13 t231 t20 t41 t20 t20)) +(let t233 (Add t13 t232 t20 t44 t23 t20)) +(let t234 (Sqrt t13 t233 t20 t20)) +(let t235 (Recip t13 t234 t20 t20)) +(let t236 (Mul t12 t235 t25 t229 t29 t29)) +(let t237 27) +(let t238 "model.layers.2.input_layernorm.weight") +(let t239 (Input t237 t238 t33)) +(let t240 (Mul t12 t236 t29 t239 t28 t29)) +(let t241 23) +(let t242 "model.layers.2.self_attn.q_proj.weight") +(let t243 (Input t241 t242 t33)) +(let t244 (Mul t37 t240 t53 t243 t57 t60)) +(let t245 (Sum t12 t8 t244 t62 t19 t29)) +(let t246 (RowRope t13 t245 t9 t8 t65)) +(let t247 24) +(let t248 "model.layers.2.self_attn.k_proj.weight") +(let t249 (Input t247 t248 t33)) +(let t250 (Mul t72 t240 t53 t249 t57 t78)) +(let t251 (Sum t70 t8 t250 t80 t19 t81)) +(let t252 (RowRope t13 t251 t69 t68 t65)) +(let t253 25) +(let t254 "model.layers.2.self_attn.v_proj.weight") +(let t255 (Input t253 t254 t33)) +(let t256 (Mul t72 t240 t53 t255 t57 t78)) +(let t257 (Sum t70 t8 t256 t80 t19 t81)) +(let t258 (ICons t257 t89)) +(let t259 (ICons t252 t258)) +(let t260 (ICons t246 t259)) +(let t261 (CustomOpHLIR t260 t142 t33)) +(let t262 26) +(let t263 "model.layers.2.self_attn.o_proj.weight") +(let t264 (Input t262 t263 t33)) +(let t265 (Mul t37 t261 t53 t264 t57 t60)) +(let t266 (Sum t12 t8 t265 t62 t19 t29)) +(let t267 (Add t12 t229 t29 t266 t29 t29)) +(let t268 (Mul t12 t267 t29 t267 t29 t29)) +(let t269 (Sum t13 t8 t268 t9 t19 t20)) +(let t270 (Mul t13 t269 t20 t41 t20 t20)) +(let t271 (Add t13 t270 t20 t44 t23 t20)) +(let t272 (Sqrt t13 t271 t20 t20)) +(let t273 (Recip t13 t272 t20 t20)) +(let t274 (Mul t12 t273 t25 t267 t29 t29)) +(let t275 28) +(let t276 "model.layers.2.post_attention_layernorm.weight") +(let t277 (Input t275 t276 t33)) +(let t278 (Mul t12 t274 t29 t277 t28 t29)) +(let t279 21) +(let t280 "model.layers.2.mlp.gate_proj.weight") +(let t281 (Input t279 t280 t33)) +(let t282 (Mul t107 t278 t53 t281 t57 t124)) +(let t283 (Sum t105 t8 t282 t126 t19 t127)) +(let t284 (Mul t105 t283 t127 t130 t131 t127)) +(let t285 (Mul t105 t284 t127 t134 t131 t127)) +(let t286 (Exp2 t105 t285 t127 t127)) +(let t287 (Add t105 t286 t127 t138 t131 t127)) +(let t288 (Recip t105 t287 t127 t127)) +(let t289 (Mul t105 t283 t127 t288 t127 t127)) +(let t290 20) +(let t291 "model.layers.2.mlp.up_proj.weight") +(let t292 (Input t290 t291 t33)) +(let t293 (Mul t107 t278 t53 t292 t57 t124)) +(let t294 (Sum t105 t8 t293 t126 t19 t127)) +(let t295 (Mul t105 t289 t127 t294 t127 t127)) +(let t296 22) +(let t297 "model.layers.2.mlp.down_proj.weight") +(let t298 (Input t296 t297 t33)) +(let t299 (Mul t104 t295 t148 t298 t152 t153)) +(let t300 (Sum t12 t101 t299 t155 t19 t29)) +(let t301 (Add t12 t267 t29 t300 t29 t29)) +(let t302 (Mul t12 t301 t29 t301 t29 t29)) +(let t303 (Sum t13 t8 t302 t9 t19 t20)) +(let t304 (Mul t13 t303 t20 t41 t20 t20)) +(let t305 (Add t13 t304 t20 t44 t23 t20)) +(let t306 (Sqrt t13 t305 t20 t20)) +(let t307 (Recip t13 t306 t20 t20)) +(let t308 (Mul t12 t307 t25 t301 t29 t29)) +(let t309 36) +(let t310 "model.layers.3.input_layernorm.weight") +(let t311 (Input t309 t310 t33)) +(let t312 (Mul t12 t308 t29 t311 t28 t29)) +(let t313 32) +(let t314 "model.layers.3.self_attn.q_proj.weight") +(let t315 (Input t313 t314 t33)) +(let t316 (Mul t37 t312 t53 t315 t57 t60)) +(let t317 (Sum t12 t8 t316 t62 t19 t29)) +(let t318 (RowRope t13 t317 t9 t8 t65)) +(let t319 33) +(let t320 "model.layers.3.self_attn.k_proj.weight") +(let t321 (Input t319 t320 t33)) +(let t322 (Mul t72 t312 t53 t321 t57 t78)) +(let t323 (Sum t70 t8 t322 t80 t19 t81)) +(let t324 (RowRope t13 t323 t69 t68 t65)) +(let t325 34) +(let t326 "model.layers.3.self_attn.v_proj.weight") +(let t327 (Input t325 t326 t33)) +(let t328 (Mul t72 t312 t53 t327 t57 t78)) +(let t329 (Sum t70 t8 t328 t80 t19 t81)) +(let t330 (ICons t329 t89)) +(let t331 (ICons t324 t330)) +(let t332 (ICons t318 t331)) +(let t333 (CustomOpHLIR t332 t119 t33)) +(let t334 35) +(let t335 "model.layers.3.self_attn.o_proj.weight") +(let t336 (Input t334 t335 t33)) +(let t337 (Mul t37 t333 t53 t336 t57 t60)) +(let t338 (Sum t12 t8 t337 t62 t19 t29)) +(let t339 (Add t12 t301 t29 t338 t29 t29)) +(let t340 (Mul t12 t339 t29 t339 t29 t29)) +(let t341 (Sum t13 t8 t340 t9 t19 t20)) +(let t342 (Mul t13 t341 t20 t41 t20 t20)) +(let t343 (Add t13 t342 t20 t44 t23 t20)) +(let t344 (Sqrt t13 t343 t20 t20)) +(let t345 (Recip t13 t344 t20 t20)) +(let t346 (Mul t12 t345 t25 t339 t29 t29)) +(let t347 37) +(let t348 "model.layers.3.post_attention_layernorm.weight") +(let t349 (Input t347 t348 t33)) +(let t350 (Mul t12 t346 t29 t349 t28 t29)) +(let t351 30) +(let t352 "model.layers.3.mlp.gate_proj.weight") +(let t353 (Input t351 t352 t33)) +(let t354 (Mul t107 t350 t53 t353 t57 t124)) +(let t355 (Sum t105 t8 t354 t126 t19 t127)) +(let t356 (Mul t105 t355 t127 t130 t131 t127)) +(let t357 (Mul t105 t356 t127 t134 t131 t127)) +(let t358 (Exp2 t105 t357 t127 t127)) +(let t359 (Add t105 t358 t127 t138 t131 t127)) +(let t360 (Recip t105 t359 t127 t127)) +(let t361 (Mul t105 t355 t127 t360 t127 t127)) +(let t362 29) +(let t363 "model.layers.3.mlp.up_proj.weight") +(let t364 (Input t362 t363 t33)) +(let t365 (Mul t107 t350 t53 t364 t57 t124)) +(let t366 (Sum t105 t8 t365 t126 t19 t127)) +(let t367 (Mul t105 t361 t127 t366 t127 t127)) +(let t368 31) +(let t369 "model.layers.3.mlp.down_proj.weight") +(let t370 (Input t368 t369 t33)) +(let t371 (Mul t104 t367 t148 t370 t152 t153)) +(let t372 (Sum t12 t101 t371 t155 t19 t29)) +(let t373 (Add t12 t339 t29 t372 t29 t29)) +(let t374 (Mul t12 t373 t29 t373 t29 t29)) +(let t375 (Sum t13 t8 t374 t9 t19 t20)) +(let t376 (Mul t13 t375 t20 t41 t20 t20)) +(let t377 (Add t13 t376 t20 t44 t23 t20)) +(let t378 (Sqrt t13 t377 t20 t20)) +(let t379 (Recip t13 t378 t20 t20)) +(let t380 (Mul t12 t379 t25 t373 t29 t29)) +(let t381 45) +(let t382 "model.layers.4.input_layernorm.weight") +(let t383 (Input t381 t382 t33)) +(let t384 (Mul t12 t380 t29 t383 t28 t29)) +(let t385 41) +(let t386 "model.layers.4.self_attn.q_proj.weight") +(let t387 (Input t385 t386 t33)) +(let t388 (Mul t37 t384 t53 t387 t57 t60)) +(let t389 (Sum t12 t8 t388 t62 t19 t29)) +(let t390 (RowRope t13 t389 t9 t8 t65)) +(let t391 42) +(let t392 "model.layers.4.self_attn.k_proj.weight") +(let t393 (Input t391 t392 t33)) +(let t394 (Mul t72 t384 t53 t393 t57 t78)) +(let t395 (Sum t70 t8 t394 t80 t19 t81)) +(let t396 (RowRope t13 t395 t69 t68 t65)) +(let t397 43) +(let t398 "model.layers.4.self_attn.v_proj.weight") +(let t399 (Input t397 t398 t33)) +(let t400 (Mul t72 t384 t53 t399 t57 t78)) +(let t401 (Sum t70 t8 t400 t80 t19 t81)) +(let t402 (ICons t401 t89)) +(let t403 (ICons t396 t402)) +(let t404 (ICons t390 t403)) +(let t405 (CustomOpHLIR t404 t149 t33)) +(let t406 44) +(let t407 "model.layers.4.self_attn.o_proj.weight") +(let t408 (Input t406 t407 t33)) +(let t409 (Mul t37 t405 t53 t408 t57 t60)) +(let t410 (Sum t12 t8 t409 t62 t19 t29)) +(let t411 (Add t12 t373 t29 t410 t29 t29)) +(let t412 (Mul t12 t411 t29 t411 t29 t29)) +(let t413 (Sum t13 t8 t412 t9 t19 t20)) +(let t414 (Mul t13 t413 t20 t41 t20 t20)) +(let t415 (Add t13 t414 t20 t44 t23 t20)) +(let t416 (Sqrt t13 t415 t20 t20)) +(let t417 (Recip t13 t416 t20 t20)) +(let t418 (Mul t12 t417 t25 t411 t29 t29)) +(let t419 46) +(let t420 "model.layers.4.post_attention_layernorm.weight") +(let t421 (Input t419 t420 t33)) +(let t422 (Mul t12 t418 t29 t421 t28 t29)) +(let t423 39) +(let t424 "model.layers.4.mlp.gate_proj.weight") +(let t425 (Input t423 t424 t33)) +(let t426 (Mul t107 t422 t53 t425 t57 t124)) +(let t427 (Sum t105 t8 t426 t126 t19 t127)) +(let t428 (Mul t105 t427 t127 t130 t131 t127)) +(let t429 (Mul t105 t428 t127 t134 t131 t127)) +(let t430 (Exp2 t105 t429 t127 t127)) +(let t431 (Add t105 t430 t127 t138 t131 t127)) +(let t432 (Recip t105 t431 t127 t127)) +(let t433 (Mul t105 t427 t127 t432 t127 t127)) +(let t434 38) +(let t435 "model.layers.4.mlp.up_proj.weight") +(let t436 (Input t434 t435 t33)) +(let t437 (Mul t107 t422 t53 t436 t57 t124)) +(let t438 (Sum t105 t8 t437 t126 t19 t127)) +(let t439 (Mul t105 t433 t127 t438 t127 t127)) +(let t440 40) +(let t441 "model.layers.4.mlp.down_proj.weight") +(let t442 (Input t440 t441 t33)) +(let t443 (Mul t104 t439 t148 t442 t152 t153)) +(let t444 (Sum t12 t101 t443 t155 t19 t29)) +(let t445 (Add t12 t411 t29 t444 t29 t29)) +(let t446 (Mul t12 t445 t29 t445 t29 t29)) +(let t447 (Sum t13 t8 t446 t9 t19 t20)) +(let t448 (Mul t13 t447 t20 t41 t20 t20)) +(let t449 (Add t13 t448 t20 t44 t23 t20)) +(let t450 (Sqrt t13 t449 t20 t20)) +(let t451 (Recip t13 t450 t20 t20)) +(let t452 (Mul t12 t451 t25 t445 t29 t29)) +(let t453 54) +(let t454 "model.layers.5.input_layernorm.weight") +(let t455 (Input t453 t454 t33)) +(let t456 (Mul t12 t452 t29 t455 t28 t29)) +(let t457 50) +(let t458 "model.layers.5.self_attn.q_proj.weight") +(let t459 (Input t457 t458 t33)) +(let t460 (Mul t37 t456 t53 t459 t57 t60)) +(let t461 (Sum t12 t8 t460 t62 t19 t29)) +(let t462 (RowRope t13 t461 t9 t8 t65)) +(let t463 51) +(let t464 "model.layers.5.self_attn.k_proj.weight") +(let t465 (Input t463 t464 t33)) +(let t466 (Mul t72 t456 t53 t465 t57 t78)) +(let t467 (Sum t70 t8 t466 t80 t19 t81)) +(let t468 (RowRope t13 t467 t69 t68 t65)) +(let t469 52) +(let t470 "model.layers.5.self_attn.v_proj.weight") +(let t471 (Input t469 t470 t33)) +(let t472 (Mul t72 t456 t53 t471 t57 t78)) +(let t473 (Sum t70 t8 t472 t80 t19 t81)) +(let t474 (ICons t473 t89)) +(let t475 (ICons t468 t474)) +(let t476 (ICons t462 t475)) +(let t477 (CustomOpHLIR t476 t54 t33)) +(let t478 53) +(let t479 "model.layers.5.self_attn.o_proj.weight") +(let t480 (Input t478 t479 t33)) +(let t481 (Mul t37 t477 t53 t480 t57 t60)) +(let t482 (Sum t12 t8 t481 t62 t19 t29)) +(let t483 (Add t12 t445 t29 t482 t29 t29)) +(let t484 (Mul t12 t483 t29 t483 t29 t29)) +(let t485 (Sum t13 t8 t484 t9 t19 t20)) +(let t486 (Mul t13 t485 t20 t41 t20 t20)) +(let t487 (Add t13 t486 t20 t44 t23 t20)) +(let t488 (Sqrt t13 t487 t20 t20)) +(let t489 (Recip t13 t488 t20 t20)) +(let t490 (Mul t12 t489 t25 t483 t29 t29)) +(let t491 55) +(let t492 "model.layers.5.post_attention_layernorm.weight") +(let t493 (Input t491 t492 t33)) +(let t494 (Mul t12 t490 t29 t493 t28 t29)) +(let t495 48) +(let t496 "model.layers.5.mlp.gate_proj.weight") +(let t497 (Input t495 t496 t33)) +(let t498 (Mul t107 t494 t53 t497 t57 t124)) +(let t499 (Sum t105 t8 t498 t126 t19 t127)) +(let t500 (Mul t105 t499 t127 t130 t131 t127)) +(let t501 (Mul t105 t500 t127 t134 t131 t127)) +(let t502 (Exp2 t105 t501 t127 t127)) +(let t503 (Add t105 t502 t127 t138 t131 t127)) +(let t504 (Recip t105 t503 t127 t127)) +(let t505 (Mul t105 t499 t127 t504 t127 t127)) +(let t506 47) +(let t507 "model.layers.5.mlp.up_proj.weight") +(let t508 (Input t506 t507 t33)) +(let t509 (Mul t107 t494 t53 t508 t57 t124)) +(let t510 (Sum t105 t8 t509 t126 t19 t127)) +(let t511 (Mul t105 t505 t127 t510 t127 t127)) +(let t512 49) +(let t513 "model.layers.5.mlp.down_proj.weight") +(let t514 (Input t512 t513 t33)) +(let t515 (Mul t104 t511 t148 t514 t152 t153)) +(let t516 (Sum t12 t101 t515 t155 t19 t29)) +(let t517 (Add t12 t483 t29 t516 t29 t29)) +(let t518 (Mul t12 t517 t29 t517 t29 t29)) +(let t519 (Sum t13 t8 t518 t9 t19 t20)) +(let t520 (Mul t13 t519 t20 t41 t20 t20)) +(let t521 (Add t13 t520 t20 t44 t23 t20)) +(let t522 (Sqrt t13 t521 t20 t20)) +(let t523 (Recip t13 t522 t20 t20)) +(let t524 (Mul t12 t523 t25 t517 t29 t29)) +(let t525 63) +(let t526 "model.layers.6.input_layernorm.weight") +(let t527 (Input t525 t526 t33)) +(let t528 (Mul t12 t524 t29 t527 t28 t29)) +(let t529 59) +(let t530 "model.layers.6.self_attn.q_proj.weight") +(let t531 (Input t529 t530 t33)) +(let t532 (Mul t37 t528 t53 t531 t57 t60)) +(let t533 (Sum t12 t8 t532 t62 t19 t29)) +(let t534 (RowRope t13 t533 t9 t8 t65)) +(let t535 60) +(let t536 "model.layers.6.self_attn.k_proj.weight") +(let t537 (Input t535 t536 t33)) +(let t538 (Mul t72 t528 t53 t537 t57 t78)) +(let t539 (Sum t70 t8 t538 t80 t19 t81)) +(let t540 (RowRope t13 t539 t69 t68 t65)) +(let t541 61) +(let t542 "model.layers.6.self_attn.v_proj.weight") +(let t543 (Input t541 t542 t33)) +(let t544 (Mul t72 t528 t53 t543 t57 t78)) +(let t545 (Sum t70 t8 t544 t80 t19 t81)) +(let t546 (ICons t545 t89)) +(let t547 (ICons t540 t546)) +(let t548 (ICons t534 t547)) +(let t549 (CustomOpHLIR t548 t73 t33)) +(let t550 62) +(let t551 "model.layers.6.self_attn.o_proj.weight") +(let t552 (Input t550 t551 t33)) +(let t553 (Mul t37 t549 t53 t552 t57 t60)) +(let t554 (Sum t12 t8 t553 t62 t19 t29)) +(let t555 (Add t12 t517 t29 t554 t29 t29)) +(let t556 (Mul t12 t555 t29 t555 t29 t29)) +(let t557 (Sum t13 t8 t556 t9 t19 t20)) +(let t558 (Mul t13 t557 t20 t41 t20 t20)) +(let t559 (Add t13 t558 t20 t44 t23 t20)) +(let t560 (Sqrt t13 t559 t20 t20)) +(let t561 (Recip t13 t560 t20 t20)) +(let t562 (Mul t12 t561 t25 t555 t29 t29)) +(let t563 64) +(let t564 "model.layers.6.post_attention_layernorm.weight") +(let t565 (Input t563 t564 t33)) +(let t566 (Mul t12 t562 t29 t565 t28 t29)) +(let t567 57) +(let t568 "model.layers.6.mlp.gate_proj.weight") +(let t569 (Input t567 t568 t33)) +(let t570 (Mul t107 t566 t53 t569 t57 t124)) +(let t571 (Sum t105 t8 t570 t126 t19 t127)) +(let t572 (Mul t105 t571 t127 t130 t131 t127)) +(let t573 (Mul t105 t572 t127 t134 t131 t127)) +(let t574 (Exp2 t105 t573 t127 t127)) +(let t575 (Add t105 t574 t127 t138 t131 t127)) +(let t576 (Recip t105 t575 t127 t127)) +(let t577 (Mul t105 t571 t127 t576 t127 t127)) +(let t578 56) +(let t579 "model.layers.6.mlp.up_proj.weight") +(let t580 (Input t578 t579 t33)) +(let t581 (Mul t107 t566 t53 t580 t57 t124)) +(let t582 (Sum t105 t8 t581 t126 t19 t127)) +(let t583 (Mul t105 t577 t127 t582 t127 t127)) +(let t584 58) +(let t585 "model.layers.6.mlp.down_proj.weight") +(let t586 (Input t584 t585 t33)) +(let t587 (Mul t104 t583 t148 t586 t152 t153)) +(let t588 (Sum t12 t101 t587 t155 t19 t29)) +(let t589 (Add t12 t555 t29 t588 t29 t29)) +(let t590 (Mul t12 t589 t29 t589 t29 t29)) +(let t591 (Sum t13 t8 t590 t9 t19 t20)) +(let t592 (Mul t13 t591 t20 t41 t20 t20)) +(let t593 (Add t13 t592 t20 t44 t23 t20)) +(let t594 (Sqrt t13 t593 t20 t20)) +(let t595 (Recip t13 t594 t20 t20)) +(let t596 (Mul t12 t595 t25 t589 t29 t29)) +(let t597 72) +(let t598 "model.layers.7.input_layernorm.weight") +(let t599 (Input t597 t598 t33)) +(let t600 (Mul t12 t596 t29 t599 t28 t29)) +(let t601 68) +(let t602 "model.layers.7.self_attn.q_proj.weight") +(let t603 (Input t601 t602 t33)) +(let t604 (Mul t37 t600 t53 t603 t57 t60)) +(let t605 (Sum t12 t8 t604 t62 t19 t29)) +(let t606 (RowRope t13 t605 t9 t8 t65)) +(let t607 69) +(let t608 "model.layers.7.self_attn.k_proj.weight") +(let t609 (Input t607 t608 t33)) +(let t610 (Mul t72 t600 t53 t609 t57 t78)) +(let t611 (Sum t70 t8 t610 t80 t19 t81)) +(let t612 (RowRope t13 t611 t69 t68 t65)) +(let t613 70) +(let t614 "model.layers.7.self_attn.v_proj.weight") +(let t615 (Input t613 t614 t33)) +(let t616 (Mul t72 t600 t53 t615 t57 t78)) +(let t617 (Sum t70 t8 t616 t80 t19 t81)) +(let t618 (ICons t617 t89)) +(let t619 (ICons t612 t618)) +(let t620 (ICons t606 t619)) +(let t621 (CustomOpHLIR t620 t84 t33)) +(let t622 71) +(let t623 "model.layers.7.self_attn.o_proj.weight") +(let t624 (Input t622 t623 t33)) +(let t625 (Mul t37 t621 t53 t624 t57 t60)) +(let t626 (Sum t12 t8 t625 t62 t19 t29)) +(let t627 (Add t12 t589 t29 t626 t29 t29)) +(let t628 (Mul t12 t627 t29 t627 t29 t29)) +(let t629 (Sum t13 t8 t628 t9 t19 t20)) +(let t630 (Mul t13 t629 t20 t41 t20 t20)) +(let t631 (Add t13 t630 t20 t44 t23 t20)) +(let t632 (Sqrt t13 t631 t20 t20)) +(let t633 (Recip t13 t632 t20 t20)) +(let t634 (Mul t12 t633 t25 t627 t29 t29)) +(let t635 73) +(let t636 "model.layers.7.post_attention_layernorm.weight") +(let t637 (Input t635 t636 t33)) +(let t638 (Mul t12 t634 t29 t637 t28 t29)) +(let t639 66) +(let t640 "model.layers.7.mlp.gate_proj.weight") +(let t641 (Input t639 t640 t33)) +(let t642 (Mul t107 t638 t53 t641 t57 t124)) +(let t643 (Sum t105 t8 t642 t126 t19 t127)) +(let t644 (Mul t105 t643 t127 t130 t131 t127)) +(let t645 (Mul t105 t644 t127 t134 t131 t127)) +(let t646 (Exp2 t105 t645 t127 t127)) +(let t647 (Add t105 t646 t127 t138 t131 t127)) +(let t648 (Recip t105 t647 t127 t127)) +(let t649 (Mul t105 t643 t127 t648 t127 t127)) +(let t650 65) +(let t651 "model.layers.7.mlp.up_proj.weight") +(let t652 (Input t650 t651 t33)) +(let t653 (Mul t107 t638 t53 t652 t57 t124)) +(let t654 (Sum t105 t8 t653 t126 t19 t127)) +(let t655 (Mul t105 t649 t127 t654 t127 t127)) +(let t656 67) +(let t657 "model.layers.7.mlp.down_proj.weight") +(let t658 (Input t656 t657 t33)) +(let t659 (Mul t104 t655 t148 t658 t152 t153)) +(let t660 (Sum t12 t101 t659 t155 t19 t29)) +(let t661 (Add t12 t627 t29 t660 t29 t29)) +(let t662 (Mul t12 t661 t29 t661 t29 t29)) +(let t663 (Sum t13 t8 t662 t9 t19 t20)) +(let t664 (Mul t13 t663 t20 t41 t20 t20)) +(let t665 (Add t13 t664 t20 t44 t23 t20)) +(let t666 (Sqrt t13 t665 t20 t20)) +(let t667 (Recip t13 t666 t20 t20)) +(let t668 (Mul t12 t667 t25 t661 t29 t29)) +(let t669 81) +(let t670 "model.layers.8.input_layernorm.weight") +(let t671 (Input t669 t670 t33)) +(let t672 (Mul t12 t668 t29 t671 t28 t29)) +(let t673 77) +(let t674 "model.layers.8.self_attn.q_proj.weight") +(let t675 (Input t673 t674 t33)) +(let t676 (Mul t37 t672 t53 t675 t57 t60)) +(let t677 (Sum t12 t8 t676 t62 t19 t29)) +(let t678 (RowRope t13 t677 t9 t8 t65)) +(let t679 78) +(let t680 "model.layers.8.self_attn.k_proj.weight") +(let t681 (Input t679 t680 t33)) +(let t682 (Mul t72 t672 t53 t681 t57 t78)) +(let t683 (Sum t70 t8 t682 t80 t19 t81)) +(let t684 (RowRope t13 t683 t69 t68 t65)) +(let t685 79) +(let t686 "model.layers.8.self_attn.v_proj.weight") +(let t687 (Input t685 t686 t33)) +(let t688 (Mul t72 t672 t53 t687 t57 t78)) +(let t689 (Sum t70 t8 t688 t80 t19 t81)) +(let t690 (ICons t689 t89)) +(let t691 (ICons t684 t690)) +(let t692 (ICons t678 t691)) +(let t693 (CustomOpHLIR t692 t94 t33)) +(let t694 80) +(let t695 "model.layers.8.self_attn.o_proj.weight") +(let t696 (Input t694 t695 t33)) +(let t697 (Mul t37 t693 t53 t696 t57 t60)) +(let t698 (Sum t12 t8 t697 t62 t19 t29)) +(let t699 (Add t12 t661 t29 t698 t29 t29)) +(let t700 (Mul t12 t699 t29 t699 t29 t29)) +(let t701 (Sum t13 t8 t700 t9 t19 t20)) +(let t702 (Mul t13 t701 t20 t41 t20 t20)) +(let t703 (Add t13 t702 t20 t44 t23 t20)) +(let t704 (Sqrt t13 t703 t20 t20)) +(let t705 (Recip t13 t704 t20 t20)) +(let t706 (Mul t12 t705 t25 t699 t29 t29)) +(let t707 82) +(let t708 "model.layers.8.post_attention_layernorm.weight") +(let t709 (Input t707 t708 t33)) +(let t710 (Mul t12 t706 t29 t709 t28 t29)) +(let t711 75) +(let t712 "model.layers.8.mlp.gate_proj.weight") +(let t713 (Input t711 t712 t33)) +(let t714 (Mul t107 t710 t53 t713 t57 t124)) +(let t715 (Sum t105 t8 t714 t126 t19 t127)) +(let t716 (Mul t105 t715 t127 t130 t131 t127)) +(let t717 (Mul t105 t716 t127 t134 t131 t127)) +(let t718 (Exp2 t105 t717 t127 t127)) +(let t719 (Add t105 t718 t127 t138 t131 t127)) +(let t720 (Recip t105 t719 t127 t127)) +(let t721 (Mul t105 t715 t127 t720 t127 t127)) +(let t722 74) +(let t723 "model.layers.8.mlp.up_proj.weight") +(let t724 (Input t722 t723 t33)) +(let t725 (Mul t107 t710 t53 t724 t57 t124)) +(let t726 (Sum t105 t8 t725 t126 t19 t127)) +(let t727 (Mul t105 t721 t127 t726 t127 t127)) +(let t728 76) +(let t729 "model.layers.8.mlp.down_proj.weight") +(let t730 (Input t728 t729 t33)) +(let t731 (Mul t104 t727 t148 t730 t152 t153)) +(let t732 (Sum t12 t101 t731 t155 t19 t29)) +(let t733 (Add t12 t699 t29 t732 t29 t29)) +(let t734 (Mul t12 t733 t29 t733 t29 t29)) +(let t735 (Sum t13 t8 t734 t9 t19 t20)) +(let t736 (Mul t13 t735 t20 t41 t20 t20)) +(let t737 (Add t13 t736 t20 t44 t23 t20)) +(let t738 (Sqrt t13 t737 t20 t20)) +(let t739 (Recip t13 t738 t20 t20)) +(let t740 (Mul t12 t739 t25 t733 t29 t29)) +(let t741 90) +(let t742 "model.layers.9.input_layernorm.weight") +(let t743 (Input t741 t742 t33)) +(let t744 (Mul t12 t740 t29 t743 t28 t29)) +(let t745 86) +(let t746 "model.layers.9.self_attn.q_proj.weight") +(let t747 (Input t745 t746 t33)) +(let t748 (Mul t37 t744 t53 t747 t57 t60)) +(let t749 (Sum t12 t8 t748 t62 t19 t29)) +(let t750 (RowRope t13 t749 t9 t8 t65)) +(let t751 87) +(let t752 "model.layers.9.self_attn.k_proj.weight") +(let t753 (Input t751 t752 t33)) +(let t754 (Mul t72 t744 t53 t753 t57 t78)) +(let t755 (Sum t70 t8 t754 t80 t19 t81)) +(let t756 (RowRope t13 t755 t69 t68 t65)) +(let t757 88) +(let t758 "model.layers.9.self_attn.v_proj.weight") +(let t759 (Input t757 t758 t33)) +(let t760 (Mul t72 t744 t53 t759 t57 t78)) +(let t761 (Sum t70 t8 t760 t80 t19 t81)) +(let t762 (ICons t761 t89)) +(let t763 (ICons t756 t762)) +(let t764 (ICons t750 t763)) +(let t765 (CustomOpHLIR t764 t49 t33)) +(let t766 89) +(let t767 "model.layers.9.self_attn.o_proj.weight") +(let t768 (Input t766 t767 t33)) +(let t769 (Mul t37 t765 t53 t768 t57 t60)) +(let t770 (Sum t12 t8 t769 t62 t19 t29)) +(let t771 (Add t12 t733 t29 t770 t29 t29)) +(let t772 (Mul t12 t771 t29 t771 t29 t29)) +(let t773 (Sum t13 t8 t772 t9 t19 t20)) +(let t774 (Mul t13 t773 t20 t41 t20 t20)) +(let t775 (Add t13 t774 t20 t44 t23 t20)) +(let t776 (Sqrt t13 t775 t20 t20)) +(let t777 (Recip t13 t776 t20 t20)) +(let t778 (Mul t12 t777 t25 t771 t29 t29)) +(let t779 91) +(let t780 "model.layers.9.post_attention_layernorm.weight") +(let t781 (Input t779 t780 t33)) +(let t782 (Mul t12 t778 t29 t781 t28 t29)) +(let t783 84) +(let t784 "model.layers.9.mlp.gate_proj.weight") +(let t785 (Input t783 t784 t33)) +(let t786 (Mul t107 t782 t53 t785 t57 t124)) +(let t787 (Sum t105 t8 t786 t126 t19 t127)) +(let t788 (Mul t105 t787 t127 t130 t131 t127)) +(let t789 (Mul t105 t788 t127 t134 t131 t127)) +(let t790 (Exp2 t105 t789 t127 t127)) +(let t791 (Add t105 t790 t127 t138 t131 t127)) +(let t792 (Recip t105 t791 t127 t127)) +(let t793 (Mul t105 t787 t127 t792 t127 t127)) +(let t794 83) +(let t795 "model.layers.9.mlp.up_proj.weight") +(let t796 (Input t794 t795 t33)) +(let t797 (Mul t107 t782 t53 t796 t57 t124)) +(let t798 (Sum t105 t8 t797 t126 t19 t127)) +(let t799 (Mul t105 t793 t127 t798 t127 t127)) +(let t800 85) +(let t801 "model.layers.9.mlp.down_proj.weight") +(let t802 (Input t800 t801 t33)) +(let t803 (Mul t104 t799 t148 t802 t152 t153)) +(let t804 (Sum t12 t101 t803 t155 t19 t29)) +(let t805 (Add t12 t771 t29 t804 t29 t29)) +(let t806 (Mul t12 t805 t29 t805 t29 t29)) +(let t807 (Sum t13 t8 t806 t9 t19 t20)) +(let t808 (Mul t13 t807 t20 t41 t20 t20)) +(let t809 (Add t13 t808 t20 t44 t23 t20)) +(let t810 (Sqrt t13 t809 t20 t20)) +(let t811 (Recip t13 t810 t20 t20)) +(let t812 (Mul t12 t811 t25 t805 t29 t29)) +(let t813 99) +(let t814 "model.layers.10.input_layernorm.weight") +(let t815 (Input t813 t814 t33)) +(let t816 (Mul t12 t812 t29 t815 t28 t29)) +(let t817 95) +(let t818 "model.layers.10.self_attn.q_proj.weight") +(let t819 (Input t817 t818 t33)) +(let t820 (Mul t37 t816 t53 t819 t57 t60)) +(let t821 (Sum t12 t8 t820 t62 t19 t29)) +(let t822 (RowRope t13 t821 t9 t8 t65)) +(let t823 96) +(let t824 "model.layers.10.self_attn.k_proj.weight") +(let t825 (Input t823 t824 t33)) +(let t826 (Mul t72 t816 t53 t825 t57 t78)) +(let t827 (Sum t70 t8 t826 t80 t19 t81)) +(let t828 (RowRope t13 t827 t69 t68 t65)) +(let t829 97) +(let t830 "model.layers.10.self_attn.v_proj.weight") +(let t831 (Input t829 t830 t33)) +(let t832 (Mul t72 t816 t53 t831 t57 t78)) +(let t833 (Sum t70 t8 t832 t80 t19 t81)) +(let t834 (ICons t833 t89)) +(let t835 (ICons t828 t834)) +(let t836 (ICons t822 t835)) +(let t837 (CustomOpHLIR t836 t115 t33)) +(let t838 98) +(let t839 "model.layers.10.self_attn.o_proj.weight") +(let t840 (Input t838 t839 t33)) +(let t841 (Mul t37 t837 t53 t840 t57 t60)) +(let t842 (Sum t12 t8 t841 t62 t19 t29)) +(let t843 (Add t12 t805 t29 t842 t29 t29)) +(let t844 (Mul t12 t843 t29 t843 t29 t29)) +(let t845 (Sum t13 t8 t844 t9 t19 t20)) +(let t846 (Mul t13 t845 t20 t41 t20 t20)) +(let t847 (Add t13 t846 t20 t44 t23 t20)) +(let t848 (Sqrt t13 t847 t20 t20)) +(let t849 (Recip t13 t848 t20 t20)) +(let t850 (Mul t12 t849 t25 t843 t29 t29)) +(let t851 100) +(let t852 "model.layers.10.post_attention_layernorm.weight") +(let t853 (Input t851 t852 t33)) +(let t854 (Mul t12 t850 t29 t853 t28 t29)) +(let t855 93) +(let t856 "model.layers.10.mlp.gate_proj.weight") +(let t857 (Input t855 t856 t33)) +(let t858 (Mul t107 t854 t53 t857 t57 t124)) +(let t859 (Sum t105 t8 t858 t126 t19 t127)) +(let t860 (Mul t105 t859 t127 t130 t131 t127)) +(let t861 (Mul t105 t860 t127 t134 t131 t127)) +(let t862 (Exp2 t105 t861 t127 t127)) +(let t863 (Add t105 t862 t127 t138 t131 t127)) +(let t864 (Recip t105 t863 t127 t127)) +(let t865 (Mul t105 t859 t127 t864 t127 t127)) +(let t866 92) +(let t867 "model.layers.10.mlp.up_proj.weight") +(let t868 (Input t866 t867 t33)) +(let t869 (Mul t107 t854 t53 t868 t57 t124)) +(let t870 (Sum t105 t8 t869 t126 t19 t127)) +(let t871 (Mul t105 t865 t127 t870 t127 t127)) +(let t872 94) +(let t873 "model.layers.10.mlp.down_proj.weight") +(let t874 (Input t872 t873 t33)) +(let t875 (Mul t104 t871 t148 t874 t152 t153)) +(let t876 (Sum t12 t101 t875 t155 t19 t29)) +(let t877 (Add t12 t843 t29 t876 t29 t29)) +(let t878 (Mul t12 t877 t29 t877 t29 t29)) +(let t879 (Sum t13 t8 t878 t9 t19 t20)) +(let t880 (Mul t13 t879 t20 t41 t20 t20)) +(let t881 (Add t13 t880 t20 t44 t23 t20)) +(let t882 (Sqrt t13 t881 t20 t20)) +(let t883 (Recip t13 t882 t20 t20)) +(let t884 (Mul t12 t883 t25 t877 t29 t29)) +(let t885 108) +(let t886 "model.layers.11.input_layernorm.weight") +(let t887 (Input t885 t886 t33)) +(let t888 (Mul t12 t884 t29 t887 t28 t29)) +(let t889 104) +(let t890 "model.layers.11.self_attn.q_proj.weight") +(let t891 (Input t889 t890 t33)) +(let t892 (Mul t37 t888 t53 t891 t57 t60)) +(let t893 (Sum t12 t8 t892 t62 t19 t29)) +(let t894 (RowRope t13 t893 t9 t8 t65)) +(let t895 105) +(let t896 "model.layers.11.self_attn.k_proj.weight") +(let t897 (Input t895 t896 t33)) +(let t898 (Mul t72 t888 t53 t897 t57 t78)) +(let t899 (Sum t70 t8 t898 t80 t19 t81)) +(let t900 (RowRope t13 t899 t69 t68 t65)) +(let t901 106) +(let t902 "model.layers.11.self_attn.v_proj.weight") +(let t903 (Input t901 t902 t33)) +(let t904 (Mul t72 t888 t53 t903 t57 t78)) +(let t905 (Sum t70 t8 t904 t80 t19 t81)) +(let t906 (ICons t905 t89)) +(let t907 (ICons t900 t906)) +(let t908 (ICons t894 t907)) +(let t909 (CustomOpHLIR t908 t218 t33)) +(let t910 107) +(let t911 "model.layers.11.self_attn.o_proj.weight") +(let t912 (Input t910 t911 t33)) +(let t913 (Mul t37 t909 t53 t912 t57 t60)) +(let t914 (Sum t12 t8 t913 t62 t19 t29)) +(let t915 (Add t12 t877 t29 t914 t29 t29)) +(let t916 (Mul t12 t915 t29 t915 t29 t29)) +(let t917 (Sum t13 t8 t916 t9 t19 t20)) +(let t918 (Mul t13 t917 t20 t41 t20 t20)) +(let t919 (Add t13 t918 t20 t44 t23 t20)) +(let t920 (Sqrt t13 t919 t20 t20)) +(let t921 (Recip t13 t920 t20 t20)) +(let t922 (Mul t12 t921 t25 t915 t29 t29)) +(let t923 109) +(let t924 "model.layers.11.post_attention_layernorm.weight") +(let t925 (Input t923 t924 t33)) +(let t926 (Mul t12 t922 t29 t925 t28 t29)) +(let t927 102) +(let t928 "model.layers.11.mlp.gate_proj.weight") +(let t929 (Input t927 t928 t33)) +(let t930 (Mul t107 t926 t53 t929 t57 t124)) +(let t931 (Sum t105 t8 t930 t126 t19 t127)) +(let t932 (Mul t105 t931 t127 t130 t131 t127)) +(let t933 (Mul t105 t932 t127 t134 t131 t127)) +(let t934 (Exp2 t105 t933 t127 t127)) +(let t935 (Add t105 t934 t127 t138 t131 t127)) +(let t936 (Recip t105 t935 t127 t127)) +(let t937 (Mul t105 t931 t127 t936 t127 t127)) +(let t938 101) +(let t939 "model.layers.11.mlp.up_proj.weight") +(let t940 (Input t938 t939 t33)) +(let t941 (Mul t107 t926 t53 t940 t57 t124)) +(let t942 (Sum t105 t8 t941 t126 t19 t127)) +(let t943 (Mul t105 t937 t127 t942 t127 t127)) +(let t944 103) +(let t945 "model.layers.11.mlp.down_proj.weight") +(let t946 (Input t944 t945 t33)) +(let t947 (Mul t104 t943 t148 t946 t152 t153)) +(let t948 (Sum t12 t101 t947 t155 t19 t29)) +(let t949 (Add t12 t915 t29 t948 t29 t29)) +(let t950 (Mul t12 t949 t29 t949 t29 t29)) +(let t951 (Sum t13 t8 t950 t9 t19 t20)) +(let t952 (Mul t13 t951 t20 t41 t20 t20)) +(let t953 (Add t13 t952 t20 t44 t23 t20)) +(let t954 (Sqrt t13 t953 t20 t20)) +(let t955 (Recip t13 t954 t20 t20)) +(let t956 (Mul t12 t955 t25 t949 t29 t29)) +(let t957 117) +(let t958 "model.layers.12.input_layernorm.weight") +(let t959 (Input t957 t958 t33)) +(let t960 (Mul t12 t956 t29 t959 t28 t29)) +(let t961 113) +(let t962 "model.layers.12.self_attn.q_proj.weight") +(let t963 (Input t961 t962 t33)) +(let t964 (Mul t37 t960 t53 t963 t57 t60)) +(let t965 (Sum t12 t8 t964 t62 t19 t29)) +(let t966 (RowRope t13 t965 t9 t8 t65)) +(let t967 114) +(let t968 "model.layers.12.self_attn.k_proj.weight") +(let t969 (Input t967 t968 t33)) +(let t970 (Mul t72 t960 t53 t969 t57 t78)) +(let t971 (Sum t70 t8 t970 t80 t19 t81)) +(let t972 (RowRope t13 t971 t69 t68 t65)) +(let t973 115) +(let t974 "model.layers.12.self_attn.v_proj.weight") +(let t975 (Input t973 t974 t33)) +(let t976 (Mul t72 t960 t53 t975 t57 t78)) +(let t977 (Sum t70 t8 t976 t80 t19 t81)) +(let t978 (ICons t977 t89)) +(let t979 (ICons t972 t978)) +(let t980 (ICons t966 t979)) +(let t981 (CustomOpHLIR t980 t207 t33)) +(let t982 116) +(let t983 "model.layers.12.self_attn.o_proj.weight") +(let t984 (Input t982 t983 t33)) +(let t985 (Mul t37 t981 t53 t984 t57 t60)) +(let t986 (Sum t12 t8 t985 t62 t19 t29)) +(let t987 (Add t12 t949 t29 t986 t29 t29)) +(let t988 (Mul t12 t987 t29 t987 t29 t29)) +(let t989 (Sum t13 t8 t988 t9 t19 t20)) +(let t990 (Mul t13 t989 t20 t41 t20 t20)) +(let t991 (Add t13 t990 t20 t44 t23 t20)) +(let t992 (Sqrt t13 t991 t20 t20)) +(let t993 (Recip t13 t992 t20 t20)) +(let t994 (Mul t12 t993 t25 t987 t29 t29)) +(let t995 118) +(let t996 "model.layers.12.post_attention_layernorm.weight") +(let t997 (Input t995 t996 t33)) +(let t998 (Mul t12 t994 t29 t997 t28 t29)) +(let t999 111) +(let t1000 "model.layers.12.mlp.gate_proj.weight") +(let t1001 (Input t999 t1000 t33)) +(let t1002 (Mul t107 t998 t53 t1001 t57 t124)) +(let t1003 (Sum t105 t8 t1002 t126 t19 t127)) +(let t1004 (Mul t105 t1003 t127 t130 t131 t127)) +(let t1005 (Mul t105 t1004 t127 t134 t131 t127)) +(let t1006 (Exp2 t105 t1005 t127 t127)) +(let t1007 (Add t105 t1006 t127 t138 t131 t127)) +(let t1008 (Recip t105 t1007 t127 t127)) +(let t1009 (Mul t105 t1003 t127 t1008 t127 t127)) +(let t1010 110) +(let t1011 "model.layers.12.mlp.up_proj.weight") +(let t1012 (Input t1010 t1011 t33)) +(let t1013 (Mul t107 t998 t53 t1012 t57 t124)) +(let t1014 (Sum t105 t8 t1013 t126 t19 t127)) +(let t1015 (Mul t105 t1009 t127 t1014 t127 t127)) +(let t1016 112) +(let t1017 "model.layers.12.mlp.down_proj.weight") +(let t1018 (Input t1016 t1017 t33)) +(let t1019 (Mul t104 t1015 t148 t1018 t152 t153)) +(let t1020 (Sum t12 t101 t1019 t155 t19 t29)) +(let t1021 (Add t12 t987 t29 t1020 t29 t29)) +(let t1022 (Mul t12 t1021 t29 t1021 t29 t29)) +(let t1023 (Sum t13 t8 t1022 t9 t19 t20)) +(let t1024 (Mul t13 t1023 t20 t41 t20 t20)) +(let t1025 (Add t13 t1024 t20 t44 t23 t20)) +(let t1026 (Sqrt t13 t1025 t20 t20)) +(let t1027 (Recip t13 t1026 t20 t20)) +(let t1028 (Mul t12 t1027 t25 t1021 t29 t29)) +(let t1029 126) +(let t1030 "model.layers.13.input_layernorm.weight") +(let t1031 (Input t1029 t1030 t33)) +(let t1032 (Mul t12 t1028 t29 t1031 t28 t29)) +(let t1033 122) +(let t1034 "model.layers.13.self_attn.q_proj.weight") +(let t1035 (Input t1033 t1034 t33)) +(let t1036 (Mul t37 t1032 t53 t1035 t57 t60)) +(let t1037 (Sum t12 t8 t1036 t62 t19 t29)) +(let t1038 (RowRope t13 t1037 t9 t8 t65)) +(let t1039 123) +(let t1040 "model.layers.13.self_attn.k_proj.weight") +(let t1041 (Input t1039 t1040 t33)) +(let t1042 (Mul t72 t1032 t53 t1041 t57 t78)) +(let t1043 (Sum t70 t8 t1042 t80 t19 t81)) +(let t1044 (RowRope t13 t1043 t69 t68 t65)) +(let t1045 124) +(let t1046 "model.layers.13.self_attn.v_proj.weight") +(let t1047 (Input t1045 t1046 t33)) +(let t1048 (Mul t72 t1032 t53 t1047 t57 t78)) +(let t1049 (Sum t70 t8 t1048 t80 t19 t81)) +(let t1050 (ICons t1049 t89)) +(let t1051 (ICons t1044 t1050)) +(let t1052 (ICons t1038 t1051)) +(let t1053 (CustomOpHLIR t1052 t224 t33)) +(let t1054 125) +(let t1055 "model.layers.13.self_attn.o_proj.weight") +(let t1056 (Input t1054 t1055 t33)) +(let t1057 (Mul t37 t1053 t53 t1056 t57 t60)) +(let t1058 (Sum t12 t8 t1057 t62 t19 t29)) +(let t1059 (Add t12 t1021 t29 t1058 t29 t29)) +(let t1060 (Mul t12 t1059 t29 t1059 t29 t29)) +(let t1061 (Sum t13 t8 t1060 t9 t19 t20)) +(let t1062 (Mul t13 t1061 t20 t41 t20 t20)) +(let t1063 (Add t13 t1062 t20 t44 t23 t20)) +(let t1064 (Sqrt t13 t1063 t20 t20)) +(let t1065 (Recip t13 t1064 t20 t20)) +(let t1066 (Mul t12 t1065 t25 t1059 t29 t29)) +(let t1067 127) +(let t1068 "model.layers.13.post_attention_layernorm.weight") +(let t1069 (Input t1067 t1068 t33)) +(let t1070 (Mul t12 t1066 t29 t1069 t28 t29)) +(let t1071 120) +(let t1072 "model.layers.13.mlp.gate_proj.weight") +(let t1073 (Input t1071 t1072 t33)) +(let t1074 (Mul t107 t1070 t53 t1073 t57 t124)) +(let t1075 (Sum t105 t8 t1074 t126 t19 t127)) +(let t1076 (Mul t105 t1075 t127 t130 t131 t127)) +(let t1077 (Mul t105 t1076 t127 t134 t131 t127)) +(let t1078 (Exp2 t105 t1077 t127 t127)) +(let t1079 (Add t105 t1078 t127 t138 t131 t127)) +(let t1080 (Recip t105 t1079 t127 t127)) +(let t1081 (Mul t105 t1075 t127 t1080 t127 t127)) +(let t1082 119) +(let t1083 "model.layers.13.mlp.up_proj.weight") +(let t1084 (Input t1082 t1083 t33)) +(let t1085 (Mul t107 t1070 t53 t1084 t57 t124)) +(let t1086 (Sum t105 t8 t1085 t126 t19 t127)) +(let t1087 (Mul t105 t1081 t127 t1086 t127 t127)) +(let t1088 121) +(let t1089 "model.layers.13.mlp.down_proj.weight") +(let t1090 (Input t1088 t1089 t33)) +(let t1091 (Mul t104 t1087 t148 t1090 t152 t153)) +(let t1092 (Sum t12 t101 t1091 t155 t19 t29)) +(let t1093 (Add t12 t1059 t29 t1092 t29 t29)) +(let t1094 (Mul t12 t1093 t29 t1093 t29 t29)) +(let t1095 (Sum t13 t8 t1094 t9 t19 t20)) +(let t1096 (Mul t13 t1095 t20 t41 t20 t20)) +(let t1097 (Add t13 t1096 t20 t44 t23 t20)) +(let t1098 (Sqrt t13 t1097 t20 t20)) +(let t1099 (Recip t13 t1098 t20 t20)) +(let t1100 (Mul t12 t1099 t25 t1093 t29 t29)) +(let t1101 135) +(let t1102 "model.layers.14.input_layernorm.weight") +(let t1103 (Input t1101 t1102 t33)) +(let t1104 (Mul t12 t1100 t29 t1103 t28 t29)) +(let t1105 131) +(let t1106 "model.layers.14.self_attn.q_proj.weight") +(let t1107 (Input t1105 t1106 t33)) +(let t1108 (Mul t37 t1104 t53 t1107 t57 t60)) +(let t1109 (Sum t12 t8 t1108 t62 t19 t29)) +(let t1110 (RowRope t13 t1109 t9 t8 t65)) +(let t1111 132) +(let t1112 "model.layers.14.self_attn.k_proj.weight") +(let t1113 (Input t1111 t1112 t33)) +(let t1114 (Mul t72 t1104 t53 t1113 t57 t78)) +(let t1115 (Sum t70 t8 t1114 t80 t19 t81)) +(let t1116 (RowRope t13 t1115 t69 t68 t65)) +(let t1117 133) +(let t1118 "model.layers.14.self_attn.v_proj.weight") +(let t1119 (Input t1117 t1118 t33)) +(let t1120 (Mul t72 t1104 t53 t1119 t57 t78)) +(let t1121 (Sum t70 t8 t1120 t80 t19 t81)) +(let t1122 (ICons t1121 t89)) +(let t1123 (ICons t1116 t1122)) +(let t1124 (ICons t1110 t1123)) +(let t1125 (CustomOpHLIR t1124 t169 t33)) +(let t1126 134) +(let t1127 "model.layers.14.self_attn.o_proj.weight") +(let t1128 (Input t1126 t1127 t33)) +(let t1129 (Mul t37 t1125 t53 t1128 t57 t60)) +(let t1130 (Sum t12 t8 t1129 t62 t19 t29)) +(let t1131 (Add t12 t1093 t29 t1130 t29 t29)) +(let t1132 (Mul t12 t1131 t29 t1131 t29 t29)) +(let t1133 (Sum t13 t8 t1132 t9 t19 t20)) +(let t1134 (Mul t13 t1133 t20 t41 t20 t20)) +(let t1135 (Add t13 t1134 t20 t44 t23 t20)) +(let t1136 (Sqrt t13 t1135 t20 t20)) +(let t1137 (Recip t13 t1136 t20 t20)) +(let t1138 (Mul t12 t1137 t25 t1131 t29 t29)) +(let t1139 136) +(let t1140 "model.layers.14.post_attention_layernorm.weight") +(let t1141 (Input t1139 t1140 t33)) +(let t1142 (Mul t12 t1138 t29 t1141 t28 t29)) +(let t1143 129) +(let t1144 "model.layers.14.mlp.gate_proj.weight") +(let t1145 (Input t1143 t1144 t33)) +(let t1146 (Mul t107 t1142 t53 t1145 t57 t124)) +(let t1147 (Sum t105 t8 t1146 t126 t19 t127)) +(let t1148 (Mul t105 t1147 t127 t130 t131 t127)) +(let t1149 (Mul t105 t1148 t127 t134 t131 t127)) +(let t1150 (Exp2 t105 t1149 t127 t127)) +(let t1151 (Add t105 t1150 t127 t138 t131 t127)) +(let t1152 (Recip t105 t1151 t127 t127)) +(let t1153 (Mul t105 t1147 t127 t1152 t127 t127)) +(let t1154 128) +(let t1155 "model.layers.14.mlp.up_proj.weight") +(let t1156 (Input t1154 t1155 t33)) +(let t1157 (Mul t107 t1142 t53 t1156 t57 t124)) +(let t1158 (Sum t105 t8 t1157 t126 t19 t127)) +(let t1159 (Mul t105 t1153 t127 t1158 t127 t127)) +(let t1160 130) +(let t1161 "model.layers.14.mlp.down_proj.weight") +(let t1162 (Input t1160 t1161 t33)) +(let t1163 (Mul t104 t1159 t148 t1162 t152 t153)) +(let t1164 (Sum t12 t101 t1163 t155 t19 t29)) +(let t1165 (Add t12 t1131 t29 t1164 t29 t29)) +(let t1166 (Mul t12 t1165 t29 t1165 t29 t29)) +(let t1167 (Sum t13 t8 t1166 t9 t19 t20)) +(let t1168 (Mul t13 t1167 t20 t41 t20 t20)) +(let t1169 (Add t13 t1168 t20 t44 t23 t20)) +(let t1170 (Sqrt t13 t1169 t20 t20)) +(let t1171 (Recip t13 t1170 t20 t20)) +(let t1172 (Mul t12 t1171 t25 t1165 t29 t29)) +(let t1173 144) +(let t1174 "model.layers.15.input_layernorm.weight") +(let t1175 (Input t1173 t1174 t33)) +(let t1176 (Mul t12 t1172 t29 t1175 t28 t29)) +(let t1177 140) +(let t1178 "model.layers.15.self_attn.q_proj.weight") +(let t1179 (Input t1177 t1178 t33)) +(let t1180 (Mul t37 t1176 t53 t1179 t57 t60)) +(let t1181 (Sum t12 t8 t1180 t62 t19 t29)) +(let t1182 (RowRope t13 t1181 t9 t8 t65)) +(let t1183 141) +(let t1184 "model.layers.15.self_attn.k_proj.weight") +(let t1185 (Input t1183 t1184 t33)) +(let t1186 (Mul t72 t1176 t53 t1185 t57 t78)) +(let t1187 (Sum t70 t8 t1186 t80 t19 t81)) +(let t1188 (RowRope t13 t1187 t69 t68 t65)) +(let t1189 142) +(let t1190 "model.layers.15.self_attn.v_proj.weight") +(let t1191 (Input t1189 t1190 t33)) +(let t1192 (Mul t72 t1176 t53 t1191 t57 t78)) +(let t1193 (Sum t70 t8 t1192 t80 t19 t81)) +(let t1194 (ICons t1193 t89)) +(let t1195 (ICons t1188 t1194)) +(let t1196 (ICons t1182 t1195)) +(let t1197 (CustomOpHLIR t1196 t175 t33)) +(let t1198 143) +(let t1199 "model.layers.15.self_attn.o_proj.weight") +(let t1200 (Input t1198 t1199 t33)) +(let t1201 (Mul t37 t1197 t53 t1200 t57 t60)) +(let t1202 (Sum t12 t8 t1201 t62 t19 t29)) +(let t1203 (Add t12 t1165 t29 t1202 t29 t29)) +(let t1204 (Mul t12 t1203 t29 t1203 t29 t29)) +(let t1205 (Sum t13 t8 t1204 t9 t19 t20)) +(let t1206 (Mul t13 t1205 t20 t41 t20 t20)) +(let t1207 (Add t13 t1206 t20 t44 t23 t20)) +(let t1208 (Sqrt t13 t1207 t20 t20)) +(let t1209 (Recip t13 t1208 t20 t20)) +(let t1210 (Mul t12 t1209 t25 t1203 t29 t29)) +(let t1211 145) +(let t1212 "model.layers.15.post_attention_layernorm.weight") +(let t1213 (Input t1211 t1212 t33)) +(let t1214 (Mul t12 t1210 t29 t1213 t28 t29)) +(let t1215 138) +(let t1216 "model.layers.15.mlp.gate_proj.weight") +(let t1217 (Input t1215 t1216 t33)) +(let t1218 (Mul t107 t1214 t53 t1217 t57 t124)) +(let t1219 (Sum t105 t8 t1218 t126 t19 t127)) +(let t1220 (Mul t105 t1219 t127 t130 t131 t127)) +(let t1221 (Mul t105 t1220 t127 t134 t131 t127)) +(let t1222 (Exp2 t105 t1221 t127 t127)) +(let t1223 (Add t105 t1222 t127 t138 t131 t127)) +(let t1224 (Recip t105 t1223 t127 t127)) +(let t1225 (Mul t105 t1219 t127 t1224 t127 t127)) +(let t1226 137) +(let t1227 "model.layers.15.mlp.up_proj.weight") +(let t1228 (Input t1226 t1227 t33)) +(let t1229 (Mul t107 t1214 t53 t1228 t57 t124)) +(let t1230 (Sum t105 t8 t1229 t126 t19 t127)) +(let t1231 (Mul t105 t1225 t127 t1230 t127 t127)) +(let t1232 139) +(let t1233 "model.layers.15.mlp.down_proj.weight") +(let t1234 (Input t1232 t1233 t33)) +(let t1235 (Mul t104 t1231 t148 t1234 t152 t153)) +(let t1236 (Sum t12 t101 t1235 t155 t19 t29)) +(let t1237 (Add t12 t1203 t29 t1236 t29 t29)) +(let t1238 (Mul t12 t1237 t29 t1237 t29 t29)) +(let t1239 (Sum t13 t8 t1238 t9 t19 t20)) +(let t1240 (Mul t13 t1239 t20 t41 t20 t20)) +(let t1241 (Add t13 t1240 t20 t44 t23 t20)) +(let t1242 (Sqrt t13 t1241 t20 t20)) +(let t1243 (Recip t13 t1242 t20 t20)) +(let t1244 (Mul t12 t1243 t25 t1237 t29 t29)) +(let t1245 153) +(let t1246 "model.layers.16.input_layernorm.weight") +(let t1247 (Input t1245 t1246 t33)) +(let t1248 (Mul t12 t1244 t29 t1247 t28 t29)) +(let t1249 149) +(let t1250 "model.layers.16.self_attn.q_proj.weight") +(let t1251 (Input t1249 t1250 t33)) +(let t1252 (Mul t37 t1248 t53 t1251 t57 t60)) +(let t1253 (Sum t12 t8 t1252 t62 t19 t29)) +(let t1254 (RowRope t13 t1253 t9 t8 t65)) +(let t1255 150) +(let t1256 "model.layers.16.self_attn.k_proj.weight") +(let t1257 (Input t1255 t1256 t33)) +(let t1258 (Mul t72 t1248 t53 t1257 t57 t78)) +(let t1259 (Sum t70 t8 t1258 t80 t19 t81)) +(let t1260 (RowRope t13 t1259 t69 t68 t65)) +(let t1261 151) +(let t1262 "model.layers.16.self_attn.v_proj.weight") +(let t1263 (Input t1261 t1262 t33)) +(let t1264 (Mul t72 t1248 t53 t1263 t57 t78)) +(let t1265 (Sum t70 t8 t1264 t80 t19 t81)) +(let t1266 (ICons t1265 t89)) +(let t1267 (ICons t1260 t1266)) +(let t1268 (ICons t1254 t1267)) +(let t1269 (CustomOpHLIR t1268 t181 t33)) +(let t1270 152) +(let t1271 "model.layers.16.self_attn.o_proj.weight") +(let t1272 (Input t1270 t1271 t33)) +(let t1273 (Mul t37 t1269 t53 t1272 t57 t60)) +(let t1274 (Sum t12 t8 t1273 t62 t19 t29)) +(let t1275 (Add t12 t1237 t29 t1274 t29 t29)) +(let t1276 (Mul t12 t1275 t29 t1275 t29 t29)) +(let t1277 (Sum t13 t8 t1276 t9 t19 t20)) +(let t1278 (Mul t13 t1277 t20 t41 t20 t20)) +(let t1279 (Add t13 t1278 t20 t44 t23 t20)) +(let t1280 (Sqrt t13 t1279 t20 t20)) +(let t1281 (Recip t13 t1280 t20 t20)) +(let t1282 (Mul t12 t1281 t25 t1275 t29 t29)) +(let t1283 154) +(let t1284 "model.layers.16.post_attention_layernorm.weight") +(let t1285 (Input t1283 t1284 t33)) +(let t1286 (Mul t12 t1282 t29 t1285 t28 t29)) +(let t1287 147) +(let t1288 "model.layers.16.mlp.gate_proj.weight") +(let t1289 (Input t1287 t1288 t33)) +(let t1290 (Mul t107 t1286 t53 t1289 t57 t124)) +(let t1291 (Sum t105 t8 t1290 t126 t19 t127)) +(let t1292 (Mul t105 t1291 t127 t130 t131 t127)) +(let t1293 (Mul t105 t1292 t127 t134 t131 t127)) +(let t1294 (Exp2 t105 t1293 t127 t127)) +(let t1295 (Add t105 t1294 t127 t138 t131 t127)) +(let t1296 (Recip t105 t1295 t127 t127)) +(let t1297 (Mul t105 t1291 t127 t1296 t127 t127)) +(let t1298 146) +(let t1299 "model.layers.16.mlp.up_proj.weight") +(let t1300 (Input t1298 t1299 t33)) +(let t1301 (Mul t107 t1286 t53 t1300 t57 t124)) +(let t1302 (Sum t105 t8 t1301 t126 t19 t127)) +(let t1303 (Mul t105 t1297 t127 t1302 t127 t127)) +(let t1304 148) +(let t1305 "model.layers.16.mlp.down_proj.weight") +(let t1306 (Input t1304 t1305 t33)) +(let t1307 (Mul t104 t1303 t148 t1306 t152 t153)) +(let t1308 (Sum t12 t101 t1307 t155 t19 t29)) +(let t1309 (Add t12 t1275 t29 t1308 t29 t29)) +(let t1310 (Mul t12 t1309 t29 t1309 t29 t29)) +(let t1311 (Sum t13 t8 t1310 t9 t19 t20)) +(let t1312 (Mul t13 t1311 t20 t41 t20 t20)) +(let t1313 (Add t13 t1312 t20 t44 t23 t20)) +(let t1314 (Sqrt t13 t1313 t20 t20)) +(let t1315 (Recip t13 t1314 t20 t20)) +(let t1316 (Mul t12 t1315 t25 t1309 t29 t29)) +(let t1317 162) +(let t1318 "model.layers.17.input_layernorm.weight") +(let t1319 (Input t1317 t1318 t33)) +(let t1320 (Mul t12 t1316 t29 t1319 t28 t29)) +(let t1321 158) +(let t1322 "model.layers.17.self_attn.q_proj.weight") +(let t1323 (Input t1321 t1322 t33)) +(let t1324 (Mul t37 t1320 t53 t1323 t57 t60)) +(let t1325 (Sum t12 t8 t1324 t62 t19 t29)) +(let t1326 (RowRope t13 t1325 t9 t8 t65)) +(let t1327 159) +(let t1328 "model.layers.17.self_attn.k_proj.weight") +(let t1329 (Input t1327 t1328 t33)) +(let t1330 (Mul t72 t1320 t53 t1329 t57 t78)) +(let t1331 (Sum t70 t8 t1330 t80 t19 t81)) +(let t1332 (RowRope t13 t1331 t69 t68 t65)) +(let t1333 160) +(let t1334 "model.layers.17.self_attn.v_proj.weight") +(let t1335 (Input t1333 t1334 t33)) +(let t1336 (Mul t72 t1320 t53 t1335 t57 t78)) +(let t1337 (Sum t70 t8 t1336 t80 t19 t81)) +(let t1338 (ICons t1337 t89)) +(let t1339 (ICons t1332 t1338)) +(let t1340 (ICons t1326 t1339)) +(let t1341 (CustomOpHLIR t1340 t190 t33)) +(let t1342 161) +(let t1343 "model.layers.17.self_attn.o_proj.weight") +(let t1344 (Input t1342 t1343 t33)) +(let t1345 (Mul t37 t1341 t53 t1344 t57 t60)) +(let t1346 (Sum t12 t8 t1345 t62 t19 t29)) +(let t1347 (Add t12 t1309 t29 t1346 t29 t29)) +(let t1348 (Mul t12 t1347 t29 t1347 t29 t29)) +(let t1349 (Sum t13 t8 t1348 t9 t19 t20)) +(let t1350 (Mul t13 t1349 t20 t41 t20 t20)) +(let t1351 (Add t13 t1350 t20 t44 t23 t20)) +(let t1352 (Sqrt t13 t1351 t20 t20)) +(let t1353 (Recip t13 t1352 t20 t20)) +(let t1354 (Mul t12 t1353 t25 t1347 t29 t29)) +(let t1355 163) +(let t1356 "model.layers.17.post_attention_layernorm.weight") +(let t1357 (Input t1355 t1356 t33)) +(let t1358 (Mul t12 t1354 t29 t1357 t28 t29)) +(let t1359 156) +(let t1360 "model.layers.17.mlp.gate_proj.weight") +(let t1361 (Input t1359 t1360 t33)) +(let t1362 (Mul t107 t1358 t53 t1361 t57 t124)) +(let t1363 (Sum t105 t8 t1362 t126 t19 t127)) +(let t1364 (Mul t105 t1363 t127 t130 t131 t127)) +(let t1365 (Mul t105 t1364 t127 t134 t131 t127)) +(let t1366 (Exp2 t105 t1365 t127 t127)) +(let t1367 (Add t105 t1366 t127 t138 t131 t127)) +(let t1368 (Recip t105 t1367 t127 t127)) +(let t1369 (Mul t105 t1363 t127 t1368 t127 t127)) +(let t1370 155) +(let t1371 "model.layers.17.mlp.up_proj.weight") +(let t1372 (Input t1370 t1371 t33)) +(let t1373 (Mul t107 t1358 t53 t1372 t57 t124)) +(let t1374 (Sum t105 t8 t1373 t126 t19 t127)) +(let t1375 (Mul t105 t1369 t127 t1374 t127 t127)) +(let t1376 157) +(let t1377 "model.layers.17.mlp.down_proj.weight") +(let t1378 (Input t1376 t1377 t33)) +(let t1379 (Mul t104 t1375 t148 t1378 t152 t153)) +(let t1380 (Sum t12 t101 t1379 t155 t19 t29)) +(let t1381 (Add t12 t1347 t29 t1380 t29 t29)) +(let t1382 (Mul t12 t1381 t29 t1381 t29 t29)) +(let t1383 (Sum t13 t8 t1382 t9 t19 t20)) +(let t1384 (Mul t13 t1383 t20 t41 t20 t20)) +(let t1385 (Add t13 t1384 t20 t44 t23 t20)) +(let t1386 (Sqrt t13 t1385 t20 t20)) +(let t1387 (Recip t13 t1386 t20 t20)) +(let t1388 (Mul t12 t1387 t25 t1381 t29 t29)) +(let t1389 171) +(let t1390 "model.layers.18.input_layernorm.weight") +(let t1391 (Input t1389 t1390 t33)) +(let t1392 (Mul t12 t1388 t29 t1391 t28 t29)) +(let t1393 167) +(let t1394 "model.layers.18.self_attn.q_proj.weight") +(let t1395 (Input t1393 t1394 t33)) +(let t1396 (Mul t37 t1392 t53 t1395 t57 t60)) +(let t1397 (Sum t12 t8 t1396 t62 t19 t29)) +(let t1398 (RowRope t13 t1397 t9 t8 t65)) +(let t1399 168) +(let t1400 "model.layers.18.self_attn.k_proj.weight") +(let t1401 (Input t1399 t1400 t33)) +(let t1402 (Mul t72 t1392 t53 t1401 t57 t78)) +(let t1403 (Sum t70 t8 t1402 t80 t19 t81)) +(let t1404 (RowRope t13 t1403 t69 t68 t65)) +(let t1405 169) +(let t1406 "model.layers.18.self_attn.v_proj.weight") +(let t1407 (Input t1405 t1406 t33)) +(let t1408 (Mul t72 t1392 t53 t1407 t57 t78)) +(let t1409 (Sum t70 t8 t1408 t80 t19 t81)) +(let t1410 (ICons t1409 t89)) +(let t1411 (ICons t1404 t1410)) +(let t1412 (ICons t1398 t1411)) +(let t1413 (CustomOpHLIR t1412 t165 t33)) +(let t1414 170) +(let t1415 "model.layers.18.self_attn.o_proj.weight") +(let t1416 (Input t1414 t1415 t33)) +(let t1417 (Mul t37 t1413 t53 t1416 t57 t60)) +(let t1418 (Sum t12 t8 t1417 t62 t19 t29)) +(let t1419 (Add t12 t1381 t29 t1418 t29 t29)) +(let t1420 (Mul t12 t1419 t29 t1419 t29 t29)) +(let t1421 (Sum t13 t8 t1420 t9 t19 t20)) +(let t1422 (Mul t13 t1421 t20 t41 t20 t20)) +(let t1423 (Add t13 t1422 t20 t44 t23 t20)) +(let t1424 (Sqrt t13 t1423 t20 t20)) +(let t1425 (Recip t13 t1424 t20 t20)) +(let t1426 (Mul t12 t1425 t25 t1419 t29 t29)) +(let t1427 172) +(let t1428 "model.layers.18.post_attention_layernorm.weight") +(let t1429 (Input t1427 t1428 t33)) +(let t1430 (Mul t12 t1426 t29 t1429 t28 t29)) +(let t1431 165) +(let t1432 "model.layers.18.mlp.gate_proj.weight") +(let t1433 (Input t1431 t1432 t33)) +(let t1434 (Mul t107 t1430 t53 t1433 t57 t124)) +(let t1435 (Sum t105 t8 t1434 t126 t19 t127)) +(let t1436 (Mul t105 t1435 t127 t130 t131 t127)) +(let t1437 (Mul t105 t1436 t127 t134 t131 t127)) +(let t1438 (Exp2 t105 t1437 t127 t127)) +(let t1439 (Add t105 t1438 t127 t138 t131 t127)) +(let t1440 (Recip t105 t1439 t127 t127)) +(let t1441 (Mul t105 t1435 t127 t1440 t127 t127)) +(let t1442 164) +(let t1443 "model.layers.18.mlp.up_proj.weight") +(let t1444 (Input t1442 t1443 t33)) +(let t1445 (Mul t107 t1430 t53 t1444 t57 t124)) +(let t1446 (Sum t105 t8 t1445 t126 t19 t127)) +(let t1447 (Mul t105 t1441 t127 t1446 t127 t127)) +(let t1448 166) +(let t1449 "model.layers.18.mlp.down_proj.weight") +(let t1450 (Input t1448 t1449 t33)) +(let t1451 (Mul t104 t1447 t148 t1450 t152 t153)) +(let t1452 (Sum t12 t101 t1451 t155 t19 t29)) +(let t1453 (Add t12 t1419 t29 t1452 t29 t29)) +(let t1454 (Mul t12 t1453 t29 t1453 t29 t29)) +(let t1455 (Sum t13 t8 t1454 t9 t19 t20)) +(let t1456 (Mul t13 t1455 t20 t41 t20 t20)) +(let t1457 (Add t13 t1456 t20 t44 t23 t20)) +(let t1458 (Sqrt t13 t1457 t20 t20)) +(let t1459 (Recip t13 t1458 t20 t20)) +(let t1460 (Mul t12 t1459 t25 t1453 t29 t29)) +(let t1461 180) +(let t1462 "model.layers.19.input_layernorm.weight") +(let t1463 (Input t1461 t1462 t33)) +(let t1464 (Mul t12 t1460 t29 t1463 t28 t29)) +(let t1465 176) +(let t1466 "model.layers.19.self_attn.q_proj.weight") +(let t1467 (Input t1465 t1466 t33)) +(let t1468 (Mul t37 t1464 t53 t1467 t57 t60)) +(let t1469 (Sum t12 t8 t1468 t62 t19 t29)) +(let t1470 (RowRope t13 t1469 t9 t8 t65)) +(let t1471 177) +(let t1472 "model.layers.19.self_attn.k_proj.weight") +(let t1473 (Input t1471 t1472 t33)) +(let t1474 (Mul t72 t1464 t53 t1473 t57 t78)) +(let t1475 (Sum t70 t8 t1474 t80 t19 t81)) +(let t1476 (RowRope t13 t1475 t69 t68 t65)) +(let t1477 178) +(let t1478 "model.layers.19.self_attn.v_proj.weight") +(let t1479 (Input t1477 t1478 t33)) +(let t1480 (Mul t72 t1464 t53 t1479 t57 t78)) +(let t1481 (Sum t70 t8 t1480 t80 t19 t81)) +(let t1482 (ICons t1481 t89)) +(let t1483 (ICons t1476 t1482)) +(let t1484 (ICons t1470 t1483)) +(let t1485 (CustomOpHLIR t1484 t203 t33)) +(let t1486 179) +(let t1487 "model.layers.19.self_attn.o_proj.weight") +(let t1488 (Input t1486 t1487 t33)) +(let t1489 (Mul t37 t1485 t53 t1488 t57 t60)) +(let t1490 (Sum t12 t8 t1489 t62 t19 t29)) +(let t1491 (Add t12 t1453 t29 t1490 t29 t29)) +(let t1492 (Mul t12 t1491 t29 t1491 t29 t29)) +(let t1493 (Sum t13 t8 t1492 t9 t19 t20)) +(let t1494 (Mul t13 t1493 t20 t41 t20 t20)) +(let t1495 (Add t13 t1494 t20 t44 t23 t20)) +(let t1496 (Sqrt t13 t1495 t20 t20)) +(let t1497 (Recip t13 t1496 t20 t20)) +(let t1498 (Mul t12 t1497 t25 t1491 t29 t29)) +(let t1499 181) +(let t1500 "model.layers.19.post_attention_layernorm.weight") +(let t1501 (Input t1499 t1500 t33)) +(let t1502 (Mul t12 t1498 t29 t1501 t28 t29)) +(let t1503 174) +(let t1504 "model.layers.19.mlp.gate_proj.weight") +(let t1505 (Input t1503 t1504 t33)) +(let t1506 (Mul t107 t1502 t53 t1505 t57 t124)) +(let t1507 (Sum t105 t8 t1506 t126 t19 t127)) +(let t1508 (Mul t105 t1507 t127 t130 t131 t127)) +(let t1509 (Mul t105 t1508 t127 t134 t131 t127)) +(let t1510 (Exp2 t105 t1509 t127 t127)) +(let t1511 (Add t105 t1510 t127 t138 t131 t127)) +(let t1512 (Recip t105 t1511 t127 t127)) +(let t1513 (Mul t105 t1507 t127 t1512 t127 t127)) +(let t1514 173) +(let t1515 "model.layers.19.mlp.up_proj.weight") +(let t1516 (Input t1514 t1515 t33)) +(let t1517 (Mul t107 t1502 t53 t1516 t57 t124)) +(let t1518 (Sum t105 t8 t1517 t126 t19 t127)) +(let t1519 (Mul t105 t1513 t127 t1518 t127 t127)) +(let t1520 175) +(let t1521 "model.layers.19.mlp.down_proj.weight") +(let t1522 (Input t1520 t1521 t33)) +(let t1523 (Mul t104 t1519 t148 t1522 t152 t153)) +(let t1524 (Sum t12 t101 t1523 t155 t19 t29)) +(let t1525 (Add t12 t1491 t29 t1524 t29 t29)) +(let t1526 (Mul t12 t1525 t29 t1525 t29 t29)) +(let t1527 (Sum t13 t8 t1526 t9 t19 t20)) +(let t1528 (Mul t13 t1527 t20 t41 t20 t20)) +(let t1529 (Add t13 t1528 t20 t44 t23 t20)) +(let t1530 (Sqrt t13 t1529 t20 t20)) +(let t1531 (Recip t13 t1530 t20 t20)) +(let t1532 (Mul t12 t1531 t25 t1525 t29 t29)) +(let t1533 189) +(let t1534 "model.layers.20.input_layernorm.weight") +(let t1535 (Input t1533 t1534 t33)) +(let t1536 (Mul t12 t1532 t29 t1535 t28 t29)) +(let t1537 185) +(let t1538 "model.layers.20.self_attn.q_proj.weight") +(let t1539 (Input t1537 t1538 t33)) +(let t1540 (Mul t37 t1536 t53 t1539 t57 t60)) +(let t1541 (Sum t12 t8 t1540 t62 t19 t29)) +(let t1542 (RowRope t13 t1541 t9 t8 t65)) +(let t1543 186) +(let t1544 "model.layers.20.self_attn.k_proj.weight") +(let t1545 (Input t1543 t1544 t33)) +(let t1546 (Mul t72 t1536 t53 t1545 t57 t78)) +(let t1547 (Sum t70 t8 t1546 t80 t19 t81)) +(let t1548 (RowRope t13 t1547 t69 t68 t65)) +(let t1549 187) +(let t1550 "model.layers.20.self_attn.v_proj.weight") +(let t1551 (Input t1549 t1550 t33)) +(let t1552 (Mul t72 t1536 t53 t1551 t57 t78)) +(let t1553 (Sum t70 t8 t1552 t80 t19 t81)) +(let t1554 (ICons t1553 t89)) +(let t1555 (ICons t1548 t1554)) +(let t1556 (ICons t1542 t1555)) +(let t1557 (CustomOpHLIR t1556 t290 t33)) +(let t1558 188) +(let t1559 "model.layers.20.self_attn.o_proj.weight") +(let t1560 (Input t1558 t1559 t33)) +(let t1561 (Mul t37 t1557 t53 t1560 t57 t60)) +(let t1562 (Sum t12 t8 t1561 t62 t19 t29)) +(let t1563 (Add t12 t1525 t29 t1562 t29 t29)) +(let t1564 (Mul t12 t1563 t29 t1563 t29 t29)) +(let t1565 (Sum t13 t8 t1564 t9 t19 t20)) +(let t1566 (Mul t13 t1565 t20 t41 t20 t20)) +(let t1567 (Add t13 t1566 t20 t44 t23 t20)) +(let t1568 (Sqrt t13 t1567 t20 t20)) +(let t1569 (Recip t13 t1568 t20 t20)) +(let t1570 (Mul t12 t1569 t25 t1563 t29 t29)) +(let t1571 190) +(let t1572 "model.layers.20.post_attention_layernorm.weight") +(let t1573 (Input t1571 t1572 t33)) +(let t1574 (Mul t12 t1570 t29 t1573 t28 t29)) +(let t1575 183) +(let t1576 "model.layers.20.mlp.gate_proj.weight") +(let t1577 (Input t1575 t1576 t33)) +(let t1578 (Mul t107 t1574 t53 t1577 t57 t124)) +(let t1579 (Sum t105 t8 t1578 t126 t19 t127)) +(let t1580 (Mul t105 t1579 t127 t130 t131 t127)) +(let t1581 (Mul t105 t1580 t127 t134 t131 t127)) +(let t1582 (Exp2 t105 t1581 t127 t127)) +(let t1583 (Add t105 t1582 t127 t138 t131 t127)) +(let t1584 (Recip t105 t1583 t127 t127)) +(let t1585 (Mul t105 t1579 t127 t1584 t127 t127)) +(let t1586 182) +(let t1587 "model.layers.20.mlp.up_proj.weight") +(let t1588 (Input t1586 t1587 t33)) +(let t1589 (Mul t107 t1574 t53 t1588 t57 t124)) +(let t1590 (Sum t105 t8 t1589 t126 t19 t127)) +(let t1591 (Mul t105 t1585 t127 t1590 t127 t127)) +(let t1592 184) +(let t1593 "model.layers.20.mlp.down_proj.weight") +(let t1594 (Input t1592 t1593 t33)) +(let t1595 (Mul t104 t1591 t148 t1594 t152 t153)) +(let t1596 (Sum t12 t101 t1595 t155 t19 t29)) +(let t1597 (Add t12 t1563 t29 t1596 t29 t29)) +(let t1598 (Mul t12 t1597 t29 t1597 t29 t29)) +(let t1599 (Sum t13 t8 t1598 t9 t19 t20)) +(let t1600 (Mul t13 t1599 t20 t41 t20 t20)) +(let t1601 (Add t13 t1600 t20 t44 t23 t20)) +(let t1602 (Sqrt t13 t1601 t20 t20)) +(let t1603 (Recip t13 t1602 t20 t20)) +(let t1604 (Mul t12 t1603 t25 t1597 t29 t29)) +(let t1605 198) +(let t1606 "model.layers.21.input_layernorm.weight") +(let t1607 (Input t1605 t1606 t33)) +(let t1608 (Mul t12 t1604 t29 t1607 t28 t29)) +(let t1609 194) +(let t1610 "model.layers.21.self_attn.q_proj.weight") +(let t1611 (Input t1609 t1610 t33)) +(let t1612 (Mul t37 t1608 t53 t1611 t57 t60)) +(let t1613 (Sum t12 t8 t1612 t62 t19 t29)) +(let t1614 (RowRope t13 t1613 t9 t8 t65)) +(let t1615 195) +(let t1616 "model.layers.21.self_attn.k_proj.weight") +(let t1617 (Input t1615 t1616 t33)) +(let t1618 (Mul t72 t1608 t53 t1617 t57 t78)) +(let t1619 (Sum t70 t8 t1618 t80 t19 t81)) +(let t1620 (RowRope t13 t1619 t69 t68 t65)) +(let t1621 196) +(let t1622 "model.layers.21.self_attn.v_proj.weight") +(let t1623 (Input t1621 t1622 t33)) +(let t1624 (Mul t72 t1608 t53 t1623 t57 t78)) +(let t1625 (Sum t70 t8 t1624 t80 t19 t81)) +(let t1626 (ICons t1625 t89)) +(let t1627 (ICons t1620 t1626)) +(let t1628 (ICons t1614 t1627)) +(let t1629 (CustomOpHLIR t1628 t279 t33)) +(let t1630 197) +(let t1631 "model.layers.21.self_attn.o_proj.weight") +(let t1632 (Input t1630 t1631 t33)) +(let t1633 (Mul t37 t1629 t53 t1632 t57 t60)) +(let t1634 (Sum t12 t8 t1633 t62 t19 t29)) +(let t1635 (Add t12 t1597 t29 t1634 t29 t29)) +(let t1636 (Mul t12 t1635 t29 t1635 t29 t29)) +(let t1637 (Sum t13 t8 t1636 t9 t19 t20)) +(let t1638 (Mul t13 t1637 t20 t41 t20 t20)) +(let t1639 (Add t13 t1638 t20 t44 t23 t20)) +(let t1640 (Sqrt t13 t1639 t20 t20)) +(let t1641 (Recip t13 t1640 t20 t20)) +(let t1642 (Mul t12 t1641 t25 t1635 t29 t29)) +(let t1643 199) +(let t1644 "model.layers.21.post_attention_layernorm.weight") +(let t1645 (Input t1643 t1644 t33)) +(let t1646 (Mul t12 t1642 t29 t1645 t28 t29)) +(let t1647 192) +(let t1648 "model.layers.21.mlp.gate_proj.weight") +(let t1649 (Input t1647 t1648 t33)) +(let t1650 (Mul t107 t1646 t53 t1649 t57 t124)) +(let t1651 (Sum t105 t8 t1650 t126 t19 t127)) +(let t1652 (Mul t105 t1651 t127 t130 t131 t127)) +(let t1653 (Mul t105 t1652 t127 t134 t131 t127)) +(let t1654 (Exp2 t105 t1653 t127 t127)) +(let t1655 (Add t105 t1654 t127 t138 t131 t127)) +(let t1656 (Recip t105 t1655 t127 t127)) +(let t1657 (Mul t105 t1651 t127 t1656 t127 t127)) +(let t1658 191) +(let t1659 "model.layers.21.mlp.up_proj.weight") +(let t1660 (Input t1658 t1659 t33)) +(let t1661 (Mul t107 t1646 t53 t1660 t57 t124)) +(let t1662 (Sum t105 t8 t1661 t126 t19 t127)) +(let t1663 (Mul t105 t1657 t127 t1662 t127 t127)) +(let t1664 193) +(let t1665 "model.layers.21.mlp.down_proj.weight") +(let t1666 (Input t1664 t1665 t33)) +(let t1667 (Mul t104 t1663 t148 t1666 t152 t153)) +(let t1668 (Sum t12 t101 t1667 t155 t19 t29)) +(let t1669 (Add t12 t1635 t29 t1668 t29 t29)) +(let t1670 (Mul t12 t1669 t29 t1669 t29 t29)) +(let t1671 (Sum t13 t8 t1670 t9 t19 t20)) +(let t1672 (Mul t13 t1671 t20 t41 t20 t20)) +(let t1673 (Add t13 t1672 t20 t44 t23 t20)) +(let t1674 (Sqrt t13 t1673 t20 t20)) +(let t1675 (Recip t13 t1674 t20 t20)) +(let t1676 (Mul t12 t1675 t25 t1669 t29 t29)) +(let t1677 207) +(let t1678 "model.layers.22.input_layernorm.weight") +(let t1679 (Input t1677 t1678 t33)) +(let t1680 (Mul t12 t1676 t29 t1679 t28 t29)) +(let t1681 203) +(let t1682 "model.layers.22.self_attn.q_proj.weight") +(let t1683 (Input t1681 t1682 t33)) +(let t1684 (Mul t37 t1680 t53 t1683 t57 t60)) +(let t1685 (Sum t12 t8 t1684 t62 t19 t29)) +(let t1686 (RowRope t13 t1685 t9 t8 t65)) +(let t1687 204) +(let t1688 "model.layers.22.self_attn.k_proj.weight") +(let t1689 (Input t1687 t1688 t33)) +(let t1690 (Mul t72 t1680 t53 t1689 t57 t78)) +(let t1691 (Sum t70 t8 t1690 t80 t19 t81)) +(let t1692 (RowRope t13 t1691 t69 t68 t65)) +(let t1693 205) +(let t1694 "model.layers.22.self_attn.v_proj.weight") +(let t1695 (Input t1693 t1694 t33)) +(let t1696 (Mul t72 t1680 t53 t1695 t57 t78)) +(let t1697 (Sum t70 t8 t1696 t80 t19 t81)) +(let t1698 (ICons t1697 t89)) +(let t1699 (ICons t1692 t1698)) +(let t1700 (ICons t1686 t1699)) +(let t1701 (CustomOpHLIR t1700 t296 t33)) +(let t1702 206) +(let t1703 "model.layers.22.self_attn.o_proj.weight") +(let t1704 (Input t1702 t1703 t33)) +(let t1705 (Mul t37 t1701 t53 t1704 t57 t60)) +(let t1706 (Sum t12 t8 t1705 t62 t19 t29)) +(let t1707 (Add t12 t1669 t29 t1706 t29 t29)) +(let t1708 (Mul t12 t1707 t29 t1707 t29 t29)) +(let t1709 (Sum t13 t8 t1708 t9 t19 t20)) +(let t1710 (Mul t13 t1709 t20 t41 t20 t20)) +(let t1711 (Add t13 t1710 t20 t44 t23 t20)) +(let t1712 (Sqrt t13 t1711 t20 t20)) +(let t1713 (Recip t13 t1712 t20 t20)) +(let t1714 (Mul t12 t1713 t25 t1707 t29 t29)) +(let t1715 208) +(let t1716 "model.layers.22.post_attention_layernorm.weight") +(let t1717 (Input t1715 t1716 t33)) +(let t1718 (Mul t12 t1714 t29 t1717 t28 t29)) +(let t1719 201) +(let t1720 "model.layers.22.mlp.gate_proj.weight") +(let t1721 (Input t1719 t1720 t33)) +(let t1722 (Mul t107 t1718 t53 t1721 t57 t124)) +(let t1723 (Sum t105 t8 t1722 t126 t19 t127)) +(let t1724 (Mul t105 t1723 t127 t130 t131 t127)) +(let t1725 (Mul t105 t1724 t127 t134 t131 t127)) +(let t1726 (Exp2 t105 t1725 t127 t127)) +(let t1727 (Add t105 t1726 t127 t138 t131 t127)) +(let t1728 (Recip t105 t1727 t127 t127)) +(let t1729 (Mul t105 t1723 t127 t1728 t127 t127)) +(let t1730 200) +(let t1731 "model.layers.22.mlp.up_proj.weight") +(let t1732 (Input t1730 t1731 t33)) +(let t1733 (Mul t107 t1718 t53 t1732 t57 t124)) +(let t1734 (Sum t105 t8 t1733 t126 t19 t127)) +(let t1735 (Mul t105 t1729 t127 t1734 t127 t127)) +(let t1736 202) +(let t1737 "model.layers.22.mlp.down_proj.weight") +(let t1738 (Input t1736 t1737 t33)) +(let t1739 (Mul t104 t1735 t148 t1738 t152 t153)) +(let t1740 (Sum t12 t101 t1739 t155 t19 t29)) +(let t1741 (Add t12 t1707 t29 t1740 t29 t29)) +(let t1742 (Mul t12 t1741 t29 t1741 t29 t29)) +(let t1743 (Sum t13 t8 t1742 t9 t19 t20)) +(let t1744 (Mul t13 t1743 t20 t41 t20 t20)) +(let t1745 (Add t13 t1744 t20 t44 t23 t20)) +(let t1746 (Sqrt t13 t1745 t20 t20)) +(let t1747 (Recip t13 t1746 t20 t20)) +(let t1748 (Mul t12 t1747 t25 t1741 t29 t29)) +(let t1749 216) +(let t1750 "model.layers.23.input_layernorm.weight") +(let t1751 (Input t1749 t1750 t33)) +(let t1752 (Mul t12 t1748 t29 t1751 t28 t29)) +(let t1753 212) +(let t1754 "model.layers.23.self_attn.q_proj.weight") +(let t1755 (Input t1753 t1754 t33)) +(let t1756 (Mul t37 t1752 t53 t1755 t57 t60)) +(let t1757 (Sum t12 t8 t1756 t62 t19 t29)) +(let t1758 (RowRope t13 t1757 t9 t8 t65)) +(let t1759 213) +(let t1760 "model.layers.23.self_attn.k_proj.weight") +(let t1761 (Input t1759 t1760 t33)) +(let t1762 (Mul t72 t1752 t53 t1761 t57 t78)) +(let t1763 (Sum t70 t8 t1762 t80 t19 t81)) +(let t1764 (RowRope t13 t1763 t69 t68 t65)) +(let t1765 214) +(let t1766 "model.layers.23.self_attn.v_proj.weight") +(let t1767 (Input t1765 t1766 t33)) +(let t1768 (Mul t72 t1752 t53 t1767 t57 t78)) +(let t1769 (Sum t70 t8 t1768 t80 t19 t81)) +(let t1770 (ICons t1769 t89)) +(let t1771 (ICons t1764 t1770)) +(let t1772 (ICons t1758 t1771)) +(let t1773 (CustomOpHLIR t1772 t241 t33)) +(let t1774 215) +(let t1775 "model.layers.23.self_attn.o_proj.weight") +(let t1776 (Input t1774 t1775 t33)) +(let t1777 (Mul t37 t1773 t53 t1776 t57 t60)) +(let t1778 (Sum t12 t8 t1777 t62 t19 t29)) +(let t1779 (Add t12 t1741 t29 t1778 t29 t29)) +(let t1780 (Mul t12 t1779 t29 t1779 t29 t29)) +(let t1781 (Sum t13 t8 t1780 t9 t19 t20)) +(let t1782 (Mul t13 t1781 t20 t41 t20 t20)) +(let t1783 (Add t13 t1782 t20 t44 t23 t20)) +(let t1784 (Sqrt t13 t1783 t20 t20)) +(let t1785 (Recip t13 t1784 t20 t20)) +(let t1786 (Mul t12 t1785 t25 t1779 t29 t29)) +(let t1787 217) +(let t1788 "model.layers.23.post_attention_layernorm.weight") +(let t1789 (Input t1787 t1788 t33)) +(let t1790 (Mul t12 t1786 t29 t1789 t28 t29)) +(let t1791 210) +(let t1792 "model.layers.23.mlp.gate_proj.weight") +(let t1793 (Input t1791 t1792 t33)) +(let t1794 (Mul t107 t1790 t53 t1793 t57 t124)) +(let t1795 (Sum t105 t8 t1794 t126 t19 t127)) +(let t1796 (Mul t105 t1795 t127 t130 t131 t127)) +(let t1797 (Mul t105 t1796 t127 t134 t131 t127)) +(let t1798 (Exp2 t105 t1797 t127 t127)) +(let t1799 (Add t105 t1798 t127 t138 t131 t127)) +(let t1800 (Recip t105 t1799 t127 t127)) +(let t1801 (Mul t105 t1795 t127 t1800 t127 t127)) +(let t1802 209) +(let t1803 "model.layers.23.mlp.up_proj.weight") +(let t1804 (Input t1802 t1803 t33)) +(let t1805 (Mul t107 t1790 t53 t1804 t57 t124)) +(let t1806 (Sum t105 t8 t1805 t126 t19 t127)) +(let t1807 (Mul t105 t1801 t127 t1806 t127 t127)) +(let t1808 211) +(let t1809 "model.layers.23.mlp.down_proj.weight") +(let t1810 (Input t1808 t1809 t33)) +(let t1811 (Mul t104 t1807 t148 t1810 t152 t153)) +(let t1812 (Sum t12 t101 t1811 t155 t19 t29)) +(let t1813 (Add t12 t1779 t29 t1812 t29 t29)) +(let t1814 (Mul t12 t1813 t29 t1813 t29 t29)) +(let t1815 (Sum t13 t8 t1814 t9 t19 t20)) +(let t1816 (Mul t13 t1815 t20 t41 t20 t20)) +(let t1817 (Add t13 t1816 t20 t44 t23 t20)) +(let t1818 (Sqrt t13 t1817 t20 t20)) +(let t1819 (Recip t13 t1818 t20 t20)) +(let t1820 (Mul t12 t1819 t25 t1813 t29 t29)) +(let t1821 225) +(let t1822 "model.layers.24.input_layernorm.weight") +(let t1823 (Input t1821 t1822 t33)) +(let t1824 (Mul t12 t1820 t29 t1823 t28 t29)) +(let t1825 221) +(let t1826 "model.layers.24.self_attn.q_proj.weight") +(let t1827 (Input t1825 t1826 t33)) +(let t1828 (Mul t37 t1824 t53 t1827 t57 t60)) +(let t1829 (Sum t12 t8 t1828 t62 t19 t29)) +(let t1830 (RowRope t13 t1829 t9 t8 t65)) +(let t1831 222) +(let t1832 "model.layers.24.self_attn.k_proj.weight") +(let t1833 (Input t1831 t1832 t33)) +(let t1834 (Mul t72 t1824 t53 t1833 t57 t78)) +(let t1835 (Sum t70 t8 t1834 t80 t19 t81)) +(let t1836 (RowRope t13 t1835 t69 t68 t65)) +(let t1837 223) +(let t1838 "model.layers.24.self_attn.v_proj.weight") +(let t1839 (Input t1837 t1838 t33)) +(let t1840 (Mul t72 t1824 t53 t1839 t57 t78)) +(let t1841 (Sum t70 t8 t1840 t80 t19 t81)) +(let t1842 (ICons t1841 t89)) +(let t1843 (ICons t1836 t1842)) +(let t1844 (ICons t1830 t1843)) +(let t1845 (CustomOpHLIR t1844 t247 t33)) +(let t1846 224) +(let t1847 "model.layers.24.self_attn.o_proj.weight") +(let t1848 (Input t1846 t1847 t33)) +(let t1849 (Mul t37 t1845 t53 t1848 t57 t60)) +(let t1850 (Sum t12 t8 t1849 t62 t19 t29)) +(let t1851 (Add t12 t1813 t29 t1850 t29 t29)) +(let t1852 (Mul t12 t1851 t29 t1851 t29 t29)) +(let t1853 (Sum t13 t8 t1852 t9 t19 t20)) +(let t1854 (Mul t13 t1853 t20 t41 t20 t20)) +(let t1855 (Add t13 t1854 t20 t44 t23 t20)) +(let t1856 (Sqrt t13 t1855 t20 t20)) +(let t1857 (Recip t13 t1856 t20 t20)) +(let t1858 (Mul t12 t1857 t25 t1851 t29 t29)) +(let t1859 226) +(let t1860 "model.layers.24.post_attention_layernorm.weight") +(let t1861 (Input t1859 t1860 t33)) +(let t1862 (Mul t12 t1858 t29 t1861 t28 t29)) +(let t1863 219) +(let t1864 "model.layers.24.mlp.gate_proj.weight") +(let t1865 (Input t1863 t1864 t33)) +(let t1866 (Mul t107 t1862 t53 t1865 t57 t124)) +(let t1867 (Sum t105 t8 t1866 t126 t19 t127)) +(let t1868 (Mul t105 t1867 t127 t130 t131 t127)) +(let t1869 (Mul t105 t1868 t127 t134 t131 t127)) +(let t1870 (Exp2 t105 t1869 t127 t127)) +(let t1871 (Add t105 t1870 t127 t138 t131 t127)) +(let t1872 (Recip t105 t1871 t127 t127)) +(let t1873 (Mul t105 t1867 t127 t1872 t127 t127)) +(let t1874 218) +(let t1875 "model.layers.24.mlp.up_proj.weight") +(let t1876 (Input t1874 t1875 t33)) +(let t1877 (Mul t107 t1862 t53 t1876 t57 t124)) +(let t1878 (Sum t105 t8 t1877 t126 t19 t127)) +(let t1879 (Mul t105 t1873 t127 t1878 t127 t127)) +(let t1880 220) +(let t1881 "model.layers.24.mlp.down_proj.weight") +(let t1882 (Input t1880 t1881 t33)) +(let t1883 (Mul t104 t1879 t148 t1882 t152 t153)) +(let t1884 (Sum t12 t101 t1883 t155 t19 t29)) +(let t1885 (Add t12 t1851 t29 t1884 t29 t29)) +(let t1886 (Mul t12 t1885 t29 t1885 t29 t29)) +(let t1887 (Sum t13 t8 t1886 t9 t19 t20)) +(let t1888 (Mul t13 t1887 t20 t41 t20 t20)) +(let t1889 (Add t13 t1888 t20 t44 t23 t20)) +(let t1890 (Sqrt t13 t1889 t20 t20)) +(let t1891 (Recip t13 t1890 t20 t20)) +(let t1892 (Mul t12 t1891 t25 t1885 t29 t29)) +(let t1893 234) +(let t1894 "model.layers.25.input_layernorm.weight") +(let t1895 (Input t1893 t1894 t33)) +(let t1896 (Mul t12 t1892 t29 t1895 t28 t29)) +(let t1897 230) +(let t1898 "model.layers.25.self_attn.q_proj.weight") +(let t1899 (Input t1897 t1898 t33)) +(let t1900 (Mul t37 t1896 t53 t1899 t57 t60)) +(let t1901 (Sum t12 t8 t1900 t62 t19 t29)) +(let t1902 (RowRope t13 t1901 t9 t8 t65)) +(let t1903 231) +(let t1904 "model.layers.25.self_attn.k_proj.weight") +(let t1905 (Input t1903 t1904 t33)) +(let t1906 (Mul t72 t1896 t53 t1905 t57 t78)) +(let t1907 (Sum t70 t8 t1906 t80 t19 t81)) +(let t1908 (RowRope t13 t1907 t69 t68 t65)) +(let t1909 232) +(let t1910 "model.layers.25.self_attn.v_proj.weight") +(let t1911 (Input t1909 t1910 t33)) +(let t1912 (Mul t72 t1896 t53 t1911 t57 t78)) +(let t1913 (Sum t70 t8 t1912 t80 t19 t81)) +(let t1914 (ICons t1913 t89)) +(let t1915 (ICons t1908 t1914)) +(let t1916 (ICons t1902 t1915)) +(let t1917 (CustomOpHLIR t1916 t253 t33)) +(let t1918 233) +(let t1919 "model.layers.25.self_attn.o_proj.weight") +(let t1920 (Input t1918 t1919 t33)) +(let t1921 (Mul t37 t1917 t53 t1920 t57 t60)) +(let t1922 (Sum t12 t8 t1921 t62 t19 t29)) +(let t1923 (Add t12 t1885 t29 t1922 t29 t29)) +(let t1924 (Mul t12 t1923 t29 t1923 t29 t29)) +(let t1925 (Sum t13 t8 t1924 t9 t19 t20)) +(let t1926 (Mul t13 t1925 t20 t41 t20 t20)) +(let t1927 (Add t13 t1926 t20 t44 t23 t20)) +(let t1928 (Sqrt t13 t1927 t20 t20)) +(let t1929 (Recip t13 t1928 t20 t20)) +(let t1930 (Mul t12 t1929 t25 t1923 t29 t29)) +(let t1931 235) +(let t1932 "model.layers.25.post_attention_layernorm.weight") +(let t1933 (Input t1931 t1932 t33)) +(let t1934 (Mul t12 t1930 t29 t1933 t28 t29)) +(let t1935 228) +(let t1936 "model.layers.25.mlp.gate_proj.weight") +(let t1937 (Input t1935 t1936 t33)) +(let t1938 (Mul t107 t1934 t53 t1937 t57 t124)) +(let t1939 (Sum t105 t8 t1938 t126 t19 t127)) +(let t1940 (Mul t105 t1939 t127 t130 t131 t127)) +(let t1941 (Mul t105 t1940 t127 t134 t131 t127)) +(let t1942 (Exp2 t105 t1941 t127 t127)) +(let t1943 (Add t105 t1942 t127 t138 t131 t127)) +(let t1944 (Recip t105 t1943 t127 t127)) +(let t1945 (Mul t105 t1939 t127 t1944 t127 t127)) +(let t1946 227) +(let t1947 "model.layers.25.mlp.up_proj.weight") +(let t1948 (Input t1946 t1947 t33)) +(let t1949 (Mul t107 t1934 t53 t1948 t57 t124)) +(let t1950 (Sum t105 t8 t1949 t126 t19 t127)) +(let t1951 (Mul t105 t1945 t127 t1950 t127 t127)) +(let t1952 229) +(let t1953 "model.layers.25.mlp.down_proj.weight") +(let t1954 (Input t1952 t1953 t33)) +(let t1955 (Mul t104 t1951 t148 t1954 t152 t153)) +(let t1956 (Sum t12 t101 t1955 t155 t19 t29)) +(let t1957 (Add t12 t1923 t29 t1956 t29 t29)) +(let t1958 (Mul t12 t1957 t29 t1957 t29 t29)) +(let t1959 (Sum t13 t8 t1958 t9 t19 t20)) +(let t1960 (Mul t13 t1959 t20 t41 t20 t20)) +(let t1961 (Add t13 t1960 t20 t44 t23 t20)) +(let t1962 (Sqrt t13 t1961 t20 t20)) +(let t1963 (Recip t13 t1962 t20 t20)) +(let t1964 (Mul t12 t1963 t25 t1957 t29 t29)) +(let t1965 243) +(let t1966 "model.layers.26.input_layernorm.weight") +(let t1967 (Input t1965 t1966 t33)) +(let t1968 (Mul t12 t1964 t29 t1967 t28 t29)) +(let t1969 239) +(let t1970 "model.layers.26.self_attn.q_proj.weight") +(let t1971 (Input t1969 t1970 t33)) +(let t1972 (Mul t37 t1968 t53 t1971 t57 t60)) +(let t1973 (Sum t12 t8 t1972 t62 t19 t29)) +(let t1974 (RowRope t13 t1973 t9 t8 t65)) +(let t1975 240) +(let t1976 "model.layers.26.self_attn.k_proj.weight") +(let t1977 (Input t1975 t1976 t33)) +(let t1978 (Mul t72 t1968 t53 t1977 t57 t78)) +(let t1979 (Sum t70 t8 t1978 t80 t19 t81)) +(let t1980 (RowRope t13 t1979 t69 t68 t65)) +(let t1981 241) +(let t1982 "model.layers.26.self_attn.v_proj.weight") +(let t1983 (Input t1981 t1982 t33)) +(let t1984 (Mul t72 t1968 t53 t1983 t57 t78)) +(let t1985 (Sum t70 t8 t1984 t80 t19 t81)) +(let t1986 (ICons t1985 t89)) +(let t1987 (ICons t1980 t1986)) +(let t1988 (ICons t1974 t1987)) +(let t1989 (CustomOpHLIR t1988 t262 t33)) +(let t1990 242) +(let t1991 "model.layers.26.self_attn.o_proj.weight") +(let t1992 (Input t1990 t1991 t33)) +(let t1993 (Mul t37 t1989 t53 t1992 t57 t60)) +(let t1994 (Sum t12 t8 t1993 t62 t19 t29)) +(let t1995 (Add t12 t1957 t29 t1994 t29 t29)) +(let t1996 (Mul t12 t1995 t29 t1995 t29 t29)) +(let t1997 (Sum t13 t8 t1996 t9 t19 t20)) +(let t1998 (Mul t13 t1997 t20 t41 t20 t20)) +(let t1999 (Add t13 t1998 t20 t44 t23 t20)) +(let t2000 (Sqrt t13 t1999 t20 t20)) +(let t2001 (Recip t13 t2000 t20 t20)) +(let t2002 (Mul t12 t2001 t25 t1995 t29 t29)) +(let t2003 244) +(let t2004 "model.layers.26.post_attention_layernorm.weight") +(let t2005 (Input t2003 t2004 t33)) +(let t2006 (Mul t12 t2002 t29 t2005 t28 t29)) +(let t2007 237) +(let t2008 "model.layers.26.mlp.gate_proj.weight") +(let t2009 (Input t2007 t2008 t33)) +(let t2010 (Mul t107 t2006 t53 t2009 t57 t124)) +(let t2011 (Sum t105 t8 t2010 t126 t19 t127)) +(let t2012 (Mul t105 t2011 t127 t130 t131 t127)) +(let t2013 (Mul t105 t2012 t127 t134 t131 t127)) +(let t2014 (Exp2 t105 t2013 t127 t127)) +(let t2015 (Add t105 t2014 t127 t138 t131 t127)) +(let t2016 (Recip t105 t2015 t127 t127)) +(let t2017 (Mul t105 t2011 t127 t2016 t127 t127)) +(let t2018 236) +(let t2019 "model.layers.26.mlp.up_proj.weight") +(let t2020 (Input t2018 t2019 t33)) +(let t2021 (Mul t107 t2006 t53 t2020 t57 t124)) +(let t2022 (Sum t105 t8 t2021 t126 t19 t127)) +(let t2023 (Mul t105 t2017 t127 t2022 t127 t127)) +(let t2024 238) +(let t2025 "model.layers.26.mlp.down_proj.weight") +(let t2026 (Input t2024 t2025 t33)) +(let t2027 (Mul t104 t2023 t148 t2026 t152 t153)) +(let t2028 (Sum t12 t101 t2027 t155 t19 t29)) +(let t2029 (Add t12 t1995 t29 t2028 t29 t29)) +(let t2030 (Mul t12 t2029 t29 t2029 t29 t29)) +(let t2031 (Sum t13 t8 t2030 t9 t19 t20)) +(let t2032 (Mul t13 t2031 t20 t41 t20 t20)) +(let t2033 (Add t13 t2032 t20 t44 t23 t20)) +(let t2034 (Sqrt t13 t2033 t20 t20)) +(let t2035 (Recip t13 t2034 t20 t20)) +(let t2036 (Mul t12 t2035 t25 t2029 t29 t29)) +(let t2037 252) +(let t2038 "model.layers.27.input_layernorm.weight") +(let t2039 (Input t2037 t2038 t33)) +(let t2040 (Mul t12 t2036 t29 t2039 t28 t29)) +(let t2041 248) +(let t2042 "model.layers.27.self_attn.q_proj.weight") +(let t2043 (Input t2041 t2042 t33)) +(let t2044 (Mul t37 t2040 t53 t2043 t57 t60)) +(let t2045 (Sum t12 t8 t2044 t62 t19 t29)) +(let t2046 (RowRope t13 t2045 t9 t8 t65)) +(let t2047 249) +(let t2048 "model.layers.27.self_attn.k_proj.weight") +(let t2049 (Input t2047 t2048 t33)) +(let t2050 (Mul t72 t2040 t53 t2049 t57 t78)) +(let t2051 (Sum t70 t8 t2050 t80 t19 t81)) +(let t2052 (RowRope t13 t2051 t69 t68 t65)) +(let t2053 250) +(let t2054 "model.layers.27.self_attn.v_proj.weight") +(let t2055 (Input t2053 t2054 t33)) +(let t2056 (Mul t72 t2040 t53 t2055 t57 t78)) +(let t2057 (Sum t70 t8 t2056 t80 t19 t81)) +(let t2058 (ICons t2057 t89)) +(let t2059 (ICons t2052 t2058)) +(let t2060 (ICons t2046 t2059)) +(let t2061 (CustomOpHLIR t2060 t237 t33)) +(let t2062 251) +(let t2063 "model.layers.27.self_attn.o_proj.weight") +(let t2064 (Input t2062 t2063 t33)) +(let t2065 (Mul t37 t2061 t53 t2064 t57 t60)) +(let t2066 (Sum t12 t8 t2065 t62 t19 t29)) +(let t2067 (Add t12 t2029 t29 t2066 t29 t29)) +(let t2068 (Mul t12 t2067 t29 t2067 t29 t29)) +(let t2069 (Sum t13 t8 t2068 t9 t19 t20)) +(let t2070 (Mul t13 t2069 t20 t41 t20 t20)) +(let t2071 (Add t13 t2070 t20 t44 t23 t20)) +(let t2072 (Sqrt t13 t2071 t20 t20)) +(let t2073 (Recip t13 t2072 t20 t20)) +(let t2074 (Mul t12 t2073 t25 t2067 t29 t29)) +(let t2075 253) +(let t2076 "model.layers.27.post_attention_layernorm.weight") +(let t2077 (Input t2075 t2076 t33)) +(let t2078 (Mul t12 t2074 t29 t2077 t28 t29)) +(let t2079 246) +(let t2080 "model.layers.27.mlp.gate_proj.weight") +(let t2081 (Input t2079 t2080 t33)) +(let t2082 (Mul t107 t2078 t53 t2081 t57 t124)) +(let t2083 (Sum t105 t8 t2082 t126 t19 t127)) +(let t2084 (Mul t105 t2083 t127 t130 t131 t127)) +(let t2085 (Mul t105 t2084 t127 t134 t131 t127)) +(let t2086 (Exp2 t105 t2085 t127 t127)) +(let t2087 (Add t105 t2086 t127 t138 t131 t127)) +(let t2088 (Recip t105 t2087 t127 t127)) +(let t2089 (Mul t105 t2083 t127 t2088 t127 t127)) +(let t2090 245) +(let t2091 "model.layers.27.mlp.up_proj.weight") +(let t2092 (Input t2090 t2091 t33)) +(let t2093 (Mul t107 t2078 t53 t2092 t57 t124)) +(let t2094 (Sum t105 t8 t2093 t126 t19 t127)) +(let t2095 (Mul t105 t2089 t127 t2094 t127 t127)) +(let t2096 247) +(let t2097 "model.layers.27.mlp.down_proj.weight") +(let t2098 (Input t2096 t2097 t33)) +(let t2099 (Mul t104 t2095 t148 t2098 t152 t153)) +(let t2100 (Sum t12 t101 t2099 t155 t19 t29)) +(let t2101 (Add t12 t2067 t29 t2100 t29 t29)) +(let t2102 (Mul t12 t2101 t29 t2101 t29 t29)) +(let t2103 (Sum t13 t8 t2102 t9 t19 t20)) +(let t2104 (Mul t13 t2103 t20 t41 t20 t20)) +(let t2105 (Add t13 t2104 t20 t44 t23 t20)) +(let t2106 (Sqrt t13 t2105 t20 t20)) +(let t2107 (Recip t13 t2106 t20 t20)) +(let t2108 (Mul t12 t2107 t25 t2101 t29 t29)) +(let t2109 261) +(let t2110 "model.layers.28.input_layernorm.weight") +(let t2111 (Input t2109 t2110 t33)) +(let t2112 (Mul t12 t2108 t29 t2111 t28 t29)) +(let t2113 257) +(let t2114 "model.layers.28.self_attn.q_proj.weight") +(let t2115 (Input t2113 t2114 t33)) +(let t2116 (Mul t37 t2112 t53 t2115 t57 t60)) +(let t2117 (Sum t12 t8 t2116 t62 t19 t29)) +(let t2118 (RowRope t13 t2117 t9 t8 t65)) +(let t2119 258) +(let t2120 "model.layers.28.self_attn.k_proj.weight") +(let t2121 (Input t2119 t2120 t33)) +(let t2122 (Mul t72 t2112 t53 t2121 t57 t78)) +(let t2123 (Sum t70 t8 t2122 t80 t19 t81)) +(let t2124 (RowRope t13 t2123 t69 t68 t65)) +(let t2125 259) +(let t2126 "model.layers.28.self_attn.v_proj.weight") +(let t2127 (Input t2125 t2126 t33)) +(let t2128 (Mul t72 t2112 t53 t2127 t57 t78)) +(let t2129 (Sum t70 t8 t2128 t80 t19 t81)) +(let t2130 (ICons t2129 t89)) +(let t2131 (ICons t2124 t2130)) +(let t2132 (ICons t2118 t2131)) +(let t2133 (CustomOpHLIR t2132 t275 t33)) +(let t2134 260) +(let t2135 "model.layers.28.self_attn.o_proj.weight") +(let t2136 (Input t2134 t2135 t33)) +(let t2137 (Mul t37 t2133 t53 t2136 t57 t60)) +(let t2138 (Sum t12 t8 t2137 t62 t19 t29)) +(let t2139 (Add t12 t2101 t29 t2138 t29 t29)) +(let t2140 (Mul t12 t2139 t29 t2139 t29 t29)) +(let t2141 (Sum t13 t8 t2140 t9 t19 t20)) +(let t2142 (Mul t13 t2141 t20 t41 t20 t20)) +(let t2143 (Add t13 t2142 t20 t44 t23 t20)) +(let t2144 (Sqrt t13 t2143 t20 t20)) +(let t2145 (Recip t13 t2144 t20 t20)) +(let t2146 (Mul t12 t2145 t25 t2139 t29 t29)) +(let t2147 262) +(let t2148 "model.layers.28.post_attention_layernorm.weight") +(let t2149 (Input t2147 t2148 t33)) +(let t2150 (Mul t12 t2146 t29 t2149 t28 t29)) +(let t2151 255) +(let t2152 "model.layers.28.mlp.gate_proj.weight") +(let t2153 (Input t2151 t2152 t33)) +(let t2154 (Mul t107 t2150 t53 t2153 t57 t124)) +(let t2155 (Sum t105 t8 t2154 t126 t19 t127)) +(let t2156 (Mul t105 t2155 t127 t130 t131 t127)) +(let t2157 (Mul t105 t2156 t127 t134 t131 t127)) +(let t2158 (Exp2 t105 t2157 t127 t127)) +(let t2159 (Add t105 t2158 t127 t138 t131 t127)) +(let t2160 (Recip t105 t2159 t127 t127)) +(let t2161 (Mul t105 t2155 t127 t2160 t127 t127)) +(let t2162 254) +(let t2163 "model.layers.28.mlp.up_proj.weight") +(let t2164 (Input t2162 t2163 t33)) +(let t2165 (Mul t107 t2150 t53 t2164 t57 t124)) +(let t2166 (Sum t105 t8 t2165 t126 t19 t127)) +(let t2167 (Mul t105 t2161 t127 t2166 t127 t127)) +(let t2168 256) +(let t2169 "model.layers.28.mlp.down_proj.weight") +(let t2170 (Input t2168 t2169 t33)) +(let t2171 (Mul t104 t2167 t148 t2170 t152 t153)) +(let t2172 (Sum t12 t101 t2171 t155 t19 t29)) +(let t2173 (Add t12 t2139 t29 t2172 t29 t29)) +(let t2174 (Mul t12 t2173 t29 t2173 t29 t29)) +(let t2175 (Sum t13 t8 t2174 t9 t19 t20)) +(let t2176 (Mul t13 t2175 t20 t41 t20 t20)) +(let t2177 (Add t13 t2176 t20 t44 t23 t20)) +(let t2178 (Sqrt t13 t2177 t20 t20)) +(let t2179 (Recip t13 t2178 t20 t20)) +(let t2180 (Mul t12 t2179 t25 t2173 t29 t29)) +(let t2181 270) +(let t2182 "model.layers.29.input_layernorm.weight") +(let t2183 (Input t2181 t2182 t33)) +(let t2184 (Mul t12 t2180 t29 t2183 t28 t29)) +(let t2185 266) +(let t2186 "model.layers.29.self_attn.q_proj.weight") +(let t2187 (Input t2185 t2186 t33)) +(let t2188 (Mul t37 t2184 t53 t2187 t57 t60)) +(let t2189 (Sum t12 t8 t2188 t62 t19 t29)) +(let t2190 (RowRope t13 t2189 t9 t8 t65)) +(let t2191 267) +(let t2192 "model.layers.29.self_attn.k_proj.weight") +(let t2193 (Input t2191 t2192 t33)) +(let t2194 (Mul t72 t2184 t53 t2193 t57 t78)) +(let t2195 (Sum t70 t8 t2194 t80 t19 t81)) +(let t2196 (RowRope t13 t2195 t69 t68 t65)) +(let t2197 268) +(let t2198 "model.layers.29.self_attn.v_proj.weight") +(let t2199 (Input t2197 t2198 t33)) +(let t2200 (Mul t72 t2184 t53 t2199 t57 t78)) +(let t2201 (Sum t70 t8 t2200 t80 t19 t81)) +(let t2202 (ICons t2201 t89)) +(let t2203 (ICons t2196 t2202)) +(let t2204 (ICons t2190 t2203)) +(let t2205 (CustomOpHLIR t2204 t362 t33)) +(let t2206 269) +(let t2207 "model.layers.29.self_attn.o_proj.weight") +(let t2208 (Input t2206 t2207 t33)) +(let t2209 (Mul t37 t2205 t53 t2208 t57 t60)) +(let t2210 (Sum t12 t8 t2209 t62 t19 t29)) +(let t2211 (Add t12 t2173 t29 t2210 t29 t29)) +(let t2212 (Mul t12 t2211 t29 t2211 t29 t29)) +(let t2213 (Sum t13 t8 t2212 t9 t19 t20)) +(let t2214 (Mul t13 t2213 t20 t41 t20 t20)) +(let t2215 (Add t13 t2214 t20 t44 t23 t20)) +(let t2216 (Sqrt t13 t2215 t20 t20)) +(let t2217 (Recip t13 t2216 t20 t20)) +(let t2218 (Mul t12 t2217 t25 t2211 t29 t29)) +(let t2219 271) +(let t2220 "model.layers.29.post_attention_layernorm.weight") +(let t2221 (Input t2219 t2220 t33)) +(let t2222 (Mul t12 t2218 t29 t2221 t28 t29)) +(let t2223 264) +(let t2224 "model.layers.29.mlp.gate_proj.weight") +(let t2225 (Input t2223 t2224 t33)) +(let t2226 (Mul t107 t2222 t53 t2225 t57 t124)) +(let t2227 (Sum t105 t8 t2226 t126 t19 t127)) +(let t2228 (Mul t105 t2227 t127 t130 t131 t127)) +(let t2229 (Mul t105 t2228 t127 t134 t131 t127)) +(let t2230 (Exp2 t105 t2229 t127 t127)) +(let t2231 (Add t105 t2230 t127 t138 t131 t127)) +(let t2232 (Recip t105 t2231 t127 t127)) +(let t2233 (Mul t105 t2227 t127 t2232 t127 t127)) +(let t2234 263) +(let t2235 "model.layers.29.mlp.up_proj.weight") +(let t2236 (Input t2234 t2235 t33)) +(let t2237 (Mul t107 t2222 t53 t2236 t57 t124)) +(let t2238 (Sum t105 t8 t2237 t126 t19 t127)) +(let t2239 (Mul t105 t2233 t127 t2238 t127 t127)) +(let t2240 265) +(let t2241 "model.layers.29.mlp.down_proj.weight") +(let t2242 (Input t2240 t2241 t33)) +(let t2243 (Mul t104 t2239 t148 t2242 t152 t153)) +(let t2244 (Sum t12 t101 t2243 t155 t19 t29)) +(let t2245 (Add t12 t2211 t29 t2244 t29 t29)) +(let t2246 (Mul t12 t2245 t29 t2245 t29 t29)) +(let t2247 (Sum t13 t8 t2246 t9 t19 t20)) +(let t2248 (Mul t13 t2247 t20 t41 t20 t20)) +(let t2249 (Add t13 t2248 t20 t44 t23 t20)) +(let t2250 (Sqrt t13 t2249 t20 t20)) +(let t2251 (Recip t13 t2250 t20 t20)) +(let t2252 (Mul t12 t2251 t25 t2245 t29 t29)) +(let t2253 279) +(let t2254 "model.layers.30.input_layernorm.weight") +(let t2255 (Input t2253 t2254 t33)) +(let t2256 (Mul t12 t2252 t29 t2255 t28 t29)) +(let t2257 275) +(let t2258 "model.layers.30.self_attn.q_proj.weight") +(let t2259 (Input t2257 t2258 t33)) +(let t2260 (Mul t37 t2256 t53 t2259 t57 t60)) +(let t2261 (Sum t12 t8 t2260 t62 t19 t29)) +(let t2262 (RowRope t13 t2261 t9 t8 t65)) +(let t2263 276) +(let t2264 "model.layers.30.self_attn.k_proj.weight") +(let t2265 (Input t2263 t2264 t33)) +(let t2266 (Mul t72 t2256 t53 t2265 t57 t78)) +(let t2267 (Sum t70 t8 t2266 t80 t19 t81)) +(let t2268 (RowRope t13 t2267 t69 t68 t65)) +(let t2269 277) +(let t2270 "model.layers.30.self_attn.v_proj.weight") +(let t2271 (Input t2269 t2270 t33)) +(let t2272 (Mul t72 t2256 t53 t2271 t57 t78)) +(let t2273 (Sum t70 t8 t2272 t80 t19 t81)) +(let t2274 (ICons t2273 t89)) +(let t2275 (ICons t2268 t2274)) +(let t2276 (ICons t2262 t2275)) +(let t2277 (CustomOpHLIR t2276 t351 t33)) +(let t2278 278) +(let t2279 "model.layers.30.self_attn.o_proj.weight") +(let t2280 (Input t2278 t2279 t33)) +(let t2281 (Mul t37 t2277 t53 t2280 t57 t60)) +(let t2282 (Sum t12 t8 t2281 t62 t19 t29)) +(let t2283 (Add t12 t2245 t29 t2282 t29 t29)) +(let t2284 (Mul t12 t2283 t29 t2283 t29 t29)) +(let t2285 (Sum t13 t8 t2284 t9 t19 t20)) +(let t2286 (Mul t13 t2285 t20 t41 t20 t20)) +(let t2287 (Add t13 t2286 t20 t44 t23 t20)) +(let t2288 (Sqrt t13 t2287 t20 t20)) +(let t2289 (Recip t13 t2288 t20 t20)) +(let t2290 (Mul t12 t2289 t25 t2283 t29 t29)) +(let t2291 280) +(let t2292 "model.layers.30.post_attention_layernorm.weight") +(let t2293 (Input t2291 t2292 t33)) +(let t2294 (Mul t12 t2290 t29 t2293 t28 t29)) +(let t2295 273) +(let t2296 "model.layers.30.mlp.gate_proj.weight") +(let t2297 (Input t2295 t2296 t33)) +(let t2298 (Mul t107 t2294 t53 t2297 t57 t124)) +(let t2299 (Sum t105 t8 t2298 t126 t19 t127)) +(let t2300 (Mul t105 t2299 t127 t130 t131 t127)) +(let t2301 (Mul t105 t2300 t127 t134 t131 t127)) +(let t2302 (Exp2 t105 t2301 t127 t127)) +(let t2303 (Add t105 t2302 t127 t138 t131 t127)) +(let t2304 (Recip t105 t2303 t127 t127)) +(let t2305 (Mul t105 t2299 t127 t2304 t127 t127)) +(let t2306 272) +(let t2307 "model.layers.30.mlp.up_proj.weight") +(let t2308 (Input t2306 t2307 t33)) +(let t2309 (Mul t107 t2294 t53 t2308 t57 t124)) +(let t2310 (Sum t105 t8 t2309 t126 t19 t127)) +(let t2311 (Mul t105 t2305 t127 t2310 t127 t127)) +(let t2312 274) +(let t2313 "model.layers.30.mlp.down_proj.weight") +(let t2314 (Input t2312 t2313 t33)) +(let t2315 (Mul t104 t2311 t148 t2314 t152 t153)) +(let t2316 (Sum t12 t101 t2315 t155 t19 t29)) +(let t2317 (Add t12 t2283 t29 t2316 t29 t29)) +(let t2318 (Mul t12 t2317 t29 t2317 t29 t29)) +(let t2319 (Sum t13 t8 t2318 t9 t19 t20)) +(let t2320 (Mul t13 t2319 t20 t41 t20 t20)) +(let t2321 (Add t13 t2320 t20 t44 t23 t20)) +(let t2322 (Sqrt t13 t2321 t20 t20)) +(let t2323 (Recip t13 t2322 t20 t20)) +(let t2324 (Mul t12 t2323 t25 t2317 t29 t29)) +(let t2325 288) +(let t2326 "model.layers.31.input_layernorm.weight") +(let t2327 (Input t2325 t2326 t33)) +(let t2328 (Mul t12 t2324 t29 t2327 t28 t29)) +(let t2329 284) +(let t2330 "model.layers.31.self_attn.q_proj.weight") +(let t2331 (Input t2329 t2330 t33)) +(let t2332 (Mul t37 t2328 t53 t2331 t57 t60)) +(let t2333 (Sum t12 t8 t2332 t62 t19 t29)) +(let t2334 (RowRope t13 t2333 t9 t8 t65)) +(let t2335 285) +(let t2336 "model.layers.31.self_attn.k_proj.weight") +(let t2337 (Input t2335 t2336 t33)) +(let t2338 (Mul t72 t2328 t53 t2337 t57 t78)) +(let t2339 (Sum t70 t8 t2338 t80 t19 t81)) +(let t2340 (RowRope t13 t2339 t69 t68 t65)) +(let t2341 286) +(let t2342 "model.layers.31.self_attn.v_proj.weight") +(let t2343 (Input t2341 t2342 t33)) +(let t2344 (Mul t72 t2328 t53 t2343 t57 t78)) +(let t2345 (Sum t70 t8 t2344 t80 t19 t81)) +(let t2346 (ICons t2345 t89)) +(let t2347 (ICons t2340 t2346)) +(let t2348 (ICons t2334 t2347)) +(let t2349 (CustomOpHLIR t2348 t368 t33)) +(let t2350 287) +(let t2351 "model.layers.31.self_attn.o_proj.weight") +(let t2352 (Input t2350 t2351 t33)) +(let t2353 (Mul t37 t2349 t53 t2352 t57 t60)) +(let t2354 (Sum t12 t8 t2353 t62 t19 t29)) +(let t2355 (Add t12 t2317 t29 t2354 t29 t29)) +(let t2356 (Mul t12 t2355 t29 t2355 t29 t29)) +(let t2357 (Sum t13 t8 t2356 t9 t19 t20)) +(let t2358 (Mul t13 t2357 t20 t41 t20 t20)) +(let t2359 (Add t13 t2358 t20 t44 t23 t20)) +(let t2360 (Sqrt t13 t2359 t20 t20)) +(let t2361 (Recip t13 t2360 t20 t20)) +(let t2362 (Mul t12 t2361 t25 t2355 t29 t29)) +(let t2363 289) +(let t2364 "model.layers.31.post_attention_layernorm.weight") +(let t2365 (Input t2363 t2364 t33)) +(let t2366 (Mul t12 t2362 t29 t2365 t28 t29)) +(let t2367 282) +(let t2368 "model.layers.31.mlp.gate_proj.weight") +(let t2369 (Input t2367 t2368 t33)) +(let t2370 (Mul t107 t2366 t53 t2369 t57 t124)) +(let t2371 (Sum t105 t8 t2370 t126 t19 t127)) +(let t2372 (Mul t105 t2371 t127 t130 t131 t127)) +(let t2373 (Mul t105 t2372 t127 t134 t131 t127)) +(let t2374 (Exp2 t105 t2373 t127 t127)) +(let t2375 (Add t105 t2374 t127 t138 t131 t127)) +(let t2376 (Recip t105 t2375 t127 t127)) +(let t2377 (Mul t105 t2371 t127 t2376 t127 t127)) +(let t2378 281) +(let t2379 "model.layers.31.mlp.up_proj.weight") +(let t2380 (Input t2378 t2379 t33)) +(let t2381 (Mul t107 t2366 t53 t2380 t57 t124)) +(let t2382 (Sum t105 t8 t2381 t126 t19 t127)) +(let t2383 (Mul t105 t2377 t127 t2382 t127 t127)) +(let t2384 283) +(let t2385 "model.layers.31.mlp.down_proj.weight") +(let t2386 (Input t2384 t2385 t33)) +(let t2387 (Mul t104 t2383 t148 t2386 t152 t153)) +(let t2388 (Sum t12 t101 t2387 t155 t19 t29)) +(let t2389 (Add t12 t2355 t29 t2388 t29 t29)) +(let t2390 (Mul t12 t2389 t29 t2389 t29 t29)) +(let t2391 (Sum t13 t8 t2390 t9 t19 t20)) +(let t2392 (Mul t13 t2391 t20 t41 t20 t20)) +(let t2393 (Add t13 t2392 t20 t44 t23 t20)) +(let t2394 (Sqrt t13 t2393 t20 t20)) +(let t2395 (Recip t13 t2394 t20 t20)) +(let t2396 (Mul t12 t2395 t25 t2389 t29 t29)) +(let t2397 290) +(let t2398 "model.norm.weight") +(let t2399 (Input t2397 t2398 t33)) +(let t2400 (Mul t12 t2396 t29 t2399 t28 t29)) +(let t2401 291) +(let t2402 "lm_head.weight") +(let t2403 (Input t2401 t2402 t33)) +(let t2404 525336576) +(let t2405 (MNum t2404)) +(let t2406 (ECons t2405 t29)) +(let t2407 (Mul t11 t2400 t53 t2403 t57 t2406)) +(let t2408 (ECons t2405 t9)) +(let t2409 (ECons t3 t20)) +(let t2410 (Sum t6 t8 t2407 t2408 t19 t2409)) +(let t2411 4887) +(let t2412 (Output t2410 t2411)) + +(run-schedule + (repeat 10 + (saturate expr) + (run) + ) + (saturate expr) + (saturate base_cleanup) + (saturate cleanup) +) \ No newline at end of file diff --git a/tests/snapshots/files__proof_unsupported_files.snap b/tests/snapshots/files__proof_unsupported_files.snap index 9d4c276dc..54b6de055 100644 --- a/tests/snapshots/files__proof_unsupported_files.snap +++ b/tests/snapshots/files__proof_unsupported_files.snap @@ -28,6 +28,7 @@ levenshtein-distance.egg list.egg looking_up_global.egg looking_up_nonconstructor_in_rewrite_good.egg +luminal-llama.egg map.egg math.egg merge_read.egg diff --git a/tests/snapshots/files__shared_snapshot_luminal_llama.snap b/tests/snapshots/files__shared_snapshot_luminal_llama.snap new file mode 100644 index 000000000..bd5a5d40b --- /dev/null +++ b/tests/snapshots/files__shared_snapshot_luminal_llama.snap @@ -0,0 +1,74 @@ +--- +source: tests/files.rs +expression: snapshot_content_across_treatments +--- +((Add 0) + (Bf16 0) + (Cast 0) + (Constant 0) + (CubeMul 225) + (CustomOpHLIR 32) + (ECons 128) + (ENil 1) + (Exp 0) + (Exp2 0) + (F16 0) + (F32 1) + (Gather 0) + (ICons 96) + (INil 1) + (Input 293) + (Int 1) + (Iota 0) + (KernelAdd 162) + (KernelArgsort 0) + (KernelGather 1) + (KernelIota 2) + (KernelMax 0) + (KernelMean 65) + (KernelMul 614) + (KernelSum 290) + (LessThan 0) + (Log2 0) + (MAdd 0) + (MAnd 0) + (MCeilDiv 5) + (MDiv 0) + (MFloat 0) + (MFloorTo 0) + (MGte 0) + (MIter 1) + (MLt 0) + (MMax 0) + (MMin 0) + (MMod 0) + (MMul 117) + (MNum 37) + (MOr 0) + (MReplace 0) + (MReplaceList 0) + (MSub 0) + (MVar 1) + (Max 0) + (Mod 0) + (Mul 0) + (Output 1) + (OutputJoin 0) + (Recip 0) + (RemoveNthFromEnd 0) + (ReplaceNthFromEnd 0) + (RowAdd 64) + (RowMajor 0) + (RowRMSNorm 65) + (RowRope 64) + (RowSwishMul 32) + (Sigmoid 0) + (Sin 0) + (Sqrt 0) + (Sum 0) + (TileMatmul 225) + (TileSum 225) + (dtype 1659) + (len 0) + (n_elements 0) + (nth_from_end 0))