@@ -2747,205 +2747,199 @@ struct llama_cparams {
2747
2747
2748
2748
struct llama_layer_posnet {
2749
2749
// resnet
2750
- struct ggml_tensor * norm1 = nullptr;
2750
+ struct ggml_tensor * norm1 = nullptr;
2751
2751
struct ggml_tensor * norm1_b = nullptr;
2752
2752
2753
- struct ggml_tensor * conv1 = nullptr;
2753
+ struct ggml_tensor * conv1 = nullptr;
2754
2754
struct ggml_tensor * conv1_b = nullptr;
2755
2755
2756
- struct ggml_tensor * norm2 = nullptr;
2756
+ struct ggml_tensor * norm2 = nullptr;
2757
2757
struct ggml_tensor * norm2_b = nullptr;
2758
2758
2759
- struct ggml_tensor * conv2 = nullptr;
2759
+ struct ggml_tensor * conv2 = nullptr;
2760
2760
struct ggml_tensor * conv2_b = nullptr;
2761
2761
2762
2762
// attention
2763
- struct ggml_tensor * attn_norm = nullptr;
2763
+ struct ggml_tensor * attn_norm = nullptr;
2764
2764
struct ggml_tensor * attn_norm_b = nullptr;
2765
2765
2766
- struct ggml_tensor * attn_q = nullptr;
2766
+ struct ggml_tensor * attn_q = nullptr;
2767
2767
struct ggml_tensor * attn_q_b = nullptr;
2768
2768
2769
- struct ggml_tensor * attn_k = nullptr;
2769
+ struct ggml_tensor * attn_k = nullptr;
2770
2770
struct ggml_tensor * attn_k_b = nullptr;
2771
2771
2772
- struct ggml_tensor * attn_v = nullptr;
2772
+ struct ggml_tensor * attn_v = nullptr;
2773
2773
struct ggml_tensor * attn_v_b = nullptr;
2774
2774
2775
- struct ggml_tensor * attn_o = nullptr;
2775
+ struct ggml_tensor * attn_o = nullptr;
2776
2776
struct ggml_tensor * attn_o_b = nullptr;
2777
2777
2778
2778
// normalize
2779
- struct ggml_tensor * norm = nullptr;
2779
+ struct ggml_tensor * norm = nullptr;
2780
2780
struct ggml_tensor * norm_b = nullptr;
2781
2781
};
2782
2782
2783
2783
struct llama_layer_convnext {
2784
- struct ggml_tensor * dw;
2785
- struct ggml_tensor * dw_b;
2784
+ struct ggml_tensor * dw = nullptr ;
2785
+ struct ggml_tensor * dw_b = nullptr ;
2786
2786
2787
- struct ggml_tensor * norm;
2788
- struct ggml_tensor * norm_b;
2787
+ struct ggml_tensor * norm = nullptr ;
2788
+ struct ggml_tensor * norm_b = nullptr ;
2789
2789
2790
- struct ggml_tensor * pw1;
2791
- struct ggml_tensor * pw1_b;
2790
+ struct ggml_tensor * pw1 = nullptr ;
2791
+ struct ggml_tensor * pw1_b = nullptr ;
2792
2792
2793
- struct ggml_tensor * pw2;
2794
- struct ggml_tensor * pw2_b;
2793
+ struct ggml_tensor * pw2 = nullptr ;
2794
+ struct ggml_tensor * pw2_b = nullptr ;
2795
2795
2796
- struct ggml_tensor * gamma;
2796
+ struct ggml_tensor * gamma = nullptr ;
2797
2797
};
2798
2798
2799
- // TODO: separate into "llama_layer_enc" and "llama_layer_dec"
2800
2799
struct llama_layer {
2801
- llama_layer() {
2802
- // initialize all pointers to NULL
2803
- std::memset(this, 0, sizeof(*this));
2804
- }
2805
-
2806
2800
// normalization
2807
- struct ggml_tensor * attn_norm;
2808
- struct ggml_tensor * attn_norm_b;
2809
- struct ggml_tensor * attn_norm_2;
2810
- struct ggml_tensor * attn_norm_2_b;
2811
- struct ggml_tensor * attn_q_norm;
2812
- struct ggml_tensor * attn_q_norm_b;
2813
- struct ggml_tensor * attn_k_norm;
2814
- struct ggml_tensor * attn_k_norm_b;
2815
- struct ggml_tensor * attn_out_norm;
2816
- struct ggml_tensor * attn_out_norm_b;
2817
- struct ggml_tensor * attn_q_a_norm;
2818
- struct ggml_tensor * attn_kv_a_norm;
2819
- struct ggml_tensor * attn_sub_norm;
2820
- struct ggml_tensor * attn_post_norm;
2821
- struct ggml_tensor * ffn_sub_norm;
2822
- struct ggml_tensor * attn_norm_cross;
2823
- struct ggml_tensor * attn_norm_enc;
2801
+ struct ggml_tensor * attn_norm = nullptr ;
2802
+ struct ggml_tensor * attn_norm_b = nullptr ;
2803
+ struct ggml_tensor * attn_norm_2 = nullptr ;
2804
+ struct ggml_tensor * attn_norm_2_b = nullptr ;
2805
+ struct ggml_tensor * attn_q_norm = nullptr ;
2806
+ struct ggml_tensor * attn_q_norm_b = nullptr ;
2807
+ struct ggml_tensor * attn_k_norm = nullptr ;
2808
+ struct ggml_tensor * attn_k_norm_b = nullptr ;
2809
+ struct ggml_tensor * attn_out_norm = nullptr ;
2810
+ struct ggml_tensor * attn_out_norm_b = nullptr ;
2811
+ struct ggml_tensor * attn_q_a_norm = nullptr ;
2812
+ struct ggml_tensor * attn_kv_a_norm = nullptr ;
2813
+ struct ggml_tensor * attn_sub_norm = nullptr ;
2814
+ struct ggml_tensor * attn_post_norm = nullptr ;
2815
+ struct ggml_tensor * ffn_sub_norm = nullptr ;
2816
+ struct ggml_tensor * attn_norm_cross = nullptr ;
2817
+ struct ggml_tensor * attn_norm_enc = nullptr ;
2824
2818
2825
2819
// attention
2826
- struct ggml_tensor * wq;
2827
- struct ggml_tensor * wk;
2828
- struct ggml_tensor * wv;
2829
- struct ggml_tensor * wo;
2830
- struct ggml_tensor * wqkv;
2831
- struct ggml_tensor * wq_a;
2832
- struct ggml_tensor * wq_b;
2833
- struct ggml_tensor * wkv_a_mqa;
2834
- struct ggml_tensor * wkv_b;
2835
- struct ggml_tensor * wq_cross;
2836
- struct ggml_tensor * wk_cross;
2837
- struct ggml_tensor * wv_cross;
2838
- struct ggml_tensor * wo_cross;
2839
- struct ggml_tensor * wq_enc;
2840
- struct ggml_tensor * wk_enc;
2841
- struct ggml_tensor * wv_enc;
2842
- struct ggml_tensor * wo_enc;
2820
+ struct ggml_tensor * wq = nullptr ;
2821
+ struct ggml_tensor * wk = nullptr ;
2822
+ struct ggml_tensor * wv = nullptr ;
2823
+ struct ggml_tensor * wo = nullptr ;
2824
+ struct ggml_tensor * wqkv = nullptr ;
2825
+ struct ggml_tensor * wq_a = nullptr ;
2826
+ struct ggml_tensor * wq_b = nullptr ;
2827
+ struct ggml_tensor * wkv_a_mqa = nullptr ;
2828
+ struct ggml_tensor * wkv_b = nullptr ;
2829
+ struct ggml_tensor * wq_cross = nullptr ;
2830
+ struct ggml_tensor * wk_cross = nullptr ;
2831
+ struct ggml_tensor * wv_cross = nullptr ;
2832
+ struct ggml_tensor * wo_cross = nullptr ;
2833
+ struct ggml_tensor * wq_enc = nullptr ;
2834
+ struct ggml_tensor * wk_enc = nullptr ;
2835
+ struct ggml_tensor * wv_enc = nullptr ;
2836
+ struct ggml_tensor * wo_enc = nullptr ;
2843
2837
2844
2838
// attention bias
2845
- struct ggml_tensor * bq;
2846
- struct ggml_tensor * bk;
2847
- struct ggml_tensor * bv;
2848
- struct ggml_tensor * bo;
2849
- struct ggml_tensor * bqkv;
2839
+ struct ggml_tensor * bq = nullptr ;
2840
+ struct ggml_tensor * bk = nullptr ;
2841
+ struct ggml_tensor * bv = nullptr ;
2842
+ struct ggml_tensor * bo = nullptr ;
2843
+ struct ggml_tensor * bqkv = nullptr ;
2850
2844
2851
2845
// relative position bias
2852
- struct ggml_tensor * attn_rel_b;
2853
- struct ggml_tensor * attn_rel_b_enc;
2854
- struct ggml_tensor * attn_rel_b_cross;
2846
+ struct ggml_tensor * attn_rel_b = nullptr ;
2847
+ struct ggml_tensor * attn_rel_b_enc = nullptr ;
2848
+ struct ggml_tensor * attn_rel_b_cross = nullptr ;
2855
2849
2856
2850
// normalization
2857
- struct ggml_tensor * ffn_norm;
2858
- struct ggml_tensor * ffn_norm_b;
2859
- struct ggml_tensor * ffn_post_norm;
2860
- struct ggml_tensor * layer_out_norm;
2861
- struct ggml_tensor * layer_out_norm_b;
2862
- struct ggml_tensor * ffn_norm_exps;
2863
- struct ggml_tensor * ffn_norm_enc;
2851
+ struct ggml_tensor * ffn_norm = nullptr ;
2852
+ struct ggml_tensor * ffn_norm_b = nullptr ;
2853
+ struct ggml_tensor * ffn_post_norm = nullptr ;
2854
+ struct ggml_tensor * layer_out_norm = nullptr ;
2855
+ struct ggml_tensor * layer_out_norm_b = nullptr ;
2856
+ struct ggml_tensor * ffn_norm_exps = nullptr ;
2857
+ struct ggml_tensor * ffn_norm_enc = nullptr ;
2864
2858
2865
2859
// ff
2866
- struct ggml_tensor * ffn_gate; // w1
2867
- struct ggml_tensor * ffn_down; // w2
2868
- struct ggml_tensor * ffn_up; // w3
2869
- struct ggml_tensor * ffn_gate_enc;
2870
- struct ggml_tensor * ffn_down_enc;
2871
- struct ggml_tensor * ffn_up_enc;
2860
+ struct ggml_tensor * ffn_gate = nullptr ; // w1
2861
+ struct ggml_tensor * ffn_down = nullptr ; // w2
2862
+ struct ggml_tensor * ffn_up = nullptr; // w3
2863
+ struct ggml_tensor * ffn_gate_enc = nullptr ;
2864
+ struct ggml_tensor * ffn_down_enc = nullptr ;
2865
+ struct ggml_tensor * ffn_up_enc = nullptr ;
2872
2866
2873
2867
// ff MoE
2874
- struct ggml_tensor * ffn_gate_inp;
2875
- struct ggml_tensor * ffn_gate_exps;
2876
- struct ggml_tensor * ffn_down_exps;
2877
- struct ggml_tensor * ffn_up_exps ;
2868
+ struct ggml_tensor * ffn_gate_inp = nullptr ;
2869
+ struct ggml_tensor * ffn_gate_exps = nullptr ;
2870
+ struct ggml_tensor * ffn_down_exps = nullptr ;
2871
+ struct ggml_tensor * ffn_up_exps = nullptr ;
2878
2872
2879
2873
// ff shared expert (shexp)
2880
- struct ggml_tensor * ffn_gate_inp_shexp;
2881
- struct ggml_tensor * ffn_gate_shexp;
2882
- struct ggml_tensor * ffn_down_shexp;
2883
- struct ggml_tensor * ffn_up_shexp;
2874
+ struct ggml_tensor * ffn_gate_inp_shexp = nullptr ;
2875
+ struct ggml_tensor * ffn_gate_shexp = nullptr ;
2876
+ struct ggml_tensor * ffn_down_shexp = nullptr ;
2877
+ struct ggml_tensor * ffn_up_shexp = nullptr ;
2884
2878
2885
2879
// ff bias
2886
- struct ggml_tensor * ffn_gate_b;
2887
- struct ggml_tensor * ffn_down_b; // b2
2888
- struct ggml_tensor * ffn_up_b; // b3
2889
- struct ggml_tensor * ffn_act;
2880
+ struct ggml_tensor * ffn_gate_b = nullptr ;
2881
+ struct ggml_tensor * ffn_down_b = nullptr ; // b2
2882
+ struct ggml_tensor * ffn_up_b = nullptr ; // b3
2883
+ struct ggml_tensor * ffn_act = nullptr ;
2890
2884
2891
2885
// mamba proj
2892
- struct ggml_tensor * ssm_in;
2893
- struct ggml_tensor * ssm_x;
2894
- struct ggml_tensor * ssm_dt;
2895
- struct ggml_tensor * ssm_out;
2886
+ struct ggml_tensor * ssm_in = nullptr ;
2887
+ struct ggml_tensor * ssm_x = nullptr ;
2888
+ struct ggml_tensor * ssm_dt = nullptr ;
2889
+ struct ggml_tensor * ssm_out = nullptr ;
2896
2890
2897
2891
// mamba
2898
- struct ggml_tensor * ssm_conv1d;
2899
- struct ggml_tensor * ssm_a;
2900
- struct ggml_tensor * ssm_d;
2892
+ struct ggml_tensor * ssm_conv1d = nullptr ;
2893
+ struct ggml_tensor * ssm_a = nullptr ;
2894
+ struct ggml_tensor * ssm_d = nullptr ;
2901
2895
2902
2896
// mamba bias
2903
- struct ggml_tensor * ssm_conv1d_b;
2904
- struct ggml_tensor * ssm_dt_b;
2897
+ struct ggml_tensor * ssm_conv1d_b = nullptr ;
2898
+ struct ggml_tensor * ssm_dt_b = nullptr ;
2905
2899
2906
2900
// rwkv
2907
- struct ggml_tensor * time_mix_w1;
2908
- struct ggml_tensor * time_mix_w2;
2909
- struct ggml_tensor * time_mix_lerp_x;
2910
- struct ggml_tensor * time_mix_lerp_w;
2911
- struct ggml_tensor * time_mix_lerp_k;
2912
- struct ggml_tensor * time_mix_lerp_v;
2913
- struct ggml_tensor * time_mix_lerp_r;
2914
- struct ggml_tensor * time_mix_lerp_g;
2915
-
2916
- struct ggml_tensor * time_mix_first;
2917
- struct ggml_tensor * time_mix_decay;
2918
- struct ggml_tensor * time_mix_decay_w1;
2919
- struct ggml_tensor * time_mix_decay_w2;
2920
- struct ggml_tensor * time_mix_key;
2921
- struct ggml_tensor * time_mix_value;
2922
- struct ggml_tensor * time_mix_receptance;
2923
- struct ggml_tensor * time_mix_gate;
2924
-
2925
- struct ggml_tensor * time_mix_ln;
2926
- struct ggml_tensor * time_mix_ln_b;
2927
- struct ggml_tensor * time_mix_output;
2928
-
2929
- struct ggml_tensor * channel_mix_lerp_k;
2930
- struct ggml_tensor * channel_mix_lerp_r;
2931
-
2932
- struct ggml_tensor * channel_mix_key;
2933
- struct ggml_tensor * channel_mix_receptance;
2934
- struct ggml_tensor * channel_mix_value;
2901
+ struct ggml_tensor * time_mix_w1 = nullptr ;
2902
+ struct ggml_tensor * time_mix_w2 = nullptr ;
2903
+ struct ggml_tensor * time_mix_lerp_x = nullptr ;
2904
+ struct ggml_tensor * time_mix_lerp_w = nullptr ;
2905
+ struct ggml_tensor * time_mix_lerp_k = nullptr ;
2906
+ struct ggml_tensor * time_mix_lerp_v = nullptr ;
2907
+ struct ggml_tensor * time_mix_lerp_r = nullptr ;
2908
+ struct ggml_tensor * time_mix_lerp_g = nullptr ;
2909
+
2910
+ struct ggml_tensor * time_mix_first = nullptr ;
2911
+ struct ggml_tensor * time_mix_decay = nullptr ;
2912
+ struct ggml_tensor * time_mix_decay_w1 = nullptr ;
2913
+ struct ggml_tensor * time_mix_decay_w2 = nullptr ;
2914
+ struct ggml_tensor * time_mix_key = nullptr ;
2915
+ struct ggml_tensor * time_mix_value = nullptr ;
2916
+ struct ggml_tensor * time_mix_receptance = nullptr ;
2917
+ struct ggml_tensor * time_mix_gate = nullptr ;
2918
+
2919
+ struct ggml_tensor * time_mix_ln = nullptr ;
2920
+ struct ggml_tensor * time_mix_ln_b = nullptr ;
2921
+ struct ggml_tensor * time_mix_output = nullptr ;
2922
+
2923
+ struct ggml_tensor * channel_mix_lerp_k = nullptr ;
2924
+ struct ggml_tensor * channel_mix_lerp_r = nullptr ;
2925
+
2926
+ struct ggml_tensor * channel_mix_key = nullptr ;
2927
+ struct ggml_tensor * channel_mix_receptance = nullptr ;
2928
+ struct ggml_tensor * channel_mix_value = nullptr ;
2935
2929
2936
2930
// long rope factors
2937
2931
struct ggml_tensor * rope_long = nullptr;
2938
2932
struct ggml_tensor * rope_short = nullptr;
2939
2933
struct ggml_tensor * rope_freqs = nullptr;
2940
2934
2941
2935
// bitnet scale
2942
- struct ggml_tensor * wq_scale;
2943
- struct ggml_tensor * wk_scale;
2944
- struct ggml_tensor * wv_scale;
2945
- struct ggml_tensor * wo_scale;
2946
- struct ggml_tensor * ffn_gate_scale;
2947
- struct ggml_tensor * ffn_up_scale;
2948
- struct ggml_tensor * ffn_down_scale;
2936
+ struct ggml_tensor * wq_scale = nullptr ;
2937
+ struct ggml_tensor * wk_scale = nullptr ;
2938
+ struct ggml_tensor * wv_scale = nullptr ;
2939
+ struct ggml_tensor * wo_scale = nullptr ;
2940
+ struct ggml_tensor * ffn_gate_scale = nullptr ;
2941
+ struct ggml_tensor * ffn_up_scale = nullptr ;
2942
+ struct ggml_tensor * ffn_down_scale = nullptr ;
2949
2943
2950
2944
struct llama_layer_posnet posnet;
2951
2945
@@ -3167,6 +3161,7 @@ struct llama_sbatch {
3167
3161
// batch indices of the output
3168
3162
std::vector<size_t> out_ids;
3169
3163
std::vector<llama_sbatch_seq> seq;
3164
+
3170
3165
const llama_batch * batch = nullptr;
3171
3166
3172
3167
// buffers for the ubatch
0 commit comments