@@ -46,7 +46,7 @@ class IMatrixCollector {
46
46
common_params m_params;
47
47
std::mutex m_mutex;
48
48
int m_last_call = 0 ;
49
- std::vector<float > m_src1_data;
49
+ std::vector<char > m_src1_data;
50
50
std::vector<char > m_ids; // the expert ids from ggml_mul_mat_id
51
51
};
52
52
@@ -93,11 +93,13 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
93
93
const bool is_host = ggml_backend_buffer_is_host (src1->buffer );
94
94
95
95
if (!is_host) {
96
- m_src1_data.resize (ggml_nelements (src1));
97
- ggml_backend_tensor_get (src1, m_src1_data.data (), 0 , ggml_nbytes (src1));
96
+ const size_t src1_nbytes = ggml_nbytes (src1);
97
+ m_src1_data.resize (src1_nbytes);
98
+ ggml_backend_tensor_get (src1, m_src1_data.data (), 0 , src1_nbytes);
98
99
}
99
100
100
- const float * data = is_host ? (const float *) src1->data : m_src1_data.data ();
101
+ const char * data = is_host ? (const char *) src1->data : m_src1_data.data ();
102
+ GGML_ASSERT (src1->nb [0 ] == ggml_element_size (src1));
101
103
102
104
// this has been adapted to the new format of storing merged experts in a single 3d tensor
103
105
// ref: https://github.com/ggml-org/llama.cpp/pull/6387
@@ -144,7 +146,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
144
146
145
147
const int64_t i11 = idx % src1->ne [1 ];
146
148
const int64_t i12 = row;
147
- const float * x = (const float *)(( const char *) data + i11*src1->nb [1 ] + i12*src1->nb [2 ]);
149
+ const float * x = (const float *)(data + i11*src1->nb [1 ] + i12*src1->nb [2 ]);
148
150
149
151
for (int j = 0 ; j < (int )src1->ne [0 ]; ++j) {
150
152
e.values [e_start + j] += x[j]*x[j];
@@ -180,7 +182,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
180
182
++e.ncall ;
181
183
LOG_DBGV (2 , " %s[%d]: %32s, %s, %5d x %5d, %d\n " , __func__, m_last_call, wname.c_str (), ggml_op_name (t->op ), (int )src1->ne [0 ], (int )src1->ne [1 ], (int )src1->type );
182
184
for (int row = 0 ; row < (int )src1->ne [1 ]; ++row) {
183
- const float * x = data + row * src1->ne [ 0 ] ;
185
+ const float * x = ( const float *) ( data + row * src1->nb [ 1 ]) ;
184
186
for (int j = 0 ; j < (int )src1->ne [0 ]; ++j) {
185
187
e.values [j] += x[j]*x[j];
186
188
e.counts [j]++;
0 commit comments