@@ -454,21 +454,21 @@ const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type
454
454
#define GGML_F32x4_ADD vaddq_f32
455
455
#define GGML_F32x4_MUL vmulq_f32
456
456
#define GGML_F32x4_REDUCE_ONE (x ) vaddvq_f32(x)
457
- #define GGML_F32x4_REDUCE (res , x ) \
458
- { \
459
- int offset = GGML_F32_ARR >> 1; \
460
- for (int i = 0; i < offset; ++i) { \
461
- (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
462
- } \
463
- offset >>= 1; \
464
- for (int i = 0; i < offset; ++i) { \
465
- (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
466
- } \
467
- offset >>= 1; \
468
- for (int i = 0; i < offset; ++i) { \
469
- (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
470
- } \
471
- (res) = GGML_F32x4_REDUCE_ONE((x)[0]); \
457
+ #define GGML_F32x4_REDUCE (res , x ) \
458
+ { \
459
+ int offset = GGML_F32_ARR >> 1; \
460
+ for (int i = 0; i < offset; ++i) { \
461
+ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
462
+ } \
463
+ offset >>= 1; \
464
+ for (int i = 0; i < offset; ++i) { \
465
+ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
466
+ } \
467
+ offset >>= 1; \
468
+ for (int i = 0; i < offset; ++i) { \
469
+ (x)[i] = vaddq_f32((x)[i], (x)[offset+i]); \
470
+ } \
471
+ (res) = (ggml_float) GGML_F32x4_REDUCE_ONE((x)[0]); \
472
472
}
473
473
474
474
#define GGML_F32_VEC GGML_F32x4
@@ -2395,7 +2395,7 @@ static void ggml_init_arm_arch_features(void) {
2395
2395
uint32_t hwcap2 = getauxval (AT_HWCAP2 );
2396
2396
2397
2397
ggml_arm_arch_features .has_neon = !!(hwcap & HWCAP_ASIMD );
2398
- ggml_arm_arch_features .has_dotprod = !!(hwcap && HWCAP_ASIMDDP );
2398
+ ggml_arm_arch_features .has_dotprod = !!(hwcap & HWCAP_ASIMDDP );
2399
2399
ggml_arm_arch_features .has_i8mm = !!(hwcap2 & HWCAP2_I8MM );
2400
2400
ggml_arm_arch_features .has_sve = !!(hwcap & HWCAP_SVE );
2401
2401
0 commit comments