novag ggerganov commited on
Commit
984a856
·
unverified ·
1 Parent(s): 3bd52ce

ggml : fix q4_1 dot product types (#759)

Browse files

Co-authored-by: Georgi Gerganov <[email protected]>

Files changed (1) hide show
  1. ggml.c +6 -6
ggml.c CHANGED
@@ -2344,14 +2344,14 @@ static void ggml_vec_dot_q4_1(const int n, float * restrict s, const void * rest
2344
 
2345
  #if defined(__ARM_FEATURE_DOTPROD)
2346
  // dot product into int32x4_t
2347
- int32x4_t p_0 = vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0l);
2348
- int32x4_t p_1 = vdotq_s32(vdupq_n_s32(0), v0_1l, v1_1l);
2349
 
2350
- p_0 = vdotq_s32(p_0, v0_0h, v1_0h);
2351
- p_1 = vdotq_s32(p_1, v0_1h, v1_1h);
2352
 
2353
- sum11 += x0->d*y0->d*vaddvq_s32(p_0);
2354
- sum11 += x1->d*y1->d*vaddvq_s32(p_1);
2355
  #else
2356
  const uint16x8_t pl0l = vmull_u8(vget_low_u8 (v0_0l), vget_low_u8 (v1_0l));
2357
  const uint16x8_t pl0h = vmull_u8(vget_high_u8(v0_0l), vget_high_u8(v1_0l));
 
2344
 
2345
  #if defined(__ARM_FEATURE_DOTPROD)
2346
  // dot product into int32x4_t
2347
+ uint32x4_t p_0 = vdotq_u32(vdupq_n_u32(0), v0_0l, v1_0l);
2348
+ uint32x4_t p_1 = vdotq_u32(vdupq_n_u32(0), v0_1l, v1_1l);
2349
 
2350
+ p_0 = vdotq_u32(p_0, v0_0h, v1_0h);
2351
+ p_1 = vdotq_u32(p_1, v0_1h, v1_1h);
2352
 
2353
+ sum11 += x0->d*y0->d*vaddvq_u32(p_0);
2354
+ sum11 += x1->d*y1->d*vaddvq_u32(p_1);
2355
  #else
2356
  const uint16x8_t pl0l = vmull_u8(vget_low_u8 (v0_0l), vget_low_u8 (v1_0l));
2357
  const uint16x8_t pl0h = vmull_u8(vget_high_u8(v0_0l), vget_high_u8(v1_0l));