Spaces:
Running
Running
ggml : fix q4_1 dot product types (#759)
Browse filesCo-authored-by: Georgi Gerganov <[email protected]>
ggml.c
CHANGED
|
@@ -2344,14 +2344,14 @@ static void ggml_vec_dot_q4_1(const int n, float * restrict s, const void * rest
|
|
| 2344 |
|
| 2345 |
#if defined(__ARM_FEATURE_DOTPROD)
|
| 2346 |
// dot product into int32x4_t
|
| 2347 |
-
|
| 2348 |
-
|
| 2349 |
|
| 2350 |
-
p_0 =
|
| 2351 |
-
p_1 =
|
| 2352 |
|
| 2353 |
-
sum11 += x0->d*y0->d*
|
| 2354 |
-
sum11 += x1->d*y1->d*
|
| 2355 |
#else
|
| 2356 |
const uint16x8_t pl0l = vmull_u8(vget_low_u8 (v0_0l), vget_low_u8 (v1_0l));
|
| 2357 |
const uint16x8_t pl0h = vmull_u8(vget_high_u8(v0_0l), vget_high_u8(v1_0l));
|
|
|
|
| 2344 |
|
| 2345 |
#if defined(__ARM_FEATURE_DOTPROD)
|
| 2346 |
// dot product into int32x4_t
|
| 2347 |
+
uint32x4_t p_0 = vdotq_u32(vdupq_n_u32(0), v0_0l, v1_0l);
|
| 2348 |
+
uint32x4_t p_1 = vdotq_u32(vdupq_n_u32(0), v0_1l, v1_1l);
|
| 2349 |
|
| 2350 |
+
p_0 = vdotq_u32(p_0, v0_0h, v1_0h);
|
| 2351 |
+
p_1 = vdotq_u32(p_1, v0_1h, v1_1h);
|
| 2352 |
|
| 2353 |
+
sum11 += x0->d*y0->d*vaddvq_u32(p_0);
|
| 2354 |
+
sum11 += x1->d*y1->d*vaddvq_u32(p_1);
|
| 2355 |
#else
|
| 2356 |
const uint16x8_t pl0l = vmull_u8(vget_low_u8 (v0_0l), vget_low_u8 (v1_0l));
|
| 2357 |
const uint16x8_t pl0h = vmull_u8(vget_high_u8(v0_0l), vget_high_u8(v1_0l));
|