Spaces:
Running
Running
ggml : fix 32-bit ARM NEON (#836)
Browse files* ggml : add support for 32-bit ARM
* ggml : fix
* ggml : fix
ggml.c
CHANGED
|
@@ -668,6 +668,33 @@ uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) {
|
|
| 668 |
return vget_high_u8(vcombine_u8(a, b));
|
| 669 |
}
|
| 670 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
#endif
|
| 672 |
#endif
|
| 673 |
|
|
|
|
| 668 |
return vget_high_u8(vcombine_u8(a, b));
|
| 669 |
}
|
| 670 |
|
| 671 |
+
int8x16_t vzip1q_s8(int8x16_t a, int8x16_t b) {
|
| 672 |
+
return vcombine_s8(vget_low_s8(a), vget_low_s8(b));
|
| 673 |
+
}
|
| 674 |
+
|
| 675 |
+
int8x16_t vzip2q_s8(int8x16_t a, int8x16_t b) {
|
| 676 |
+
return vcombine_s8(vget_high_s8(a), vget_high_s8(b));
|
| 677 |
+
}
|
| 678 |
+
|
| 679 |
+
uint8x16_t vzip1q_u8(uint8x16_t a, uint8x16_t b) {
|
| 680 |
+
return vcombine_u8(vget_low_u8(a), vget_low_u8(b));
|
| 681 |
+
}
|
| 682 |
+
|
| 683 |
+
uint8x16_t vzip2q_u8(uint8x16_t a, uint8x16_t b) {
|
| 684 |
+
return vcombine_u8(vget_high_u8(a), vget_high_u8(b));
|
| 685 |
+
}
|
| 686 |
+
|
| 687 |
+
int32x4_t vcvtnq_s32_f32(float32x4_t v) {
|
| 688 |
+
int32x4_t res;
|
| 689 |
+
|
| 690 |
+
res[0] = roundf(vgetq_lane_f32(v, 0));
|
| 691 |
+
res[1] = roundf(vgetq_lane_f32(v, 1));
|
| 692 |
+
res[2] = roundf(vgetq_lane_f32(v, 2));
|
| 693 |
+
res[3] = roundf(vgetq_lane_f32(v, 3));
|
| 694 |
+
|
| 695 |
+
return res;
|
| 696 |
+
}
|
| 697 |
+
|
| 698 |
#endif
|
| 699 |
#endif
|
| 700 |
|