ggerganov commited on
Commit
5fa72ca
·
unverified ·
1 Parent(s): 741db99

ggml : fix 32-bit ARM NEON (#836)

Browse files

* ggml : add support for 32-bit ARM

* ggml : fix

* ggml : fix

Files changed (1) hide show
  1. ggml.c +27 -0
ggml.c CHANGED
@@ -668,6 +668,33 @@ uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) {
668
  return vget_high_u8(vcombine_u8(a, b));
669
  }
670
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671
  #endif
672
  #endif
673
 
 
668
  return vget_high_u8(vcombine_u8(a, b));
669
  }
670
 
671
+ int8x16_t vzip1q_s8(int8x16_t a, int8x16_t b) {
672
+ return vcombine_s8(vget_low_s8(a), vget_low_s8(b));
673
+ }
674
+
675
+ int8x16_t vzip2q_s8(int8x16_t a, int8x16_t b) {
676
+ return vcombine_s8(vget_high_s8(a), vget_high_s8(b));
677
+ }
678
+
679
+ uint8x16_t vzip1q_u8(uint8x16_t a, uint8x16_t b) {
680
+ return vcombine_u8(vget_low_u8(a), vget_low_u8(b));
681
+ }
682
+
683
+ uint8x16_t vzip2q_u8(uint8x16_t a, uint8x16_t b) {
684
+ return vcombine_u8(vget_high_u8(a), vget_high_u8(b));
685
+ }
686
+
687
+ int32x4_t vcvtnq_s32_f32(float32x4_t v) {
688
+ int32x4_t res;
689
+
690
+ res[0] = roundf(vgetq_lane_f32(v, 0));
691
+ res[1] = roundf(vgetq_lane_f32(v, 1));
692
+ res[2] = roundf(vgetq_lane_f32(v, 2));
693
+ res[3] = roundf(vgetq_lane_f32(v, 3));
694
+
695
+ return res;
696
+ }
697
+
698
  #endif
699
  #endif
700