Diego Devesa commited on
Commit
0a93e1b
·
1 Parent(s): 7f269bb

ggml : fix BLAS with unsupported types (llama/9775)

Browse files

* ggml : do not use BLAS with types without to_float

* ggml : return pointer from ggml_internal_get_type_traits to avoid unnecessary copies

* ggml : rename ggml_internal_get_type_traits -> ggml_get_type_traits

it's not really internal if everybody uses it

ggml/include/ggml.h CHANGED
@@ -2536,7 +2536,7 @@ extern "C" {
2536
  typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
2537
  const void * GGML_RESTRICT y, int nr, int nc);
2538
 
2539
- typedef struct {
2540
  const char * type_name;
2541
  int64_t blck_size;
2542
  int64_t blck_size_interleave; // interleave elements in blocks
@@ -2552,9 +2552,9 @@ extern "C" {
2552
  int64_t ncols; // number of columns to process simultaneously
2553
  ggml_gemv_t gemv;
2554
  ggml_gemm_t gemm;
2555
- } ggml_type_traits_t;
2556
 
2557
- GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
2558
 
2559
  #ifdef __cplusplus
2560
  }
 
2536
  typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
2537
  const void * GGML_RESTRICT y, int nr, int nc);
2538
 
2539
+ struct ggml_type_traits {
2540
  const char * type_name;
2541
  int64_t blck_size;
2542
  int64_t blck_size_interleave; // interleave elements in blocks
 
2552
  int64_t ncols; // number of columns to process simultaneously
2553
  ggml_gemv_t gemv;
2554
  ggml_gemm_t gemm;
2555
+ };
2556
 
2557
+ GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
2558
 
2559
  #ifdef __cplusplus
2560
  }
ggml/src/ggml-backend.cpp CHANGED
@@ -1177,7 +1177,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
1177
  op->type != GGML_TYPE_IQ1_S &&
1178
  op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
1179
  case GGML_OP_MUL_MAT:
1180
- return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type).vec_dot_type;
1181
  case GGML_OP_ROPE_BACK:
1182
  return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
1183
  case GGML_OP_IM2COL_BACK:
 
1177
  op->type != GGML_TYPE_IQ1_S &&
1178
  op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
1179
  case GGML_OP_MUL_MAT:
1180
+ return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_get_type_traits(op->src[0]->type)->vec_dot_type;
1181
  case GGML_OP_ROPE_BACK:
1182
  return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
1183
  case GGML_OP_IM2COL_BACK:
ggml/src/ggml-blas.cpp CHANGED
@@ -65,8 +65,8 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
65
 
66
  // convert src0 to float
67
  if (type != GGML_TYPE_F32) {
68
- ggml_type_traits_t type_traits = ggml_internal_get_type_traits(type);
69
- ggml_to_float_t const to_float = type_traits.to_float;
70
 
71
  for (int64_t i03 = 0; i03 < ne03; i03++) {
72
  for (int64_t i02 = 0; i02 < ne02; i02++) {
@@ -420,19 +420,21 @@ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const s
420
  // TODO: find the optimal value
421
  const int64_t min_batch = 32;
422
 
423
- return (ggml_is_contiguous(src0) &&
424
- ggml_is_contiguous(src1) &&
425
- src1->type == GGML_TYPE_F32 &&
426
- (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch));
 
427
  }
428
 
429
  case GGML_OP_OUT_PROD:
430
- return (op->src[0]->type == GGML_TYPE_F32 &&
431
- op->src[1]->type == GGML_TYPE_F32 &&
432
- ggml_is_matrix(src0) &&
433
- ggml_is_matrix(src1) &&
434
- ggml_is_contiguous(src0) &&
435
- (ggml_is_contiguous(src1) || ggml_is_transposed(src1)));
 
436
 
437
  default:
438
  return false;
 
65
 
66
  // convert src0 to float
67
  if (type != GGML_TYPE_F32) {
68
+ const auto * type_traits = ggml_get_type_traits(type);
69
+ ggml_to_float_t const to_float = type_traits->to_float;
70
 
71
  for (int64_t i03 = 0; i03 < ne03; i03++) {
72
  for (int64_t i02 = 0; i02 < ne02; i02++) {
 
420
  // TODO: find the optimal value
421
  const int64_t min_batch = 32;
422
 
423
+ return ggml_is_contiguous(src0) &&
424
+ ggml_is_contiguous(src1) &&
425
+ src1->type == GGML_TYPE_F32 &&
426
+ (ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
427
+ (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
428
  }
429
 
430
  case GGML_OP_OUT_PROD:
431
+ return op->src[0]->type == GGML_TYPE_F32 &&
432
+ op->src[1]->type == GGML_TYPE_F32 &&
433
+ ggml_is_matrix(src0) &&
434
+ ggml_is_matrix(src1) &&
435
+ ggml_is_contiguous(src0) &&
436
+ (ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
437
+ (src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
438
 
439
  default:
440
  return false;
ggml/src/ggml-vulkan.cpp CHANGED
@@ -5287,9 +5287,9 @@ static void ggml_vk_dequantize_data(const void * from, float * to, size_t ne, gg
5287
  return;
5288
  }
5289
 
5290
- ggml_type_traits_t tt = ggml_internal_get_type_traits(quant);
5291
 
5292
- ggml_to_float_t dequant_fn = tt.to_float;
5293
 
5294
  dequant_fn(from, to, ne);
5295
  }
 
5287
  return;
5288
  }
5289
 
5290
+ const auto * tt = ggml_get_type_traits(quant);
5291
 
5292
+ ggml_to_float_t dequant_fn = tt->to_float;
5293
 
5294
  dequant_fn(from, to, ne);
5295
  }
ggml/src/ggml.c CHANGED
@@ -730,7 +730,7 @@ static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float *
730
  static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
731
  static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
732
 
733
- static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
734
  [GGML_TYPE_I8] = {
735
  .type_name = "i8",
736
  .blck_size = 1,
@@ -1152,9 +1152,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
1152
  };
1153
 
1154
  // For internal test use
1155
- ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
1156
  GGML_ASSERT(type < GGML_TYPE_COUNT);
1157
- return type_traits[type];
1158
  }
1159
 
1160
  //
 
730
  static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
731
  static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
732
 
733
+ static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
734
  [GGML_TYPE_I8] = {
735
  .type_name = "i8",
736
  .blck_size = 1,
 
1152
  };
1153
 
1154
  // For internal test use
1155
+ const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
1156
  GGML_ASSERT(type < GGML_TYPE_COUNT);
1157
+ return &type_traits[type];
1158
  }
1159
 
1160
  //