Spaces:
Running
Running
Diego Devesa
commited on
Commit
·
0a93e1b
1
Parent(s):
7f269bb
ggml : fix BLAS with unsupported types (llama/9775)
Browse files* ggml : do not use BLAS with types without to_float
* ggml : return pointer from ggml_internal_get_type_traits to avoid unnecessary copies
* ggml : rename ggml_internal_get_type_traits -> ggml_get_type_traits
it's not really internal if everybody uses it
- ggml/include/ggml.h +3 -3
- ggml/src/ggml-backend.cpp +1 -1
- ggml/src/ggml-blas.cpp +14 -12
- ggml/src/ggml-vulkan.cpp +2 -2
- ggml/src/ggml.c +3 -3
ggml/include/ggml.h
CHANGED
|
@@ -2536,7 +2536,7 @@ extern "C" {
|
|
| 2536 |
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
| 2537 |
const void * GGML_RESTRICT y, int nr, int nc);
|
| 2538 |
|
| 2539 |
-
|
| 2540 |
const char * type_name;
|
| 2541 |
int64_t blck_size;
|
| 2542 |
int64_t blck_size_interleave; // interleave elements in blocks
|
|
@@ -2552,9 +2552,9 @@ extern "C" {
|
|
| 2552 |
int64_t ncols; // number of columns to process simultaneously
|
| 2553 |
ggml_gemv_t gemv;
|
| 2554 |
ggml_gemm_t gemm;
|
| 2555 |
-
}
|
| 2556 |
|
| 2557 |
-
GGML_API
|
| 2558 |
|
| 2559 |
#ifdef __cplusplus
|
| 2560 |
}
|
|
|
|
| 2536 |
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
|
| 2537 |
const void * GGML_RESTRICT y, int nr, int nc);
|
| 2538 |
|
| 2539 |
+
struct ggml_type_traits {
|
| 2540 |
const char * type_name;
|
| 2541 |
int64_t blck_size;
|
| 2542 |
int64_t blck_size_interleave; // interleave elements in blocks
|
|
|
|
| 2552 |
int64_t ncols; // number of columns to process simultaneously
|
| 2553 |
ggml_gemv_t gemv;
|
| 2554 |
ggml_gemm_t gemm;
|
| 2555 |
+
};
|
| 2556 |
|
| 2557 |
+
GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
|
| 2558 |
|
| 2559 |
#ifdef __cplusplus
|
| 2560 |
}
|
ggml/src/ggml-backend.cpp
CHANGED
|
@@ -1177,7 +1177,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
|
|
| 1177 |
op->type != GGML_TYPE_IQ1_S &&
|
| 1178 |
op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
|
| 1179 |
case GGML_OP_MUL_MAT:
|
| 1180 |
-
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type ==
|
| 1181 |
case GGML_OP_ROPE_BACK:
|
| 1182 |
return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
|
| 1183 |
case GGML_OP_IM2COL_BACK:
|
|
|
|
| 1177 |
op->type != GGML_TYPE_IQ1_S &&
|
| 1178 |
op->type != GGML_TYPE_IQ1_M; // missing type_traits.from_float
|
| 1179 |
case GGML_OP_MUL_MAT:
|
| 1180 |
+
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_get_type_traits(op->src[0]->type)->vec_dot_type;
|
| 1181 |
case GGML_OP_ROPE_BACK:
|
| 1182 |
return op->src[2] == NULL && (op->op_params[2] & 4) == 0;
|
| 1183 |
case GGML_OP_IM2COL_BACK:
|
ggml/src/ggml-blas.cpp
CHANGED
|
@@ -65,8 +65,8 @@ static void ggml_backend_blas_mul_mat(ggml_backend_blas_context * ctx, struct gg
|
|
| 65 |
|
| 66 |
// convert src0 to float
|
| 67 |
if (type != GGML_TYPE_F32) {
|
| 68 |
-
|
| 69 |
-
ggml_to_float_t const to_float = type_traits
|
| 70 |
|
| 71 |
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
| 72 |
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
|
@@ -420,19 +420,21 @@ static bool ggml_backend_blas_device_supports_op(ggml_backend_dev_t dev, const s
|
|
| 420 |
// TODO: find the optimal value
|
| 421 |
const int64_t min_batch = 32;
|
| 422 |
|
| 423 |
-
return
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
|
|
|
| 427 |
}
|
| 428 |
|
| 429 |
case GGML_OP_OUT_PROD:
|
| 430 |
-
return
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
|
|
|
| 436 |
|
| 437 |
default:
|
| 438 |
return false;
|
|
|
|
| 65 |
|
| 66 |
// convert src0 to float
|
| 67 |
if (type != GGML_TYPE_F32) {
|
| 68 |
+
const auto * type_traits = ggml_get_type_traits(type);
|
| 69 |
+
ggml_to_float_t const to_float = type_traits->to_float;
|
| 70 |
|
| 71 |
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
| 72 |
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
|
|
|
| 420 |
// TODO: find the optimal value
|
| 421 |
const int64_t min_batch = 32;
|
| 422 |
|
| 423 |
+
return ggml_is_contiguous(src0) &&
|
| 424 |
+
ggml_is_contiguous(src1) &&
|
| 425 |
+
src1->type == GGML_TYPE_F32 &&
|
| 426 |
+
(ne0 >= min_batch && ne1 >= min_batch && ne10 >= min_batch) &&
|
| 427 |
+
(src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
|
| 428 |
}
|
| 429 |
|
| 430 |
case GGML_OP_OUT_PROD:
|
| 431 |
+
return op->src[0]->type == GGML_TYPE_F32 &&
|
| 432 |
+
op->src[1]->type == GGML_TYPE_F32 &&
|
| 433 |
+
ggml_is_matrix(src0) &&
|
| 434 |
+
ggml_is_matrix(src1) &&
|
| 435 |
+
ggml_is_contiguous(src0) &&
|
| 436 |
+
(ggml_is_contiguous(src1) || ggml_is_transposed(src1)) &&
|
| 437 |
+
(src0->type == GGML_TYPE_F32 || ggml_get_type_traits(src0->type)->to_float != NULL);
|
| 438 |
|
| 439 |
default:
|
| 440 |
return false;
|
ggml/src/ggml-vulkan.cpp
CHANGED
|
@@ -5287,9 +5287,9 @@ static void ggml_vk_dequantize_data(const void * from, float * to, size_t ne, gg
|
|
| 5287 |
return;
|
| 5288 |
}
|
| 5289 |
|
| 5290 |
-
|
| 5291 |
|
| 5292 |
-
ggml_to_float_t dequant_fn = tt
|
| 5293 |
|
| 5294 |
dequant_fn(from, to, ne);
|
| 5295 |
}
|
|
|
|
| 5287 |
return;
|
| 5288 |
}
|
| 5289 |
|
| 5290 |
+
const auto * tt = ggml_get_type_traits(quant);
|
| 5291 |
|
| 5292 |
+
ggml_to_float_t dequant_fn = tt->to_float;
|
| 5293 |
|
| 5294 |
dequant_fn(from, to, ne);
|
| 5295 |
}
|
ggml/src/ggml.c
CHANGED
|
@@ -730,7 +730,7 @@ static void ggml_vec_dot_f32(int n, float * restrict s, size_t bs, const float *
|
|
| 730 |
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
|
| 731 |
static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
|
| 732 |
|
| 733 |
-
static const
|
| 734 |
[GGML_TYPE_I8] = {
|
| 735 |
.type_name = "i8",
|
| 736 |
.blck_size = 1,
|
|
@@ -1152,9 +1152,9 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
| 1152 |
};
|
| 1153 |
|
| 1154 |
// For internal test use
|
| 1155 |
-
|
| 1156 |
GGML_ASSERT(type < GGML_TYPE_COUNT);
|
| 1157 |
-
return type_traits[type];
|
| 1158 |
}
|
| 1159 |
|
| 1160 |
//
|
|
|
|
| 730 |
static void ggml_vec_dot_f16(int n, float * restrict s, size_t bs, ggml_fp16_t * restrict x, size_t bx, ggml_fp16_t * restrict y, size_t by, int nrc);
|
| 731 |
static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t * restrict x, size_t bx, ggml_bf16_t * restrict y, size_t by, int nrc);
|
| 732 |
|
| 733 |
+
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
|
| 734 |
[GGML_TYPE_I8] = {
|
| 735 |
.type_name = "i8",
|
| 736 |
.blck_size = 1,
|
|
|
|
| 1152 |
};
|
| 1153 |
|
| 1154 |
// For internal test use
|
| 1155 |
+
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
|
| 1156 |
GGML_ASSERT(type < GGML_TYPE_COUNT);
|
| 1157 |
+
return &type_traits[type];
|
| 1158 |
}
|
| 1159 |
|
| 1160 |
//
|