Spaces:
Running
Running
ggml : add `ggml_upscale_ext` (ggml/814)
Browse files* initial commit with CPU implementation of upscale to shape and test, cuda implementation next
* experimental commit to see if dst shape is correct
* test version
* test
* removed unnecessary params
* refactor
* fixed tests
* ggml : metal impl + cleanup + sycl dev warnings
* patched ggml_upscale cuda op to handle non-contiguous tensors, added test for non-contiguous behavior
* metal : fix upsacle op to support nb00 + style
---------
Co-authored-by: Georgi Gerganov <[email protected]>
- ggml-cuda/upscale.cu +33 -30
- ggml-metal.m +8 -2
- ggml-metal.metal +13 -8
- ggml-sycl.cpp +4 -0
- ggml.c +47 -17
- ggml.h +12 -0
ggml-cuda/upscale.cu
CHANGED
|
@@ -1,35 +1,36 @@
|
|
| 1 |
#include "upscale.cuh"
|
| 2 |
|
| 3 |
-
static __global__ void upscale_f32(const float * x, float * dst,
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
int nidx = threadIdx.x + blockIdx.x * blockDim.x;
|
| 10 |
-
if (nidx >= ne0) {
|
| 11 |
return;
|
| 12 |
}
|
| 13 |
-
|
| 14 |
-
int
|
| 15 |
-
int
|
| 16 |
-
int
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
int
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
dst[
|
| 25 |
}
|
| 26 |
|
| 27 |
-
static void upscale_f32_cuda(const float * x, float * dst,
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
| 33 |
}
|
| 34 |
|
| 35 |
void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
@@ -39,10 +40,12 @@ void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
| 39 |
cudaStream_t stream = ctx.stream();
|
| 40 |
|
| 41 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 42 |
-
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 43 |
-
GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors
|
| 44 |
|
| 45 |
-
const
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
upscale_f32_cuda(src0_d, dst_d, src0->
|
| 48 |
}
|
|
|
|
| 1 |
#include "upscale.cuh"
|
| 2 |
|
| 3 |
+
static __global__ void upscale_f32(const float * x, float * dst,
|
| 4 |
+
const int nb00, const int nb01, const int nb02, const int nb03,
|
| 5 |
+
const int ne10, const int ne11, const int ne12, const int ne13,
|
| 6 |
+
const float sf0, const float sf1, const float sf2, const float sf3) {
|
| 7 |
+
int index = threadIdx.x + blockIdx.x * blockDim.x;
|
| 8 |
+
if (index >= ne10 * ne11 * ne12 * ne13) {
|
|
|
|
|
|
|
| 9 |
return;
|
| 10 |
}
|
| 11 |
+
|
| 12 |
+
int i10 = index % ne10;
|
| 13 |
+
int i11 = (index / ne10) % ne11;
|
| 14 |
+
int i12 = (index / (ne10 * ne11)) % ne12;
|
| 15 |
+
int i13 = (index / (ne10 * ne11 * ne12)) % ne13;
|
| 16 |
+
|
| 17 |
+
int i00 = i10 / sf0;
|
| 18 |
+
int i01 = i11 / sf1;
|
| 19 |
+
int i02 = i12 / sf2;
|
| 20 |
+
int i03 = i13 / sf3;
|
| 21 |
+
|
| 22 |
+
dst[index] = *(float *)((char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00);
|
| 23 |
}
|
| 24 |
|
| 25 |
+
static void upscale_f32_cuda(const float * x, float * dst,
|
| 26 |
+
const int nb00, const int nb01, const int nb02, const int nb03,
|
| 27 |
+
const int ne10, const int ne11, const int ne12, const int ne13,
|
| 28 |
+
const float sf0, const float sf1, const float sf2, const float sf3,
|
| 29 |
+
cudaStream_t stream) {
|
| 30 |
+
int dst_size = ne10 * ne11 * ne12 * ne13;
|
| 31 |
+
int num_blocks = (dst_size + CUDA_UPSCALE_BLOCK_SIZE - 1) / CUDA_UPSCALE_BLOCK_SIZE;
|
| 32 |
+
|
| 33 |
+
upscale_f32<<<num_blocks, CUDA_UPSCALE_BLOCK_SIZE,0,stream>>>(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3);
|
| 34 |
}
|
| 35 |
|
| 36 |
void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
|
|
|
| 40 |
cudaStream_t stream = ctx.stream();
|
| 41 |
|
| 42 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 43 |
+
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
|
|
|
| 44 |
|
| 45 |
+
const float sf0 = (float)dst->ne[0]/src0->ne[0];
|
| 46 |
+
const float sf1 = (float)dst->ne[1]/src0->ne[1];
|
| 47 |
+
const float sf2 = (float)dst->ne[2]/src0->ne[2];
|
| 48 |
+
const float sf3 = (float)dst->ne[3]/src0->ne[3];
|
| 49 |
|
| 50 |
+
upscale_f32_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, stream);
|
| 51 |
}
|
ggml-metal.m
CHANGED
|
@@ -2353,7 +2353,10 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2353 |
{
|
| 2354 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 2355 |
|
| 2356 |
-
const
|
|
|
|
|
|
|
|
|
|
| 2357 |
|
| 2358 |
const id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_UPSCALE_F32].pipeline;
|
| 2359 |
|
|
@@ -2376,7 +2379,10 @@ static enum ggml_status ggml_metal_graph_compute(
|
|
| 2376 |
[encoder setBytes:&nb1 length:sizeof(nb1) atIndex:15];
|
| 2377 |
[encoder setBytes:&nb2 length:sizeof(nb2) atIndex:16];
|
| 2378 |
[encoder setBytes:&nb3 length:sizeof(nb3) atIndex:17];
|
| 2379 |
-
[encoder setBytes:&
|
|
|
|
|
|
|
|
|
|
| 2380 |
|
| 2381 |
const int nth = MIN((int) pipeline.maxTotalThreadsPerThreadgroup, ne0);
|
| 2382 |
|
|
|
|
| 2353 |
{
|
| 2354 |
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 2355 |
|
| 2356 |
+
const float sf0 = (float)ne0/src0->ne[0];
|
| 2357 |
+
const float sf1 = (float)ne1/src0->ne[1];
|
| 2358 |
+
const float sf2 = (float)ne2/src0->ne[2];
|
| 2359 |
+
const float sf3 = (float)ne3/src0->ne[3];
|
| 2360 |
|
| 2361 |
const id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_UPSCALE_F32].pipeline;
|
| 2362 |
|
|
|
|
| 2379 |
[encoder setBytes:&nb1 length:sizeof(nb1) atIndex:15];
|
| 2380 |
[encoder setBytes:&nb2 length:sizeof(nb2) atIndex:16];
|
| 2381 |
[encoder setBytes:&nb3 length:sizeof(nb3) atIndex:17];
|
| 2382 |
+
[encoder setBytes:&sf0 length:sizeof(sf0) atIndex:18];
|
| 2383 |
+
[encoder setBytes:&sf1 length:sizeof(sf1) atIndex:19];
|
| 2384 |
+
[encoder setBytes:&sf2 length:sizeof(sf2) atIndex:20];
|
| 2385 |
+
[encoder setBytes:&sf3 length:sizeof(sf3) atIndex:21];
|
| 2386 |
|
| 2387 |
const int nth = MIN((int) pipeline.maxTotalThreadsPerThreadgroup, ne0);
|
| 2388 |
|
ggml-metal.metal
CHANGED
|
@@ -1852,7 +1852,10 @@ kernel void kernel_upscale_f32(
|
|
| 1852 |
constant uint64_t & nb1,
|
| 1853 |
constant uint64_t & nb2,
|
| 1854 |
constant uint64_t & nb3,
|
| 1855 |
-
constant
|
|
|
|
|
|
|
|
|
|
| 1856 |
uint3 tgpig[[threadgroup_position_in_grid]],
|
| 1857 |
uint3 tpitg[[thread_position_in_threadgroup]],
|
| 1858 |
uint3 ntg[[threads_per_threadgroup]]) {
|
|
@@ -1861,15 +1864,17 @@ kernel void kernel_upscale_f32(
|
|
| 1861 |
const int64_t i2 = tgpig.y;
|
| 1862 |
const int64_t i1 = tgpig.x;
|
| 1863 |
|
| 1864 |
-
const int64_t i03 = i3;
|
| 1865 |
-
const int64_t i02 = i2;
|
| 1866 |
-
const int64_t i01 = i1/
|
| 1867 |
-
|
| 1868 |
-
device const float * src0_ptr = (device const float *) (src0 + i03*nb03 + i02*nb02 + i01*nb01);
|
| 1869 |
-
device float * dst_ptr = (device float *) (dst + i3*nb3 + i2*nb2 + i1*nb1);
|
| 1870 |
|
| 1871 |
for (int i0 = tpitg.x; i0 < ne0; i0 += ntg.x) {
|
| 1872 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1873 |
}
|
| 1874 |
}
|
| 1875 |
|
|
|
|
| 1852 |
constant uint64_t & nb1,
|
| 1853 |
constant uint64_t & nb2,
|
| 1854 |
constant uint64_t & nb3,
|
| 1855 |
+
constant float & sf0,
|
| 1856 |
+
constant float & sf1,
|
| 1857 |
+
constant float & sf2,
|
| 1858 |
+
constant float & sf3,
|
| 1859 |
uint3 tgpig[[threadgroup_position_in_grid]],
|
| 1860 |
uint3 tpitg[[thread_position_in_threadgroup]],
|
| 1861 |
uint3 ntg[[threads_per_threadgroup]]) {
|
|
|
|
| 1864 |
const int64_t i2 = tgpig.y;
|
| 1865 |
const int64_t i1 = tgpig.x;
|
| 1866 |
|
| 1867 |
+
const int64_t i03 = i3/sf3;
|
| 1868 |
+
const int64_t i02 = i2/sf2;
|
| 1869 |
+
const int64_t i01 = i1/sf1;
|
|
|
|
|
|
|
|
|
|
| 1870 |
|
| 1871 |
for (int i0 = tpitg.x; i0 < ne0; i0 += ntg.x) {
|
| 1872 |
+
const int64_t i00 = i0/sf0;
|
| 1873 |
+
|
| 1874 |
+
device const float * src0_ptr = (device const float *) (src0 + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00);
|
| 1875 |
+
device float * dst_ptr = (device float *) (dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
| 1876 |
+
|
| 1877 |
+
dst_ptr[0] = src0_ptr[0];
|
| 1878 |
}
|
| 1879 |
}
|
| 1880 |
|
ggml-sycl.cpp
CHANGED
|
@@ -13987,6 +13987,10 @@ inline void ggml_sycl_op_upscale(const ggml_tensor *src0,
|
|
| 13987 |
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 13988 |
GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors
|
| 13989 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13990 |
const int scale_factor = dst->op_params[0];
|
| 13991 |
|
| 13992 |
upscale_f32_sycl(src0_dd, dst_dd, src0->ne[0], src0->ne[1], src0->ne[2], scale_factor, main_stream);
|
|
|
|
| 13987 |
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
| 13988 |
GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors
|
| 13989 |
|
| 13990 |
+
#pragma message("TODO: generalize upscale operator")
|
| 13991 |
+
#pragma message(" https://github.com/ggerganov/ggml/pull/814")
|
| 13992 |
+
GGML_ASSERT(false && "TODO: generalize upscale operator);
|
| 13993 |
+
|
| 13994 |
const int scale_factor = dst->op_params[0];
|
| 13995 |
|
| 13996 |
upscale_f32_sycl(src0_dd, dst_dd, src0->ne[0], src0->ne[1], src0->ne[2], scale_factor, main_stream);
|
ggml.c
CHANGED
|
@@ -6293,7 +6293,10 @@ struct ggml_tensor * ggml_pool_2d(
|
|
| 6293 |
static struct ggml_tensor * ggml_upscale_impl(
|
| 6294 |
struct ggml_context * ctx,
|
| 6295 |
struct ggml_tensor * a,
|
| 6296 |
-
int
|
|
|
|
|
|
|
|
|
|
| 6297 |
bool is_node = false;
|
| 6298 |
|
| 6299 |
if (a->grad) {
|
|
@@ -6301,19 +6304,45 @@ static struct ggml_tensor * ggml_upscale_impl(
|
|
| 6301 |
is_node = true;
|
| 6302 |
}
|
| 6303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6304 |
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
|
| 6305 |
-
|
| 6306 |
-
|
| 6307 |
-
|
|
|
|
|
|
|
| 6308 |
|
| 6309 |
result->op = GGML_OP_UPSCALE;
|
| 6310 |
-
|
| 6311 |
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
| 6312 |
result->src[0] = a;
|
| 6313 |
|
| 6314 |
return result;
|
| 6315 |
}
|
| 6316 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6317 |
struct ggml_tensor * ggml_pad(
|
| 6318 |
struct ggml_context * ctx,
|
| 6319 |
struct ggml_tensor * a,
|
|
@@ -6338,12 +6367,7 @@ struct ggml_tensor * ggml_pad(
|
|
| 6338 |
return result;
|
| 6339 |
}
|
| 6340 |
|
| 6341 |
-
|
| 6342 |
-
struct ggml_context * ctx,
|
| 6343 |
-
struct ggml_tensor * a,
|
| 6344 |
-
int scale_factor) {
|
| 6345 |
-
return ggml_upscale_impl(ctx, a, scale_factor);
|
| 6346 |
-
}
|
| 6347 |
|
| 6348 |
struct ggml_tensor * ggml_arange(
|
| 6349 |
struct ggml_context * ctx,
|
|
@@ -6365,6 +6389,8 @@ struct ggml_tensor * ggml_arange(
|
|
| 6365 |
return result;
|
| 6366 |
}
|
| 6367 |
|
|
|
|
|
|
|
| 6368 |
struct ggml_tensor * ggml_timestep_embedding(
|
| 6369 |
struct ggml_context * ctx,
|
| 6370 |
struct ggml_tensor * timesteps,
|
|
@@ -14820,25 +14846,28 @@ static void ggml_compute_forward_upscale_f32(
|
|
| 14820 |
return;
|
| 14821 |
}
|
| 14822 |
|
| 14823 |
-
GGML_ASSERT(src0->
|
| 14824 |
|
| 14825 |
const int ith = params->ith;
|
| 14826 |
const int nth = params->nth;
|
| 14827 |
|
| 14828 |
GGML_TENSOR_UNARY_OP_LOCALS
|
| 14829 |
|
| 14830 |
-
const
|
|
|
|
|
|
|
|
|
|
| 14831 |
|
| 14832 |
// TODO: optimize
|
| 14833 |
|
| 14834 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 14835 |
-
const int64_t i03 = i3;
|
| 14836 |
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
|
| 14837 |
-
const int64_t i02 = i2;
|
| 14838 |
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
| 14839 |
-
const int64_t i01 = i1 /
|
| 14840 |
for (int64_t i0 = 0; i0 < ne0; i0++) {
|
| 14841 |
-
const int64_t i00 = i0 /
|
| 14842 |
|
| 14843 |
const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
| 14844 |
float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
|
@@ -14868,6 +14897,7 @@ static void ggml_compute_forward_upscale(
|
|
| 14868 |
}
|
| 14869 |
}
|
| 14870 |
|
|
|
|
| 14871 |
// ggml_compute_forward_pad
|
| 14872 |
|
| 14873 |
static void ggml_compute_forward_pad_f32(
|
|
|
|
| 6293 |
static struct ggml_tensor * ggml_upscale_impl(
|
| 6294 |
struct ggml_context * ctx,
|
| 6295 |
struct ggml_tensor * a,
|
| 6296 |
+
int ne0,
|
| 6297 |
+
int ne1,
|
| 6298 |
+
int ne2,
|
| 6299 |
+
int ne3) {
|
| 6300 |
bool is_node = false;
|
| 6301 |
|
| 6302 |
if (a->grad) {
|
|
|
|
| 6304 |
is_node = true;
|
| 6305 |
}
|
| 6306 |
|
| 6307 |
+
GGML_ASSERT(a->ne[0] <= ne0);
|
| 6308 |
+
GGML_ASSERT(a->ne[1] <= ne1);
|
| 6309 |
+
GGML_ASSERT(a->ne[2] <= ne2);
|
| 6310 |
+
GGML_ASSERT(a->ne[3] <= ne3);
|
| 6311 |
+
|
| 6312 |
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
|
| 6313 |
+
ne0,
|
| 6314 |
+
ne1,
|
| 6315 |
+
ne2,
|
| 6316 |
+
ne3
|
| 6317 |
+
);
|
| 6318 |
|
| 6319 |
result->op = GGML_OP_UPSCALE;
|
| 6320 |
+
|
| 6321 |
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
| 6322 |
result->src[0] = a;
|
| 6323 |
|
| 6324 |
return result;
|
| 6325 |
}
|
| 6326 |
|
| 6327 |
+
struct ggml_tensor * ggml_upscale(
|
| 6328 |
+
struct ggml_context * ctx,
|
| 6329 |
+
struct ggml_tensor * a,
|
| 6330 |
+
int scale_factor) {
|
| 6331 |
+
return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]);
|
| 6332 |
+
}
|
| 6333 |
+
|
| 6334 |
+
struct ggml_tensor * ggml_upscale_ext(
|
| 6335 |
+
struct ggml_context * ctx,
|
| 6336 |
+
struct ggml_tensor * a,
|
| 6337 |
+
int ne0,
|
| 6338 |
+
int ne1,
|
| 6339 |
+
int ne2,
|
| 6340 |
+
int ne3) {
|
| 6341 |
+
return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3);
|
| 6342 |
+
}
|
| 6343 |
+
|
| 6344 |
+
// ggml_pad
|
| 6345 |
+
|
| 6346 |
struct ggml_tensor * ggml_pad(
|
| 6347 |
struct ggml_context * ctx,
|
| 6348 |
struct ggml_tensor * a,
|
|
|
|
| 6367 |
return result;
|
| 6368 |
}
|
| 6369 |
|
| 6370 |
+
// ggml_arange
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6371 |
|
| 6372 |
struct ggml_tensor * ggml_arange(
|
| 6373 |
struct ggml_context * ctx,
|
|
|
|
| 6389 |
return result;
|
| 6390 |
}
|
| 6391 |
|
| 6392 |
+
// ggml_timestep_embedding
|
| 6393 |
+
|
| 6394 |
struct ggml_tensor * ggml_timestep_embedding(
|
| 6395 |
struct ggml_context * ctx,
|
| 6396 |
struct ggml_tensor * timesteps,
|
|
|
|
| 14846 |
return;
|
| 14847 |
}
|
| 14848 |
|
| 14849 |
+
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
| 14850 |
|
| 14851 |
const int ith = params->ith;
|
| 14852 |
const int nth = params->nth;
|
| 14853 |
|
| 14854 |
GGML_TENSOR_UNARY_OP_LOCALS
|
| 14855 |
|
| 14856 |
+
const float sf0 = (float)ne0/src0->ne[0];
|
| 14857 |
+
const float sf1 = (float)ne1/src0->ne[1];
|
| 14858 |
+
const float sf2 = (float)ne2/src0->ne[2];
|
| 14859 |
+
const float sf3 = (float)ne3/src0->ne[3];
|
| 14860 |
|
| 14861 |
// TODO: optimize
|
| 14862 |
|
| 14863 |
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
| 14864 |
+
const int64_t i03 = i3 / sf3;
|
| 14865 |
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
|
| 14866 |
+
const int64_t i02 = i2 / sf2;
|
| 14867 |
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
| 14868 |
+
const int64_t i01 = i1 / sf1;
|
| 14869 |
for (int64_t i0 = 0; i0 < ne0; i0++) {
|
| 14870 |
+
const int64_t i00 = i0 / sf0;
|
| 14871 |
|
| 14872 |
const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
| 14873 |
float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
|
|
|
| 14897 |
}
|
| 14898 |
}
|
| 14899 |
|
| 14900 |
+
|
| 14901 |
// ggml_compute_forward_pad
|
| 14902 |
|
| 14903 |
static void ggml_compute_forward_pad_f32(
|
ggml.h
CHANGED
|
@@ -1674,12 +1674,24 @@ extern "C" {
|
|
| 1674 |
float p1);
|
| 1675 |
|
| 1676 |
// nearest interpolate
|
|
|
|
| 1677 |
// used in stable-diffusion
|
| 1678 |
GGML_API struct ggml_tensor * ggml_upscale(
|
| 1679 |
struct ggml_context * ctx,
|
| 1680 |
struct ggml_tensor * a,
|
| 1681 |
int scale_factor);
|
| 1682 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1683 |
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
| 1684 |
GGML_API struct ggml_tensor * ggml_pad(
|
| 1685 |
struct ggml_context * ctx,
|
|
|
|
| 1674 |
float p1);
|
| 1675 |
|
| 1676 |
// nearest interpolate
|
| 1677 |
+
// multiplies ne0 and ne1 by scale factor
|
| 1678 |
// used in stable-diffusion
|
| 1679 |
GGML_API struct ggml_tensor * ggml_upscale(
|
| 1680 |
struct ggml_context * ctx,
|
| 1681 |
struct ggml_tensor * a,
|
| 1682 |
int scale_factor);
|
| 1683 |
|
| 1684 |
+
// nearest interpolate
|
| 1685 |
+
// nearest interpolate to specified dimensions
|
| 1686 |
+
// used in tortoise.cpp
|
| 1687 |
+
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
| 1688 |
+
struct ggml_context * ctx,
|
| 1689 |
+
struct ggml_tensor * a,
|
| 1690 |
+
int ne0,
|
| 1691 |
+
int ne1,
|
| 1692 |
+
int ne2,
|
| 1693 |
+
int ne3);
|
| 1694 |
+
|
| 1695 |
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
| 1696 |
GGML_API struct ggml_tensor * ggml_pad(
|
| 1697 |
struct ggml_context * ctx,
|