Spaces:
Running
Running
| //------------------------------------------------------------------------------ | |
| // gelu | |
| //------------------------------------------------------------------------------ | |
| kernel void kernel_gelu( | |
| global float * src0, | |
| ulong offset0, | |
| global float * dst, | |
| ulong offsetd | |
| ) { | |
| src0 = (global float*)((global char*)src0 + offset0); | |
| dst = (global float*)((global char*)dst + offsetd); | |
| float x = src0[get_global_id(0)]; | |
| dst[get_global_id(0)] = 0.5f*x*(1.0f + tanh(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x))); | |
| } | |
| kernel void kernel_gelu_4( | |
| global float4 * src0, | |
| ulong offset0, | |
| global float4 * dst, | |
| ulong offsetd | |
| ) { | |
| src0 = (global float4*)((global char*)src0 + offset0); | |
| dst = (global float4*)((global char*)dst + offsetd); | |
| float4 x = src0[get_global_id(0)]; | |
| dst[get_global_id(0)] = 0.5f*x*(1.0f + tanh(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x))); | |
| } | |
| kernel void kernel_gelu_erf( | |
| global float * src0, | |
| ulong offset0, | |
| global float * dst, | |
| ulong offsetd | |
| ) { | |
| src0 = (global float*)((global char*)src0 + offset0); | |
| dst = (global float*)((global char*)dst + offsetd); | |
| float x = src0[get_global_id(0)]; | |
| dst[get_global_id(0)] = 0.5f*x*(1.0f + erf(x*SQRT_2_INV)); | |
| } | |
| kernel void kernel_gelu_erf_4( | |
| global float4 * src0, | |
| ulong offset0, | |
| global float4 * dst, | |
| ulong offsetd | |
| ) { | |
| src0 = (global float4*)((global char*)src0 + offset0); | |
| dst = (global float4*)((global char*)dst + offsetd); | |
| float4 x = src0[get_global_id(0)]; | |
| dst[get_global_id(0)] = 0.5f*x*(1.0f + erf(x*SQRT_2_INV)); | |
| } | |
| kernel void kernel_gelu_quick( | |
| global float * src0, | |
| ulong offset0, | |
| global float * dst, | |
| ulong offsetd | |
| ) { | |
| src0 = (global float*)((global char*)src0 + offset0); | |
| dst = (global float*)((global char*)dst + offsetd); | |
| float x = src0[get_global_id(0)]; | |
| dst[get_global_id(0)] = x*(1.0f/(1.0f+exp(GELU_QUICK_COEF*x))); | |
| } | |
| kernel void kernel_gelu_quick_4( | |
| global float4 * src0, | |
| ulong offset0, | |
| global float4 * dst, | |
| ulong offsetd | |
| ) { | |
| src0 = (global float4*)((global char*)src0 + offset0); | |
| dst = (global float4*)((global char*)dst + offsetd); | |
| float4 x = src0[get_global_id(0)]; | |
| dst[get_global_id(0)] = x*(1.0f/(1.0f+exp(GELU_QUICK_COEF*x))); | |
| } | |