Spaces:
Running
Running
| function(ggml_add_cpu_backend_features cpu_name arch) | |
| # The feature detection code is compiled as a separate target so that | |
| # it can be built without the architecture flags | |
| # Since multiple variants of the CPU backend may be included in the same | |
| # build, using set_source_files_properties() to set the arch flags is not possible | |
| set(GGML_CPU_FEATS_NAME ${cpu_name}-feats) | |
| add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp) | |
| target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . ../include) | |
| target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN}) | |
| target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED) | |
| set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) | |
| target_link_libraries(${cpu_name} PRIVATE ${GGML_CPU_FEATS_NAME}) | |
| endfunction() | |
| function(ggml_add_cpu_backend_variant_impl tag_name) | |
| if (tag_name) | |
| set(GGML_CPU_NAME ggml-cpu-${tag_name}) | |
| else() | |
| set(GGML_CPU_NAME ggml-cpu) | |
| endif() | |
| ggml_add_backend_library(${GGML_CPU_NAME}) | |
| list (APPEND GGML_CPU_SOURCES | |
| ggml-cpu/ggml-cpu.c | |
| ggml-cpu/ggml-cpu.cpp | |
| ggml-cpu/repack.cpp | |
| ggml-cpu/repack.h | |
| ggml-cpu/hbm.cpp | |
| ggml-cpu/hbm.h | |
| ggml-cpu/quants.c | |
| ggml-cpu/quants.h | |
| ggml-cpu/traits.cpp | |
| ggml-cpu/traits.h | |
| ggml-cpu/amx/amx.cpp | |
| ggml-cpu/amx/amx.h | |
| ggml-cpu/amx/mmq.cpp | |
| ggml-cpu/amx/mmq.h | |
| ggml-cpu/ggml-cpu-impl.h | |
| ggml-cpu/common.h | |
| ggml-cpu/binary-ops.h | |
| ggml-cpu/binary-ops.cpp | |
| ggml-cpu/unary-ops.h | |
| ggml-cpu/unary-ops.cpp | |
| ggml-cpu/simd-mappings.h | |
| ggml-cpu/vec.h | |
| ggml-cpu/vec.cpp | |
| ggml-cpu/ops.h | |
| ggml-cpu/ops.cpp | |
| ) | |
| target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) | |
| target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) | |
| if (APPLE AND GGML_ACCELERATE) | |
| find_library(ACCELERATE_FRAMEWORK Accelerate) | |
| if (ACCELERATE_FRAMEWORK) | |
| message(STATUS "Accelerate framework found") | |
| target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE) | |
| target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK) | |
| target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64) | |
| target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK}) | |
| else() | |
| message(WARNING "Accelerate framework not found") | |
| endif() | |
| endif() | |
| if (GGML_OPENMP) | |
| find_package(OpenMP) | |
| if (OpenMP_FOUND) | |
| set(GGML_OPENMP_ENABLED "ON" CACHE INTERNAL "") | |
| target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP) | |
| target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX) | |
| else() | |
| set(GGML_OPENMP_ENABLED "OFF" CACHE INTERNAL "") | |
| message(WARNING "OpenMP not found") | |
| endif() | |
| endif() | |
| if (GGML_LLAMAFILE) | |
| target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE) | |
| list(APPEND GGML_CPU_SOURCES | |
| ggml-cpu/llamafile/sgemm.cpp | |
| ggml-cpu/llamafile/sgemm.h) | |
| endif() | |
| if (GGML_CPU_HBM) | |
| find_library(memkind memkind REQUIRED) | |
| message(STATUS "Using memkind for CPU HBM") | |
| target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM) | |
| target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind) | |
| endif() | |
| if (GGML_SYSTEM_ARCH STREQUAL "ARM") | |
| message(STATUS "ARM detected") | |
| list(APPEND GGML_CPU_SOURCES | |
| ggml-cpu/arch/arm/quants.c | |
| ggml-cpu/arch/arm/repack.cpp | |
| ) | |
| if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang") | |
| message(FATAL_ERROR "MSVC is not supported for ARM, use clang") | |
| else() | |
| check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E) | |
| if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") | |
| list(APPEND ARCH_FLAGS -mfp16-format=ieee) | |
| endif() | |
| if (GGML_NATIVE) | |
| # -mcpu=native does not always enable all the features in some compilers, | |
| # so we check for them manually and enable them if available | |
| execute_process( | |
| COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v - | |
| INPUT_FILE "/dev/null" | |
| OUTPUT_QUIET | |
| ERROR_VARIABLE ARM_MCPU | |
| RESULT_VARIABLE ARM_MCPU_RESULT | |
| ) | |
| if (NOT ARM_MCPU_RESULT) | |
| string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}") | |
| endif() | |
| if ("${ARM_MCPU_FLAG}" STREQUAL "") | |
| set(ARM_MCPU_FLAG -mcpu=native) | |
| message(STATUS "ARM -mcpu not found, -mcpu=native will be used") | |
| endif() | |
| include(CheckCXXSourceRuns) | |
| function(check_arm_feature tag code) | |
| set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) | |
| set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+${tag}") | |
| check_cxx_source_runs("${code}" GGML_MACHINE_SUPPORTS_${tag}) | |
| if (GGML_MACHINE_SUPPORTS_${tag}) | |
| set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+${tag}" PARENT_SCOPE) | |
| else() | |
| set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+no${tag}") | |
| check_cxx_source_compiles("int main() { return 0; }" GGML_MACHINE_SUPPORTS_no${tag}) | |
| if (GGML_MACHINE_SUPPORTS_no${tag}) | |
| set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+no${tag}" PARENT_SCOPE) | |
| endif() | |
| endif() | |
| set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) | |
| endfunction() | |
| check_arm_feature(dotprod "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }") | |
| check_arm_feature(i8mm "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }") | |
| check_arm_feature(sve "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }") | |
| check_arm_feature(sme "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }") | |
| list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}") | |
| else() | |
| if (GGML_CPU_ARM_ARCH) | |
| list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH}) | |
| elseif(GGML_CPU_ALL_VARIANTS) | |
| # Begin with the lowest baseline | |
| set(ARM_MCPU "armv8-a") | |
| set(ARCH_TAGS "") | |
| set(ARCH_DEFINITIONS "") | |
| # When a feature is selected, bump the MCPU to the first | |
| # version that supported it | |
| if (GGML_INTERNAL_DOTPROD) | |
| set(ARM_MCPU "armv8.2-a") | |
| set(ARCH_TAGS "${ARCH_TAGS}+dotprod") | |
| list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD) | |
| endif() | |
| if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC) | |
| set(ARM_MCPU "armv8.2-a") | |
| set(ARCH_TAGS "${ARCH_TAGS}+fp16") | |
| list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC) | |
| endif() | |
| if (GGML_INTERNAL_SVE) | |
| set(ARM_MCPU "armv8.2-a") | |
| set(ARCH_TAGS "${ARCH_TAGS}+sve") | |
| list(APPEND ARCH_DEFINITIONS GGML_USE_SVE) | |
| endif() | |
| if (GGML_INTERNAL_MATMUL_INT8) | |
| set(ARM_MCPU "armv8.6-a") | |
| set(ARCH_TAGS "${ARCH_TAGS}+i8mm") | |
| list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8) | |
| endif() | |
| if (GGML_INTERNAL_SVE2) | |
| set(ARM_MCPU "armv8.6-a") | |
| set(ARCH_TAGS "${ARCH_TAGS}+sve2") | |
| list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2) | |
| endif() | |
| if (GGML_INTERNAL_NOSVE) | |
| set(ARCH_TAGS "${ARCH_TAGS}+nosve") | |
| endif() | |
| if (GGML_INTERNAL_SME) | |
| set(ARM_MCPU "armv9.2-a") | |
| set(ARCH_TAGS "${ARCH_TAGS}+sme") | |
| list(APPEND ARCH_DEFINITIONS GGML_USE_SME) | |
| endif() | |
| list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}") | |
| ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS}) | |
| endif() | |
| endif() | |
| # show enabled features | |
| if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") | |
| set(FEAT_INPUT_FILE "NUL") | |
| else() | |
| set(FEAT_INPUT_FILE "/dev/null") | |
| endif() | |
| execute_process( | |
| COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E - | |
| INPUT_FILE ${FEAT_INPUT_FILE} | |
| OUTPUT_VARIABLE ARM_FEATURE | |
| RESULT_VARIABLE ARM_FEATURE_RESULT | |
| ) | |
| if (ARM_FEATURE_RESULT) | |
| message(WARNING "Failed to get ARM features") | |
| else() | |
| foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC SME) | |
| string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos) | |
| if (NOT ${feature_pos} EQUAL -1) | |
| message(STATUS "ARM feature ${feature} enabled") | |
| endif() | |
| endforeach() | |
| endif() | |
| endif() | |
| elseif (GGML_SYSTEM_ARCH STREQUAL "x86") | |
| message(STATUS "x86 detected") | |
| list(APPEND GGML_CPU_SOURCES | |
| ggml-cpu/arch/x86/quants.c | |
| ggml-cpu/arch/x86/repack.cpp | |
| ) | |
| if (MSVC) | |
| # instruction set detection for MSVC only | |
| if (GGML_NATIVE) | |
| include(ggml-cpu/cmake/FindSIMD.cmake) | |
| endif () | |
| if (GGML_AVX512) | |
| list(APPEND ARCH_FLAGS /arch:AVX512) | |
| # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__ | |
| # MSVC has no compile-time flags enabling specific | |
| # AVX512 extensions, neither it defines the | |
| # macros corresponding to the extensions. | |
| # Do it manually. | |
| list(APPEND ARCH_DEFINITIONS GGML_AVX512) | |
| if (GGML_AVX512_VBMI) | |
| list(APPEND ARCH_DEFINITIONS __AVX512VBMI__) | |
| if (CMAKE_C_COMPILER_ID STREQUAL "Clang") | |
| list(APPEND ARCH_FLAGS -mavx512vbmi) | |
| endif() | |
| endif() | |
| if (GGML_AVX512_VNNI) | |
| list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI) | |
| if (CMAKE_C_COMPILER_ID STREQUAL "Clang") | |
| list(APPEND ARCH_FLAGS -mavx512vnni) | |
| endif() | |
| endif() | |
| if (GGML_AVX512_BF16) | |
| list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16) | |
| if (CMAKE_C_COMPILER_ID STREQUAL "Clang") | |
| list(APPEND ARCH_FLAGS -mavx512bf16) | |
| endif() | |
| endif() | |
| if (GGML_AMX_TILE) | |
| list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE) | |
| endif() | |
| if (GGML_AMX_INT8) | |
| list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8) | |
| endif() | |
| if (GGML_AMX_BF16) | |
| list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16) | |
| endif() | |
| elseif (GGML_AVX2) | |
| list(APPEND ARCH_FLAGS /arch:AVX2) | |
| list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C) | |
| elseif (GGML_AVX) | |
| list(APPEND ARCH_FLAGS /arch:AVX) | |
| list(APPEND ARCH_DEFINITIONS GGML_AVX) | |
| elseif (GGML_SSE42) | |
| list(APPEND ARCH_FLAGS /arch:SSE4.2) | |
| list(APPEND ARCH_DEFINITIONS GGML_SSE42) | |
| endif() | |
| if (GGML_AVX_VNNI) | |
| list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI) | |
| endif() | |
| if (GGML_BMI2) | |
| # MSVC does not define macro __BMI2__ | |
| list(APPEND ARCH_DEFINITIONS __BMI2__ GGML_BMI2) | |
| endif() | |
| else () | |
| if (GGML_NATIVE) | |
| list(APPEND ARCH_FLAGS -march=native) | |
| else () | |
| if (GGML_SSE42) | |
| list(APPEND ARCH_FLAGS -msse4.2) | |
| list(APPEND ARCH_DEFINITIONS GGML_SSE42) | |
| endif() | |
| if (GGML_F16C) | |
| list(APPEND ARCH_FLAGS -mf16c) | |
| list(APPEND ARCH_DEFINITIONS GGML_F16C) | |
| endif() | |
| if (GGML_FMA) | |
| list(APPEND ARCH_FLAGS -mfma) | |
| list(APPEND ARCH_DEFINITIONS GGML_FMA) | |
| endif() | |
| if (GGML_BMI2) | |
| list(APPEND ARCH_FLAGS -mbmi2) | |
| list(APPEND ARCH_DEFINITIONS GGML_BMI2) | |
| endif() | |
| if (GGML_AVX) | |
| list(APPEND ARCH_FLAGS -mavx) | |
| list(APPEND ARCH_DEFINITIONS GGML_AVX) | |
| endif() | |
| if (GGML_AVX2) | |
| list(APPEND ARCH_FLAGS -mavx2) | |
| list(APPEND ARCH_DEFINITIONS GGML_AVX2) | |
| endif() | |
| if (GGML_AVX_VNNI) | |
| list(APPEND ARCH_FLAGS -mavxvnni) | |
| list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI) | |
| endif() | |
| if (GGML_AVX512) | |
| list(APPEND ARCH_FLAGS -mavx512f) | |
| list(APPEND ARCH_FLAGS -mavx512cd) | |
| list(APPEND ARCH_FLAGS -mavx512vl) | |
| list(APPEND ARCH_FLAGS -mavx512dq) | |
| list(APPEND ARCH_FLAGS -mavx512bw) | |
| list(APPEND ARCH_DEFINITIONS GGML_AVX512) | |
| endif() | |
| if (GGML_AVX512_VBMI) | |
| list(APPEND ARCH_FLAGS -mavx512vbmi) | |
| list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI) | |
| endif() | |
| if (GGML_AVX512_VNNI) | |
| list(APPEND ARCH_FLAGS -mavx512vnni) | |
| list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI) | |
| endif() | |
| if (GGML_AVX512_BF16) | |
| list(APPEND ARCH_FLAGS -mavx512bf16) | |
| list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16) | |
| endif() | |
| if (GGML_AMX_TILE) | |
| list(APPEND ARCH_FLAGS -mamx-tile) | |
| list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE) | |
| endif() | |
| if (GGML_AMX_INT8) | |
| list(APPEND ARCH_FLAGS -mamx-int8) | |
| list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8) | |
| endif() | |
| if (GGML_AMX_BF16) | |
| list(APPEND ARCH_FLAGS -mamx-bf16) | |
| list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16) | |
| endif() | |
| endif() | |
| endif() | |
| if (GGML_BACKEND_DL) | |
| if (GGML_NATIVE) | |
| # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE | |
| message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS") | |
| endif() | |
| ggml_add_cpu_backend_features(${GGML_CPU_NAME} x86 ${ARCH_DEFINITIONS}) | |
| endif() | |
| elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC") | |
| message(STATUS "PowerPC detected") | |
| list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c) | |
| if (GGML_NATIVE) | |
| if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") | |
| file(READ "/proc/cpuinfo" POWER10_M) | |
| elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc") | |
| execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M) | |
| endif() | |
| string(TOUPPER "${POWER10_M}" POWER10_M_UPPER) | |
| string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M_UPPER}") | |
| string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}") | |
| if (EXTRACTED_NUMBER GREATER_EQUAL 10) | |
| list(APPEND ARCH_FLAGS -mcpu=power10 -mpowerpc64) | |
| elseif (EXTRACTED_NUMBER EQUAL 9) | |
| list(APPEND ARCH_FLAGS -mcpu=power9 -mpowerpc64) | |
| elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le") | |
| list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native) | |
| else() | |
| list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64) | |
| endif() | |
| elseif(GGML_CPU_ALL_VARIANTS) | |
| # Begin with the lowest baseline | |
| set(ARCH_DEFINITIONS "") | |
| # When a feature is selected, bump the MCPU to the first | |
| # version that supported it | |
| foreach(PVER RANGE 7 11) | |
| if(DEFINED GGML_INTERNAL_POWER${PVER}) | |
| set(POWERPC_MCPU "power${PVER}") | |
| list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER}) | |
| endif() | |
| endforeach() | |
| if (GGML_INTERNAL_VSX) | |
| list(APPEND ARCH_DEFINITIONS GGML_USE_VSX) | |
| list(APPEND ARCH_FLAGS -mvsx) | |
| endif() | |
| if (DEFINED POWERPC_MCPU) | |
| list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU}) | |
| endif() | |
| ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS}) | |
| else() | |
| if (GGML_CPU_POWERPC_CPUTYPE) | |
| list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE}) | |
| endif() | |
| endif() | |
| elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64") | |
| message(STATUS "loongarch64 detected") | |
| list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c) | |
| list(APPEND ARCH_FLAGS -march=loongarch64) | |
| if (GGML_LASX) | |
| list(APPEND ARCH_FLAGS -mlasx) | |
| endif() | |
| if (GGML_LSX) | |
| list(APPEND ARCH_FLAGS -mlsx) | |
| endif() | |
| elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64") | |
| message(STATUS "riscv64 detected") | |
| list(APPEND GGML_CPU_SOURCES | |
| ggml-cpu/arch/riscv/quants.c | |
| ggml-cpu/arch/riscv/repack.cpp | |
| ) | |
| if (GGML_RVV) | |
| if (GGML_XTHEADVECTOR) | |
| list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d) | |
| elseif (GGML_RV_ZFH) | |
| list(APPEND ARCH_FLAGS -march=rv64gcv_zfhmin -mabi=lp64d) | |
| else() | |
| list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d) | |
| endif() | |
| endif() | |
| elseif (GGML_SYSTEM_ARCH STREQUAL "s390x") | |
| message(STATUS "s390x detected") | |
| list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c) | |
| file(READ "/proc/cpuinfo" CPUINFO_CONTENTS) | |
| string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS}) | |
| # TODO: Separation to determine activation of VX/VXE/VXE2 | |
| if (${S390X_M} MATCHES "8561|8562") | |
| set(GGML_NNPA OFF) | |
| message(STATUS "z15 target") | |
| list(APPEND ARCH_FLAGS -march=z15) | |
| elseif (${S390X_M} MATCHES "3931") | |
| message(STATUS "z16 target") | |
| list(APPEND ARCH_FLAGS -march=z16) | |
| elseif (${S390X_M} MATCHES "9175|9176") | |
| # NOTE: Only available from GCC 15.1.0 onwards. Any z17 machine with compile issues must first verify their GCC version. | |
| # binutils must also be updated to the latest for the -march=z17 flag to work. Otherwise, use -march=arch15. | |
| message(STATUS "z17 target") | |
| list(APPEND ARCH_FLAGS -march=arch15) | |
| else() | |
| message(STATUS "Unknown target") | |
| message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.") | |
| list(APPEND ARCH_FLAGS -march=native -mtune=native) | |
| endif() | |
| if (GGML_VXE) | |
| message(STATUS "VX/VXE/VXE2 enabled") | |
| list(APPEND ARCH_FLAGS -mvx -mzvector) | |
| list(APPEND ARCH_DEFINITIONS GGML_VXE) | |
| endif() | |
| if (GGML_NNPA) | |
| message(STATUS "NNPA enabled") | |
| list(APPEND ARCH_DEFINITIONS GGML_NNPA) | |
| endif() | |
| elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm") | |
| message(STATUS "Wasm detected") | |
| list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c) | |
| else() | |
| message(WARNING "Unknown CPU architecture. Falling back to generic implementations.") | |
| list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC) | |
| endif() | |
| if (GGML_CPU_REPACK) | |
| target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK) | |
| endif() | |
| if (GGML_CPU_KLEIDIAI) | |
| message(STATUS "Using KleidiAI optimized kernels if applicable") | |
| # Disable the KleidiAI tests | |
| set(KLEIDIAI_BUILD_TESTS OFF) | |
| # Fetch KleidiAI sources: | |
| include(FetchContent) | |
| set(KLEIDIAI_COMMIT_TAG "v1.11.0") | |
| set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz") | |
| set(KLEIDIAI_ARCHIVE_MD5 "3fe9e5ab964c375c53839296eb71eaa2") | |
| if (POLICY CMP0135) | |
| cmake_policy(SET CMP0135 NEW) | |
| endif() | |
| FetchContent_Declare(KleidiAI_Download | |
| URL ${KLEIDIAI_DOWNLOAD_URL} | |
| DOWNLOAD_EXTRACT_TIMESTAMP NEW | |
| URL_HASH MD5=${KLEIDIAI_ARCHIVE_MD5}) | |
| FetchContent_MakeAvailable(KleidiAI_Download) | |
| FetchContent_GetProperties(KleidiAI_Download | |
| SOURCE_DIR KLEIDIAI_SRC | |
| POPULATED KLEIDIAI_POPULATED) | |
| if (NOT KLEIDIAI_POPULATED) | |
| message(FATAL_ERROR "KleidiAI source downloaded failed.") | |
| endif() | |
| add_compile_definitions(GGML_USE_CPU_KLEIDIAI) | |
| # Remove kleidiai target after fetching it | |
| if (TARGET kleidiai) | |
| set_target_properties(kleidiai PROPERTIES EXCLUDE_FROM_ALL TRUE) | |
| endif() | |
| list(APPEND GGML_CPU_SOURCES | |
| ggml-cpu/kleidiai/kleidiai.cpp | |
| ggml-cpu/kleidiai/kernels.cpp | |
| ggml-cpu/kleidiai/kleidiai.h | |
| ggml-cpu/kleidiai/kernels.h | |
| ) | |
| # KleidiAI | |
| include_directories( | |
| ${KLEIDIAI_SRC}/ | |
| ${KLEIDIAI_SRC}/kai/ | |
| ${KLEIDIAI_SRC}/kai/ukernels/ | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/ | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/ | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/ | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/) | |
| set(ARCH_FLAGS_TEMP "${ARCH_FLAGS}") | |
| if (NOT ARCH_FLAGS_TEMP) | |
| string(REGEX MATCH "-march=[^ ]+" ARCH_FLAGS_TEMP "${CMAKE_C_FLAGS}") | |
| endif() | |
| string(FIND "${ARCH_FLAGS_TEMP}" "+dotprod" DOTPROD_ENABLED) | |
| string(FIND "${ARCH_FLAGS_TEMP}" "+i8mm" I8MM_ENABLED) | |
| string(FIND "${ARCH_FLAGS_TEMP}" "+sme" SME_ENABLED) | |
| set(PRIVATE_ARCH_FLAGS ${ARCH_FLAGS_TEMP}) | |
| list(APPEND GGML_KLEIDIAI_SOURCES | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c) | |
| if (NOT DOTPROD_ENABLED MATCHES -1) | |
| list(APPEND GGML_KLEIDIAI_SOURCES | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c) | |
| endif() | |
| if (NOT I8MM_ENABLED MATCHES -1) | |
| list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c) | |
| endif() | |
| if (NOT SME_ENABLED MATCHES -1) | |
| list(APPEND GGML_KLEIDIAI_SOURCES | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c | |
| ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_kxn_bf16p2vlx2b_f32_x32_sme.c) | |
| set(PRIVATE_ARCH_FLAGS "-fno-tree-vectorize;${PRIVATE_ARCH_FLAGS}+sve+sve2") | |
| endif() | |
| set_source_files_properties(${GGML_KLEIDIAI_SOURCES} PROPERTIES COMPILE_OPTIONS "${PRIVATE_ARCH_FLAGS}") | |
| list(APPEND GGML_CPU_SOURCES ${GGML_KLEIDIAI_SOURCES}) | |
| endif() | |
| message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}") | |
| target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES}) | |
| target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS}) | |
| target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS}) | |
| if (EMSCRIPTEN) | |
| set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128") | |
| endif() | |
| if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") | |
| # The compiler automatically enables "-ffast-math" which can cause NaNs in tests due to "-fassociative-math" | |
| target_compile_options(${GGML_CPU_NAME} PRIVATE "-fno-associative-math") | |
| endif() | |
| endfunction() | |