mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-07 01:24:24 +00:00
Compare commits
101 Commits
ik/i-quant
...
b2382
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
621e86b331 | ||
|
|
77d1ac7e00 | ||
|
|
d894f352bf | ||
|
|
098dbaab44 | ||
|
|
8380ecfb21 | ||
|
|
58308a0ecc | ||
|
|
5b09797321 | ||
|
|
97c09585d6 | ||
|
|
fb215c3832 | ||
|
|
2c4f566c88 | ||
|
|
0db32beaf0 | ||
|
|
8a3012a4ad | ||
|
|
9674aaf35c | ||
|
|
950ba1ab84 | ||
|
|
e1fa9569ba | ||
|
|
fd72d2d2a5 | ||
|
|
c2101a2e90 | ||
|
|
515f7d0d4f | ||
|
|
76e868821a | ||
|
|
e457fb3540 | ||
|
|
af37fd8b30 | ||
|
|
581ed5c4fe | ||
|
|
6cdabe6526 | ||
|
|
89fb735fcf | ||
|
|
55a2a900ff | ||
|
|
2002bc96bf | ||
|
|
ceca1aef07 | ||
|
|
e04e04f8fa | ||
|
|
e25fb4b18f | ||
|
|
1e35d619a6 | ||
|
|
8ced9f7e32 | ||
|
|
652ca2bded | ||
|
|
bd836944f8 | ||
|
|
3de31677d3 | ||
|
|
82cb31eb93 | ||
|
|
b1a4e994fd | ||
|
|
61d1c88e15 | ||
|
|
21b0867433 | ||
|
|
6a87ac3a52 | ||
|
|
29eee40474 | ||
|
|
1d41d6f7c2 | ||
|
|
29ae62d2ae | ||
|
|
e0843afe1b | ||
|
|
a1c6d96ed8 | ||
|
|
efd8533ef8 | ||
|
|
9fa2627347 | ||
|
|
fe52be11e3 | ||
|
|
6d341ab6c5 | ||
|
|
4ffcdce2ff | ||
|
|
a0fc62661f | ||
|
|
7d43c585dc | ||
|
|
82f3e668ad | ||
|
|
5a51cc1bb4 | ||
|
|
67be2ce101 | ||
|
|
231ae28f07 | ||
|
|
475df1d6cf | ||
|
|
87c2e8b279 | ||
|
|
de9692a7d2 | ||
|
|
e6029348e8 | ||
|
|
8ef969afce | ||
|
|
fa974646e1 | ||
|
|
9731134296 | ||
|
|
4a6e2d6142 | ||
|
|
494c870326 | ||
|
|
4d4d2366fc | ||
|
|
c7a0ad8ec9 | ||
|
|
bbde6eb256 | ||
|
|
ef2cd694c4 | ||
|
|
6c32d8c7ad | ||
|
|
802da0091b | ||
|
|
715641391d | ||
|
|
9bf297a02b | ||
|
|
cb5e8f7fc4 | ||
|
|
da3b9ba2b7 | ||
|
|
c29af7e225 | ||
|
|
38d16b1426 | ||
|
|
c2224f003b | ||
|
|
e743386728 | ||
|
|
f49a535686 | ||
|
|
3ab8b3a92e | ||
|
|
9600d59e01 | ||
|
|
5cb02b4a01 | ||
|
|
6ea0f010ff | ||
|
|
f105471ef6 | ||
|
|
38d1521608 | ||
|
|
052051d8ae | ||
|
|
d5ab29757e | ||
|
|
87c91c0766 | ||
|
|
317709b2a8 | ||
|
|
08c5ee87e4 | ||
|
|
78aacf3634 | ||
|
|
8c0e8f4e73 | ||
|
|
2774b0c974 | ||
|
|
5f70671856 | ||
|
|
a693bea1e6 | ||
|
|
adcb12a9ba | ||
|
|
177628bfd8 | ||
|
|
6c4416868d | ||
|
|
efc72253f7 | ||
|
|
7c4263d426 | ||
|
|
cb49e0f8c9 |
@@ -1,5 +1,6 @@
|
||||
{
|
||||
lib,
|
||||
glibc,
|
||||
config,
|
||||
stdenv,
|
||||
mkShell,
|
||||
@@ -30,6 +31,11 @@
|
||||
useRocm ? config.rocmSupport,
|
||||
useVulkan ? false,
|
||||
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
||||
|
||||
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
||||
# otherwise we get libstdc++ errors downstream.
|
||||
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
||||
enableStatic ? effectiveStdenv.hostPlatform.isStatic
|
||||
}@inputs:
|
||||
|
||||
let
|
||||
@@ -41,10 +47,7 @@ let
|
||||
versionOlder
|
||||
;
|
||||
|
||||
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
||||
# otherwise we get libstdc++ errors downstream.
|
||||
stdenv = throw "Use effectiveStdenv instead";
|
||||
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
|
||||
|
||||
suffices =
|
||||
lib.optionals useBlas [ "BLAS" ]
|
||||
@@ -167,6 +170,9 @@ effectiveStdenv.mkDerivation (
|
||||
# TODO: Replace with autoAddDriverRunpath
|
||||
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
|
||||
cudaPackages.autoAddOpenGLRunpathHook
|
||||
]
|
||||
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
|
||||
glibc.static
|
||||
];
|
||||
|
||||
buildInputs =
|
||||
@@ -181,7 +187,7 @@ effectiveStdenv.mkDerivation (
|
||||
[
|
||||
(cmakeBool "LLAMA_NATIVE" false)
|
||||
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
||||
(cmakeBool "BUILD_SHARED_LIBS" true)
|
||||
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
||||
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
||||
(cmakeBool "LLAMA_BLAS" useBlas)
|
||||
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
|
||||
@@ -190,6 +196,7 @@ effectiveStdenv.mkDerivation (
|
||||
(cmakeBool "LLAMA_METAL" useMetalKit)
|
||||
(cmakeBool "LLAMA_MPI" useMpi)
|
||||
(cmakeBool "LLAMA_VULKAN" useVulkan)
|
||||
(cmakeBool "LLAMA_STATIC" enableStatic)
|
||||
]
|
||||
++ optionals useCuda [
|
||||
(
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
}:
|
||||
|
||||
let
|
||||
optionalInt = cond: x: if cond then x else 0;
|
||||
optionalInt = cond: x: if cond then x else 0;
|
||||
in
|
||||
singularity-tools.buildImage rec {
|
||||
inherit (llama-cpp) name;
|
||||
|
||||
22
.github/workflows/build.yml
vendored
22
.github/workflows/build.yml
vendored
@@ -145,6 +145,28 @@ jobs:
|
||||
cd build
|
||||
ctest -L main --verbose
|
||||
|
||||
ubuntu-22-cmake-vulkan:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
id: checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Dependencies
|
||||
id: depends
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install build-essential libvulkan-dev
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DLLAMA_VULKAN=ON ..
|
||||
cmake --build . --config Release -j $(nproc)
|
||||
|
||||
ubuntu-22-cmake-sycl:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
|
||||
@@ -3,12 +3,14 @@ name: Python check requirements.txt
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '.github/workflows/python-check-requirements.yml'
|
||||
- 'scripts/check-requirements.sh'
|
||||
- 'convert*.py'
|
||||
- 'requirements.txt'
|
||||
- 'requirements/*.txt'
|
||||
pull_request:
|
||||
paths:
|
||||
- '.github/workflows/python-check-requirements.yml'
|
||||
- 'scripts/check-requirements.sh'
|
||||
- 'convert*.py'
|
||||
- 'requirements.txt'
|
||||
@@ -26,4 +28,4 @@ jobs:
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Run check-requirements.sh script
|
||||
run: bash scripts/check-requirements.sh nocleanup
|
||||
run: bash scripts/check-requirements.sh
|
||||
|
||||
25
.github/workflows/server.yml
vendored
25
.github/workflows/server.yml
vendored
@@ -3,6 +3,11 @@ name: Server
|
||||
|
||||
on:
|
||||
workflow_dispatch: # allows manual triggering
|
||||
inputs:
|
||||
slow_tests:
|
||||
description: 'Run slow tests'
|
||||
required: true
|
||||
type: boolean
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
@@ -10,6 +15,8 @@ on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
jobs:
|
||||
server:
|
||||
@@ -51,7 +58,8 @@ jobs:
|
||||
cmake \
|
||||
python3-pip \
|
||||
wget \
|
||||
psmisc
|
||||
psmisc \
|
||||
language-pack-en
|
||||
|
||||
- name: Build
|
||||
id: cmake_build
|
||||
@@ -70,14 +78,15 @@ jobs:
|
||||
run: |
|
||||
pip install -r examples/server/tests/requirements.txt
|
||||
|
||||
- name: Download models
|
||||
id: download_models
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
../../../scripts/hf.sh --repo ggml-org/models --file tinyllamas/stories260K.gguf
|
||||
|
||||
- name: Tests
|
||||
id: server_integration_test
|
||||
id: server_integration_tests
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
PORT=8888 ./tests.sh
|
||||
|
||||
- name: Slow tests
|
||||
id: server_integration_tests_slow
|
||||
if: ${{ github.event.schedule != '' && matrix.build_type == 'Release' || github.event.inputs.slow_tests == 'true' }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
|
||||
|
||||
@@ -199,7 +199,8 @@ if (LLAMA_METAL)
|
||||
# get full path to the file
|
||||
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
|
||||
|
||||
# copy ggml-metal.metal to bin directory
|
||||
# copy ggml-common.h and ggml-metal.metal to bin directory
|
||||
configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
|
||||
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
|
||||
|
||||
if (LLAMA_METAL_EMBED_LIBRARY)
|
||||
|
||||
13
Makefile
13
Makefile
@@ -201,6 +201,10 @@ ifdef LLAMA_SERVER_VERBOSE
|
||||
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
|
||||
endif
|
||||
|
||||
ifdef LLAMA_SERVER_SSL
|
||||
MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
|
||||
MK_LDFLAGS += -lssl -lcrypto
|
||||
endif
|
||||
|
||||
ifdef LLAMA_CODE_COVERAGE
|
||||
MK_CXXFLAGS += -fprofile-arcs -ftest-coverage -dumpbase ''
|
||||
@@ -449,7 +453,7 @@ endif # LLAMA_CUDA_PEER_MAX_BATCH_SIZE
|
||||
ifdef LLAMA_CUDA_CCBIN
|
||||
MK_NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||
endif
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml-common.h
|
||||
ifdef JETSON_EOL_MODULE_DETECT
|
||||
$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
|
||||
else
|
||||
@@ -626,7 +630,7 @@ ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
||||
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
|
||||
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o
|
||||
@@ -724,10 +728,9 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||
|
||||
server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h examples/llava/llava.h examples/llava/llava.cpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
||||
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
|
||||
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
||||
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
||||
|
||||
gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# llama.cpp for SYCL
|
||||
|
||||
- [Background](#background)
|
||||
- [News](#news)
|
||||
- [OS](#os)
|
||||
- [Intel GPU](#intel-gpu)
|
||||
- [Docker](#docker)
|
||||
@@ -25,6 +26,21 @@ The llama.cpp for SYCL is used to support Intel GPUs.
|
||||
|
||||
For Intel CPU, recommend to use llama.cpp for X86 (Intel MKL building).
|
||||
|
||||
## News
|
||||
|
||||
- 2024.3
|
||||
- Support multiple cards: **--split-mode**: [none|layer]; not support [row], it's on developing.
|
||||
- Support to assign main GPU by **--main-gpu**, replace $GGML_SYCL_DEVICE.
|
||||
- Support detecting all GPUs with level-zero and same top **Max compute units**.
|
||||
- Support OPs
|
||||
- hardsigmoid
|
||||
- hardswish
|
||||
- pool2d
|
||||
|
||||
- 2024.1
|
||||
- Create SYCL backend for Intel GPU.
|
||||
- Support Windows build
|
||||
|
||||
## OS
|
||||
|
||||
|OS|Status|Verified|
|
||||
@@ -449,6 +465,7 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||
|-|-|-|
|
||||
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
|
||||
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
|
||||
|ZES_ENABLE_SYSMAN| 0 (default) or 1|Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.<br>Recommended to use when --split-mode = layer|
|
||||
|
||||
## Known Issue
|
||||
|
||||
@@ -458,6 +475,10 @@ Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
|
||||
|
||||
Solution: add **--no-mmap** or **--mmap 0**.
|
||||
|
||||
- Split-mode: [row] is not supported
|
||||
|
||||
It's on developing.
|
||||
|
||||
## Q&A
|
||||
|
||||
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
|
||||
|
||||
23
README.md
23
README.md
@@ -8,12 +8,20 @@
|
||||
|
||||
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
|
||||
|
||||
> [!IMPORTANT]
|
||||
> **Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962**
|
||||
>
|
||||
> Vote for which quantization type provides better responses, all other parameters being the same.
|
||||
|
||||
### Recent API changes
|
||||
|
||||
- [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_max_seq()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328
|
||||
- [2024 Mar 4] Embeddings API updated https://github.com/ggerganov/llama.cpp/pull/5796
|
||||
- [2024 Mar 3] `struct llama_context_params` https://github.com/ggerganov/llama.cpp/pull/5849
|
||||
|
||||
### Hot topics
|
||||
|
||||
- Support for chat templates: [Wiki (contributions welcome)](https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template)
|
||||
- Support for Gemma models: https://github.com/ggerganov/llama.cpp/pull/5631
|
||||
- Non-linear quantization IQ4_NL: https://github.com/ggerganov/llama.cpp/pull/5590
|
||||
- Looking for contributions to improve and maintain the `server` example: https://github.com/ggerganov/llama.cpp/issues/4216
|
||||
- Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328
|
||||
|
||||
----
|
||||
|
||||
@@ -104,10 +112,11 @@ Typically finetunes of the base models below are supported as well.
|
||||
- [x] [InternLM2](https://huggingface.co/models?search=internlm2)
|
||||
- [x] [CodeShell](https://github.com/WisdomShell/codeshell)
|
||||
- [x] [Gemma](https://ai.google.dev/gemma)
|
||||
- [x] [Mamba](https://github.com/state-spaces/mamba)
|
||||
|
||||
**Multimodal models:**
|
||||
|
||||
- [x] [LLaVA 1.5 models](https://huggingface.co/collections/liuhaotian/llava-15-653aac15d994e992e2677a7e)
|
||||
- [x] [LLaVA 1.5 models](https://huggingface.co/collections/liuhaotian/llava-15-653aac15d994e992e2677a7e), [LLaVA 1.6 models](https://huggingface.co/collections/liuhaotian/llava-16-65b9e40155f60fd046a5ccf2)
|
||||
- [x] [BakLLaVA](https://huggingface.co/models?search=SkunkworksAI/Bakllava)
|
||||
- [x] [Obsidian](https://huggingface.co/NousResearch/Obsidian-3B-V0.5)
|
||||
- [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V)
|
||||
@@ -785,7 +794,7 @@ And after 4.45 hours, you will have the final perplexity.
|
||||
### Interactive mode
|
||||
|
||||
If you want a more ChatGPT-like experience, you can run in interactive mode by passing `-i` as a parameter.
|
||||
In this mode, you can always interrupt generation by pressing Ctrl+C and entering one or more lines of text, which will be converted into tokens and appended to the current context. You can also specify a *reverse prompt* with the parameter `-r "reverse prompt string"`. This will result in user input being prompted whenever the exact tokens of the reverse prompt string are encountered in the generation. A typical use is to use a prompt that makes LLaMa emulate a chat between multiple users, say Alice and Bob, and pass `-r "Alice:"`.
|
||||
In this mode, you can always interrupt generation by pressing Ctrl+C and entering one or more lines of text, which will be converted into tokens and appended to the current context. You can also specify a *reverse prompt* with the parameter `-r "reverse prompt string"`. This will result in user input being prompted whenever the exact tokens of the reverse prompt string are encountered in the generation. A typical use is to use a prompt that makes LLaMA emulate a chat between multiple users, say Alice and Bob, and pass `-r "Alice:"`.
|
||||
|
||||
Here is an example of a few-shot interaction, invoked with the command
|
||||
|
||||
@@ -849,7 +858,7 @@ Sample run:
|
||||
```
|
||||
== Running in interactive mode. ==
|
||||
- Press Ctrl+C to interject at any time.
|
||||
- Press Return to return control to LLaMa.
|
||||
- Press Return to return control to LLaMA.
|
||||
- If you want to submit another line, end your input in '\'.
|
||||
|
||||
Below is an instruction that describes a task. Write a response that appropriately completes the request.
|
||||
|
||||
116
awq-py/README.md
116
awq-py/README.md
@@ -1,116 +0,0 @@
|
||||
# AWQ: Activation-aware Weight Quantization for LLM - version apply to llamacpp
|
||||
[[Paper](https://arxiv.org/abs/2306.00978)][[Original Repo](https://github.com/mit-han-lab/llm-awq)][[Easy-to-use Repo](https://github.com/casper-hansen/AutoAWQ)]
|
||||
|
||||
**Supported models:**
|
||||
|
||||
- [X] LLaMA
|
||||
- [x] LLaMA 2
|
||||
- [X] MPT
|
||||
- [X] Mistral AI v0.1
|
||||
- [ ] Bloom
|
||||
- [ ] Mixtral MoE
|
||||
|
||||
**TODO:**
|
||||
- [x] Update version work with both MPT and MPT-AWQ model
|
||||
- [ ] Add OPT model
|
||||
- [ ] Add Bloom model
|
||||
- [ ] Add Mixtral MoE
|
||||
- [ ] Support w3, w2
|
||||
|
||||
|
||||
## Contents
|
||||
|
||||
- [Install](##Install)
|
||||
- [Convert](##Convert)
|
||||
- [Quantize](##Quantize)
|
||||
- [Test](##Test)
|
||||
- [Benchmark](##Benchmark)
|
||||
- [Results](##Results)
|
||||
|
||||
## Install
|
||||
Install requirements
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
Get the pre-computed AWQ search results for multiple model families, including LLaMA, LLaMA2, MPT, OPT
|
||||
```bash
|
||||
git clone https://huggingface.co/datasets/mit-han-lab/awq-model-zoo awq_cache
|
||||
```
|
||||
|
||||
## Convert
|
||||
Example for llama model
|
||||
```bash
|
||||
# For llama7b and llama2 models
|
||||
python convert.py models/llama-7b/ --awq-path awq_cache/llama-7b-w4-g128.pt --outfile models/llama_7b_fp16.gguf
|
||||
# For mistral and mpt models
|
||||
python convert-hf-to-gguf.py models/mpt-7b/ --awq-path awq_cache/mpt-7b-w4-g128.pt --outfile models/mpt_7b_fp16.gguf
|
||||
```
|
||||
|
||||
## Quantize
|
||||
```bash
|
||||
# We only benchmark and confirm the results on q4_0, q4_1, and q2_k types.
|
||||
./quantize models/llama_7b_fp16.gguf models/llama_7b_q4_0.gguf q4_0
|
||||
```
|
||||
|
||||
## Test
|
||||
```bash
|
||||
# For all models.
|
||||
./build/bin/main -m models/llama_7b_q4_0.gguf -n 128 --prompt "Once upon a time"
|
||||
```
|
||||
|
||||
## Benchmark
|
||||
The perplexity measurements in table above are done against the `wikitext2` test dataset (https://paperswithcode.com/dataset/wikitext-2), with context length of 512.
|
||||
```bash
|
||||
# For llama and llama2, and mistral models.
|
||||
./perplexity -m models/llama_7b_q4_0.gguf -f datasets/wikitext-2-raw/wiki.test.raw
|
||||
```
|
||||
|
||||
## Results
|
||||
Results are run on OpenBLAS (CPU) and CuBLAS (GPU) for fair comparison
|
||||
We use three types of llamacpp quantization methods to work with our version, including q4_0, q4_1, and q2_k
|
||||
|
||||
### Llama 7B (Build with OpenBLAS)
|
||||
|
||||
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
||||
|-----------:|--------------|-------:|-------:|-------:|-------:|
|
||||
|Llama 7B | perplexity | 5.9066 | 6.1214 | 6.0643 | 6.5808 |
|
||||
|Llama 7B | file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
||||
|Llama 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
||||
|AWQ-LLama 7B| perplexity | 5.9175 | 6.0252 | 5.9987 | 6.3692 |
|
||||
|AWQ-LLama 7B| file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
||||
|AWQ-LLama 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
||||
|
||||
|
||||
### Llama2 7B (Build with CuBLAS)
|
||||
|
||||
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
||||
|------------:|--------------|-------:|-------:|-------:|-------:|
|
||||
|Llama2 7B | perplexity | 5.8664 | 6.0260 | 6.0656 | 6.4496 |
|
||||
|Llama2 7B | file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
||||
|Llama2 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
||||
|AWQ-LLama2 7B| perplexity | 5.8801 | 6.0054 | 5.9849 | 6.3650 |
|
||||
|AWQ-LLama2 7B| file size | 12.9G | 3.5G | 3.9G | 2.7G |
|
||||
|AWQ-LLama2 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
||||
|
||||
|
||||
### Mistral 7B v0.1 (Build with CuBLAS)
|
||||
|
||||
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
||||
|-------------:|--------------|-------:|-------:|-------:|-------:|
|
||||
|Mistral 7B | perplexity | 5.6931 | 5.8202 | 5.8268 | 6.1645 |
|
||||
|Mistral 7B | file size | 14.5G | 4.1G | 4.5G | 3.1G |
|
||||
|Mistral 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
||||
|AWQ-Mistral 7B| perplexity | 5.6934 | 5.8020 | 5.7691 | 6.0426 |
|
||||
|AWQ-Mistral 7B| file size | 14.5G | 4.1G | 4.5G | 3.1G |
|
||||
|AWQ-Mistral 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
||||
|
||||
### MPT 7B (Build with OpenBLAS)
|
||||
|
||||
| Model | Measure | F16 | Q4_0 | Q4_1 | Q2_K |
|
||||
|---------:|--------------|-------:|-------:|-------:|--------:|
|
||||
|MPT 7B | perplexity | 8.4369 | 8.7956 | 8.6265 | 11.4913 |
|
||||
|MPT 7B | file size | 13.7G | 3.9G | 4.3G | 2.8G |
|
||||
|MPT 7B | bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
||||
|AWQ-MPT 7B| perplexity | 8.4944 | 8.7053 | 8.6750 | 10.2873|
|
||||
|AWQ-MPT 7B| file size | 13.7G | 3.9G | 4.3G | 2.8G |
|
||||
|AWQ-MPT 7B| bits/weight | 16.0 | 4.5 | 5.0 | 2.6 |
|
||||
@@ -1,254 +0,0 @@
|
||||
"""
|
||||
Implements the AWQ for llama.cpp use cases.
|
||||
Original paper: https://arxiv.org/abs/2306.00978
|
||||
|
||||
This code is based on versions of the AWQ implementation found in the following repositories:
|
||||
* https://github.com/mit-han-lab/llm-awq
|
||||
* https://github.com/casper-hansen/AutoAWQ
|
||||
"""
|
||||
|
||||
import os
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from transformers import AutoModelForCausalLM, AutoConfig
|
||||
from transformers.models.bloom.modeling_bloom import BloomGelu
|
||||
from transformers.models.llama.modeling_llama import LlamaRMSNorm
|
||||
from transformers.activations import GELUActivation
|
||||
|
||||
|
||||
class ScaledActivation(nn.Module):
|
||||
"""
|
||||
ScaledActivation module wraps an existing activation function and applies a
|
||||
scale factor to its output.
|
||||
|
||||
Args:
|
||||
module (nn.Module): The activation function to be scaled.
|
||||
scales (torch.Tensor): A tensor of size (num_features,) containing the initial
|
||||
scale factors for each feature.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: The scaled output of the activation function.
|
||||
"""
|
||||
|
||||
def __init__(self, module, scales):
|
||||
super().__init__()
|
||||
self.act = module
|
||||
self.scales = nn.Parameter(scales.data)
|
||||
|
||||
def forward(self, x):
|
||||
return self.act(x) / self.scales.view(1, 1, -1).to(x.device)
|
||||
|
||||
|
||||
def set_op_by_name(layer, name, new_module):
|
||||
"""
|
||||
Set the new module for given module's name.
|
||||
|
||||
Args:
|
||||
layer (nn.Module): The layer in which to replace the submodule.
|
||||
name (str): The path to the submodule to be replaced, using dot notation
|
||||
to access nested modules.
|
||||
new_module (nn.Module): The new module to replace the existing one.
|
||||
"""
|
||||
levels = name.split(".")
|
||||
if len(levels) > 1:
|
||||
mod_ = layer
|
||||
for l_idx in range(len(levels) - 1):
|
||||
if levels[l_idx].isdigit():
|
||||
mod_ = mod_[int(levels[l_idx])]
|
||||
else:
|
||||
mod_ = getattr(mod_, levels[l_idx])
|
||||
setattr(mod_, levels[-1], new_module)
|
||||
else:
|
||||
setattr(layer, name, new_module)
|
||||
|
||||
|
||||
def get_op_by_name(module, op_name):
|
||||
"""
|
||||
Retrieves a submodule within a given layer based on its name.
|
||||
|
||||
Args:
|
||||
module (nn.Module): The layer containing the submodule to find.
|
||||
op_name (str): The name of the submodule.
|
||||
|
||||
Returns:
|
||||
nn.Module: The requested submodule found within the given layer.
|
||||
|
||||
Raises:
|
||||
ValueError: If the specified submodule cannot be found within the layer.
|
||||
"""
|
||||
for name, m in module.named_modules():
|
||||
if name == op_name:
|
||||
return m
|
||||
raise ValueError(f"Cannot find op {op_name} in module {module}")
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def scale_ln_fcs(ln, fcs, scales):
|
||||
"""
|
||||
Scales the weights of a LayerNorm and a list of fully-connected layers proportionally.
|
||||
|
||||
Args:
|
||||
ln (nn.LayerNorm): The LayerNorm module to be scaled.
|
||||
fcs (List[nn.Linear]): A list of fully-connected layers to be scaled.
|
||||
scales (torch.Tensor): A 1D tensor of size (num_features,).
|
||||
"""
|
||||
|
||||
if not isinstance(fcs, list):
|
||||
fcs = [fcs]
|
||||
|
||||
scales = scales.to(ln.weight.device)
|
||||
|
||||
ln.weight.div_(scales)
|
||||
if hasattr(ln, "bias") and ln.bias is not None:
|
||||
ln.bias.div_(scales)
|
||||
|
||||
for fc in fcs:
|
||||
fc.weight.mul_(scales.view(1, -1))
|
||||
|
||||
for p in ln.parameters():
|
||||
assert torch.isnan(p).sum() == 0
|
||||
for fc in fcs:
|
||||
for p in fc.parameters():
|
||||
assert torch.isnan(p).sum() == 0
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def scale_fc_fc(fc1, fc2, scales):
|
||||
"""
|
||||
Scales the weights of two fully-connected layers in a specific pattern.
|
||||
|
||||
Args:
|
||||
fc1 (nn.Linear): The first fully-connected layer to be scaled.
|
||||
fc2 (nn.Linear): The second fully-connected layer to be scaled.
|
||||
scales (torch.Tensor): A 1D tensor of size (num_features,).
|
||||
"""
|
||||
assert isinstance(fc1, nn.Linear)
|
||||
assert isinstance(fc2, nn.Linear)
|
||||
|
||||
scales = scales.to(fc1.weight.device)
|
||||
|
||||
fc1.weight[-scales.size(0):].div_(scales.view(-1, 1))
|
||||
if fc1.bias is not None:
|
||||
fc1.bias.div_(scales.view(-1))
|
||||
|
||||
fc2.weight.mul_(scales.view(1, -1))
|
||||
|
||||
for p in fc1.parameters():
|
||||
assert torch.isnan(p).sum() == 0
|
||||
for p in fc2.parameters():
|
||||
assert torch.isnan(p).sum() == 0
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def scale_gelu_fc(gelu, fc, scales):
|
||||
"""
|
||||
Scales the weight of a GELU activation and a fully-connected layer proportionally.
|
||||
|
||||
Args:
|
||||
gelu (Union[nn.GELU, BloomGelu, GELUActivation]): The GELU activation module to be scaled.
|
||||
fc (nn.Linear): The fully-connected layer to be scaled.
|
||||
scales (torch.Tensor): A 1D tensor of size (num_features,).
|
||||
|
||||
Raises:
|
||||
TypeError: If the `gelu` module is not of type `nn.GELU`, `BloomGelu`, or `GELUActivation`.
|
||||
TypeError: If the `fc` module is not of type `nn.Linear`.
|
||||
"""
|
||||
assert isinstance(gelu, (nn.GELU, BloomGelu, GELUActivation))
|
||||
assert isinstance(fc, nn.Linear)
|
||||
|
||||
fc.weight.mul_(scales.view(1, -1).to(fc.weight.device))
|
||||
|
||||
for p in fc.parameters():
|
||||
assert torch.isnan(p).sum() == 0
|
||||
|
||||
|
||||
def apply_scale(module, scales_list, input_feat_dict=None):
|
||||
"""
|
||||
Applies different scaling strategies to layers based on their type and hierarchy within a given module.
|
||||
|
||||
Args:
|
||||
module (nn.Module): The module containing the layers to be scaled.
|
||||
scales_list (List[Tuple[str, List[str], torch.Tensor]]): A list of tuples containing:
|
||||
* prev_op_name (str): The name of the preceding operation or module,
|
||||
relative to which the layers to be scaled are located.
|
||||
* layer_names (List[str]): A list of names of the layers to be scaled, relative to the preceding operation.
|
||||
* scales (torch.Tensor): A 1D tensor of size (num_features,) containing the scaling factors for each feature.
|
||||
input_feat_dict (Optional[Dict[str, torch.Tensor]]): A dictionary mapping layer names to their corresponding
|
||||
input features (optional).
|
||||
"""
|
||||
for prev_op_name, layer_names, scales in scales_list:
|
||||
prev_op = get_op_by_name(module, prev_op_name)
|
||||
layers = [get_op_by_name(module, name) for name in layer_names]
|
||||
|
||||
prev_op.cuda()
|
||||
for layer in layers:
|
||||
layer.cuda()
|
||||
scales.cuda()
|
||||
|
||||
if isinstance(prev_op, nn.Linear):
|
||||
assert len(layers) == 1
|
||||
scale_fc_fc(prev_op, layers[0], scales)
|
||||
elif isinstance(prev_op, (nn.LayerNorm, LlamaRMSNorm)) or "rmsnorm" in str(prev_op.__class__).lower():
|
||||
scale_ln_fcs(prev_op, layers, scales)
|
||||
elif isinstance(prev_op, (nn.GELU, BloomGelu, GELUActivation)):
|
||||
new_module = ScaledActivation(prev_op, scales)
|
||||
set_op_by_name(module, prev_op_name, new_module)
|
||||
scale_gelu_fc(prev_op, layers[0], scales)
|
||||
else:
|
||||
raise NotImplementedError(f"prev_op {type(prev_op)} not supported yet!")
|
||||
|
||||
# apply the scaling to input feat if given; prepare it for clipping
|
||||
if input_feat_dict is not None:
|
||||
for layer_name in layer_names:
|
||||
inp = input_feat_dict[layer_name]
|
||||
inp.div_(scales.view(1, -1).to(inp.device))
|
||||
|
||||
prev_op.cpu()
|
||||
for layer in layers:
|
||||
layer.cpu()
|
||||
scales.cpu()
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def apply_clip(module, clip_list):
|
||||
"""
|
||||
Applies element-wise clipping to the weight of a specific layer within a given module.
|
||||
|
||||
Args:
|
||||
module (nn.Module): The module containing the layer to be clipped.
|
||||
clip_list (List[Tuple[str, torch.Tensor]]): A list of tuples containing:
|
||||
* name (str): The name of the layer to be clipped, relative to the root of the module.
|
||||
* max_val (torch.Tensor): A 1D or 2D tensor defining the upper bound for each element of the layer's weight.
|
||||
"""
|
||||
for name, max_val in clip_list:
|
||||
layer = get_op_by_name(module, name)
|
||||
layer.cuda()
|
||||
max_val = max_val.to(layer.weight.device)
|
||||
org_shape = layer.weight.shape
|
||||
layer.weight.data = layer.weight.data.reshape(*max_val.shape[:2], -1)
|
||||
layer.weight.data = torch.clamp(layer.weight.data, -max_val, max_val)
|
||||
layer.weight.data = layer.weight.data.reshape(org_shape)
|
||||
layer.cpu()
|
||||
|
||||
|
||||
def add_scale_weights(model_path, scale_path, tmp_path):
|
||||
"""
|
||||
Adds pre-computed Activation Weight Quantization (AWQ) results to a model,
|
||||
including scaling factors and clipping bounds.
|
||||
|
||||
Args:
|
||||
model_path (str): Path to the pre-trained model to be equipped with AWQ.
|
||||
scale_path (str): Path to the AWQ scale factors (.pt file).
|
||||
tmp_path (str): Path to the temporary directory where the equipped model will be saved.
|
||||
"""
|
||||
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_path, config=config, trust_remote_code=True
|
||||
)
|
||||
model.eval()
|
||||
awq_results = torch.load(str(scale_path), map_location="cpu")
|
||||
apply_scale(model, awq_results["scale"])
|
||||
apply_clip(model, awq_results["clip"])
|
||||
model.save_pretrained(str(tmp_path))
|
||||
os.system(f"cp {str(model_path)}/tokenizer* {str(tmp_path)}")
|
||||
@@ -1,2 +0,0 @@
|
||||
torch>=2.1.1
|
||||
transformers>=4.32.0
|
||||
37
ci/run.sh
37
ci/run.sh
@@ -45,7 +45,8 @@ fi
|
||||
|
||||
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
||||
if [ -z ${ONEAPI_ROOT} ]; then
|
||||
echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:\n source /opt/intel/oneapi/setvars.sh"
|
||||
echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:"
|
||||
echo "source /opt/intel/oneapi/setvars.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -272,19 +273,19 @@ function gg_run_open_llama_3b_v2 {
|
||||
(time ./bin/main --model ${model_q5_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/main --model ${model_q6_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
|
||||
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||
(time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||
|
||||
(time ./bin/imatrix --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||
(time ./bin/imatrix --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||
|
||||
(time ./bin/save-load-state --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||
|
||||
@@ -343,17 +344,17 @@ function gg_run_open_llama_3b_v2 {
|
||||
python3 ../convert-lora-to-ggml.py ${path_lora}
|
||||
|
||||
# f16
|
||||
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-f16.log
|
||||
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-f16.log
|
||||
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-f16.log
|
||||
(time ./bin/perplexity --model ${model_f16} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-f16.log
|
||||
compare_ppl "f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-f16.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
|
||||
|
||||
# q8_0
|
||||
(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log
|
||||
(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log
|
||||
(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log
|
||||
(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log
|
||||
compare_ppl "q8_0 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
|
||||
|
||||
# q8_0 + f16 lora-base
|
||||
(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -c 128 -b 128 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log
|
||||
(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log
|
||||
compare_ppl "q8_0 / f16 base shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
|
||||
|
||||
set +e
|
||||
|
||||
@@ -19,7 +19,12 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(GIT_INDEX "${GIT_DIR}/index")
|
||||
if(EXISTS "${GIT_DIR}/index")
|
||||
set(GIT_INDEX "${GIT_DIR}/index")
|
||||
else()
|
||||
message(WARNING "Git index not found in git repository.")
|
||||
set(GIT_INDEX "")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
|
||||
set(GIT_INDEX "")
|
||||
|
||||
@@ -335,6 +335,16 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
||||
break;
|
||||
}
|
||||
params.yarn_beta_slow = std::stof(argv[i]);
|
||||
} else if (arg == "--pooling") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
std::string value(argv[i]);
|
||||
/**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; }
|
||||
else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; }
|
||||
else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; }
|
||||
else { invalid_param = true; break; }
|
||||
} else if (arg == "--defrag-thold" || arg == "-dt") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -503,12 +513,6 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
||||
break;
|
||||
}
|
||||
params.n_sequences = std::stoi(argv[i]);
|
||||
} else if (arg == "--p-accept" || arg == "-pa") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.p_accept = std::stof(argv[i]);
|
||||
} else if (arg == "--p-split" || arg == "-ps") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -640,6 +644,10 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
||||
} else if (arg_next == "layer") {
|
||||
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
|
||||
} else if (arg_next == "row") {
|
||||
#ifdef GGML_USE_SYCL
|
||||
fprintf(stderr, "warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\nExit!\n");
|
||||
exit(1);
|
||||
#endif // GGML_USE_SYCL
|
||||
params.split_mode = LLAMA_SPLIT_MODE_ROW;
|
||||
} else {
|
||||
invalid_param = true;
|
||||
@@ -1010,12 +1018,14 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
printf(" --yarn-attn-factor N YaRN: scale sqrt(t) or attention magnitude (default: 1.0)\n");
|
||||
printf(" --yarn-beta-slow N YaRN: high correction dim or alpha (default: %.1f)\n", params.yarn_beta_slow);
|
||||
printf(" --yarn-beta-fast N YaRN: low correction dim or beta (default: %.1f)\n", params.yarn_beta_fast);
|
||||
printf(" --pooling {none,mean,cls}\n");
|
||||
printf(" pooling type for embeddings, use model default if unspecified\n");
|
||||
printf(" -dt N, --defrag-thold N\n");
|
||||
printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold);
|
||||
printf(" --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
|
||||
printf(" --no-penalize-nl do not penalize newline token\n");
|
||||
printf(" --temp N temperature (default: %.1f)\n", (double)sparams.temp);
|
||||
printf(" --logits-all return logits for all tokens in the batch (default: disabled)\n");
|
||||
printf(" --all-logits return logits for all tokens in the batch (default: disabled)\n");
|
||||
printf(" --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n");
|
||||
printf(" --hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
|
||||
printf(" --winogrande compute Winogrande score over random tasks from datafile supplied with -f\n");
|
||||
@@ -1028,7 +1038,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
printf(" --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
|
||||
printf(" -np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel);
|
||||
printf(" -ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences);
|
||||
printf(" -pa N, --p-accept N speculative decoding accept probability (default: %.1f)\n", (double)params.p_accept);
|
||||
printf(" -ps N, --p-split N speculative decoding split probability (default: %.1f)\n", (double)params.p_split);
|
||||
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
|
||||
printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA. see examples/llava/README.md\n");
|
||||
@@ -1279,12 +1288,12 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
|
||||
|
||||
cparams.n_ctx = params.n_ctx;
|
||||
cparams.n_batch = params.n_batch;
|
||||
cparams.n_parallel = params.n_parallel;
|
||||
cparams.n_threads = params.n_threads;
|
||||
cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||
cparams.mul_mat_q = params.mul_mat_q;
|
||||
cparams.seed = params.seed;
|
||||
cparams.logits_all = params.logits_all;
|
||||
cparams.embedding = params.embedding;
|
||||
cparams.embeddings = params.embedding;
|
||||
cparams.rope_scaling_type = params.rope_scaling_type;
|
||||
cparams.rope_freq_base = params.rope_freq_base;
|
||||
cparams.rope_freq_scale = params.rope_freq_scale;
|
||||
@@ -1293,6 +1302,7 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
|
||||
cparams.yarn_beta_fast = params.yarn_beta_fast;
|
||||
cparams.yarn_beta_slow = params.yarn_beta_slow;
|
||||
cparams.yarn_orig_ctx = params.yarn_orig_ctx;
|
||||
cparams.pooling_type = params.pooling_type;
|
||||
cparams.defrag_thold = params.defrag_thold;
|
||||
cparams.offload_kqv = !params.no_kv_offload;
|
||||
|
||||
@@ -1725,7 +1735,6 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
|
||||
fprintf(stream, "n_predict: %d # default: -1 (unlimited)\n", params.n_predict);
|
||||
fprintf(stream, "n_probs: %d # only used by server binary, default: 0\n", sparams.n_probs);
|
||||
fprintf(stream, "no_mmap: %s # default: false\n", !params.use_mmap ? "true" : "false");
|
||||
fprintf(stream, "no_mul_mat_q: %s # default: false\n", !params.mul_mat_q ? "true" : "false");
|
||||
fprintf(stream, "no_penalize_nl: %s # default: false\n", !sparams.penalize_nl ? "true" : "false");
|
||||
fprintf(stream, "ppl_output_type: %d # default: 0\n", params.ppl_output_type);
|
||||
fprintf(stream, "ppl_stride: %d # default: 0\n", params.ppl_stride);
|
||||
@@ -1843,3 +1852,18 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) {
|
||||
|
||||
printf("\n=== Done dumping\n");
|
||||
}
|
||||
|
||||
void llama_embd_normalize(const float * inp, float * out, int n) {
|
||||
double sum = 0.0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
sum += inp[i] * inp[i];
|
||||
}
|
||||
sum = sqrt(sum);
|
||||
|
||||
const float norm = sum > 0.0 ? 1.0f / sum : 0.0f;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
out[i] = inp[i] * norm;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ extern char const *LLAMA_BUILD_TARGET;
|
||||
int32_t get_num_physical_cores();
|
||||
|
||||
struct gpt_params {
|
||||
uint32_t seed = -1; // RNG seed
|
||||
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
||||
|
||||
int32_t n_threads = get_num_physical_cores();
|
||||
int32_t n_threads_draft = -1;
|
||||
@@ -53,11 +53,10 @@ struct gpt_params {
|
||||
int32_t n_ctx = 512; // context size
|
||||
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_draft = 8; // number of tokens to draft during speculative decoding
|
||||
int32_t n_draft = 5; // number of tokens to draft during speculative decoding
|
||||
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
||||
int32_t n_parallel = 1; // number of parallel sequences to decode
|
||||
int32_t n_sequences = 1; // number of sequences to decode
|
||||
float p_accept = 0.5f; // speculative decoding accept probability
|
||||
float p_split = 0.1f; // speculative decoding split probability
|
||||
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
|
||||
int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
|
||||
@@ -76,8 +75,11 @@ struct gpt_params {
|
||||
float yarn_beta_slow = 1.0f; // YaRN high correction dim
|
||||
int32_t yarn_orig_ctx = 0; // YaRN original context length
|
||||
float defrag_thold = -1.0f; // KV cache defragmentation threshold
|
||||
int32_t rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
||||
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
||||
|
||||
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
||||
|
||||
llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
||||
llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
||||
|
||||
// // sampling parameters
|
||||
struct llama_sampling_params sparams;
|
||||
@@ -115,7 +117,6 @@ struct gpt_params {
|
||||
|
||||
bool kl_divergence = false; // compute KL-divergence
|
||||
|
||||
bool mul_mat_q = true; // if true, use mul_mat_q kernels instead of cuBLAS
|
||||
bool random_prompt = false; // do not randomize prompt if none provided
|
||||
bool use_color = false; // use color to distinguish generations and inputs
|
||||
bool interactive = false; // interactive mode
|
||||
@@ -259,3 +260,10 @@ void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size = 80);
|
||||
|
||||
// Dump the KV cache view showing individual sequences in each cell (long output).
|
||||
void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
||||
|
||||
//
|
||||
// Embedding utils
|
||||
//
|
||||
|
||||
void llama_embd_normalize(const float * inp, float * out, int n);
|
||||
|
||||
|
||||
@@ -297,7 +297,7 @@ inline std::string log_filename_generator_impl(LogTriState multilog, const std::
|
||||
#ifndef _MSC_VER
|
||||
#define LOG(...) LOG_IMPL(__VA_ARGS__, "")
|
||||
#else
|
||||
#define LOG(str, ...) LOG_IMPL("%s" str, "", __VA_ARGS__, "")
|
||||
#define LOG(str, ...) LOG_IMPL("%s" str, "", ##__VA_ARGS__, "")
|
||||
#endif
|
||||
|
||||
// Main TEE macro.
|
||||
@@ -311,7 +311,7 @@ inline std::string log_filename_generator_impl(LogTriState multilog, const std::
|
||||
#ifndef _MSC_VER
|
||||
#define LOG_TEE(...) LOG_TEE_IMPL(__VA_ARGS__, "")
|
||||
#else
|
||||
#define LOG_TEE(str, ...) LOG_TEE_IMPL("%s" str, "", __VA_ARGS__, "")
|
||||
#define LOG_TEE(str, ...) LOG_TEE_IMPL("%s" str, "", ##__VA_ARGS__, "")
|
||||
#endif
|
||||
|
||||
// LOG macro variants with auto endline.
|
||||
@@ -319,8 +319,8 @@ inline std::string log_filename_generator_impl(LogTriState multilog, const std::
|
||||
#define LOGLN(...) LOG_IMPL(__VA_ARGS__, "\n")
|
||||
#define LOG_TEELN(...) LOG_TEE_IMPL(__VA_ARGS__, "\n")
|
||||
#else
|
||||
#define LOGLN(str, ...) LOG_IMPL("%s" str, "", __VA_ARGS__, "\n")
|
||||
#define LOG_TEELN(str, ...) LOG_TEE_IMPL("%s" str, "", __VA_ARGS__, "\n")
|
||||
#define LOGLN(str, ...) LOG_IMPL("%s" str, "", ##__VA_ARGS__, "\n")
|
||||
#define LOG_TEELN(str, ...) LOG_TEE_IMPL("%s" str, "", ##__VA_ARGS__, "\n")
|
||||
#endif
|
||||
|
||||
// INTERNAL, DO NOT USE
|
||||
|
||||
@@ -295,6 +295,77 @@ static llama_token llama_sampling_sample_impl(
|
||||
return id;
|
||||
}
|
||||
|
||||
static llama_token_data_array llama_sample_probability_distribution_impl(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
struct llama_context * ctx_cfg,
|
||||
const int idx) {
|
||||
const llama_sampling_params & params = ctx_sampling->params;
|
||||
|
||||
const int n_vocab = llama_n_vocab(llama_get_model(ctx_main));
|
||||
|
||||
const int32_t penalty_last_n = params.penalty_last_n < 0 ? params.n_prev : params.penalty_last_n;
|
||||
const float penalty_repeat = params.penalty_repeat;
|
||||
const float penalty_freq = params.penalty_freq;
|
||||
const float penalty_present = params.penalty_present;
|
||||
const bool penalize_nl = params.penalize_nl;
|
||||
|
||||
auto & prev = ctx_sampling->prev;
|
||||
auto & cur = ctx_sampling->cur;
|
||||
|
||||
// Get a pointer to the logits
|
||||
float * logits = llama_get_logits_ith(ctx_main, idx);
|
||||
|
||||
// Declare original_logits at the beginning of the function scope
|
||||
std::vector<float> original_logits;
|
||||
|
||||
// apply params.logit_bias map
|
||||
for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) {
|
||||
logits[it->first] += it->second;
|
||||
}
|
||||
|
||||
if (ctx_cfg) {
|
||||
float * logits_guidance = llama_get_logits_ith(ctx_cfg, idx);
|
||||
llama_sample_apply_guidance(ctx_main, logits, logits_guidance, params.cfg_scale);
|
||||
}
|
||||
|
||||
cur.clear();
|
||||
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||
cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
||||
}
|
||||
|
||||
llama_token_data_array cur_p = { cur.data(), cur.size(), false };
|
||||
|
||||
// apply penalties
|
||||
const auto& penalty_tokens = params.use_penalty_prompt_tokens ? params.penalty_prompt_tokens : prev;
|
||||
const int penalty_tokens_used_size = std::min((int)penalty_tokens.size(), penalty_last_n);
|
||||
if (penalty_tokens_used_size) {
|
||||
const float nl_logit = logits[llama_token_nl(llama_get_model(ctx_main))];
|
||||
|
||||
llama_sample_repetition_penalties(ctx_main, &cur_p,
|
||||
penalty_tokens.data() + penalty_tokens.size() - penalty_tokens_used_size,
|
||||
penalty_tokens_used_size, penalty_repeat, penalty_freq, penalty_present);
|
||||
|
||||
if (!penalize_nl) {
|
||||
for (size_t idx = 0; idx < cur_p.size; idx++) {
|
||||
if (cur_p.data[idx].id == llama_token_nl(llama_get_model(ctx_main))) {
|
||||
cur_p.data[idx].logit = nl_logit;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// apply grammar checks
|
||||
if (ctx_sampling->grammar != NULL) {
|
||||
llama_sample_grammar(ctx_main, &cur_p, ctx_sampling->grammar);
|
||||
}
|
||||
|
||||
llama_sample_softmax(ctx_main, &cur_p);
|
||||
return cur_p;
|
||||
}
|
||||
|
||||
llama_token llama_sampling_sample(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
@@ -304,6 +375,14 @@ llama_token llama_sampling_sample(
|
||||
return llama_sampling_sample_impl(ctx_sampling, ctx_main, ctx_cfg, idx, false);
|
||||
}
|
||||
|
||||
llama_token_data_array llama_sampling_probability_distribution(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
struct llama_context * ctx_cfg,
|
||||
const int idx) {
|
||||
return llama_sample_probability_distribution_impl(ctx_sampling,ctx_main, ctx_cfg, idx);
|
||||
}
|
||||
|
||||
void llama_sampling_accept(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
|
||||
@@ -131,6 +131,13 @@ llama_token llama_sampling_sample(
|
||||
struct llama_context * ctx_cfg,
|
||||
int idx = 0);
|
||||
|
||||
// returns the probability that token of given id will be sampled
|
||||
llama_token_data_array llama_sampling_probability_distribution(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
struct llama_context * ctx_cfg,
|
||||
int idx = 0);
|
||||
|
||||
void llama_sampling_accept(
|
||||
struct llama_sampling_context * ctx_sampling,
|
||||
struct llama_context * ctx_main,
|
||||
|
||||
@@ -8,9 +8,10 @@ import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import IntEnum
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, ContextManager, Iterator, Sequence, cast
|
||||
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterator, Sequence, TypeVar, cast
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@@ -36,7 +37,12 @@ class SentencePieceTokenTypes(IntEnum):
|
||||
BYTE = 6
|
||||
|
||||
|
||||
class Model:
|
||||
AnyModel = TypeVar("AnyModel", bound="type[Model]")
|
||||
|
||||
|
||||
class Model(ABC):
|
||||
_model_classes: dict[str, type[Model]] = {}
|
||||
|
||||
def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian: bool):
|
||||
self.dir_model = dir_model
|
||||
self.ftype = ftype
|
||||
@@ -47,10 +53,14 @@ class Model:
|
||||
self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin")
|
||||
self.part_names = self._get_part_names()
|
||||
self.hparams = Model.load_hparams(self.dir_model)
|
||||
self.model_arch = self._get_model_architecture()
|
||||
self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=False)
|
||||
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def model_arch(self) -> gguf.MODEL_ARCH:
|
||||
pass
|
||||
|
||||
def find_hparam(self, keys: Sequence[str], optional: bool = False) -> Any:
|
||||
key = next((k for k in keys if k in self.hparams), None)
|
||||
if key is not None:
|
||||
@@ -96,9 +106,11 @@ class Model:
|
||||
if (n_head_kv := self.hparams.get("num_key_value_heads")) is not None:
|
||||
self.gguf_writer.add_head_count_kv(n_head_kv)
|
||||
|
||||
if (rope_theta := self.hparams.get("rope_theta")) is not None:
|
||||
self.gguf_writer.add_rope_freq_base(rope_theta)
|
||||
if (f_rms_eps := self.hparams.get("rms_norm_eps")) is not None:
|
||||
self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps)
|
||||
if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon"], optional=True)) is not None:
|
||||
if (f_norm_eps := self.find_hparam(["layer_norm_eps", "layer_norm_epsilon", "norm_epsilon"], optional=True)) is not None:
|
||||
self.gguf_writer.add_layer_norm_eps(f_norm_eps)
|
||||
if (n_experts := self.hparams.get("num_local_experts")) is not None:
|
||||
self.gguf_writer.add_expert_count(n_experts)
|
||||
@@ -174,53 +186,22 @@ class Model:
|
||||
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
@staticmethod
|
||||
def from_model_architecture(model_architecture):
|
||||
if model_architecture == "GPTNeoXForCausalLM":
|
||||
return GPTNeoXModel
|
||||
if model_architecture == "BloomForCausalLM":
|
||||
return BloomModel
|
||||
if model_architecture == "MPTForCausalLM":
|
||||
return MPTModel
|
||||
if model_architecture in ("BaichuanForCausalLM", "BaiChuanForCausalLM"):
|
||||
return BaichuanModel
|
||||
if model_architecture in ("FalconForCausalLM", "RWForCausalLM"):
|
||||
return FalconModel
|
||||
if model_architecture == "GPTBigCodeForCausalLM":
|
||||
return StarCoderModel
|
||||
if model_architecture == "GPTRefactForCausalLM":
|
||||
return RefactModel
|
||||
if model_architecture == "PersimmonForCausalLM":
|
||||
return PersimmonModel
|
||||
if model_architecture in ("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
|
||||
return StableLMModel
|
||||
if model_architecture == "QWenLMHeadModel":
|
||||
return QwenModel
|
||||
if model_architecture == "Qwen2ForCausalLM":
|
||||
return Model
|
||||
if model_architecture == "MixtralForCausalLM":
|
||||
return MixtralModel
|
||||
if model_architecture == "GPT2LMHeadModel":
|
||||
return GPT2Model
|
||||
if model_architecture == "PhiForCausalLM":
|
||||
return Phi2Model
|
||||
if model_architecture == "PlamoForCausalLM":
|
||||
return PlamoModel
|
||||
if model_architecture == "CodeShellForCausalLM":
|
||||
return CodeShellModel
|
||||
if model_architecture == "OrionForCausalLM":
|
||||
return OrionModel
|
||||
if model_architecture == "InternLM2ForCausalLM":
|
||||
return InternLM2Model
|
||||
if model_architecture == "MiniCPMForCausalLM":
|
||||
return MiniCPMModel
|
||||
if model_architecture == "BertModel":
|
||||
return BertModel
|
||||
if model_architecture == "NomicBertModel":
|
||||
return NomicBertModel
|
||||
if model_architecture == "GemmaForCausalLM":
|
||||
return GemmaModel
|
||||
return Model
|
||||
@classmethod
|
||||
def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]:
|
||||
assert names
|
||||
|
||||
def func(modelcls: type[Model]):
|
||||
for name in names:
|
||||
cls._model_classes[name] = modelcls
|
||||
return modelcls
|
||||
return func
|
||||
|
||||
@classmethod
|
||||
def from_model_architecture(cls, arch):
|
||||
try:
|
||||
return cls._model_classes[arch]
|
||||
except KeyError:
|
||||
raise NotImplementedError(f'Architecture {arch!r} not supported!') from None
|
||||
|
||||
def _is_model_safetensors(self) -> bool:
|
||||
return Model.count_model_parts(self.dir_model, ".safetensors") > 0
|
||||
@@ -235,55 +216,6 @@ class Model:
|
||||
return ("pytorch_model.bin",)
|
||||
return (f"pytorch_model-{n:05}-of-{self.num_parts:05}.bin" for n in range(1, self.num_parts + 1))
|
||||
|
||||
def _get_model_architecture(self) -> gguf.MODEL_ARCH:
|
||||
arch = self.hparams["architectures"][0]
|
||||
if arch == "GPTNeoXForCausalLM":
|
||||
return gguf.MODEL_ARCH.GPTNEOX
|
||||
if arch == "BloomForCausalLM":
|
||||
return gguf.MODEL_ARCH.BLOOM
|
||||
if arch == "MPTForCausalLM":
|
||||
return gguf.MODEL_ARCH.MPT
|
||||
if arch in ("BaichuanForCausalLM", "BaiChuanForCausalLM"):
|
||||
return gguf.MODEL_ARCH.BAICHUAN
|
||||
if arch in ("FalconForCausalLM", "RWForCausalLM"):
|
||||
return gguf.MODEL_ARCH.FALCON
|
||||
if arch == "GPTBigCodeForCausalLM":
|
||||
return gguf.MODEL_ARCH.STARCODER
|
||||
if arch == "GPTRefactForCausalLM":
|
||||
return gguf.MODEL_ARCH.REFACT
|
||||
if arch == "PersimmonForCausalLM":
|
||||
return gguf.MODEL_ARCH.PERSIMMON
|
||||
if arch in ("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
|
||||
return gguf.MODEL_ARCH.STABLELM
|
||||
if arch == "QWenLMHeadModel":
|
||||
return gguf.MODEL_ARCH.QWEN
|
||||
if arch == "Qwen2ForCausalLM":
|
||||
return gguf.MODEL_ARCH.QWEN2
|
||||
if arch == "MixtralForCausalLM":
|
||||
return gguf.MODEL_ARCH.LLAMA
|
||||
if arch == "GPT2LMHeadModel":
|
||||
return gguf.MODEL_ARCH.GPT2
|
||||
if arch == "PhiForCausalLM":
|
||||
return gguf.MODEL_ARCH.PHI2
|
||||
if arch == "PlamoForCausalLM":
|
||||
return gguf.MODEL_ARCH.PLAMO
|
||||
if arch == "CodeShellForCausalLM":
|
||||
return gguf.MODEL_ARCH.CODESHELL
|
||||
if arch == "OrionForCausalLM":
|
||||
return gguf.MODEL_ARCH.ORION
|
||||
if arch == "InternLM2ForCausalLM":
|
||||
return gguf.MODEL_ARCH.INTERNLM2
|
||||
if arch == "MiniCPMForCausalLM":
|
||||
return gguf.MODEL_ARCH.MINICPM
|
||||
if arch == "BertModel":
|
||||
return gguf.MODEL_ARCH.BERT
|
||||
if arch == "NomicBertModel":
|
||||
return gguf.MODEL_ARCH.NOMIC_BERT
|
||||
if arch == "GemmaForCausalLM":
|
||||
return gguf.MODEL_ARCH.GEMMA
|
||||
|
||||
raise NotImplementedError(f'Architecture "{arch}" not supported!')
|
||||
|
||||
def _set_vocab_gpt2(self):
|
||||
dir_model = self.dir_model
|
||||
hparams = self.hparams
|
||||
@@ -451,7 +383,10 @@ class Model:
|
||||
special_vocab.add_to_gguf(self.gguf_writer)
|
||||
|
||||
|
||||
@Model.register("GPTNeoXForCausalLM")
|
||||
class GPTNeoXModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.GPTNEOX
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
block_count = self.hparams["num_hidden_layers"]
|
||||
|
||||
@@ -468,7 +403,10 @@ class GPTNeoXModel(Model):
|
||||
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
|
||||
|
||||
|
||||
@Model.register("BloomForCausalLM")
|
||||
class BloomModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.BLOOM
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
self.gguf_writer.add_name("Bloom")
|
||||
n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
|
||||
@@ -560,7 +498,10 @@ class BloomModel(Model):
|
||||
print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}")
|
||||
|
||||
|
||||
@Model.register("MPTForCausalLM")
|
||||
class MPTModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.MPT
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
block_count = self.hparams["n_layers"]
|
||||
self.gguf_writer.add_name(self.dir_model.name)
|
||||
@@ -623,7 +564,10 @@ class MPTModel(Model):
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
@Model.register("OrionForCausalLM")
|
||||
class OrionModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.ORION
|
||||
|
||||
def set_vocab(self):
|
||||
self._set_vocab_sentencepiece()
|
||||
|
||||
@@ -702,7 +646,10 @@ class OrionModel(Model):
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
@Model.register("BaichuanForCausalLM", "BaiChuanForCausalLM")
|
||||
class BaichuanModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.BAICHUAN
|
||||
|
||||
def set_vocab(self):
|
||||
self._set_vocab_sentencepiece()
|
||||
|
||||
@@ -817,7 +764,10 @@ class BaichuanModel(Model):
|
||||
return weights[r * n_part:r * n_part + r, ...]
|
||||
|
||||
|
||||
@Model.register("FalconForCausalLM", "RWForCausalLM")
|
||||
class FalconModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.FALCON
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
block_count = self.hparams.get("num_hidden_layers")
|
||||
if block_count is None:
|
||||
@@ -910,7 +860,10 @@ class FalconModel(Model):
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
@Model.register("GPTBigCodeForCausalLM")
|
||||
class StarCoderModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.STARCODER
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
block_count = self.hparams["n_layer"]
|
||||
|
||||
@@ -925,7 +878,10 @@ class StarCoderModel(Model):
|
||||
self.gguf_writer.add_file_type(self.ftype)
|
||||
|
||||
|
||||
@Model.register("GPTRefactForCausalLM")
|
||||
class RefactModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.REFACT
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
hidden_dim = self.hparams["n_embd"]
|
||||
inner_dim = 4 * hidden_dim
|
||||
@@ -1009,7 +965,10 @@ class RefactModel(Model):
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
@Model.register("PersimmonForCausalLM")
|
||||
class PersimmonModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.PERSIMMON
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
block_count = self.hparams.get("num_layers", self.hparams.get("num_hidden_layers"))
|
||||
head_count = self.hparams["num_attention_heads"]
|
||||
@@ -1057,7 +1016,10 @@ class PersimmonModel(Model):
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
@Model.register("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM")
|
||||
class StableLMModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.STABLELM
|
||||
|
||||
def set_vocab(self):
|
||||
if (self.dir_model / "tokenizer.json").is_file():
|
||||
self._set_vocab_gpt2()
|
||||
@@ -1081,12 +1043,18 @@ class StableLMModel(Model):
|
||||
self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))
|
||||
|
||||
|
||||
@Model.register("MixtralForCausalLM")
|
||||
class MixtralModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.LLAMA
|
||||
|
||||
def set_vocab(self):
|
||||
self._set_vocab_sentencepiece()
|
||||
|
||||
|
||||
@Model.register("MiniCPMForCausalLM")
|
||||
class MiniCPMModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.MINICPM
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
block_count = self.hparams["num_hidden_layers"]
|
||||
self.gguf_writer.add_name("MiniCPM")
|
||||
@@ -1163,7 +1131,10 @@ class MiniCPMModel(Model):
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
@Model.register("QWenLMHeadModel")
|
||||
class QwenModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.QWEN
|
||||
|
||||
@staticmethod
|
||||
def token_bytes_to_string(b):
|
||||
from transformers.models.gpt2.tokenization_gpt2 import bytes_to_unicode
|
||||
@@ -1243,7 +1214,15 @@ class QwenModel(Model):
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
@Model.register("Qwen2ForCausalLM")
|
||||
class Qwen2Model(Model):
|
||||
model_arch = gguf.MODEL_ARCH.QWEN2
|
||||
|
||||
|
||||
@Model.register("GPT2LMHeadModel")
|
||||
class GPT2Model(Model):
|
||||
model_arch = gguf.MODEL_ARCH.GPT2
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
self.gguf_writer.add_name(self.dir_model.name)
|
||||
self.gguf_writer.add_block_count(self.hparams["n_layer"])
|
||||
@@ -1305,7 +1284,10 @@ class GPT2Model(Model):
|
||||
self.gguf_writer.add_tensor("output.weight", data)
|
||||
|
||||
|
||||
@Model.register("PhiForCausalLM")
|
||||
class Phi2Model(Model):
|
||||
model_arch = gguf.MODEL_ARCH.PHI2
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
|
||||
|
||||
@@ -1327,7 +1309,10 @@ class Phi2Model(Model):
|
||||
self.gguf_writer.add_add_bos_token(False)
|
||||
|
||||
|
||||
@Model.register("PlamoForCausalLM")
|
||||
class PlamoModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.PLAMO
|
||||
|
||||
def set_vocab(self):
|
||||
self._set_vocab_sentencepiece()
|
||||
|
||||
@@ -1406,7 +1391,10 @@ class PlamoModel(Model):
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
@Model.register("CodeShellForCausalLM")
|
||||
class CodeShellModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.CODESHELL
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
block_count = self.hparams["n_layer"]
|
||||
|
||||
@@ -1471,7 +1459,10 @@ class CodeShellModel(Model):
|
||||
print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}")
|
||||
|
||||
|
||||
@Model.register("InternLM2ForCausalLM")
|
||||
class InternLM2Model(Model):
|
||||
model_arch = gguf.MODEL_ARCH.INTERNLM2
|
||||
|
||||
def set_vocab(self):
|
||||
# (TODO): Is there a better way?
|
||||
# Copy from _set_vocab_sentencepiece, The only difference is that we will treat the character
|
||||
@@ -1643,7 +1634,10 @@ in chat mode so that the conversation can end normally.")
|
||||
self.post_write_tensors(tensor_map, name, data_torch)
|
||||
|
||||
|
||||
@Model.register("BertModel")
|
||||
class BertModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.BERT
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.vocab_size = None
|
||||
@@ -1653,16 +1647,17 @@ class BertModel(Model):
|
||||
self.gguf_writer.add_causal_attention(False)
|
||||
|
||||
# get pooling path
|
||||
with open(self.dir_model / "modules.json", encoding="utf-8") as f:
|
||||
modules = json.load(f)
|
||||
pooling_path = None
|
||||
for mod in modules:
|
||||
if mod["type"] == "sentence_transformers.models.Pooling":
|
||||
pooling_path = mod["path"]
|
||||
break
|
||||
module_path = self.dir_model / "modules.json"
|
||||
if module_path.is_file():
|
||||
with open(module_path, encoding="utf-8") as f:
|
||||
modules = json.load(f)
|
||||
for mod in modules:
|
||||
if mod["type"] == "sentence_transformers.models.Pooling":
|
||||
pooling_path = mod["path"]
|
||||
break
|
||||
|
||||
# get pooling type
|
||||
pooling_type = gguf.PoolingType.NONE
|
||||
if pooling_path is not None:
|
||||
with open(self.dir_model / pooling_path / "config.json", encoding="utf-8") as f:
|
||||
pooling = json.load(f)
|
||||
@@ -1672,8 +1667,7 @@ class BertModel(Model):
|
||||
pooling_type = gguf.PoolingType.CLS
|
||||
else:
|
||||
raise NotImplementedError("Only MEAN and CLS pooling types supported")
|
||||
|
||||
self.gguf_writer.add_pooling_type(pooling_type.value)
|
||||
self.gguf_writer.add_pooling_type(pooling_type)
|
||||
|
||||
def set_vocab(self):
|
||||
path = self.dir_model
|
||||
@@ -1749,7 +1743,10 @@ class BertModel(Model):
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
@Model.register("NomicBertModel")
|
||||
class NomicBertModel(BertModel):
|
||||
model_arch = gguf.MODEL_ARCH.NOMIC_BERT
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@@ -1786,7 +1783,10 @@ class NomicBertModel(BertModel):
|
||||
yield name, data
|
||||
|
||||
|
||||
@Model.register("GemmaForCausalLM")
|
||||
class GemmaModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.GEMMA
|
||||
|
||||
def set_vocab(self):
|
||||
self._set_vocab_sentencepiece()
|
||||
|
||||
@@ -1811,16 +1811,15 @@ class GemmaModel(Model):
|
||||
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
|
||||
|
||||
for name, data_torch in self.get_tensors():
|
||||
# ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
|
||||
if name.endswith("norm.weight"):
|
||||
data_torch = data_torch + 1
|
||||
|
||||
old_dtype = data_torch.dtype
|
||||
|
||||
# convert any unsupported data types to float32
|
||||
if data_torch.dtype not in (torch.float16, torch.float32):
|
||||
data_torch = data_torch.to(torch.float32)
|
||||
|
||||
# ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
|
||||
if name.endswith("norm.weight"):
|
||||
data_torch = data_torch + 1
|
||||
data = data_torch.squeeze().numpy()
|
||||
|
||||
# map tensor names
|
||||
@@ -1843,6 +1842,129 @@ class GemmaModel(Model):
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
@Model.register("Starcoder2ForCausalLM")
|
||||
class StarCoder2Model(Model):
|
||||
model_arch = gguf.MODEL_ARCH.STARCODER2
|
||||
|
||||
|
||||
@Model.register("MambaForCausalLM", "MambaLMHeadModel")
|
||||
class MambaModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.MAMBA
|
||||
|
||||
def set_vocab(self):
|
||||
vocab_size = self.hparams["vocab_size"]
|
||||
# Round vocab size to next multiple of 8
|
||||
pad_vocab = self.hparams.get("pad_vocab_size_multiple", 8)
|
||||
# pad using ceiling division
|
||||
# ref: https://stackoverflow.com/a/17511341/22827863
|
||||
vocab_size = -(vocab_size // -pad_vocab) * pad_vocab
|
||||
self.hparams["vocab_size"] = vocab_size
|
||||
|
||||
if (self.dir_model / "tokenizer.json").is_file():
|
||||
self._set_vocab_gpt2()
|
||||
else:
|
||||
# Use the GPT-NeoX tokenizer when no tokenizer files are present
|
||||
tokenizer_path = Path(sys.path[0]) / "models" / "ggml-vocab-gpt-neox.gguf"
|
||||
print(f"Using tokenizer from '{os.path.relpath(tokenizer_path, os.getcwd())}'")
|
||||
neox_reader = gguf.GGUFReader(tokenizer_path, "r")
|
||||
|
||||
field = neox_reader.get_field(gguf.Keys.Tokenizer.MODEL)
|
||||
self.gguf_writer.add_tokenizer_model(bytes(field.parts[-1]))
|
||||
field = neox_reader.get_field(gguf.Keys.Tokenizer.LIST)
|
||||
self.gguf_writer.add_token_list([bytes(field.parts[i]) for i in field.data][:vocab_size])
|
||||
field = neox_reader.get_field(gguf.Keys.Tokenizer.TOKEN_TYPE)
|
||||
self.gguf_writer.add_token_types([field.parts[i].tolist()[0] for i in field.data][:vocab_size])
|
||||
field = neox_reader.get_field(gguf.Keys.Tokenizer.MERGES)
|
||||
self.gguf_writer.add_token_merges([bytes(field.parts[i]) for i in field.data])
|
||||
field = neox_reader.get_field(gguf.Keys.Tokenizer.BOS_ID)
|
||||
self.gguf_writer.add_bos_token_id(field.parts[-1].tolist()[0])
|
||||
field = neox_reader.get_field(gguf.Keys.Tokenizer.EOS_ID)
|
||||
self.gguf_writer.add_eos_token_id(field.parts[-1].tolist()[0])
|
||||
field = neox_reader.get_field(gguf.Keys.Tokenizer.UNK_ID)
|
||||
self.gguf_writer.add_unk_token_id(field.parts[-1].tolist()[0])
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
d_model = self.find_hparam(["hidden_size", "d_model"])
|
||||
d_conv = self.find_hparam(["conv_kernel", "d_conv"], optional=True) or 4
|
||||
d_inner = self.find_hparam(["intermediate_size", "d_inner"], optional=True) or 2 * d_model
|
||||
d_state = self.find_hparam(["state_size", "d_state"], optional=True) or 16
|
||||
# ceiling division
|
||||
# ref: https://stackoverflow.com/a/17511341/22827863
|
||||
# ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
|
||||
dt_rank = self.find_hparam(["time_step_rank", "dt_rank"], optional=True) or -(d_model // -16)
|
||||
rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5
|
||||
|
||||
# Fail early for models which don't have a block expansion factor of 2
|
||||
assert d_inner == 2 * d_model
|
||||
|
||||
self.gguf_writer.add_name(self.dir_model.name)
|
||||
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
|
||||
self.gguf_writer.add_embedding_length(d_model)
|
||||
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
|
||||
self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
|
||||
self.gguf_writer.add_block_count(self.hparams["n_layer"])
|
||||
self.gguf_writer.add_ssm_conv_kernel(d_conv)
|
||||
self.gguf_writer.add_ssm_inner_size(d_inner)
|
||||
self.gguf_writer.add_ssm_state_size(d_state)
|
||||
self.gguf_writer.add_ssm_time_step_rank(dt_rank)
|
||||
self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
|
||||
self.gguf_writer.add_file_type(self.ftype)
|
||||
|
||||
def write_tensors(self):
|
||||
block_count = self.hparams["n_layer"]
|
||||
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
|
||||
|
||||
tok_embd = None
|
||||
tok_embd_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.TOKEN_EMBD] + ".weight"
|
||||
output_name = gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.OUTPUT] + ".weight"
|
||||
|
||||
for name, data_torch in self.get_tensors():
|
||||
old_dtype = data_torch.dtype
|
||||
|
||||
# convert any unsupported data types to float32
|
||||
if data_torch.dtype not in (torch.float16, torch.float32):
|
||||
data_torch = data_torch.to(torch.float32)
|
||||
|
||||
# map tensor names
|
||||
new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
|
||||
if new_name is None:
|
||||
print(f"Can not map tensor {name!r}")
|
||||
sys.exit()
|
||||
|
||||
if name.endswith(".A_log"):
|
||||
print("A_log --> A ==> " + new_name)
|
||||
data_torch = -torch.exp(data_torch)
|
||||
|
||||
# assuming token_embd.weight is seen before output.weight
|
||||
if tok_embd is not None and new_name == output_name:
|
||||
if torch.equal(tok_embd, data_torch):
|
||||
print(f"{output_name} is equivalent to {tok_embd_name}, omitting")
|
||||
continue
|
||||
if new_name == tok_embd_name:
|
||||
tok_embd = data_torch
|
||||
|
||||
data = data_torch.squeeze().numpy()
|
||||
|
||||
n_dims = len(data.shape)
|
||||
data_dtype = data.dtype
|
||||
|
||||
# if f32 desired, convert any float16 to float32
|
||||
if self.ftype == 0 and data_dtype == np.float16:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||
if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||
data = data.astype(np.float32)
|
||||
|
||||
# if f16 desired, convert big float32 2-dim weight tensors to float16
|
||||
if self.ftype == 1 and data_dtype == np.float32 and new_name.removesuffix(".weight").endswith((".ssm_in", ".ssm_out", "token_embd", "output")) and n_dims == 2:
|
||||
data = data.astype(np.float16)
|
||||
|
||||
print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
|
||||
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
###### CONVERSION LOGIC ######
|
||||
|
||||
|
||||
|
||||
@@ -373,7 +373,7 @@ def handle_metadata(cfg, hp):
|
||||
raise ValueError('Unable to load metadata')
|
||||
vocab_path = Path(cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir)
|
||||
vocab_factory = convert.VocabFactory(vocab_path)
|
||||
vocab, special_vocab = vocab_factory.load_vocab(cfg.vocabtype, cfg.model_metadata_dir)
|
||||
vocab, special_vocab = vocab_factory.load_vocab(cfg.vocabtype.split(","), cfg.model_metadata_dir)
|
||||
convert.check_vocab_size(params, vocab)
|
||||
return params, vocab, special_vocab
|
||||
|
||||
@@ -398,8 +398,8 @@ def handle_args():
|
||||
help ='Load HuggingFace/.pth vocab and metadata from the specified directory')
|
||||
parser.add_argument("--vocab-dir", type=Path,
|
||||
help="directory containing tokenizer.model, if separate from model file - only meaningful with --model-metadata-dir")
|
||||
parser.add_argument("--vocabtype", choices=["spm", "bpe"], default="spm",
|
||||
help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm)")
|
||||
parser.add_argument("--vocabtype", default="spm,hfft",
|
||||
help="vocab format - only meaningful with --model-metadata-dir and/or --vocab-dir (default: spm,hfft)")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
||||
85
convert.py
85
convert.py
@@ -1282,35 +1282,32 @@ def load_some_model(path: Path) -> ModelPlus:
|
||||
|
||||
|
||||
class VocabFactory:
|
||||
_FILES = {"spm": "tokenizer.model", "bpe": "vocab.json", "hfft": "tokenizer.json"}
|
||||
|
||||
def __init__(self, path: Path):
|
||||
self.path = path
|
||||
self.files: dict[str, Path | None] = {
|
||||
"tokenizer.model": None,
|
||||
"vocab.json": None,
|
||||
"tokenizer.json": None,
|
||||
}
|
||||
self._detect_files()
|
||||
self.file_paths = self._detect_files()
|
||||
print(f"Found vocab files: {self.file_paths}")
|
||||
|
||||
def _detect_files(self):
|
||||
for file in self.files.keys():
|
||||
file_path = self.path / file
|
||||
parent_file_path = self.path.parent / file
|
||||
if file_path.exists():
|
||||
self.files[file] = file_path
|
||||
elif parent_file_path.exists():
|
||||
self.files[file] = parent_file_path
|
||||
print(f"Found vocab files: {self.files}")
|
||||
def _detect_files(self) -> dict[str, Path | None]:
|
||||
def locate(file: str) -> Path | None:
|
||||
if (path := self.path / file).exists():
|
||||
return path
|
||||
if (path := self.path.parent / file).exists():
|
||||
return path
|
||||
return None
|
||||
|
||||
def _select_file(self, vocabtype: str | None) -> Path:
|
||||
if vocabtype in ["spm", "bpe"]:
|
||||
for file_key in self.files.keys():
|
||||
if (file := self.files[file_key]) is not None:
|
||||
return file
|
||||
raise FileNotFoundError(f"{vocabtype} vocab not found.")
|
||||
if vocabtype == "hfft":
|
||||
# For Hugging Face Fast Tokenizer, return the directory path instead of a specific file
|
||||
return self.path
|
||||
raise ValueError(f"Unsupported vocabulary type {vocabtype}")
|
||||
return {vt: locate(f) for vt, f in self._FILES.items()}
|
||||
|
||||
def _select_file(self, vocab_types: list[str]) -> tuple[str, Path]:
|
||||
for vtype in vocab_types:
|
||||
try:
|
||||
path = self.file_paths[vtype]
|
||||
except KeyError:
|
||||
raise ValueError(f"Unsupported vocabulary type {vtype}") from None
|
||||
if path is not None:
|
||||
return vtype, path
|
||||
raise FileNotFoundError(f"Could not find any of {[self._FILES[vt] for vt in vocab_types]}")
|
||||
|
||||
def _create_special_vocab(self, vocab: Vocab, vocabtype: str, model_parent_path: Path) -> gguf.SpecialVocab:
|
||||
load_merges = vocabtype == "bpe"
|
||||
@@ -1322,30 +1319,30 @@ class VocabFactory:
|
||||
n_vocab=n_vocab,
|
||||
)
|
||||
|
||||
def load_vocab(self, vocabtype: str, model_parent_path: Path) -> tuple[Vocab, gguf.SpecialVocab]:
|
||||
path = self._select_file(vocabtype)
|
||||
print(f"Loading vocab file '{path}', type '{vocabtype}'")
|
||||
def load_vocab(self, vocab_types: list[str], model_parent_path: Path) -> tuple[Vocab, gguf.SpecialVocab]:
|
||||
vocab_type, path = self._select_file(vocab_types)
|
||||
print(f"Loading vocab file {path!r}, type {vocab_type!r}")
|
||||
|
||||
added_tokens_path = path.parent / "added_tokens.json"
|
||||
vocab: Vocab
|
||||
if vocabtype == "bpe":
|
||||
if vocab_type == "bpe":
|
||||
vocab = BpeVocab(
|
||||
path, added_tokens_path if added_tokens_path.exists() else None
|
||||
)
|
||||
elif vocabtype == "spm":
|
||||
elif vocab_type == "spm":
|
||||
vocab = SentencePieceVocab(
|
||||
path, added_tokens_path if added_tokens_path.exists() else None
|
||||
)
|
||||
elif vocabtype == "hfft":
|
||||
elif vocab_type == "hfft":
|
||||
vocab = HfVocab(
|
||||
path, added_tokens_path if added_tokens_path.exists() else None
|
||||
path.parent, added_tokens_path if added_tokens_path.exists() else None
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported vocabulary type {vocabtype}")
|
||||
raise ValueError(vocab_type)
|
||||
# FIXME: Respect --vocab-dir?
|
||||
special_vocab = self._create_special_vocab(
|
||||
vocab,
|
||||
vocabtype,
|
||||
vocab_type,
|
||||
model_parent_path,
|
||||
)
|
||||
return vocab, special_vocab
|
||||
@@ -1379,15 +1376,13 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
|
||||
# We currently only support Q8_0 output on little endian systems.
|
||||
output_choices.append("q8_0")
|
||||
vocab_types = ["spm", "bpe", "hfft"]
|
||||
parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
|
||||
parser.add_argument("--awq-path", type=Path, help="Path to scale awq cache file", default=None)
|
||||
parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file")
|
||||
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
|
||||
parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
|
||||
parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
|
||||
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
|
||||
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
|
||||
parser.add_argument("--vocab-type", choices=vocab_types, help="The vocabulary format used to define the tokenizer model (default: spm)", default="spm")
|
||||
parser.add_argument("--vocab-type", help="vocab types to try in order, choose from 'spm', 'bpe', 'hfft' (default: spm,hfft)", default="spm,hfft")
|
||||
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
|
||||
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
|
||||
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
|
||||
@@ -1397,18 +1392,6 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
|
||||
|
||||
args = parser.parse_args(args_in)
|
||||
if args.awq_path:
|
||||
sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
|
||||
from awq.apply_awq import add_scale_weights # type: ignore[import-not-found]
|
||||
tmp_model_path = args.model / "weighted_model"
|
||||
if tmp_model_path.is_dir():
|
||||
print(f"{tmp_model_path} exists as a weighted model.")
|
||||
else:
|
||||
tmp_model_path.mkdir(parents=True, exist_ok=True)
|
||||
print("Saving new weighted model ...")
|
||||
add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
|
||||
print(f"Saved weighted model at {tmp_model_path}.")
|
||||
args.model = tmp_model_path
|
||||
|
||||
if args.dump_single:
|
||||
model_plus = lazy_load_file(args.model)
|
||||
@@ -1448,7 +1431,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
model_parent_path = model_plus.paths[0].parent
|
||||
vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
|
||||
vocab_factory = VocabFactory(vocab_path)
|
||||
vocab, special_vocab = vocab_factory.load_vocab(args.vocab_type, model_parent_path)
|
||||
vocab, special_vocab = vocab_factory.load_vocab(args.vocab_type.split(","), model_parent_path)
|
||||
|
||||
if args.vocab_only:
|
||||
if not args.outfile:
|
||||
|
||||
@@ -32,16 +32,15 @@ int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
|
||||
if (argc == 1 || argv[1][0] == '-') {
|
||||
printf("usage: %s MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] [MMQ] <PP> <TG> <PL>\n" , argv[0]);
|
||||
printf("usage: %s MODEL_PATH [N_KV_MAX] [IS_PP_SHARED] [NGL] <PP> <TG> <PL>\n" , argv[0]);
|
||||
printf(" <PP>, <TG> and PL are comma-separated lists of numbers without spaces\n\n");
|
||||
printf(" example: %s ggml-model-f16.gguf 2048 0 999 0 128,256,512 128,256 1,2,4,8,16,32\n\n", argv[0]);
|
||||
printf(" example: %s ggml-model-f16.gguf 2048 0 999 128,256,512 128,256 1,2,4,8,16,32\n\n", argv[0]);
|
||||
return 1 ;
|
||||
}
|
||||
|
||||
int n_kv_max = 2048;
|
||||
int is_pp_shared = 0;
|
||||
int n_gpu_layers = 0;
|
||||
int mmq = 0;
|
||||
|
||||
std::vector<int> n_pp = { 128, 256, 512, 1024, 2048, 3584, 7680, };
|
||||
std::vector<int> n_tg = { 128, 256, };
|
||||
@@ -65,19 +64,15 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
if (argc >= 6) {
|
||||
mmq = std::atoi(argv[5]);
|
||||
n_pp = parse_list(argv[5]);
|
||||
}
|
||||
|
||||
if (argc >= 7) {
|
||||
n_pp = parse_list(argv[6]);
|
||||
n_tg = parse_list(argv[6]);
|
||||
}
|
||||
|
||||
if (argc >= 8) {
|
||||
n_tg = parse_list(argv[7]);
|
||||
}
|
||||
|
||||
if (argc >= 9) {
|
||||
n_pl = parse_list(argv[8]);
|
||||
n_pl = parse_list(argv[7]);
|
||||
}
|
||||
|
||||
// init LLM
|
||||
@@ -106,11 +101,13 @@ int main(int argc, char ** argv) {
|
||||
ctx_params.seed = 1234;
|
||||
ctx_params.n_ctx = n_kv_max;
|
||||
ctx_params.n_batch = 512;
|
||||
ctx_params.mul_mat_q = mmq;
|
||||
|
||||
ctx_params.n_threads = params.n_threads;
|
||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||
|
||||
// ensure enough sequences are available
|
||||
ctx_params.n_parallel = *std::max_element(n_pl.begin(), n_pl.end());
|
||||
|
||||
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
||||
|
||||
if (ctx == NULL) {
|
||||
@@ -159,7 +156,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
LOG_TEE("\n");
|
||||
LOG_TEE("%s: n_kv_max = %d, is_pp_shared = %d, n_gpu_layers = %d, mmq = %d, n_threads = %u, n_threads_batch = %u\n", __func__, n_kv_max, is_pp_shared, n_gpu_layers, mmq, ctx_params.n_threads, ctx_params.n_threads_batch);
|
||||
LOG_TEE("%s: n_kv_max = %d, is_pp_shared = %d, n_gpu_layers = %d, n_threads = %u, n_threads_batch = %u\n", __func__, n_kv_max, is_pp_shared, n_gpu_layers, ctx_params.n_threads, ctx_params.n_threads_batch);
|
||||
LOG_TEE("\n");
|
||||
|
||||
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
|
||||
@@ -180,10 +177,10 @@ int main(int argc, char ** argv) {
|
||||
|
||||
llama_batch_clear(batch);
|
||||
|
||||
const int n_tokens = is_pp_shared ? pp : pl*pp;
|
||||
|
||||
for (int i = 0; i < n_tokens; ++i) {
|
||||
llama_batch_add(batch, 0, i, { 0 }, false);
|
||||
for (int i = 0; i < pp; ++i) {
|
||||
for (int j = 0; j < (is_pp_shared ? 1 : pl); ++j) {
|
||||
llama_batch_add(batch, 0, i, { j }, false);
|
||||
}
|
||||
}
|
||||
batch.logits[batch.n_tokens - 1] = true;
|
||||
|
||||
@@ -198,7 +195,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
if (is_pp_shared) {
|
||||
for (int32_t i = 1; i < pl; ++i) {
|
||||
llama_kv_cache_seq_cp(ctx, 0, i, 0, pp);
|
||||
llama_kv_cache_seq_cp(ctx, 0, i, -1, -1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -80,6 +80,7 @@ int main(int argc, char ** argv) {
|
||||
ctx_params.seed = 1234;
|
||||
ctx_params.n_ctx = n_kv_req;
|
||||
ctx_params.n_batch = std::max(n_len, n_parallel);
|
||||
ctx_params.n_parallel = n_parallel;
|
||||
ctx_params.n_threads = params.n_threads;
|
||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||
|
||||
@@ -132,7 +133,7 @@ int main(int argc, char ** argv) {
|
||||
// assign the system KV cache to all parallel sequences
|
||||
// this way, the parallel sequences will "reuse" the prompt tokens without having to copy them
|
||||
for (int32_t i = 1; i < n_parallel; ++i) {
|
||||
llama_kv_cache_seq_cp(ctx, 0, i, 0, batch.n_tokens);
|
||||
llama_kv_cache_seq_cp(ctx, 0, i, -1, -1);
|
||||
}
|
||||
|
||||
if (n_parallel > 1) {
|
||||
|
||||
@@ -189,12 +189,10 @@ int main(int argc, char ** argv) {
|
||||
|
||||
int32_t nelements = sizex*sizey;
|
||||
|
||||
std::vector<int64_t> hist_cur(1 << 4, 0);
|
||||
|
||||
// Set up a the benchmark matrices
|
||||
// printf("Creating new tensor q11 & Running quantize\n");
|
||||
struct ggml_tensor * q11 = ggml_new_tensor_2d(ctx, qtype, sizex, sizey);
|
||||
ggml_quantize_chunk(qtype, (const float *) m11->data, q11->data, 0, nelements/m11->ne[0], m11->ne[0], hist_cur.data(), nullptr);
|
||||
ggml_quantize_chunk(qtype, (const float *) m11->data, q11->data, 0, nelements/m11->ne[0], m11->ne[0], nullptr);
|
||||
|
||||
// Set up a the compute graph
|
||||
// printf("Creating new tensor q31\n");
|
||||
@@ -207,7 +205,7 @@ int main(int argc, char ** argv) {
|
||||
// Set up a second graph computation to make sure we override the CPU cache lines
|
||||
// printf("Creating new tensor q12 & Running quantize\n");
|
||||
struct ggml_tensor * q12 = ggml_new_tensor_2d(ctx, qtype, sizex, sizey);
|
||||
ggml_quantize_chunk(qtype, (const float *) m12->data, q12->data, 0, nelements/m12->ne[0], m12->ne[0], hist_cur.data(), nullptr);
|
||||
ggml_quantize_chunk(qtype, (const float *) m12->data, q12->data, 0, nelements/m12->ne[0], m12->ne[0], nullptr);
|
||||
|
||||
// printf("Creating new tensor q32\n");
|
||||
struct ggml_tensor * q32 = ggml_mul_mat(ctx, q12, m2);
|
||||
|
||||
@@ -19,18 +19,7 @@ static std::vector<std::string> split_lines(const std::string & s) {
|
||||
|
||||
static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & tokens, int seq_id) {
|
||||
for (size_t i = 0; i < tokens.size(); i++) {
|
||||
llama_batch_add(batch, tokens[i], i, { seq_id }, false);
|
||||
}
|
||||
}
|
||||
|
||||
static void normalize(float * vec, float * out, int n) {
|
||||
float norm = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
norm += vec[i] * vec[i];
|
||||
}
|
||||
norm = sqrt(norm);
|
||||
for (int i = 0; i < n; i++) {
|
||||
out[i] = vec[i] / norm;
|
||||
llama_batch_add(batch, tokens[i], i, { seq_id }, i == tokens.size() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -44,11 +33,23 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
|
||||
fprintf(stderr, "%s : failed to decode\n", __func__);
|
||||
}
|
||||
|
||||
// normalize on copy
|
||||
for (int k = 0; k < n_seq; k++) {
|
||||
float * emb = llama_get_embeddings_ith(ctx, k);
|
||||
float * out = output + k * n_embd;
|
||||
normalize(emb, out, n_embd);
|
||||
for (int i = 0; i < batch.n_tokens; i++) {
|
||||
if (!batch.logits[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// try to get sequence embeddings - supported only when pooling_type is not NONE
|
||||
const float * embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]);
|
||||
if (embd == NULL) {
|
||||
embd = llama_get_embeddings_ith(ctx, i);
|
||||
if (embd == NULL) {
|
||||
fprintf(stderr, "%s: failed to get embeddings for token %d\n", __func__, i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
float * out = output + batch.seq_id[i][0] * n_embd;
|
||||
llama_embd_normalize(embd, out, n_embd);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -132,7 +133,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// initialize batch
|
||||
const int n_prompts = prompts.size();
|
||||
struct llama_batch batch = llama_batch_init(n_batch, 0, n_prompts);
|
||||
struct llama_batch batch = llama_batch_init(n_batch, 0, 1);
|
||||
|
||||
// allocate output
|
||||
const int n_embd = llama_n_embd(model);
|
||||
@@ -145,6 +146,7 @@ int main(int argc, char ** argv) {
|
||||
for (int k = 0; k < n_prompts; k++) {
|
||||
// clamp to n_batch tokens
|
||||
auto & inp = inputs[k];
|
||||
|
||||
const uint64_t n_toks = inp.size();
|
||||
|
||||
// encode if at capacity
|
||||
|
||||
@@ -378,10 +378,10 @@ int main(int argc, char ** argv) {
|
||||
if (params.interactive) {
|
||||
const char *control_message;
|
||||
if (params.multiline_input) {
|
||||
control_message = " - To return control to LLaMa, end your input with '\\'.\n"
|
||||
control_message = " - To return control to LLaMA, end your input with '\\'.\n"
|
||||
" - To return control without starting a new line, end your input with '/'.\n";
|
||||
} else {
|
||||
control_message = " - Press Return to return control to LLaMa.\n"
|
||||
control_message = " - Press Return to return control to LLaMA.\n"
|
||||
" - To return control without starting a new line, end your input with '/'.\n"
|
||||
" - If you want to submit another line, end your input with '\\'.\n";
|
||||
}
|
||||
|
||||
@@ -35,7 +35,6 @@ options:
|
||||
-mg, --main-gpu <i> (default: 0)
|
||||
-nkvo, --no-kv-offload <0|1> (default: 0)
|
||||
-mmp, --mmap <0|1> (default: 1)
|
||||
-mmq, --mul-mat-q <0|1> (default: 1)
|
||||
-ts, --tensor_split <ts0/ts1/..> (default: 0)
|
||||
-r, --repetitions <n> (default: 5)
|
||||
-o, --output <csv|json|md|sql> (default: md)
|
||||
|
||||
@@ -123,20 +123,15 @@ static std::string get_gpu_info() {
|
||||
}
|
||||
#endif
|
||||
#ifdef GGML_USE_SYCL
|
||||
int device_list[GGML_SYCL_MAX_DEVICES];
|
||||
ggml_sycl_get_gpu_list(device_list, GGML_SYCL_MAX_DEVICES);
|
||||
|
||||
for (int i = 0; i < GGML_SYCL_MAX_DEVICES; i++) {
|
||||
if (device_list[i] >0 ){
|
||||
char buf[128];
|
||||
ggml_sycl_get_device_description(i, buf, sizeof(buf));
|
||||
id += buf;
|
||||
int count = ggml_backend_sycl_get_device_count();
|
||||
for (int i = 0; i < count; i++) {
|
||||
char buf[128];
|
||||
ggml_sycl_get_device_description(i, buf, sizeof(buf));
|
||||
id += buf;
|
||||
if (i < count - 1) {
|
||||
id += "/";
|
||||
}
|
||||
}
|
||||
if (id.length() >2 ) {
|
||||
id.pop_back();
|
||||
}
|
||||
#endif
|
||||
// TODO: other backends
|
||||
return id;
|
||||
@@ -176,9 +171,9 @@ struct cmd_params {
|
||||
std::vector<llama_split_mode> split_mode;
|
||||
std::vector<int> main_gpu;
|
||||
std::vector<bool> no_kv_offload;
|
||||
std::vector<bool> mul_mat_q;
|
||||
std::vector<std::vector<float>> tensor_split;
|
||||
std::vector<bool> use_mmap;
|
||||
std::vector<bool> embeddings;
|
||||
int reps;
|
||||
bool verbose;
|
||||
output_formats output_format;
|
||||
@@ -196,9 +191,9 @@ static const cmd_params cmd_params_defaults = {
|
||||
/* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
|
||||
/* main_gpu */ {0},
|
||||
/* no_kv_offload */ {false},
|
||||
/* mul_mat_q */ {true},
|
||||
/* tensor_split */ {std::vector<float>(llama_max_devices(), 0.0f)},
|
||||
/* use_mmap */ {true},
|
||||
/* embeddings */ {false},
|
||||
/* reps */ 5,
|
||||
/* verbose */ false,
|
||||
/* output_format */ MARKDOWN
|
||||
@@ -221,7 +216,7 @@ static void print_usage(int /* argc */, char ** argv) {
|
||||
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
|
||||
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
|
||||
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
|
||||
printf(" -mmq, --mul-mat-q <0|1> (default: %s)\n", join(cmd_params_defaults.mul_mat_q, ",").c_str());
|
||||
printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str());
|
||||
printf(" -ts, --tensor_split <ts0/ts1/..> (default: 0)\n");
|
||||
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
||||
printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
|
||||
@@ -383,13 +378,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
}
|
||||
auto p = split<bool>(argv[i], split_delim);
|
||||
params.no_kv_offload.insert(params.no_kv_offload.end(), p.begin(), p.end());
|
||||
} else if (arg == "-mmq" || arg == "--mul-mat-q") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<bool>(argv[i], split_delim);
|
||||
params.mul_mat_q.insert(params.mul_mat_q.end(), p.begin(), p.end());
|
||||
} else if (arg == "-mmp" || arg == "--mmap") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -397,6 +385,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
}
|
||||
auto p = split<bool>(argv[i], split_delim);
|
||||
params.use_mmap.insert(params.use_mmap.end(), p.begin(), p.end());
|
||||
} else if (arg == "-embd" || arg == "--embeddings") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<bool>(argv[i], split_delim);
|
||||
params.embeddings.insert(params.embeddings.end(), p.begin(), p.end());
|
||||
} else if (arg == "-ts" || arg == "--tensor-split") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -466,9 +461,9 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
if (params.split_mode.empty()) { params.split_mode = cmd_params_defaults.split_mode; }
|
||||
if (params.main_gpu.empty()) { params.main_gpu = cmd_params_defaults.main_gpu; }
|
||||
if (params.no_kv_offload.empty()){ params.no_kv_offload = cmd_params_defaults.no_kv_offload; }
|
||||
if (params.mul_mat_q.empty()) { params.mul_mat_q = cmd_params_defaults.mul_mat_q; }
|
||||
if (params.tensor_split.empty()) { params.tensor_split = cmd_params_defaults.tensor_split; }
|
||||
if (params.use_mmap.empty()) { params.use_mmap = cmd_params_defaults.use_mmap; }
|
||||
if (params.embeddings.empty()) { params.embeddings = cmd_params_defaults.embeddings; }
|
||||
if (params.n_threads.empty()) { params.n_threads = cmd_params_defaults.n_threads; }
|
||||
|
||||
return params;
|
||||
@@ -486,9 +481,9 @@ struct cmd_params_instance {
|
||||
llama_split_mode split_mode;
|
||||
int main_gpu;
|
||||
bool no_kv_offload;
|
||||
bool mul_mat_q;
|
||||
std::vector<float> tensor_split;
|
||||
bool use_mmap;
|
||||
bool embeddings;
|
||||
|
||||
llama_model_params to_llama_mparams() const {
|
||||
llama_model_params mparams = llama_model_default_params();
|
||||
@@ -518,8 +513,8 @@ struct cmd_params_instance {
|
||||
cparams.n_batch = n_batch;
|
||||
cparams.type_k = type_k;
|
||||
cparams.type_v = type_v;
|
||||
cparams.mul_mat_q = mul_mat_q;
|
||||
cparams.offload_kqv = !no_kv_offload;
|
||||
cparams.embeddings = embeddings;
|
||||
|
||||
return cparams;
|
||||
}
|
||||
@@ -535,10 +530,10 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
||||
for (const auto & mg : params.main_gpu)
|
||||
for (const auto & ts : params.tensor_split)
|
||||
for (const auto & mmp : params.use_mmap)
|
||||
for (const auto & embd : params.embeddings)
|
||||
for (const auto & nb : params.n_batch)
|
||||
for (const auto & tk : params.type_k)
|
||||
for (const auto & tv : params.type_v)
|
||||
for (const auto & mmq : params.mul_mat_q)
|
||||
for (const auto & nkvo : params.no_kv_offload)
|
||||
for (const auto & nt : params.n_threads) {
|
||||
for (const auto & n_prompt : params.n_prompt) {
|
||||
@@ -557,9 +552,9 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
||||
/* .split_mode = */ sm,
|
||||
/* .main_gpu = */ mg,
|
||||
/* .no_kv_offload= */ nkvo,
|
||||
/* .mul_mat_q = */ mmq,
|
||||
/* .tensor_split = */ ts,
|
||||
/* .use_mmap = */ mmp,
|
||||
/* .embeddings = */ embd,
|
||||
};
|
||||
instances.push_back(instance);
|
||||
}
|
||||
@@ -580,9 +575,9 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
||||
/* .split_mode = */ sm,
|
||||
/* .main_gpu = */ mg,
|
||||
/* .no_kv_offload= */ nkvo,
|
||||
/* .mul_mat_q = */ mmq,
|
||||
/* .tensor_split = */ ts,
|
||||
/* .use_mmap = */ mmp,
|
||||
/* .embeddings = */ embd,
|
||||
};
|
||||
instances.push_back(instance);
|
||||
}
|
||||
@@ -616,9 +611,9 @@ struct test {
|
||||
llama_split_mode split_mode;
|
||||
int main_gpu;
|
||||
bool no_kv_offload;
|
||||
bool mul_mat_q;
|
||||
std::vector<float> tensor_split;
|
||||
bool use_mmap;
|
||||
bool embeddings;
|
||||
int n_prompt;
|
||||
int n_gen;
|
||||
std::string test_time;
|
||||
@@ -639,9 +634,9 @@ struct test {
|
||||
split_mode = inst.split_mode;
|
||||
main_gpu = inst.main_gpu;
|
||||
no_kv_offload = inst.no_kv_offload;
|
||||
mul_mat_q = inst.mul_mat_q;
|
||||
tensor_split = inst.tensor_split;
|
||||
use_mmap = inst.use_mmap;
|
||||
embeddings = inst.embeddings;
|
||||
n_prompt = inst.n_prompt;
|
||||
n_gen = inst.n_gen;
|
||||
// RFC 3339 date-time format
|
||||
@@ -713,7 +708,7 @@ struct test {
|
||||
"n_batch", "n_threads", "type_k", "type_v",
|
||||
"n_gpu_layers", "split_mode",
|
||||
"main_gpu", "no_kv_offload",
|
||||
"mul_mat_q", "tensor_split", "use_mmap",
|
||||
"tensor_split", "use_mmap", "embeddings",
|
||||
"n_prompt", "n_gen", "test_time",
|
||||
"avg_ns", "stddev_ns",
|
||||
"avg_ts", "stddev_ts"
|
||||
@@ -733,7 +728,7 @@ struct test {
|
||||
}
|
||||
if (field == "cuda" || field == "opencl" || field == "vulkan" || field == "kompute" || field == "metal" ||
|
||||
field == "gpu_blas" || field == "blas" || field == "sycl" ||field == "f16_kv" || field == "no_kv_offload" ||
|
||||
field == "mul_mat_q" || field == "use_mmap") {
|
||||
field == "use_mmap" || field == "embeddings") {
|
||||
return BOOL;
|
||||
}
|
||||
if (field == "avg_ts" || field == "stddev_ts") {
|
||||
@@ -767,7 +762,7 @@ struct test {
|
||||
std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
|
||||
std::to_string(n_gpu_layers), split_mode_str(split_mode),
|
||||
std::to_string(main_gpu), std::to_string(no_kv_offload),
|
||||
std::to_string(mul_mat_q), tensor_split_str, std::to_string(use_mmap),
|
||||
tensor_split_str, std::to_string(use_mmap), std::to_string(embeddings),
|
||||
std::to_string(n_prompt), std::to_string(n_gen), test_time,
|
||||
std::to_string(avg_ns()), std::to_string(stdev_ns()),
|
||||
std::to_string(avg_ts()), std::to_string(stdev_ts())
|
||||
@@ -931,15 +926,15 @@ struct markdown_printer : public printer {
|
||||
if (field == "n_threads") {
|
||||
return "threads";
|
||||
}
|
||||
if (field == "mul_mat_q") {
|
||||
return "mmq";
|
||||
}
|
||||
if (field == "no_kv_offload") {
|
||||
return "nkvo";
|
||||
}
|
||||
if (field == "use_mmap") {
|
||||
return "mmap";
|
||||
}
|
||||
if (field == "embeddings") {
|
||||
return "embd";
|
||||
}
|
||||
if (field == "tensor_split") {
|
||||
return "ts";
|
||||
}
|
||||
@@ -974,9 +969,6 @@ struct markdown_printer : public printer {
|
||||
if (params.split_mode.size() > 1 || params.split_mode != cmd_params_defaults.split_mode) {
|
||||
fields.emplace_back("split_mode");
|
||||
}
|
||||
if (params.mul_mat_q.size() > 1 || params.mul_mat_q != cmd_params_defaults.mul_mat_q) {
|
||||
fields.emplace_back("mul_mat_q");
|
||||
}
|
||||
if (params.no_kv_offload.size() > 1 || params.no_kv_offload != cmd_params_defaults.no_kv_offload) {
|
||||
fields.emplace_back("no_kv_offload");
|
||||
}
|
||||
@@ -986,6 +978,9 @@ struct markdown_printer : public printer {
|
||||
if (params.use_mmap.size() > 1 || params.use_mmap != cmd_params_defaults.use_mmap) {
|
||||
fields.emplace_back("use_mmap");
|
||||
}
|
||||
if (params.embeddings.size() > 1 || params.embeddings != cmd_params_defaults.embeddings) {
|
||||
fields.emplace_back("embeddings");
|
||||
}
|
||||
fields.emplace_back("test");
|
||||
fields.emplace_back("t/s");
|
||||
|
||||
|
||||
@@ -1862,7 +1862,6 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
||||
|
||||
std::vector<uint8_t> work(512);
|
||||
std::vector<float> conv_buf(512);
|
||||
std::vector<int64_t> hist_all(1 << 4, 0);
|
||||
size_t total_size_org = 0;
|
||||
size_t total_size_new = 0;
|
||||
|
||||
@@ -1917,48 +1916,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
||||
}
|
||||
new_data = work.data();
|
||||
|
||||
std::vector<int64_t> hist_cur(1 << 4, 0);
|
||||
|
||||
switch (new_type) {
|
||||
case GGML_TYPE_Q4_0: {
|
||||
new_size = ggml_quantize_q4_0(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q4_1: {
|
||||
new_size = ggml_quantize_q4_1(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q5_0: {
|
||||
new_size = ggml_quantize_q5_0(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q5_1: {
|
||||
new_size = ggml_quantize_q5_1(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q8_0: {
|
||||
new_size = ggml_quantize_q8_0(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q2_K: {
|
||||
new_size = ggml_quantize_q2_K(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q3_K: {
|
||||
new_size = ggml_quantize_q3_K(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q4_K: {
|
||||
new_size = ggml_quantize_q4_K(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q5_K: {
|
||||
new_size = ggml_quantize_q5_K(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
||||
} break;
|
||||
case GGML_TYPE_Q6_K: {
|
||||
new_size = ggml_quantize_q6_K(f32_data, new_data, n_elms, cur->ne[0], hist_cur.data());
|
||||
} break;
|
||||
default: {
|
||||
fprintf(stderr, "%s: unsupported quantization type %d\n", __func__, new_type);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < hist_cur.size(); ++j) {
|
||||
hist_all[j] += hist_cur[j];
|
||||
}
|
||||
new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr);
|
||||
} else {
|
||||
new_type = cur->type;
|
||||
new_data = cur->data;
|
||||
@@ -1993,17 +1951,6 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
||||
{
|
||||
printf("%s: original size = %8.2f MB\n", __func__, total_size_org / 1024.0 / 1024.0);
|
||||
printf("%s: quantized size = %8.2f MB\n", __func__, total_size_new / 1024.0 / 1024.0);
|
||||
|
||||
int64_t sum_all = 0;
|
||||
for (size_t i = 0; i < hist_all.size(); ++i) {
|
||||
sum_all += hist_all[i];
|
||||
}
|
||||
|
||||
printf("%s: hist: ", __func__);
|
||||
for (size_t i = 0; i < hist_all.size(); ++i) {
|
||||
printf("%5.3f ", hist_all[i] / (float)sum_all);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@@ -511,6 +511,14 @@ int main(int argc, char ** argv) {
|
||||
std::vector<llama_token> embd;
|
||||
std::vector<llama_token> embd_guidance;
|
||||
|
||||
// tokenized antiprompts
|
||||
std::vector<std::vector<llama_token>> antiprompt_ids;
|
||||
|
||||
antiprompt_ids.reserve(params.antiprompt.size());
|
||||
for (const std::string & antiprompt : params.antiprompt) {
|
||||
antiprompt_ids.emplace_back(::llama_tokenize(ctx, antiprompt, false, true));
|
||||
}
|
||||
|
||||
struct llama_sampling_context * ctx_sampling = llama_sampling_init(sparams);
|
||||
|
||||
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
||||
@@ -769,6 +777,18 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
// check for reverse prompt using special tokens
|
||||
llama_token last_token = llama_sampling_last(ctx_sampling);
|
||||
for (std::vector<llama_token> ids : antiprompt_ids) {
|
||||
if (ids.size() == 1 && last_token == ids[0]) {
|
||||
if (params.interactive) {
|
||||
is_interacting = true;
|
||||
}
|
||||
is_antiprompt = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_antiprompt) {
|
||||
LOG("found antiprompt: %s\n", last_output.c_str());
|
||||
}
|
||||
|
||||
@@ -107,6 +107,9 @@ int main(int argc, char ** argv) {
|
||||
// number of simultaneous "clients" to simulate
|
||||
const int32_t n_clients = params.n_parallel;
|
||||
|
||||
// dedicate one sequence to the system prompt
|
||||
params.n_parallel += 1;
|
||||
|
||||
// requests to simulate
|
||||
const int32_t n_seq = params.n_sequences;
|
||||
|
||||
@@ -196,8 +199,8 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
// assign the system KV cache to all parallel sequences
|
||||
for (int32_t i = 1; i < n_clients; ++i) {
|
||||
llama_kv_cache_seq_cp(ctx, 0, i, 0, n_tokens_system);
|
||||
for (int32_t i = 1; i <= n_clients; ++i) {
|
||||
llama_kv_cache_seq_cp(ctx, 0, i, -1, -1);
|
||||
}
|
||||
|
||||
LOG_TEE("\n");
|
||||
@@ -221,15 +224,17 @@ int main(int argc, char ** argv) {
|
||||
|
||||
client.i_batch = batch.n_tokens;
|
||||
|
||||
llama_batch_add(batch, client.sampled, n_tokens_system + client.n_prompt + client.n_decoded, { client.id }, true);
|
||||
llama_batch_add(batch, client.sampled, n_tokens_system + client.n_prompt + client.n_decoded, { client.id + 1 }, true);
|
||||
|
||||
client.n_decoded += 1;
|
||||
}
|
||||
|
||||
if (batch.n_tokens == 0) {
|
||||
// all sequences have ended - clear the entire KV cache
|
||||
for (int i = 0; i < n_clients; ++i) {
|
||||
llama_kv_cache_seq_rm(ctx, i, n_tokens_system, -1);
|
||||
for (int i = 1; i <= n_clients; ++i) {
|
||||
llama_kv_cache_seq_rm(ctx, i, -1, -1);
|
||||
// but keep the system prompt
|
||||
llama_kv_cache_seq_cp(ctx, 0, i, -1, -1);
|
||||
}
|
||||
|
||||
LOG_TEE("%s: clearing the KV cache\n", __func__);
|
||||
@@ -255,7 +260,7 @@ int main(int argc, char ** argv) {
|
||||
tokens_prompt = ::llama_tokenize(ctx, client.prompt, false);
|
||||
|
||||
for (size_t i = 0; i < tokens_prompt.size(); ++i) {
|
||||
llama_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id }, false);
|
||||
llama_batch_add(batch, tokens_prompt[i], i + n_tokens_system, { client.id + 1 }, false);
|
||||
}
|
||||
|
||||
// extract the logits only for the last token
|
||||
@@ -366,7 +371,8 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
// delete only the generated part of the sequence, i.e. keep the system prompt in the cache
|
||||
llama_kv_cache_seq_rm(ctx, client.id, n_tokens_system, -1);
|
||||
llama_kv_cache_seq_rm(ctx, client.id + 1, -1, -1);
|
||||
llama_kv_cache_seq_cp(ctx, 0, client.id + 1, -1, -1);
|
||||
|
||||
const auto t_main_end = ggml_time_us();
|
||||
|
||||
|
||||
@@ -442,7 +442,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
|
||||
return {tokens, std::exp(nll / count), logit_history, prob_history};
|
||||
}
|
||||
|
||||
static results_perplexity perplexity(llama_context * ctx, const gpt_params & params) {
|
||||
static results_perplexity perplexity(llama_context * ctx, const gpt_params & params, const int32_t n_ctx) {
|
||||
if (params.ppl_stride > 0) {
|
||||
return perplexity_v2(ctx, params);
|
||||
}
|
||||
@@ -453,7 +453,6 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
||||
// BOS tokens will be added for each chunk before eval
|
||||
|
||||
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
|
||||
const int n_ctx = llama_n_ctx(ctx);
|
||||
|
||||
std::ofstream logits_stream;
|
||||
if (!params.logits_file.empty()) {
|
||||
@@ -499,13 +498,19 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
||||
double nll2 = 0.0;
|
||||
|
||||
const int num_batches = (n_ctx + n_batch - 1) / n_batch;
|
||||
const int n_seq = std::max(1, n_batch / n_ctx);
|
||||
|
||||
GGML_ASSERT(n_batch < n_ctx || n_batch % n_ctx == 0);
|
||||
GGML_ASSERT(params.n_ctx == n_seq * n_ctx);
|
||||
|
||||
llama_batch batch = llama_batch_init(std::min(n_batch, n_ctx*n_seq), 0, 1);
|
||||
|
||||
std::vector<float> logits;
|
||||
if (num_batches > 1) {
|
||||
logits.reserve((size_t)n_ctx * n_vocab);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: calculating perplexity over %d chunks, batch_size=%d\n", __func__, n_chunk, n_batch);
|
||||
fprintf(stderr, "%s: calculating perplexity over %d chunks, n_ctx=%d, batch_size=%d, n_seq=%d\n", __func__, n_chunk, n_ctx, n_batch, n_seq);
|
||||
|
||||
std::vector<std::thread> workers(std::thread::hardware_concurrency() - 1);
|
||||
|
||||
@@ -518,10 +523,26 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
||||
log_probs.resize(n_ctx * nv);
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_chunk; ++i) {
|
||||
// We get the logits for all the tokens in the context window (params.n_ctx)
|
||||
// from llama_eval above. Now, based on https://huggingface.co/docs/transformers/perplexity,
|
||||
// calculate the perplexity over the last half of the window (so the model always has
|
||||
// some context to predict the token).
|
||||
//
|
||||
// We rely on the fact that attention in the forward pass only looks at previous
|
||||
// tokens here, so the logits returned for each token are an accurate representation
|
||||
// of what the model would have predicted at that point.
|
||||
//
|
||||
// Example, we have a context window of 512, we will compute perplexity for each of the
|
||||
// last 256 tokens. Then, we split the input up into context window size chunks to
|
||||
// process the entire prompt.
|
||||
const int first = n_ctx/2;
|
||||
|
||||
for (int i = 0; i < n_chunk; i += n_seq) {
|
||||
const int start = i * n_ctx;
|
||||
const int end = start + n_ctx;
|
||||
|
||||
const int n_seq_batch = std::min(n_seq, n_chunk - i);
|
||||
|
||||
const auto t_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// clear the KV cache
|
||||
@@ -531,22 +552,37 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
||||
const int batch_start = start + j * n_batch;
|
||||
const int batch_size = std::min(end - batch_start, n_batch);
|
||||
|
||||
// save original token and restore it after eval
|
||||
const auto token_org = tokens[batch_start];
|
||||
batch.n_tokens = 0;
|
||||
for (int seq = 0; seq < n_seq_batch; seq++) {
|
||||
int seq_start = batch_start + seq*n_ctx;
|
||||
|
||||
// add BOS token for the first batch of each chunk
|
||||
if (add_bos && j == 0) {
|
||||
tokens[batch_start] = llama_token_bos(llama_get_model(ctx));
|
||||
// save original token and restore it after eval
|
||||
const auto token_org = tokens[seq_start];
|
||||
|
||||
// add BOS token for the first batch of each chunk
|
||||
if (add_bos && j == 0) {
|
||||
tokens[seq_start] = llama_token_bos(llama_get_model(ctx));
|
||||
}
|
||||
|
||||
for (int k = 0; k < batch_size; ++k) {
|
||||
const int idx = seq*n_ctx + k;
|
||||
batch.token[idx] = tokens[seq_start + k];
|
||||
batch.pos[idx] = j*n_batch + k;
|
||||
batch.n_seq_id[idx] = 1;
|
||||
batch.seq_id[idx][0] = seq;
|
||||
batch.logits[idx] = batch.pos[idx] >= first ? 1 : 0;
|
||||
}
|
||||
batch.n_tokens += batch_size;
|
||||
|
||||
// restore the original token in case it was set to BOS
|
||||
tokens[seq_start] = token_org;
|
||||
}
|
||||
|
||||
if (llama_decode(ctx, llama_batch_get_one(tokens.data() + batch_start, batch_size, j * n_batch, 0))) {
|
||||
if (llama_decode(ctx, batch)) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return {tokens, -1, logit_history, prob_history};
|
||||
}
|
||||
|
||||
// restore the original token in case it was set to BOS
|
||||
tokens[batch_start] = token_org;
|
||||
|
||||
if (num_batches > 1) {
|
||||
const auto * batch_logits = llama_get_logits(ctx);
|
||||
logits.insert(logits.end(), batch_logits, batch_logits + batch_size * n_vocab);
|
||||
@@ -558,7 +594,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
||||
if (i == 0) {
|
||||
const float t_total = std::chrono::duration<float>(t_end - t_start).count();
|
||||
fprintf(stderr, "%s: %.2f seconds per pass - ETA ", __func__, t_total);
|
||||
int total_seconds = (int)(t_total * n_chunk);
|
||||
int total_seconds = (int)(t_total*n_chunk/n_seq);
|
||||
if (total_seconds >= 60*60) {
|
||||
fprintf(stderr, "%d hours ", total_seconds / (60*60));
|
||||
total_seconds = total_seconds % (60*60);
|
||||
@@ -566,37 +602,31 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
||||
fprintf(stderr, "%.2f minutes\n", total_seconds / 60.0);
|
||||
}
|
||||
|
||||
// We get the logits for all the tokens in the context window (params.n_ctx)
|
||||
// from llama_eval above. Now, based on https://huggingface.co/docs/transformers/perplexity,
|
||||
// calculate the perplexity over the last half of the window (so the model always has
|
||||
// some context to predict the token).
|
||||
//
|
||||
// We rely on the fact that attention in the forward pass only looks at previous
|
||||
// tokens here, so the logits returned for each token are an accurate representation
|
||||
// of what the model would have predicted at that point.
|
||||
//
|
||||
// Example, we have a context window of 512, we will compute perplexity for each of the
|
||||
// last 256 tokens. Then, we split the input up into context window size chunks to
|
||||
// process the entire prompt.
|
||||
const int first = n_ctx/2;
|
||||
const float * all_logits = num_batches > 1 ? logits.data() : llama_get_logits(ctx);
|
||||
if (!params.logits_file.empty()) {
|
||||
process_logits(logits_stream, n_vocab, all_logits + first*n_vocab, tokens.data() + start + first, n_ctx - 1 - first,
|
||||
workers, log_probs, nll, nll2);
|
||||
} else {
|
||||
process_logits(n_vocab, all_logits + first*n_vocab, tokens.data() + start + first, n_ctx - 1 - first,
|
||||
workers, nll, nll2, logit_history.data() + start + first, prob_history.data() + start + first);
|
||||
}
|
||||
count += n_ctx - first - 1;
|
||||
for (int seq = 0; seq < n_seq_batch; seq++) {
|
||||
const float * all_logits = num_batches > 1 ? logits.data() : llama_get_logits_ith(ctx, seq*n_ctx);
|
||||
llama_token * tokens_data = tokens.data() + start + seq*n_ctx + first;
|
||||
if (!params.logits_file.empty()) {
|
||||
process_logits(logits_stream, n_vocab, all_logits + first*n_vocab,
|
||||
tokens_data, n_ctx - 1 - first,
|
||||
workers, log_probs, nll, nll2);
|
||||
} else {
|
||||
process_logits(n_vocab, all_logits + first*n_vocab,
|
||||
tokens_data, n_ctx - 1 - first,
|
||||
workers, nll, nll2,
|
||||
logit_history.data() + start + seq*n_ctx + first,
|
||||
prob_history.data() + start + seq*n_ctx + first);
|
||||
}
|
||||
count += n_ctx - first - 1;
|
||||
|
||||
// perplexity is e^(average negative log-likelihood)
|
||||
if (params.ppl_output_type == 0) {
|
||||
printf("[%d]%.4lf,", i + 1, std::exp(nll / count));
|
||||
} else {
|
||||
double av = nll/count;
|
||||
double av2 = nll2/count - av*av;
|
||||
if (av2 > 0) av2 = sqrt(av2/(count-1));
|
||||
printf("%8d %.4lf %4lf %4lf\n", i*n_ctx, std::exp(nll / count), av, av2);
|
||||
// perplexity is e^(average negative log-likelihood)
|
||||
if (params.ppl_output_type == 0) {
|
||||
printf("[%d]%.4lf,", i + seq + 1, std::exp(nll / count));
|
||||
} else {
|
||||
double av = nll/count;
|
||||
double av2 = nll2/count - av*av;
|
||||
if (av2 > 0) av2 = sqrt(av2/(count-1));
|
||||
printf("%8d %.4lf %4lf %4lf\n", i*n_ctx, std::exp(nll / count), av, av2);
|
||||
}
|
||||
}
|
||||
fflush(stdout);
|
||||
|
||||
@@ -615,6 +645,8 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
||||
printf("Unexpected negative standard deviation of log(prob)\n");
|
||||
}
|
||||
|
||||
llama_batch_free(batch);
|
||||
|
||||
return {tokens, ppl, logit_history, prob_history};
|
||||
}
|
||||
|
||||
@@ -809,7 +841,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||
const int n_batch = params.n_batch;
|
||||
|
||||
const int max_tasks_per_batch = 32;
|
||||
const int max_seq = 4*max_tasks_per_batch;
|
||||
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
|
||||
|
||||
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
||||
|
||||
@@ -1086,7 +1118,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
||||
const int n_batch = params.n_batch;
|
||||
|
||||
const int max_tasks_per_batch = 128;
|
||||
const int max_seq = 2*max_tasks_per_batch;
|
||||
const int max_seq = std::min(2*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
|
||||
|
||||
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
||||
|
||||
@@ -1438,7 +1470,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
|
||||
const int n_batch = params.n_batch;
|
||||
|
||||
const int max_tasks_per_batch = 32;
|
||||
const int max_seq = 4*max_tasks_per_batch;
|
||||
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
|
||||
|
||||
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
||||
|
||||
@@ -1782,13 +1814,24 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
|
||||
params.n_batch = 512;
|
||||
if (!gpt_params_parse(argc, argv, params)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
params.logits_all = true;
|
||||
params.n_batch = std::min(params.n_batch, params.n_ctx);
|
||||
|
||||
const int32_t n_ctx = params.n_ctx;
|
||||
|
||||
const bool ppl = !params.hellaswag && !params.winogrande && !params.multiple_choice && !params.kl_divergence;
|
||||
if (ppl) {
|
||||
int n_seq = std::max(1, params.n_batch / n_ctx);
|
||||
int32_t n_kv = n_seq * n_ctx;
|
||||
params.n_parallel = n_seq;
|
||||
params.n_ctx = n_kv;
|
||||
params.n_batch = std::min(params.n_batch, n_kv);
|
||||
} else {
|
||||
params.n_batch = std::min(params.n_batch, params.n_ctx);
|
||||
}
|
||||
|
||||
if (params.ppl_stride > 0) {
|
||||
fprintf(stderr, "Will perform strided perplexity calculation -> adjusting context size from %d to %d\n",
|
||||
@@ -1815,6 +1858,9 @@ int main(int argc, char ** argv) {
|
||||
llama_model * model;
|
||||
llama_context * ctx;
|
||||
|
||||
// ensure there's at least enough seq_ids for HellaSwag
|
||||
params.n_parallel = std::max(4, params.n_parallel);
|
||||
|
||||
// load the model and apply lora adapter, if any
|
||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||
if (model == NULL) {
|
||||
@@ -1844,7 +1890,7 @@ int main(int argc, char ** argv) {
|
||||
} else if (params.kl_divergence) {
|
||||
kl_divergence(ctx, params);
|
||||
} else {
|
||||
results = perplexity(ctx, params);
|
||||
results = perplexity(ctx, params, n_ctx);
|
||||
}
|
||||
|
||||
llama_print_timings(ctx);
|
||||
|
||||
34
examples/server-embd.py
Normal file
34
examples/server-embd.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import asyncio
|
||||
import requests
|
||||
import numpy as np
|
||||
|
||||
n = 8
|
||||
|
||||
result = []
|
||||
|
||||
async def requests_post_async(*args, **kwargs):
|
||||
return await asyncio.to_thread(requests.post, *args, **kwargs)
|
||||
|
||||
async def main():
|
||||
model_url = "http://127.0.0.1:6900"
|
||||
responses: list[requests.Response] = await asyncio.gather(*[requests_post_async(
|
||||
url= f"{model_url}/embedding",
|
||||
json= {"content": str(0)*1024}
|
||||
) for i in range(n)])
|
||||
|
||||
for response in responses:
|
||||
embedding = response.json()["embedding"]
|
||||
print(embedding[-8:])
|
||||
result.append(embedding)
|
||||
|
||||
asyncio.run(main())
|
||||
|
||||
# compute cosine similarity
|
||||
|
||||
for i in range(n-1):
|
||||
for j in range(i+1, n):
|
||||
embedding1 = np.array(result[i])
|
||||
embedding2 = np.array(result[j])
|
||||
similarity = np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
|
||||
print(f"Similarity between {i} and {j}: {similarity:.2f}")
|
||||
|
||||
@@ -1,12 +1,18 @@
|
||||
set(TARGET server)
|
||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
add_executable(${TARGET} server.cpp oai.hpp utils.hpp json.hpp httplib.h)
|
||||
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||
)
|
||||
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
||||
if (LLAMA_SERVER_SSL)
|
||||
find_package(OpenSSL REQUIRED)
|
||||
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
|
||||
target_compile_definitions(${TARGET} PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT)
|
||||
endif()
|
||||
if (WIN32)
|
||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||
endif()
|
||||
|
||||
@@ -18,6 +18,7 @@ The project is under active development, and we are [looking for feedback and co
|
||||
|
||||
- `--threads N`, `-t N`: Set the number of threads to use during generation.
|
||||
- `-tb N, --threads-batch N`: Set the number of threads to use during batch and prompt processing. If not specified, the number of threads will be set to the number of threads used for generation.
|
||||
- `--threads-http N`: number of threads in the http server pool to process requests (default: `max(std::thread::hardware_concurrency() - 1, --parallel N + 2)`)
|
||||
- `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`).
|
||||
- `-a ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses.
|
||||
- `-c N`, `--ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. The size may differ in other models, for example, baichuan models were build with a context of 4096.
|
||||
@@ -41,7 +42,7 @@ see https://github.com/ggerganov/llama.cpp/issues/1437
|
||||
- `-to N`, `--timeout N`: Server read/write timeout in seconds. Default `600`.
|
||||
- `--host`: Set the hostname or ip address to listen. Default `127.0.0.1`.
|
||||
- `--port`: Set the port to listen. Default: `8080`.
|
||||
- `--path`: path from which to serve static files (default examples/server/public)
|
||||
- `--path`: path from which to serve static files (default: disabled)
|
||||
- `--api-key`: Set an api key for request authorization. By default the server responds to every request. With an api key set, the requests must have the Authorization header set with the api key as Bearer token. May be used multiple times to enable multiple valid keys.
|
||||
- `--api-key-file`: path to file containing api keys delimited by new lines. If set, requests must include one of the keys for access. May be used in conjunction with `--api-key`'s.
|
||||
- `--embedding`: Enable embedding extraction, Default: disabled.
|
||||
@@ -58,6 +59,10 @@ see https://github.com/ggerganov/llama.cpp/issues/1437
|
||||
- `--log-disable`: Output logs to stdout only, default: enabled.
|
||||
- `--log-format FORMAT`: Define the log output to FORMAT: json or text (default: json)
|
||||
|
||||
**If compiled with `LLAMA_SERVER_SSL=ON`**
|
||||
- `--ssl-key-file FNAME`: path to file a PEM-encoded SSL private key
|
||||
- `--ssl-cert-file FNAME`: path to file a PEM-encoded SSL certificate
|
||||
|
||||
## Build
|
||||
|
||||
server is build alongside everything else from the root of the project
|
||||
@@ -74,6 +79,28 @@ server is build alongside everything else from the root of the project
|
||||
cmake --build . --config Release
|
||||
```
|
||||
|
||||
## Build with SSL
|
||||
|
||||
server can also be built with SSL support using OpenSSL 3
|
||||
|
||||
- Using `make`:
|
||||
|
||||
```bash
|
||||
# NOTE: For non-system openssl, use the following:
|
||||
# CXXFLAGS="-I /path/to/openssl/include"
|
||||
# LDFLAGS="-L /path/to/openssl/lib"
|
||||
make LLAMA_SERVER_SSL=true server
|
||||
```
|
||||
|
||||
- Using `CMake`:
|
||||
|
||||
```bash
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DLLAMA_SERVER_SSL=ON
|
||||
make server
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
To get started right away, run the following command, making sure to use the correct path for the model you have:
|
||||
@@ -168,7 +195,11 @@ node index.js
|
||||
|
||||
*Options:*
|
||||
|
||||
`prompt`: Provide the prompt for this completion as a string or as an array of strings or numbers representing tokens. Internally, the prompt is compared to the previous completion and only the "unseen" suffix is evaluated. If the prompt is a string or an array with the first element given as a string, a `bos` token is inserted in the front like `main` does.
|
||||
`prompt`: Provide the prompt for this completion as a string or as an array of strings or numbers representing tokens. Internally, if `cache_prompt` is `true`, the prompt is compared to the previous completion and only the "unseen" suffix is evaluated. A `BOS` token is inserted at the start, if all of the following conditions are true:
|
||||
|
||||
- The prompt is a string or an array with the first element given as a string
|
||||
- The model's `tokenizer.ggml.add_bos_token` metadata is `true`
|
||||
- The system prompt is empty
|
||||
|
||||
`temperature`: Adjust the randomness of the generated text (default: 0.8).
|
||||
|
||||
@@ -281,7 +312,7 @@ Notice that each `probs` is an array of length `n_probs`.
|
||||
|
||||
`content`: Set the text to tokenize.
|
||||
|
||||
Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.
|
||||
Note that a special `BOS` token is never inserted.
|
||||
|
||||
- **POST** `/detokenize`: Convert tokens to text.
|
||||
|
||||
@@ -325,7 +356,7 @@ Notice that each `probs` is an array of length `n_probs`.
|
||||
- `default_generation_settings` - the default generation settings for the `/completion` endpoint, has the same fields as the `generation_settings` response object from the `/completion` endpoint.
|
||||
- `total_slots` - the total number of slots for process requests (defined by `--parallel` option)
|
||||
|
||||
- **POST** `/v1/chat/completions`: OpenAI-compatible Chat Completions API. Given a ChatML-formatted json description in `messages`, it returns the predicted completion. Both synchronous and streaming mode are supported, so scripted and interactive applications work fine. While no strong claims of compatibility with OpenAI API spec is being made, in our experience it suffices to support many apps. Only ChatML-tuned models, such as Dolphin, OpenOrca, OpenHermes, OpenChat-3.5, etc can be used with this endpoint. Compared to `api_like_OAI.py` this API implementation does not require a wrapper to be served.
|
||||
- **POST** `/v1/chat/completions`: OpenAI-compatible Chat Completions API. Given a ChatML-formatted json description in `messages`, it returns the predicted completion. Both synchronous and streaming mode are supported, so scripted and interactive applications work fine. While no strong claims of compatibility with OpenAI API spec is being made, in our experience it suffices to support many apps. Only ChatML-tuned models, such as Dolphin, OpenOrca, OpenHermes, OpenChat-3.5, etc can be used with this endpoint.
|
||||
|
||||
*Options:*
|
||||
|
||||
@@ -435,7 +466,7 @@ Notice that each `probs` is an array of length `n_probs`.
|
||||
"next_token": {
|
||||
"has_next_token": true,
|
||||
"n_remain": -1,
|
||||
"num_tokens_predicted": 0,
|
||||
"n_decoded": 0,
|
||||
"stopped_eos": false,
|
||||
"stopped_limit": false,
|
||||
"stopped_word": false,
|
||||
@@ -527,24 +558,11 @@ bash chat.sh
|
||||
|
||||
### API like OAI
|
||||
|
||||
API example using Python Flask: [api_like_OAI.py](api_like_OAI.py)
|
||||
This example must be used with server.cpp
|
||||
|
||||
```sh
|
||||
python api_like_OAI.py
|
||||
```
|
||||
|
||||
After running the API server, you can use it in Python by setting the API base URL.
|
||||
|
||||
```python
|
||||
openai.api_base = "http://<Your api-server IP>:port"
|
||||
```
|
||||
|
||||
Then you can utilize llama.cpp as an OpenAI's **chat.completion** or **text_completion** API
|
||||
The HTTP server supports OAI-like API
|
||||
|
||||
### Extending or building alternative Web Front End
|
||||
|
||||
The default location for the static files is `examples/server/public`. You can extend the front end by running the server binary with `--path` set to `./your-directory` and importing `/completion.js` to get access to the llamaComplete() method.
|
||||
You can extend the front end by running the server binary with `--path` set to `./your-directory` and importing `/completion.js` to get access to the llamaComplete() method.
|
||||
|
||||
Read the documentation in `/completion.js` to see convenient ways to access llama.
|
||||
|
||||
|
||||
@@ -1,228 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
from flask import Flask, jsonify, request, Response
|
||||
import urllib.parse
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
slot_id = -1
|
||||
|
||||
parser = argparse.ArgumentParser(description="An example of using server.cpp with a similar API to OAI. It must be used together with server.cpp.")
|
||||
parser.add_argument("--chat-prompt", type=str, help="the top prompt in chat completions(default: 'A chat between a curious user and an artificial intelligence assistant. The assistant follows the given rules no matter what.')", default='A chat between a curious user and an artificial intelligence assistant. The assistant follows the given rules no matter what.')
|
||||
parser.add_argument("--user-name", type=str, help="USER name in chat completions(default: 'USER: ')", default="USER: ")
|
||||
parser.add_argument("--ai-name", type=str, help="ASSISTANT name in chat completions(default: 'ASSISTANT: ')", default="ASSISTANT: ")
|
||||
parser.add_argument("--system-name", type=str, help="SYSTEM name in chat completions(default: 'ASSISTANT's RULE: ')", default="ASSISTANT's RULE: ")
|
||||
parser.add_argument("--stop", type=str, help="the end of response in chat completions(default: '</s>')", default="</s>")
|
||||
parser.add_argument("--llama-api", type=str, help="Set the address of server.cpp in llama.cpp(default: http://127.0.0.1:8080)", default='http://127.0.0.1:8080')
|
||||
parser.add_argument("--api-key", type=str, help="Set the api key to allow only few user(default: NULL)", default="")
|
||||
parser.add_argument("--host", type=str, help="Set the ip address to listen.(default: 127.0.0.1)", default='127.0.0.1')
|
||||
parser.add_argument("--port", type=int, help="Set the port to listen.(default: 8081)", default=8081)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
def is_present(json, key):
|
||||
try:
|
||||
buf = json[key]
|
||||
except KeyError:
|
||||
return False
|
||||
if json[key] == None:
|
||||
return False
|
||||
return True
|
||||
|
||||
#convert chat to prompt
|
||||
def convert_chat(messages):
|
||||
|
||||
system_n = args.system_name
|
||||
user_n = args.user_name
|
||||
ai_n = args.ai_name
|
||||
stop = args.stop
|
||||
|
||||
prompt = "" + args.chat_prompt + stop
|
||||
|
||||
for line in messages:
|
||||
if (line["role"] == "system"):
|
||||
prompt += f"{system_n}{line['content']}{stop}"
|
||||
if (line["role"] == "user"):
|
||||
prompt += f"{user_n}{line['content']}{stop}"
|
||||
if (line["role"] == "assistant"):
|
||||
prompt += f"{ai_n}{line['content']}{stop}"
|
||||
prompt += ai_n.rstrip()
|
||||
|
||||
return prompt
|
||||
|
||||
def make_postData(body, chat=False, stream=False):
|
||||
postData = {}
|
||||
if (chat):
|
||||
postData["prompt"] = convert_chat(body["messages"])
|
||||
else:
|
||||
postData["prompt"] = body["prompt"]
|
||||
if(is_present(body, "temperature")): postData["temperature"] = body["temperature"]
|
||||
if(is_present(body, "top_k")): postData["top_k"] = body["top_k"]
|
||||
if(is_present(body, "top_p")): postData["top_p"] = body["top_p"]
|
||||
if(is_present(body, "max_tokens")): postData["n_predict"] = body["max_tokens"]
|
||||
if(is_present(body, "presence_penalty")): postData["presence_penalty"] = body["presence_penalty"]
|
||||
if(is_present(body, "frequency_penalty")): postData["frequency_penalty"] = body["frequency_penalty"]
|
||||
if(is_present(body, "repeat_penalty")): postData["repeat_penalty"] = body["repeat_penalty"]
|
||||
if(is_present(body, "mirostat")): postData["mirostat"] = body["mirostat"]
|
||||
if(is_present(body, "mirostat_tau")): postData["mirostat_tau"] = body["mirostat_tau"]
|
||||
if(is_present(body, "mirostat_eta")): postData["mirostat_eta"] = body["mirostat_eta"]
|
||||
if(is_present(body, "seed")): postData["seed"] = body["seed"]
|
||||
if(is_present(body, "grammar")): postData["grammar"] = body["grammar"]
|
||||
if(is_present(body, "logit_bias")): postData["logit_bias"] = [[int(token), body["logit_bias"][token]] for token in body["logit_bias"].keys()]
|
||||
if (args.stop != ""):
|
||||
postData["stop"] = [args.stop]
|
||||
else:
|
||||
postData["stop"] = []
|
||||
if(is_present(body, "stop")): postData["stop"] += body["stop"]
|
||||
postData["n_keep"] = -1
|
||||
postData["stream"] = stream
|
||||
postData["cache_prompt"] = True
|
||||
postData["slot_id"] = slot_id
|
||||
return postData
|
||||
|
||||
def make_resData(data, chat=False, promptToken=[]):
|
||||
resData = {
|
||||
"id": "chatcmpl" if (chat) else "cmpl",
|
||||
"object": "chat.completion" if (chat) else "text_completion",
|
||||
"created": int(time.time()),
|
||||
"truncated": data["truncated"],
|
||||
"model": "LLaMA_CPP",
|
||||
"usage": {
|
||||
"prompt_tokens": data["tokens_evaluated"],
|
||||
"completion_tokens": data["tokens_predicted"],
|
||||
"total_tokens": data["tokens_evaluated"] + data["tokens_predicted"]
|
||||
}
|
||||
}
|
||||
if (len(promptToken) != 0):
|
||||
resData["promptToken"] = promptToken
|
||||
if (chat):
|
||||
#only one choice is supported
|
||||
resData["choices"] = [{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": data["content"],
|
||||
},
|
||||
"finish_reason": "stop" if (data["stopped_eos"] or data["stopped_word"]) else "length"
|
||||
}]
|
||||
else:
|
||||
#only one choice is supported
|
||||
resData["choices"] = [{
|
||||
"text": data["content"],
|
||||
"index": 0,
|
||||
"logprobs": None,
|
||||
"finish_reason": "stop" if (data["stopped_eos"] or data["stopped_word"]) else "length"
|
||||
}]
|
||||
return resData
|
||||
|
||||
def make_resData_stream(data, chat=False, time_now = 0, start=False):
|
||||
resData = {
|
||||
"id": "chatcmpl" if (chat) else "cmpl",
|
||||
"object": "chat.completion.chunk" if (chat) else "text_completion.chunk",
|
||||
"created": time_now,
|
||||
"model": "LLaMA_CPP",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": None,
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
slot_id = data.get("slot_id")
|
||||
if (chat):
|
||||
if (start):
|
||||
resData["choices"][0]["delta"] = {
|
||||
"role": "assistant"
|
||||
}
|
||||
else:
|
||||
resData["choices"][0]["delta"] = {
|
||||
"content": data["content"]
|
||||
}
|
||||
if (data["stop"]):
|
||||
resData["choices"][0]["finish_reason"] = "stop" if (data["stopped_eos"] or data["stopped_word"]) else "length"
|
||||
else:
|
||||
resData["choices"][0]["text"] = data["content"]
|
||||
if (data["stop"]):
|
||||
resData["choices"][0]["finish_reason"] = "stop" if (data["stopped_eos"] or data["stopped_word"]) else "length"
|
||||
|
||||
return resData
|
||||
|
||||
|
||||
@app.route('/chat/completions', methods=['POST', 'OPTIONS'])
|
||||
@app.route('/v1/chat/completions', methods=['POST', 'OPTIONS'])
|
||||
def chat_completions():
|
||||
if (args.api_key != "" and request.headers["Authorization"].split()[1] != args.api_key):
|
||||
return Response(status=403)
|
||||
if request.method == 'OPTIONS':
|
||||
return Response(headers={"Access-Control-Allow-Origin": "*", "Access-Control-Allow-Headers": "*"})
|
||||
body = request.get_json()
|
||||
stream = False
|
||||
tokenize = False
|
||||
if(is_present(body, "stream")): stream = body["stream"]
|
||||
if(is_present(body, "tokenize")): tokenize = body["tokenize"]
|
||||
postData = make_postData(body, chat=True, stream=stream)
|
||||
|
||||
promptToken = []
|
||||
if (tokenize):
|
||||
tokenData = requests.request("POST", urllib.parse.urljoin(args.llama_api, "/tokenize"), data=json.dumps({"content": postData["prompt"]})).json()
|
||||
promptToken = tokenData["tokens"]
|
||||
|
||||
if (not stream):
|
||||
data = requests.request("POST", urllib.parse.urljoin(args.llama_api, "/completion"), data=json.dumps(postData))
|
||||
print(data.json())
|
||||
resData = make_resData(data.json(), chat=True, promptToken=promptToken)
|
||||
return jsonify(resData)
|
||||
else:
|
||||
def generate():
|
||||
data = requests.request("POST", urllib.parse.urljoin(args.llama_api, "/completion"), data=json.dumps(postData), stream=True)
|
||||
time_now = int(time.time())
|
||||
resData = make_resData_stream({}, chat=True, time_now=time_now, start=True)
|
||||
yield 'data: {}\n\n'.format(json.dumps(resData))
|
||||
for line in data.iter_lines():
|
||||
if line:
|
||||
decoded_line = line.decode('utf-8')
|
||||
resData = make_resData_stream(json.loads(decoded_line[6:]), chat=True, time_now=time_now)
|
||||
yield 'data: {}\n\n'.format(json.dumps(resData))
|
||||
return Response(generate(), mimetype='text/event-stream', headers={"Access-Control-Allow-Origin": "*", "Access-Control-Allow-Headers": "*"})
|
||||
|
||||
|
||||
@app.route('/completions', methods=['POST', 'OPTIONS'])
|
||||
@app.route('/v1/completions', methods=['POST', 'OPTIONS'])
|
||||
def completion():
|
||||
if (args.api_key != "" and request.headers["Authorization"].split()[1] != args.api_key):
|
||||
return Response(status=403)
|
||||
if request.method == 'OPTIONS':
|
||||
return Response(headers={"Access-Control-Allow-Origin": "*", "Access-Control-Allow-Headers": "*"})
|
||||
body = request.get_json()
|
||||
stream = False
|
||||
tokenize = False
|
||||
if(is_present(body, "stream")): stream = body["stream"]
|
||||
if(is_present(body, "tokenize")): tokenize = body["tokenize"]
|
||||
postData = make_postData(body, chat=False, stream=stream)
|
||||
|
||||
promptToken = []
|
||||
if (tokenize):
|
||||
tokenData = requests.request("POST", urllib.parse.urljoin(args.llama_api, "/tokenize"), data=json.dumps({"content": postData["prompt"]})).json()
|
||||
promptToken = tokenData["tokens"]
|
||||
|
||||
if (not stream):
|
||||
data = requests.request("POST", urllib.parse.urljoin(args.llama_api, "/completion"), data=json.dumps(postData))
|
||||
print(data.json())
|
||||
resData = make_resData(data.json(), chat=False, promptToken=promptToken)
|
||||
return jsonify(resData)
|
||||
else:
|
||||
def generate():
|
||||
data = requests.request("POST", urllib.parse.urljoin(args.llama_api, "/completion"), data=json.dumps(postData), stream=True)
|
||||
time_now = int(time.time())
|
||||
for line in data.iter_lines():
|
||||
if line:
|
||||
decoded_line = line.decode('utf-8')
|
||||
resData = make_resData_stream(json.loads(decoded_line[6:]), chat=False, time_now=time_now)
|
||||
yield 'data: {}\n\n'.format(json.dumps(resData))
|
||||
return Response(generate(), mimetype='text/event-stream', headers={"Access-Control-Allow-Origin": "*", "Access-Control-Allow-Headers": "*"})
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(args.host, port=args.port)
|
||||
88
examples/server/bench/README.md
Normal file
88
examples/server/bench/README.md
Normal file
@@ -0,0 +1,88 @@
|
||||
### Server benchmark tools
|
||||
|
||||
Benchmark is using [k6](https://k6.io/).
|
||||
|
||||
##### Install k6
|
||||
|
||||
Follow instruction from: https://k6.io/docs/get-started/installation/
|
||||
|
||||
Example for ubuntu:
|
||||
```shell
|
||||
snap install k6
|
||||
```
|
||||
|
||||
#### Download a dataset
|
||||
|
||||
This dataset was originally proposed in [vLLM benchmarks](https://github.com/vllm-project/vllm/blob/main/benchmarks/README.md).
|
||||
|
||||
```shell
|
||||
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
||||
```
|
||||
|
||||
#### Download a model
|
||||
Example for PHI-2
|
||||
|
||||
```shell
|
||||
../../../scripts/hf.sh --repo ggml-org/models --file phi-2/ggml-model-q4_0.gguf
|
||||
```
|
||||
|
||||
#### Start the server
|
||||
The server must answer OAI Chat completion requests on `http://localhost:8080/v1` or according to the environment variable `SERVER_BENCH_URL`.
|
||||
|
||||
Example:
|
||||
```shell
|
||||
server --host localhost --port 8080 \
|
||||
--model ggml-model-q4_0.gguf \
|
||||
--cont-batching \
|
||||
--metrics \
|
||||
--parallel 8 \
|
||||
--batch-size 512 \
|
||||
--ctx-size 4096 \
|
||||
--log-format text \
|
||||
-ngl 33
|
||||
```
|
||||
|
||||
#### Run the benchmark
|
||||
|
||||
For 500 chat completions request with 8 concurrent users during maximum 10 minutes, run:
|
||||
```shell
|
||||
k6 run script.js --duration 10m --iterations 500 --vus 8
|
||||
```
|
||||
|
||||
The benchmark values can be overridden with:
|
||||
- `SERVER_BENCH_URL` server url prefix for chat completions, default `http://localhost:8080/v1`
|
||||
- `SERVER_BENCH_N_PROMPTS` total prompts to randomly select in the benchmark, default `480`
|
||||
- `SERVER_BENCH_MODEL_ALIAS` model alias to pass in the completion request, default `my-model`
|
||||
- `SERVER_BENCH_MAX_TOKENS` max tokens to predict, default: `512`
|
||||
- `SERVER_BENCH_DATASET` path to the benchmark dataset file
|
||||
- `SERVER_BENCH_MAX_PROMPT_TOKENS` maximum prompt tokens to filter out in the dataset: default `1024`
|
||||
- `SERVER_BENCH_MAX_CONTEXT` maximum context size of the completions request to filter out in the dataset: prompt + predicted tokens, default `2048`
|
||||
|
||||
Note: the local tokenizer is just a string space split, real number of tokens will differ.
|
||||
|
||||
Or with [k6 options](https://k6.io/docs/using-k6/k6-options/reference/):
|
||||
|
||||
```shell
|
||||
SERVER_BENCH_N_PROMPTS=500 k6 run script.js --duration 10m --iterations 500 --vus 8
|
||||
```
|
||||
|
||||
To [debug http request](https://k6.io/docs/using-k6/http-debugging/) use `--http-debug="full"`.
|
||||
|
||||
#### Metrics
|
||||
|
||||
Following metrics are available computed from the OAI chat completions response `usage`:
|
||||
- `llamacpp_tokens_second` Trend of `usage.total_tokens / request duration`
|
||||
- `llamacpp_prompt_tokens` Trend of `usage.prompt_tokens`
|
||||
- `llamacpp_prompt_tokens_total_counter` Counter of `usage.prompt_tokens`
|
||||
- `llamacpp_completion_tokens` Trend of `usage.completion_tokens`
|
||||
- `llamacpp_completion_tokens_total_counter` Counter of `usage.completion_tokens`
|
||||
- `llamacpp_completions_truncated_rate` Rate of completions truncated, i.e. if `finish_reason === 'length'`
|
||||
- `llamacpp_completions_stop_rate` Rate of completions stopped by the model, i.e. if `finish_reason === 'stop'`
|
||||
|
||||
The script will fail if too many completions are truncated, see `llamacpp_completions_truncated_rate`.
|
||||
|
||||
K6 metrics might be compared against [server metrics](../README.md), with:
|
||||
|
||||
```shell
|
||||
curl http://localhost:8080/metrics
|
||||
```
|
||||
120
examples/server/bench/script.js
Normal file
120
examples/server/bench/script.js
Normal file
@@ -0,0 +1,120 @@
|
||||
import http from 'k6/http'
|
||||
import {check, sleep} from 'k6'
|
||||
import {SharedArray} from 'k6/data'
|
||||
import {Counter, Rate, Trend} from 'k6/metrics'
|
||||
import exec from 'k6/execution';
|
||||
|
||||
// Server chat completions prefix
|
||||
const server_url = __ENV.SERVER_BENCH_URL ? __ENV.SERVER_BENCH_URL : 'http://localhost:8080/v1'
|
||||
|
||||
// Number of total prompts in the dataset - default 10m / 10 seconds/request * number of users
|
||||
const n_prompt = __ENV.SERVER_BENCH_N_PROMPTS ? parseInt(__ENV.SERVER_BENCH_N_PROMPTS) : 600 / 10 * 8
|
||||
|
||||
// Model name to request
|
||||
const model = __ENV.SERVER_BENCH_MODEL_ALIAS ? __ENV.SERVER_BENCH_MODEL_ALIAS : 'my-model'
|
||||
|
||||
// Dataset path
|
||||
const dataset_path = __ENV.SERVER_BENCH_DATASET ? __ENV.SERVER_BENCH_DATASET : './ShareGPT_V3_unfiltered_cleaned_split.json'
|
||||
|
||||
// Max tokens to predict
|
||||
const max_tokens = __ENV.SERVER_BENCH_MAX_TOKENS ? parseInt(__ENV.SERVER_BENCH_MAX_TOKENS) : 512
|
||||
|
||||
// Max prompt tokens
|
||||
const n_prompt_tokens = __ENV.SERVER_BENCH_MAX_PROMPT_TOKENS ? parseInt(__ENV.SERVER_BENCH_MAX_PROMPT_TOKENS) : 1024
|
||||
|
||||
// Max slot context
|
||||
const n_ctx_slot = __ENV.SERVER_BENCH_MAX_CONTEXT ? parseInt(__ENV.SERVER_BENCH_MAX_CONTEXT) : 2048
|
||||
|
||||
export function setup() {
|
||||
console.info(`Benchmark config: server_url=${server_url} n_prompt=${n_prompt} model=${model} dataset_path=${dataset_path} max_tokens=${max_tokens}`)
|
||||
}
|
||||
|
||||
const data = new SharedArray('conversations', function () {
|
||||
const tokenizer = (message) => message.split(/[\s,'".?]/)
|
||||
|
||||
return JSON.parse(open(dataset_path))
|
||||
// Filter out the conversations with less than 2 turns.
|
||||
.filter(data => data["conversations"].length >= 2)
|
||||
.filter(data => data["conversations"][0]["from"] === "human")
|
||||
.map(data => {
|
||||
return {
|
||||
prompt: data["conversations"][0]["value"],
|
||||
n_prompt_tokens: tokenizer(data["conversations"][0]["value"]).length,
|
||||
n_completion_tokens: tokenizer(data["conversations"][1]["value"]).length,
|
||||
}
|
||||
})
|
||||
// Filter out too short sequences
|
||||
.filter(conv => conv.n_prompt_tokens >= 4 && conv.n_completion_tokens >= 4)
|
||||
// Filter out too long sequences.
|
||||
.filter(conv => conv.n_prompt_tokens <= n_prompt_tokens && conv.n_prompt_tokens + conv.n_completion_tokens <= n_ctx_slot)
|
||||
// Keep only first n prompts
|
||||
.slice(0, n_prompt)
|
||||
})
|
||||
|
||||
const llamacpp_prompt_tokens = new Trend('llamacpp_prompt_tokens')
|
||||
const llamacpp_completion_tokens = new Trend('llamacpp_completion_tokens')
|
||||
const llamacpp_tokens_second = new Trend('llamacpp_tokens_second')
|
||||
|
||||
const llamacpp_prompt_tokens_total_counter = new Counter('llamacpp_prompt_tokens_total_counter')
|
||||
const llamacpp_completion_tokens_total_counter = new Counter('llamacpp_completion_tokens_total_counter')
|
||||
|
||||
const llamacpp_completions_truncated_rate = new Rate('llamacpp_completions_truncated_rate')
|
||||
const llamacpp_completions_stop_rate = new Rate('llamacpp_completions_stop_rate')
|
||||
|
||||
export const options = {
|
||||
thresholds: {
|
||||
llamacpp_completions_truncated_rate: [
|
||||
// more than 80% of truncated input will abort the test
|
||||
{threshold: 'rate < 0.8', abortOnFail: true, delayAbortEval: '1m'},
|
||||
],
|
||||
},
|
||||
duration: '10m',
|
||||
vus: 8,
|
||||
}
|
||||
|
||||
export default function () {
|
||||
const conversation = data[exec.scenario.iterationInInstance % data.length]
|
||||
const payload = {
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are ChatGPT, an AI assistant.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": conversation.prompt,
|
||||
}
|
||||
],
|
||||
"model": model,
|
||||
"stream": false,
|
||||
"max_tokens": max_tokens
|
||||
}
|
||||
|
||||
const body = JSON.stringify(payload)
|
||||
|
||||
let res = http.post(`${server_url}/chat/completions`, body, {
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
timeout: '300s'
|
||||
})
|
||||
|
||||
check(res, {'success completion': (r) => r.status === 200})
|
||||
|
||||
if (res.status === 200) {
|
||||
const completions = res.json()
|
||||
|
||||
llamacpp_prompt_tokens.add(completions.usage.prompt_tokens)
|
||||
llamacpp_prompt_tokens_total_counter.add(completions.usage.prompt_tokens)
|
||||
|
||||
llamacpp_completion_tokens.add(completions.usage.completion_tokens)
|
||||
llamacpp_completion_tokens_total_counter.add(completions.usage.completion_tokens)
|
||||
|
||||
llamacpp_completions_truncated_rate.add(completions.choices[0].finish_reason === 'length')
|
||||
llamacpp_completions_stop_rate.add(completions.choices[0].finish_reason === 'stop')
|
||||
|
||||
llamacpp_tokens_second.add(completions.usage.total_tokens / res.timings.duration * 1.e3)
|
||||
} else {
|
||||
console.error(`response: ${res.body} request=${payload}`)
|
||||
}
|
||||
|
||||
sleep(0.3)
|
||||
}
|
||||
@@ -1,225 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "json.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613"
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
inline static json oaicompat_completion_params_parse(
|
||||
const struct llama_model * model,
|
||||
const json &body, /* openai api json semantics */
|
||||
const std::string &chat_template)
|
||||
{
|
||||
json llama_params;
|
||||
|
||||
llama_params["__oaicompat"] = true;
|
||||
|
||||
// Map OpenAI parameters to llama.cpp parameters
|
||||
//
|
||||
// For parameters that are defined by the OpenAI documentation (e.g.
|
||||
// temperature), we explicitly specify OpenAI's intended default; we
|
||||
// need to do that because sometimes OpenAI disagrees with llama.cpp
|
||||
//
|
||||
// https://platform.openai.com/docs/api-reference/chat/create
|
||||
llama_sampling_params default_sparams;
|
||||
llama_params["model"] = json_value(body, "model", std::string("unknown"));
|
||||
llama_params["prompt"] = format_chat(model, chat_template, body["messages"]);
|
||||
llama_params["cache_prompt"] = json_value(body, "cache_prompt", false);
|
||||
llama_params["temperature"] = json_value(body, "temperature", 0.0);
|
||||
llama_params["top_k"] = json_value(body, "top_k", default_sparams.top_k);
|
||||
llama_params["top_p"] = json_value(body, "top_p", 1.0);
|
||||
llama_params["n_predict"] = json_value(body, "max_tokens", -1);
|
||||
llama_params["logit_bias"] = json_value(body, "logit_bias",json::object());
|
||||
llama_params["frequency_penalty"] = json_value(body, "frequency_penalty", 0.0);
|
||||
llama_params["presence_penalty"] = json_value(body, "presence_penalty", 0.0);
|
||||
llama_params["seed"] = json_value(body, "seed", LLAMA_DEFAULT_SEED);
|
||||
llama_params["stream"] = json_value(body, "stream", false);
|
||||
llama_params["mirostat"] = json_value(body, "mirostat", default_sparams.mirostat);
|
||||
llama_params["mirostat_tau"] = json_value(body, "mirostat_tau", default_sparams.mirostat_tau);
|
||||
llama_params["mirostat_eta"] = json_value(body, "mirostat_eta", default_sparams.mirostat_eta);
|
||||
llama_params["penalize_nl"] = json_value(body, "penalize_nl", default_sparams.penalize_nl);
|
||||
llama_params["typical_p"] = json_value(body, "typical_p", default_sparams.typical_p);
|
||||
llama_params["repeat_last_n"] = json_value(body, "repeat_last_n", default_sparams.penalty_last_n);
|
||||
llama_params["ignore_eos"] = json_value(body, "ignore_eos", false);
|
||||
llama_params["tfs_z"] = json_value(body, "tfs_z", default_sparams.tfs_z);
|
||||
|
||||
if (body.count("grammar") != 0) {
|
||||
llama_params["grammar"] = json_value(body, "grammar", json::object());
|
||||
}
|
||||
|
||||
// Handle 'stop' field
|
||||
if (body.contains("stop") && body["stop"].is_string()) {
|
||||
llama_params["stop"] = json::array({body["stop"].get<std::string>()});
|
||||
} else {
|
||||
llama_params["stop"] = json_value(body, "stop", json::array());
|
||||
}
|
||||
|
||||
// Ensure there is ChatML-specific end sequence among stop words
|
||||
llama_params["stop"].push_back("<|im_end|>");
|
||||
|
||||
return llama_params;
|
||||
}
|
||||
|
||||
inline static json format_final_response_oaicompat(const json &request, const task_result &response, bool streaming = false)
|
||||
{
|
||||
json result = response.result_json;
|
||||
|
||||
bool stopped_word = result.count("stopped_word") != 0;
|
||||
bool stopped_eos = json_value(result, "stopped_eos", false);
|
||||
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
|
||||
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
|
||||
std::string content = json_value(result, "content", std::string(""));
|
||||
|
||||
std::string finish_reason = "length";
|
||||
if (stopped_word || stopped_eos) {
|
||||
finish_reason = "stop";
|
||||
}
|
||||
|
||||
json choices =
|
||||
streaming ? json::array({json{{"finish_reason", finish_reason},
|
||||
{"index", 0},
|
||||
{"delta", json::object()}}})
|
||||
: json::array({json{{"finish_reason", finish_reason},
|
||||
{"index", 0},
|
||||
{"message", json{{"content", content},
|
||||
{"role", "assistant"}}}}});
|
||||
|
||||
std::time_t t = std::time(0);
|
||||
|
||||
json res =
|
||||
json{{"choices", choices},
|
||||
{"created", t},
|
||||
{"model",
|
||||
json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"object", streaming ? "chat.completion.chunk" : "chat.completion"},
|
||||
{"usage",
|
||||
json{{"completion_tokens", num_tokens_predicted},
|
||||
{"prompt_tokens", num_prompt_tokens},
|
||||
{"total_tokens", num_tokens_predicted + num_prompt_tokens}}},
|
||||
{"id", gen_chatcmplid()}};
|
||||
|
||||
if (server_verbose) {
|
||||
res["__verbose"] = result;
|
||||
}
|
||||
|
||||
if (result.contains("completion_probabilities")) {
|
||||
res["completion_probabilities"] = json_value(result, "completion_probabilities", json::array());
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// return value is vector as there is one case where we might need to generate two responses
|
||||
inline static std::vector<json> format_partial_response_oaicompat(const task_result &response) {
|
||||
json result = response.result_json;
|
||||
|
||||
if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
|
||||
return std::vector<json>({response.result_json});
|
||||
}
|
||||
|
||||
bool first = json_value(result, "oaicompat_token_ctr", 0) == 0;
|
||||
std::string modelname = json_value(result, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
|
||||
|
||||
bool stopped_word = json_value(result, "stopped_word", false);
|
||||
bool stopped_eos = json_value(result, "stopped_eos", false);
|
||||
bool stopped_limit = json_value(result, "stopped_limit", false);
|
||||
std::string content = json_value(result, "content", std::string(""));
|
||||
|
||||
std::string finish_reason;
|
||||
if (stopped_word || stopped_eos) {
|
||||
finish_reason = "stop";
|
||||
}
|
||||
if (stopped_limit) {
|
||||
finish_reason = "length";
|
||||
}
|
||||
|
||||
std::time_t t = std::time(0);
|
||||
|
||||
json choices;
|
||||
|
||||
if (!finish_reason.empty()) {
|
||||
choices = json::array({json{{"finish_reason", finish_reason},
|
||||
{"index", 0},
|
||||
{"delta", json::object()}}});
|
||||
} else {
|
||||
if (first) {
|
||||
if (content.empty()) {
|
||||
choices = json::array({json{{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta", json{{"role", "assistant"}}}}});
|
||||
} else {
|
||||
// We have to send this as two updates to conform to openai behavior
|
||||
json initial_ret = json{{"choices", json::array({json{
|
||||
{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta", json{
|
||||
{"role", "assistant"}
|
||||
}}}})},
|
||||
{"created", t},
|
||||
{"id", gen_chatcmplid()},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
json second_ret = json{
|
||||
{"choices", json::array({json{{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta", json{
|
||||
{"content", content}}}
|
||||
}})},
|
||||
{"created", t},
|
||||
{"id", gen_chatcmplid()},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
return std::vector<json>({initial_ret, second_ret});
|
||||
}
|
||||
} else {
|
||||
// Some idiosyncrasy in task processing logic makes several trailing calls
|
||||
// with empty content, we ignore these at the calee site.
|
||||
if (content.empty()) {
|
||||
return std::vector<json>({json::object()});
|
||||
}
|
||||
|
||||
choices = json::array({json{
|
||||
{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta",
|
||||
json{
|
||||
{"content", content},
|
||||
}},
|
||||
}});
|
||||
}
|
||||
}
|
||||
|
||||
json ret = json{{"choices", choices},
|
||||
{"created", t},
|
||||
{"id", gen_chatcmplid()},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
return std::vector<json>({ret});
|
||||
}
|
||||
|
||||
inline static json format_embeddings_response_oaicompat(const json &request, const json &embeddings)
|
||||
{
|
||||
json res =
|
||||
json{
|
||||
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"object", "list"},
|
||||
{"usage",
|
||||
json{{"prompt_tokens", 0},
|
||||
{"total_tokens", 0}}},
|
||||
{"data", embeddings}
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,22 +1,30 @@
|
||||
# Server tests
|
||||
|
||||
Python based server tests scenario using [BDD](https://en.wikipedia.org/wiki/Behavior-driven_development) and [behave](https://behave.readthedocs.io/en/latest/):
|
||||
* [issues.feature](./features/issues.feature) Pending issues scenario
|
||||
* [parallel.feature](./features/parallel.feature) Scenario involving multi slots and concurrent requests
|
||||
* [security.feature](./features/security.feature) Security, CORS and API Key
|
||||
* [server.feature](./features/server.feature) Server base scenario: completion, embedding, tokenization, etc...
|
||||
Python based server tests scenario using [BDD](https://en.wikipedia.org/wiki/Behavior-driven_development)
|
||||
and [behave](https://behave.readthedocs.io/en/latest/):
|
||||
|
||||
* [issues.feature](./features/issues.feature) Pending issues scenario
|
||||
* [parallel.feature](./features/parallel.feature) Scenario involving multi slots and concurrent requests
|
||||
* [security.feature](./features/security.feature) Security, CORS and API Key
|
||||
* [server.feature](./features/server.feature) Server base scenario: completion, embedding, tokenization, etc...
|
||||
|
||||
Tests target GitHub workflows job runners with 4 vCPU.
|
||||
|
||||
Requests are using [aiohttp](https://docs.aiohttp.org/en/stable/client_reference.html), [asyncio](https://docs.python.org/fr/3/library/asyncio.html) based http client.
|
||||
Requests are
|
||||
using [aiohttp](https://docs.aiohttp.org/en/stable/client_reference.html), [asyncio](https://docs.python.org/fr/3/library/asyncio.html)
|
||||
based http client.
|
||||
|
||||
Note: If the host architecture inference speed is faster than GitHub runners one, parallel scenario may randomly fail. To mitigate it, you can increase values in `n_predict`, `kv_size`.
|
||||
Note: If the host architecture inference speed is faster than GitHub runners one, parallel scenario may randomly fail.
|
||||
To mitigate it, you can increase values in `n_predict`, `kv_size`.
|
||||
|
||||
### Install dependencies
|
||||
|
||||
`pip install -r requirements.txt`
|
||||
|
||||
### Run tests
|
||||
|
||||
1. Build the server
|
||||
|
||||
```shell
|
||||
cd ../../..
|
||||
mkdir build
|
||||
@@ -24,24 +32,36 @@ cd build
|
||||
cmake ../
|
||||
cmake --build . --target server
|
||||
```
|
||||
2. download required models:
|
||||
1. `../../../scripts/hf.sh --repo ggml-org/models --file tinyllamas/stories260K.gguf`
|
||||
3. Start the test: `./tests.sh`
|
||||
|
||||
2. Start the test: `./tests.sh`
|
||||
|
||||
It's possible to override some scenario steps values with environment variables:
|
||||
- `PORT` -> `context.server_port` to set the listening port of the server during scenario, default: `8080`
|
||||
- `LLAMA_SERVER_BIN_PATH` -> to change the server binary path, default: `../../../build/bin/server`
|
||||
- `DEBUG` -> "ON" to enable steps and server verbose mode `--verbose`
|
||||
- `SERVER_LOG_FORMAT_JSON` -> if set switch server logs to json format
|
||||
|
||||
| variable | description |
|
||||
|--------------------------|------------------------------------------------------------------------------------------------|
|
||||
| `PORT` | `context.server_port` to set the listening port of the server during scenario, default: `8080` |
|
||||
| `LLAMA_SERVER_BIN_PATH` | to change the server binary path, default: `../../../build/bin/server` |
|
||||
| `DEBUG` | "ON" to enable steps and server verbose mode `--verbose` |
|
||||
| `SERVER_LOG_FORMAT_JSON` | if set switch server logs to json format |
|
||||
| `N_GPU_LAYERS` | number of model layers to offload to VRAM `-ngl --n-gpu-layers` |
|
||||
|
||||
### Run @bug, @wip or @wrong_usage annotated scenario
|
||||
|
||||
Feature or Scenario must be annotated with `@llama.cpp` to be included in the default scope.
|
||||
|
||||
- `@bug` annotation aims to link a scenario with a GitHub issue.
|
||||
- `@wrong_usage` are meant to show user issue that are actually an expected behavior
|
||||
- `@wip` to focus on a scenario working in progress
|
||||
- `@slow` heavy test, disabled by default
|
||||
|
||||
To run a scenario annotated with `@bug`, start:
|
||||
`DEBUG=ON ./tests.sh --no-skipped --tags bug`
|
||||
|
||||
```shell
|
||||
DEBUG=ON ./tests.sh --no-skipped --tags bug
|
||||
```
|
||||
|
||||
After changing logic in `steps.py`, ensure that `@bug` and `@wrong_usage` scenario are updated.
|
||||
|
||||
```shell
|
||||
./tests.sh --no-skipped --tags bug,wrong_usage || echo "should failed but compile"
|
||||
```
|
||||
|
||||
94
examples/server/tests/features/embeddings.feature
Normal file
94
examples/server/tests/features/embeddings.feature
Normal file
@@ -0,0 +1,94 @@
|
||||
@llama.cpp
|
||||
@embeddings
|
||||
Feature: llama.cpp server
|
||||
|
||||
Background: Server startup
|
||||
Given a server listening on localhost:8080
|
||||
And a model file bert-bge-small/ggml-model-f16.gguf from HF repo ggml-org/models
|
||||
And a model alias bert-bge-small
|
||||
And 42 as server seed
|
||||
And 2 slots
|
||||
And 1024 as batch size
|
||||
And 2048 KV cache size
|
||||
And embeddings extraction
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
|
||||
Scenario: Embedding
|
||||
When embeddings are computed for:
|
||||
"""
|
||||
What is the capital of Bulgaria ?
|
||||
"""
|
||||
Then embeddings are generated
|
||||
|
||||
Scenario: OAI Embeddings compatibility
|
||||
Given a model bert-bge-small
|
||||
When an OAI compatible embeddings computation request for:
|
||||
"""
|
||||
What is the capital of Spain ?
|
||||
"""
|
||||
Then embeddings are generated
|
||||
|
||||
Scenario: OAI Embeddings compatibility with multiple inputs
|
||||
Given a model bert-bge-small
|
||||
Given a prompt:
|
||||
"""
|
||||
In which country Paris is located ?
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Is Madrid the capital of Spain ?
|
||||
"""
|
||||
When an OAI compatible embeddings computation request for multiple inputs
|
||||
Then embeddings are generated
|
||||
|
||||
Scenario: Multi users embeddings
|
||||
Given a prompt:
|
||||
"""
|
||||
Write a very long story about AI.
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Write another very long music lyrics.
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Write a very long poem.
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Write a very long joke.
|
||||
"""
|
||||
Given concurrent embedding requests
|
||||
Then the server is busy
|
||||
Then the server is idle
|
||||
Then all embeddings are generated
|
||||
|
||||
Scenario: Multi users OAI compatibility embeddings
|
||||
Given a prompt:
|
||||
"""
|
||||
In which country Paris is located ?
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Is Madrid the capital of Spain ?
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
What is the biggest US city ?
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
What is the capital of Bulgaria ?
|
||||
"""
|
||||
And a model bert-bge-small
|
||||
Given concurrent OAI embedding requests
|
||||
Then the server is busy
|
||||
Then the server is idle
|
||||
Then all embeddings are generated
|
||||
|
||||
Scenario: All embeddings should be the same
|
||||
Given 10 fixed prompts
|
||||
And a model bert-bge-small
|
||||
Given concurrent OAI embedding requests
|
||||
Then all embeddings are the same
|
||||
@@ -7,7 +7,10 @@ from signal import SIGKILL
|
||||
|
||||
|
||||
def before_scenario(context, scenario):
|
||||
print(f"\x1b[33;42mStarting new scenario: {scenario.name}!\x1b[0m")
|
||||
context.debug = 'DEBUG' in os.environ and os.environ['DEBUG'] == 'ON'
|
||||
if context.debug:
|
||||
print("DEBUG=ON\n")
|
||||
print(f"\x1b[33;42mStarting new scenario: {scenario.name}!\x1b[0m\n")
|
||||
port = 8080
|
||||
if 'PORT' in os.environ:
|
||||
port = int(os.environ['PORT'])
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
# List of ongoing issues
|
||||
# run with: DEBUG=ON ./tests.sh --no-skipped --tags bug
|
||||
@bug
|
||||
Feature: Issues
|
||||
# No confirmed issue at the moment
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
@llama.cpp
|
||||
@parallel
|
||||
Feature: Parallel
|
||||
|
||||
Background: Server startup
|
||||
Given a server listening on localhost:8080
|
||||
And a model file stories260K.gguf
|
||||
And a model alias tinyllama-2
|
||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And 42 as server seed
|
||||
And 64 KV cache size
|
||||
And 128 as batch size
|
||||
And 256 KV cache size
|
||||
And 2 slots
|
||||
And embeddings extraction
|
||||
And continuous batching
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
@@ -54,6 +54,29 @@ Feature: Parallel
|
||||
| disabled | 128 |
|
||||
| enabled | 64 |
|
||||
|
||||
Scenario Outline: Multi users OAI completions compatibility no v1
|
||||
Given a system prompt You are a writer.
|
||||
And a model tinyllama-2
|
||||
Given a prompt:
|
||||
"""
|
||||
Write a very long book.
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Write another a poem.
|
||||
"""
|
||||
And <n_predict> max tokens to predict
|
||||
And streaming is <streaming>
|
||||
Given concurrent OAI completions requests no v1
|
||||
Then the server is busy
|
||||
Then the server is idle
|
||||
Then all prompts are predicted with <n_predict> tokens
|
||||
Examples:
|
||||
| streaming | n_predict |
|
||||
| disabled | 128 |
|
||||
| enabled | 64 |
|
||||
|
||||
|
||||
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size #3969
|
||||
Given a prompt:
|
||||
"""
|
||||
@@ -76,48 +99,3 @@ Feature: Parallel
|
||||
Then the server is busy
|
||||
Then the server is idle
|
||||
Then all prompts are predicted
|
||||
|
||||
Scenario: Multi users embeddings
|
||||
Given a prompt:
|
||||
"""
|
||||
Write a very long story about AI.
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Write another very long music lyrics.
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Write a very long poem.
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Write a very long joke.
|
||||
"""
|
||||
Given concurrent embedding requests
|
||||
Then the server is busy
|
||||
Then the server is idle
|
||||
Then all embeddings are generated
|
||||
|
||||
Scenario: Multi users OAI compatibility embeddings
|
||||
Given a prompt:
|
||||
"""
|
||||
In which country Paris is located ?
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Is Madrid the capital of Spain ?
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
What is the biggest US city ?
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
What is the capital of Bulgaria ?
|
||||
"""
|
||||
And a model tinyllama-2
|
||||
Given concurrent OAI embedding requests
|
||||
Then the server is busy
|
||||
Then the server is idle
|
||||
Then all embeddings are generated
|
||||
|
||||
55
examples/server/tests/features/passkey.feature
Normal file
55
examples/server/tests/features/passkey.feature
Normal file
@@ -0,0 +1,55 @@
|
||||
# run with: ./tests.sh --no-skipped --tags passkey
|
||||
@passkey
|
||||
@slow
|
||||
Feature: Passkey / Self-extend with context shift
|
||||
|
||||
Background: Server startup
|
||||
Given a server listening on localhost:8080
|
||||
|
||||
# Generates a long text of junk and inserts a secret passkey number inside it.
|
||||
# Then we query the LLM for the secret passkey.
|
||||
# see #3856 and #4810
|
||||
Scenario Outline: Passkey
|
||||
Given a model file <hf_file> from HF repo <hf_repo>
|
||||
And <n_batch> as batch size
|
||||
And <n_junk> as number of junk
|
||||
And <n_predicted> server max tokens to predict
|
||||
And 42 as seed
|
||||
And <n_ctx> KV cache size
|
||||
And 1 slots
|
||||
And <n_ga> group attention factor to extend context size through self-extend
|
||||
And <n_ga_w> group attention width to extend context size through self-extend
|
||||
# Can be override with N_GPU_LAYERS
|
||||
And <ngl> GPU offloaded layers
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
Given available models
|
||||
Then model 0 is trained on <n_ctx_train> tokens context
|
||||
Given a prefix prompt:
|
||||
"""
|
||||
here is an important info hidden inside a lot of irrelevant text. Find it and memorize them. I will quiz you about the important information there.
|
||||
"""
|
||||
And a passkey prompt template:
|
||||
"""
|
||||
The pass key is <passkey> Remember it. <passkey> is the pass key.
|
||||
"""
|
||||
And a junk suffix prompt:
|
||||
"""
|
||||
The grass is green. The sky is blue. The sun is yellow. Here we go. There and back again.
|
||||
"""
|
||||
And a suffix prompt:
|
||||
"""
|
||||
What is the pass key? The pass key is
|
||||
"""
|
||||
Given a "<passkey>" passkey challenge prompt with the passkey inserted every <i_pos> junk
|
||||
And a completion request with no api error
|
||||
Then <n_predicted> tokens are predicted matching <re_content>
|
||||
|
||||
Examples:
|
||||
| hf_repo | hf_file | n_ctx_train | ngl | n_ctx | n_batch | n_ga | n_ga_w | n_junk | i_pos | passkey | n_predicted | re_content |
|
||||
| TheBloke/phi-2-GGUF | phi-2.Q4_K_M.gguf | 2048 | 5 | 8192 | 512 | 4 | 512 | 250 | 50 | 42 | 1 | 42 |
|
||||
| TheBloke/phi-2-GGUF | phi-2.Q4_K_M.gguf | 2048 | 5 | 8192 | 512 | 2 | 512 | 250 | 50 | 42 | 1 | \b((?!42)\w)+\b |
|
||||
#| TheBloke/Llama-2-7B-GGUF | llama-2-7b.Q2_K.gguf | 4096 | 3 | 16384 | 512 | 4 | 512 | 500 | 300 | 1234 | 5 | 1234 |
|
||||
#| TheBloke/Mixtral-8x7B-v0.1-GGUF | mixtral-8x7b-v0.1.Q2_K.gguf | 32768 | 2 | 16384 | 512 | 4 | 512 | 500 | 100 | 0987 | 5 | 0
|
||||
# 987 |
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
@llama.cpp
|
||||
@security
|
||||
Feature: Security
|
||||
|
||||
Background: Server startup with an api key defined
|
||||
Given a server listening on localhost:8080
|
||||
And a model file stories260K.gguf
|
||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a server api key llama.cpp
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
@@ -38,8 +39,9 @@ Feature: Security
|
||||
|
||||
|
||||
Scenario Outline: CORS Options
|
||||
When an OPTIONS request is sent from <origin>
|
||||
Then CORS header <cors_header> is set to <cors_header_value>
|
||||
Given a user api key llama.cpp
|
||||
When an OPTIONS request is sent from <origin>
|
||||
Then CORS header <cors_header> is set to <cors_header_value>
|
||||
|
||||
Examples: Headers
|
||||
| origin | cors_header | cors_header_value |
|
||||
|
||||
@@ -1,18 +1,19 @@
|
||||
@llama.cpp
|
||||
@server
|
||||
Feature: llama.cpp server
|
||||
|
||||
Background: Server startup
|
||||
Given a server listening on localhost:8080
|
||||
And a model file stories260K.gguf
|
||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a model alias tinyllama-2
|
||||
And 42 as server seed
|
||||
# KV Cache corresponds to the total amount of tokens
|
||||
# that can be stored across all independent sequences: #4130
|
||||
# see --ctx-size and #5568
|
||||
And 32 KV cache size
|
||||
And 1 slots
|
||||
And embeddings extraction
|
||||
And 32 server max tokens to predict
|
||||
And 256 KV cache size
|
||||
And 32 as batch size
|
||||
And 2 slots
|
||||
And 64 server max tokens to predict
|
||||
And prometheus compatible metrics exposed
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
@@ -21,17 +22,35 @@ Feature: llama.cpp server
|
||||
Then the server is ready
|
||||
And all slots are idle
|
||||
|
||||
|
||||
Scenario Outline: Completion
|
||||
Given a prompt <prompt>
|
||||
And <n_predict> max tokens to predict
|
||||
And a completion request with no api error
|
||||
Then <n_predicted> tokens are predicted matching <re_content>
|
||||
And the completion is <truncated> truncated
|
||||
And <n_prompt> prompt tokens are processed
|
||||
And prometheus metrics are exposed
|
||||
And metric llamacpp:tokens_predicted is <n_predicted>
|
||||
|
||||
Examples: Prompts
|
||||
| prompt | n_predict | re_content | n_predicted |
|
||||
| I believe the meaning of life is | 8 | (read<or>going)+ | 8 |
|
||||
| Write a joke about AI | 64 | (park<or>friends<or>scared<or>always)+ | 32 |
|
||||
| prompt | n_predict | re_content | n_prompt | n_predicted | truncated |
|
||||
| I believe the meaning of life is | 8 | (read\|going)+ | 18 | 8 | not |
|
||||
| Write a joke about AI from a very long prompt which will not be truncated | 256 | (princesses\|everyone\|kids)+ | 46 | 64 | not |
|
||||
|
||||
Scenario: Completion prompt truncated
|
||||
Given a prompt:
|
||||
"""
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
||||
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
|
||||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
|
||||
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||
"""
|
||||
And a completion request with no api error
|
||||
Then 64 tokens are predicted matching fun|Annaks|popcorns
|
||||
And the completion is truncated
|
||||
And 109 prompt tokens are processed
|
||||
|
||||
|
||||
Scenario Outline: OAI Compatibility
|
||||
Given a model <model>
|
||||
@@ -41,39 +60,13 @@ Feature: llama.cpp server
|
||||
And streaming is <enable_streaming>
|
||||
Given an OAI compatible chat completions request with no api error
|
||||
Then <n_predicted> tokens are predicted matching <re_content>
|
||||
And <n_prompt> prompt tokens are processed
|
||||
And the completion is <truncated> truncated
|
||||
|
||||
Examples: Prompts
|
||||
| model | system_prompt | user_prompt | max_tokens | re_content | n_predicted | enable_streaming |
|
||||
| llama-2 | Book | What is the best book | 8 | (Mom<or>what)+ | 8 | disabled |
|
||||
| codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 64 | (thanks<or>happy<or>bird)+ | 32 | enabled |
|
||||
|
||||
Scenario: Embedding
|
||||
When embeddings are computed for:
|
||||
"""
|
||||
What is the capital of Bulgaria ?
|
||||
"""
|
||||
Then embeddings are generated
|
||||
|
||||
Scenario: OAI Embeddings compatibility
|
||||
Given a model tinyllama-2
|
||||
When an OAI compatible embeddings computation request for:
|
||||
"""
|
||||
What is the capital of Spain ?
|
||||
"""
|
||||
Then embeddings are generated
|
||||
|
||||
Scenario: OAI Embeddings compatibility with multiple inputs
|
||||
Given a model tinyllama-2
|
||||
Given a prompt:
|
||||
"""
|
||||
In which country Paris is located ?
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Is Madrid the capital of Spain ?
|
||||
"""
|
||||
When an OAI compatible embeddings computation request for multiple inputs
|
||||
Then embeddings are generated
|
||||
| model | system_prompt | user_prompt | max_tokens | re_content | n_prompt | n_predicted | enable_streaming | truncated |
|
||||
| llama-2 | Book | What is the best book | 8 | (Here\|what)+ | 77 | 8 | disabled | not |
|
||||
| codellama70b | You are a coding assistant. | Write the fibonacci function in c++. | 128 | (thanks\|happy\|bird)+ | -1 | 64 | enabled | |
|
||||
|
||||
|
||||
Scenario: Tokenize / Detokenize
|
||||
@@ -82,3 +75,9 @@ Feature: llama.cpp server
|
||||
What is the capital of France ?
|
||||
"""
|
||||
Then tokens can be detokenize
|
||||
|
||||
Scenario: Models available
|
||||
Given available models
|
||||
Then 1 models are supported
|
||||
Then model 0 is identified by tinyllama-2
|
||||
Then model 0 is trained on 128 tokens context
|
||||
|
||||
@@ -10,9 +10,11 @@ from contextlib import closing
|
||||
from re import RegexFlag
|
||||
|
||||
import aiohttp
|
||||
import numpy as np
|
||||
import openai
|
||||
from behave import step
|
||||
from behave.api.async_step import async_run_until_complete
|
||||
from huggingface_hub import hf_hub_download
|
||||
from prometheus_client import parser
|
||||
|
||||
|
||||
@@ -23,20 +25,30 @@ def step_server_config(context, server_fqdn, server_port):
|
||||
if 'PORT' in os.environ:
|
||||
context.server_port = int(os.environ['PORT'])
|
||||
print(f"$PORT set, overriding server port with to {context.server_port}")
|
||||
if 'FQDN' in os.environ:
|
||||
context.server_fqdn = os.environ['FQDN']
|
||||
print(f"$FQDN set, overriding server fqdn with to {context.server_fqdn}")
|
||||
|
||||
context.base_url = f'http://{context.server_fqdn}:{context.server_port}'
|
||||
|
||||
context.debug = 'DEBUG' in os.environ and os.environ['DEBUG'] == 'ON'
|
||||
context.model_alias = None
|
||||
context.n_batch = None
|
||||
context.n_ctx = None
|
||||
context.n_ga = None
|
||||
context.n_ga_w = None
|
||||
context.n_gpu_layer = None
|
||||
context.n_predict = None
|
||||
context.n_prompts = 0
|
||||
context.n_server_predict = None
|
||||
context.n_slots = None
|
||||
context.prompt_prefix = None
|
||||
context.prompt_suffix = None
|
||||
context.server_api_key = None
|
||||
context.server_continuous_batching = False
|
||||
context.server_embeddings = False
|
||||
context.server_metrics = False
|
||||
context.server_process = None
|
||||
context.seed = None
|
||||
context.server_seed = None
|
||||
context.user_api_key = None
|
||||
|
||||
@@ -45,9 +57,11 @@ def step_server_config(context, server_fqdn, server_port):
|
||||
context.prompts = []
|
||||
|
||||
|
||||
@step(u'a model file {model_file}')
|
||||
def step_model_file(context, model_file):
|
||||
context.model_file = model_file
|
||||
@step(u'a model file {hf_file} from HF repo {hf_repo}')
|
||||
def step_download_hf_model(context, hf_file, hf_repo):
|
||||
context.model_file = hf_hub_download(repo_id=hf_repo, filename=hf_file)
|
||||
if context.debug:
|
||||
print(f"model file: {context.model_file}\n")
|
||||
|
||||
|
||||
@step(u'a model alias {model_alias}')
|
||||
@@ -55,24 +69,34 @@ def step_model_alias(context, model_alias):
|
||||
context.model_alias = model_alias
|
||||
|
||||
|
||||
@step(u'{seed} as server seed')
|
||||
@step(u'{seed:d} as server seed')
|
||||
def step_seed(context, seed):
|
||||
context.server_seed = int(seed)
|
||||
context.server_seed = seed
|
||||
|
||||
|
||||
@step(u'{n_ctx} KV cache size')
|
||||
@step(u'{ngl:d} GPU offloaded layers')
|
||||
def step_n_gpu_layer(context, ngl):
|
||||
if 'N_GPU_LAYERS' in os.environ:
|
||||
new_ngl = int(os.environ['N_GPU_LAYERS'])
|
||||
if context.debug:
|
||||
print(f"-ngl upgraded from {ngl} to {new_ngl}")
|
||||
ngl = new_ngl
|
||||
context.n_gpu_layer = ngl
|
||||
|
||||
|
||||
@step(u'{n_ctx:d} KV cache size')
|
||||
def step_n_ctx(context, n_ctx):
|
||||
context.n_ctx = int(n_ctx)
|
||||
context.n_ctx = n_ctx
|
||||
|
||||
|
||||
@step(u'{n_slots} slots')
|
||||
@step(u'{n_slots:d} slots')
|
||||
def step_n_slots(context, n_slots):
|
||||
context.n_slots = int(n_slots)
|
||||
context.n_slots = n_slots
|
||||
|
||||
|
||||
@step(u'{n_predict} server max tokens to predict')
|
||||
@step(u'{n_predict:d} server max tokens to predict')
|
||||
def step_server_n_predict(context, n_predict):
|
||||
context.n_server_predict = int(n_predict)
|
||||
context.n_server_predict = n_predict
|
||||
|
||||
|
||||
@step(u'continuous batching')
|
||||
@@ -116,11 +140,13 @@ async def step_wait_for_the_server_to_be_started(context, expecting_status):
|
||||
|
||||
case 'ready' | 'idle':
|
||||
await wait_for_health_status(context, context.base_url, 200, 'ok',
|
||||
timeout=10,
|
||||
params={'fail_on_no_slot': 0, 'include_slots': 0},
|
||||
slots_idle=context.n_slots,
|
||||
slots_processing=0,
|
||||
expected_slots=[{'id': slot_id, 'state': 0}
|
||||
for slot_id in range(context.n_slots)])
|
||||
for slot_id in
|
||||
range(context.n_slots if context.n_slots else 1)])
|
||||
case 'busy':
|
||||
await wait_for_health_status(context, context.base_url, 503,
|
||||
'no slot available',
|
||||
@@ -128,7 +154,8 @@ async def step_wait_for_the_server_to_be_started(context, expecting_status):
|
||||
slots_idle=0,
|
||||
slots_processing=context.n_slots,
|
||||
expected_slots=[{'id': slot_id, 'state': 1}
|
||||
for slot_id in range(context.n_slots)])
|
||||
for slot_id in
|
||||
range(context.n_slots if context.n_slots else 1)])
|
||||
case _:
|
||||
assert False, "unknown status"
|
||||
|
||||
@@ -157,29 +184,48 @@ async def step_request_completion(context, api_error):
|
||||
context.base_url,
|
||||
debug=context.debug,
|
||||
n_predict=context.n_predict,
|
||||
server_seed=context.server_seed,
|
||||
seed=await completions_seed(context),
|
||||
expect_api_error=expect_api_error,
|
||||
user_api_key=context.user_api_key)
|
||||
context.tasks_result.append(completion)
|
||||
if context.debug:
|
||||
print(f"Completion response: {completion}")
|
||||
print(f"Completion response: {completion}\n")
|
||||
if expect_api_error:
|
||||
assert completion == 401, f"completion must be an 401 status code: {completion}"
|
||||
|
||||
|
||||
@step(u'{predicted_n} tokens are predicted matching {re_content}')
|
||||
@step(u'{predicted_n:d} tokens are predicted matching {re_content}')
|
||||
def step_n_tokens_predicted_with_content(context, predicted_n, re_content):
|
||||
assert_n_tokens_predicted(context.tasks_result.pop(), int(predicted_n), re_content)
|
||||
context.completion = context.tasks_result.pop()
|
||||
assert_n_tokens_predicted(context.completion, predicted_n, re_content)
|
||||
|
||||
|
||||
@step(u'{predicted_n} tokens are predicted')
|
||||
@step(u'{predicted_n:d} tokens are predicted')
|
||||
def step_n_tokens_predicted(context, predicted_n):
|
||||
assert_n_tokens_predicted(context.tasks_result.pop(), int(predicted_n))
|
||||
context.completion = context.tasks_result.pop()
|
||||
assert_n_tokens_predicted(context.completion, predicted_n)
|
||||
|
||||
|
||||
@step(u'the completion is truncated')
|
||||
def step_assert_completion_truncated(context):
|
||||
step_assert_completion_truncated(context, '')
|
||||
|
||||
|
||||
@step(u'the completion is {truncated} truncated')
|
||||
def step_assert_completion_truncated(context, truncated):
|
||||
truncated = truncated != "not"
|
||||
assert context.completion['truncated'] == truncated, f'{context.completion}'
|
||||
|
||||
|
||||
@step(u'{n_prompt:d} prompt tokens are processed')
|
||||
def step_impl(context, n_prompt):
|
||||
assert n_prompt < 0 or n_prompt == context.completion['timings']['prompt_n'], f"n_prompt={context.completion['timings']['prompt_n']}"
|
||||
|
||||
|
||||
@step(u'a user prompt {user_prompt}')
|
||||
def step_user_prompt(context, user_prompt):
|
||||
context.prompts.append(user_prompt)
|
||||
context.n_prompts = len(context.prompts)
|
||||
|
||||
|
||||
@step(u'a system prompt {system_prompt}')
|
||||
@@ -192,9 +238,9 @@ def step_model(context, model):
|
||||
context.model = model
|
||||
|
||||
|
||||
@step(u'{max_tokens} max tokens to predict')
|
||||
@step(u'{max_tokens:d} max tokens to predict')
|
||||
def step_max_tokens(context, max_tokens):
|
||||
context.n_predict = int(max_tokens)
|
||||
context.n_predict = max_tokens
|
||||
|
||||
|
||||
@step(u'streaming is {enable_streaming}')
|
||||
@@ -222,15 +268,82 @@ def step_server_api_key(context, server_api_key):
|
||||
context.server_api_key = server_api_key
|
||||
|
||||
|
||||
@step(u'{n_junk:d} as number of junk')
|
||||
def step_n_junk(context, n_junk):
|
||||
context.n_junk = n_junk
|
||||
|
||||
|
||||
@step(u'{n_batch:d} as batch size')
|
||||
def step_n_batch(context, n_batch):
|
||||
context.n_batch = n_batch
|
||||
|
||||
|
||||
@step(u'{seed:d} as seed')
|
||||
def step_seed(context, seed):
|
||||
context.seed = seed
|
||||
|
||||
|
||||
@step(u'a prefix prompt')
|
||||
def step_prompt_prefix(context):
|
||||
context.prompt_prefix = context.text
|
||||
|
||||
|
||||
@step(u'a junk suffix prompt')
|
||||
def step_prompt_junk_suffix(context):
|
||||
context.prompt_junk_suffix = context.text
|
||||
|
||||
|
||||
@step(u'a suffix prompt')
|
||||
def step_prompt_suffix(context):
|
||||
context.prompt_suffix = context.text
|
||||
|
||||
|
||||
@step(u'{n_ga:d} group attention factor'
|
||||
u' to extend context size through self-extend')
|
||||
def step_impl(context, n_ga):
|
||||
context.n_ga = n_ga
|
||||
|
||||
|
||||
@step(u'{n_ga_w:d} group attention width to extend context size through self-extend')
|
||||
def step_impl(context, n_ga_w):
|
||||
context.n_ga_w = n_ga_w
|
||||
|
||||
|
||||
@step(u'a passkey prompt template')
|
||||
def step_prompt_passkey(context):
|
||||
context.prompt_passkey = context.text
|
||||
|
||||
|
||||
@step(u'{n_prompts:d} fixed prompts')
|
||||
def step_fixed_prompts(context, n_prompts):
|
||||
context.prompts.extend([str(0)*(context.n_batch if context.n_batch is not None else 512) for i in range(n_prompts)])
|
||||
context.n_prompts = n_prompts
|
||||
|
||||
|
||||
@step(u'a "{passkey}" passkey challenge prompt with the passkey inserted every {i_pos:d} junk')
|
||||
def step_prompt_passkey(context, passkey, i_pos):
|
||||
prompt = ""
|
||||
for i in range(context.n_junk):
|
||||
if i % context.n_junk == i_pos:
|
||||
prompt += context.prompt_passkey # the passkey is already substituted
|
||||
prompt += context.prompt_junk_suffix
|
||||
if context.debug:
|
||||
passkey_highlight = "\x1b[33m" + passkey + "\x1b[0m"
|
||||
print(f"Passkey challenge:\n```{prompt.replace(passkey, passkey_highlight)}```\n")
|
||||
context.prompts.append(context.prompt_prefix + prompt + context.prompt_suffix)
|
||||
context.n_prompts = len(context.prompts)
|
||||
|
||||
|
||||
@step(u'an OAI compatible chat completions request with {api_error} api error')
|
||||
@async_run_until_complete
|
||||
async def step_oai_chat_completions(context, api_error):
|
||||
if context.debug:
|
||||
print(f"Submitting OAI compatible completions request...")
|
||||
print(f"Submitting OAI compatible completions request...\n")
|
||||
expect_api_error = api_error == 'raised'
|
||||
completion = await oai_chat_completions(context.prompts.pop(),
|
||||
context.system_prompt,
|
||||
context.base_url,
|
||||
'/v1/chat',
|
||||
False,
|
||||
model=context.model if hasattr(context, 'model') else None,
|
||||
|
||||
@@ -240,8 +353,7 @@ async def step_oai_chat_completions(context, api_error):
|
||||
enable_streaming=context.enable_streaming
|
||||
if hasattr(context, 'enable_streaming') else None,
|
||||
|
||||
server_seed=context.server_seed
|
||||
if hasattr(context, 'server_seed') else None,
|
||||
seed=await completions_seed(context),
|
||||
|
||||
user_api_key=context.user_api_key
|
||||
if hasattr(context, 'user_api_key') else None,
|
||||
@@ -260,11 +372,13 @@ async def step_oai_chat_completions(context, api_error):
|
||||
@step(u'a prompt')
|
||||
def step_a_prompt(context):
|
||||
context.prompts.append(context.text)
|
||||
context.n_prompts = len(context.prompts)
|
||||
|
||||
|
||||
@step(u'a prompt {prompt}')
|
||||
def step_a_prompt_prompt(context, prompt):
|
||||
context.prompts.append(prompt)
|
||||
context.n_prompts = len(context.prompts)
|
||||
|
||||
|
||||
@step(u'concurrent completion requests')
|
||||
@@ -275,8 +389,10 @@ async def step_concurrent_completion_requests(context):
|
||||
# prompt is inserted automatically
|
||||
context.base_url,
|
||||
debug=context.debug,
|
||||
prompt_prefix=context.prompt_prefix,
|
||||
prompt_suffix=context.prompt_suffix,
|
||||
n_predict=context.n_predict if hasattr(context, 'n_predict') else None,
|
||||
server_seed=context.server_seed if hasattr(context, 'server_seed') else None,
|
||||
seed=await completions_seed(context),
|
||||
user_api_key=context.user_api_key if hasattr(context,
|
||||
'user_api_key') else None)
|
||||
|
||||
@@ -288,6 +404,7 @@ async def step_oai_chat_completions(context):
|
||||
# user_prompt is inserted automatically
|
||||
context.system_prompt,
|
||||
context.base_url,
|
||||
'/v1/chat/completions',
|
||||
True, # async_client
|
||||
model=context.model
|
||||
if hasattr(context, 'model') else None,
|
||||
@@ -295,7 +412,29 @@ async def step_oai_chat_completions(context):
|
||||
if hasattr(context, 'n_predict') else None,
|
||||
enable_streaming=context.enable_streaming
|
||||
if hasattr(context, 'enable_streaming') else None,
|
||||
server_seed=context.server_seed
|
||||
seed=await completions_seed(context),
|
||||
user_api_key=context.user_api_key
|
||||
if hasattr(context, 'user_api_key') else None)
|
||||
|
||||
|
||||
@step(u'concurrent OAI completions requests no v1')
|
||||
@async_run_until_complete
|
||||
async def step_oai_chat_completions(context):
|
||||
await concurrent_requests(context, oai_chat_completions,
|
||||
# user_prompt is inserted automatically
|
||||
context.system_prompt,
|
||||
context.base_url,
|
||||
'/chat/completions',
|
||||
True, # async_client
|
||||
model=context.model
|
||||
if hasattr(context, 'model') else None,
|
||||
n_predict=context.n_predict
|
||||
if hasattr(context, 'n_predict') else None,
|
||||
enable_streaming=context.enable_streaming
|
||||
if hasattr(context, 'enable_streaming') else None,
|
||||
seed=context.seed
|
||||
if hasattr(context, 'seed') else
|
||||
context.server_seed
|
||||
if hasattr(context, 'server_seed') else None,
|
||||
user_api_key=context.user_api_key
|
||||
if hasattr(context, 'user_api_key') else None)
|
||||
@@ -307,11 +446,10 @@ async def step_all_prompts_are_predicted(context):
|
||||
await all_prompts_are_predicted(context)
|
||||
|
||||
|
||||
@step(u'all prompts are predicted with {n_predict} tokens')
|
||||
@step(u'all prompts are predicted with {n_expected_predicted:d} tokens')
|
||||
@async_run_until_complete
|
||||
async def step_all_prompts_are_predicted_with_n_tokens(context, n_predict):
|
||||
expected_predicted_n = int(n_predict)
|
||||
await all_prompts_are_predicted(context, expected_predicted_n)
|
||||
async def step_all_prompts_are_predicted_with_n_tokens(context, n_expected_predicted):
|
||||
await all_prompts_are_predicted(context, n_expected_predicted)
|
||||
|
||||
|
||||
async def all_prompts_are_predicted(context, expected_predicted_n=None):
|
||||
@@ -325,25 +463,47 @@ async def all_prompts_are_predicted(context, expected_predicted_n=None):
|
||||
@step(u'embeddings are computed for')
|
||||
@async_run_until_complete
|
||||
async def step_compute_embedding(context):
|
||||
context.n_prompts = 1
|
||||
context.embeddings = await request_embedding(context.text, base_url=context.base_url)
|
||||
|
||||
|
||||
@step(u'all embeddings are the same')
|
||||
@async_run_until_complete
|
||||
async def step_all_embeddings_are_the_same(context):
|
||||
n_embedding_requests = await gather_tasks_results(context)
|
||||
assert n_embedding_requests > 0
|
||||
embeddings = []
|
||||
for i in range(n_embedding_requests):
|
||||
embedding = context.tasks_result.pop().pop()
|
||||
embeddings.append(embedding)
|
||||
assert_embeddings(embedding)
|
||||
n = len(embeddings)
|
||||
for i in range(n-1):
|
||||
for j in range(i+1, n):
|
||||
embedding1 = np.array(embeddings[i])
|
||||
embedding2 = np.array(embeddings[j])
|
||||
if context.debug:
|
||||
print(f"embedding1: {embedding1[-8:]}\n")
|
||||
print(f"embedding2: {embedding2[-8:]}\n")
|
||||
similarity = np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
|
||||
msg = f"Similarity between {i} and {j}: {similarity:.10f}"
|
||||
if context.debug:
|
||||
print(f"{msg}\n")
|
||||
assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg
|
||||
|
||||
@step(u'embeddings are generated')
|
||||
def step_assert_embeddings(context):
|
||||
if len(context.prompts) == 0:
|
||||
assert_embeddings(context.embeddings)
|
||||
else:
|
||||
assert len(context.embeddings) == len(context.prompts), (f"unexpected response:\n"
|
||||
f"context.prompts={context.prompts}\n"
|
||||
f"context.embeddings={context.embeddings}")
|
||||
for embedding in context.embeddings:
|
||||
context.prompts.pop()
|
||||
assert_embeddings(embedding)
|
||||
assert context.n_prompts == len(context.embeddings), (f"unexpected response:\n"
|
||||
f"context.n_prompts={context.n_prompts}\n"
|
||||
f"context.embeddings={context.embeddings}")
|
||||
for embedding in context.embeddings:
|
||||
assert_embeddings(embedding)
|
||||
|
||||
|
||||
@step(u'an OAI compatible embeddings computation request for')
|
||||
@async_run_until_complete
|
||||
async def step_oai_compute_embeddings(context):
|
||||
context.n_prompts = 1
|
||||
context.embeddings = await request_oai_embeddings(context.text,
|
||||
base_url=context.base_url,
|
||||
user_api_key=context.user_api_key,
|
||||
@@ -357,6 +517,7 @@ async def step_oai_compute_embeddings_multiple_inputs(context):
|
||||
base_url=context.base_url,
|
||||
user_api_key=context.user_api_key,
|
||||
model=context.model)
|
||||
context.prompts.clear()
|
||||
|
||||
|
||||
@step(u'concurrent embedding requests')
|
||||
@@ -383,9 +544,9 @@ async def step_concurrent_oai_embedding_requests(context):
|
||||
@async_run_until_complete()
|
||||
async def all_embeddings_are_generated(context):
|
||||
n_embedding_requests = await gather_tasks_results(context)
|
||||
assert n_embedding_requests > 0
|
||||
assert n_embedding_requests == context.n_prompts
|
||||
for i in range(n_embedding_requests):
|
||||
assert_embeddings(context.tasks_result.pop())
|
||||
assert_embeddings(context.tasks_result.pop().pop())
|
||||
|
||||
|
||||
@step(u'tokenizing')
|
||||
@@ -421,8 +582,9 @@ async def step_detokenize(context):
|
||||
@async_run_until_complete
|
||||
async def step_options_request(context, origin):
|
||||
async with aiohttp.ClientSession() as session:
|
||||
headers = {'Authorization': f'Bearer {context.user_api_key}', 'Origin': origin}
|
||||
async with session.options(f'{context.base_url}/v1/chat/completions',
|
||||
headers={"Origin": origin}) as response:
|
||||
headers=headers) as response:
|
||||
assert response.status == 200
|
||||
context.options_response = response
|
||||
|
||||
@@ -441,20 +603,63 @@ async def step_prometheus_metrics_exported(context):
|
||||
assert metrics_response.headers['Content-Type'] == "text/plain; version=0.0.4"
|
||||
metrics_raw = await metrics_response.text()
|
||||
metric_exported = False
|
||||
if context.debug:
|
||||
print(f"/metrics answer:\n{metrics_raw}\n")
|
||||
context.metrics = {}
|
||||
for metric in parser.text_string_to_metric_families(metrics_raw):
|
||||
match metric.name:
|
||||
case "llamacpp:kv_cache_usage_ratio":
|
||||
assert len(metric.samples) > 0
|
||||
metric_exported = True
|
||||
context.metrics[metric.name] = metric
|
||||
assert int(metrics_response.headers["Process-Start-Time-Unix"]) > 0, "no header process start time"
|
||||
assert metric_exported, "No metrics exported"
|
||||
|
||||
|
||||
async def concurrent_requests(context, f_completion, *args, **kwargs):
|
||||
n_prompts = len(context.prompts)
|
||||
@step(u'metric {metric_name} is {metric_value:d}')
|
||||
def step_assert_metric_value(context, metric_name, metric_value):
|
||||
if metric_name not in context.metrics:
|
||||
assert False, f"no metric {metric_name} in {context.metrics.keys()}"
|
||||
assert context.metrics[metric_name].samples[0].value == metric_value, f"metric: {context.metrics[metric_name]}"
|
||||
|
||||
|
||||
@step(u'available models')
|
||||
def step_available_models(context):
|
||||
# openai client always expects an api_key
|
||||
openai.api_key = context.user_api_key if context.user_api_key is not None else 'nope'
|
||||
openai.api_base = f'{context.base_url}/v1'
|
||||
context.models = openai.Model.list().data
|
||||
|
||||
|
||||
@step(u'{n_model:d} models are supported')
|
||||
def step_supported_models(context, n_model):
|
||||
if context.debug:
|
||||
print(f"starting {n_prompts} concurrent completion requests...")
|
||||
assert n_prompts > 0
|
||||
for prompt_no in range(n_prompts):
|
||||
print("server models available:", context.models)
|
||||
assert len(context.models) == n_model
|
||||
|
||||
|
||||
@step(u'model {i_model:d} is {param} {preposition} {param_value}')
|
||||
def step_supported_models(context, i_model, param, preposition, param_value):
|
||||
assert i_model < len(context.models)
|
||||
model = context.models[i_model]
|
||||
|
||||
param_value = param_value.split(' ', 1)[0]
|
||||
match param:
|
||||
case 'identified':
|
||||
value = model.id
|
||||
case 'trained':
|
||||
value = str(model.meta.n_ctx_train)
|
||||
case _:
|
||||
assert False, "param {param} not supported"
|
||||
assert param_value == value, f"model param {param} {value} != {param_value}"
|
||||
|
||||
|
||||
async def concurrent_requests(context, f_completion, *args, **kwargs):
|
||||
context.n_prompts = len(context.prompts)
|
||||
if context.debug:
|
||||
print(f"starting {context.n_prompts} concurrent completion requests...")
|
||||
assert context.n_prompts > 0
|
||||
for prompt_no in range(context.n_prompts):
|
||||
shifted_args = [context.prompts.pop(), *args]
|
||||
context.concurrent_tasks.append(asyncio.create_task(f_completion(*shifted_args, **kwargs)))
|
||||
await asyncio.sleep(0.1)
|
||||
@@ -463,8 +668,10 @@ async def concurrent_requests(context, f_completion, *args, **kwargs):
|
||||
async def request_completion(prompt,
|
||||
base_url,
|
||||
debug=False,
|
||||
prompt_prefix=None,
|
||||
prompt_suffix=None,
|
||||
n_predict=None,
|
||||
server_seed=None,
|
||||
seed=None,
|
||||
expect_api_error=None,
|
||||
user_api_key=None):
|
||||
if debug:
|
||||
@@ -481,11 +688,14 @@ async def request_completion(prompt,
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(f'{base_url}/completion',
|
||||
json={
|
||||
"input_prefix": prompt_prefix,
|
||||
"prompt": prompt,
|
||||
"n_predict": int(n_predict) if n_predict is not None else -1,
|
||||
"seed": server_seed if server_seed is not None else 42
|
||||
"input_suffix": prompt_suffix,
|
||||
"n_predict": n_predict if n_predict is not None else -1,
|
||||
"seed": seed if seed is not None else 42
|
||||
},
|
||||
headers=headers) as response:
|
||||
headers=headers,
|
||||
timeout=3600) as response:
|
||||
if expect_api_error is None or not expect_api_error:
|
||||
assert response.status == 200
|
||||
assert response.headers['Access-Control-Allow-Origin'] == origin
|
||||
@@ -497,19 +707,20 @@ async def request_completion(prompt,
|
||||
async def oai_chat_completions(user_prompt,
|
||||
system_prompt,
|
||||
base_url,
|
||||
base_path,
|
||||
async_client,
|
||||
debug=False,
|
||||
model=None,
|
||||
n_predict=None,
|
||||
enable_streaming=None,
|
||||
server_seed=None,
|
||||
seed=None,
|
||||
user_api_key=None,
|
||||
expect_api_error=None):
|
||||
if debug:
|
||||
print(f"Sending OAI Chat completions request: {user_prompt}")
|
||||
# openai client always expects an api key
|
||||
user_api_key = user_api_key if user_api_key is not None else 'nope'
|
||||
seed = server_seed if server_seed is not None else 42
|
||||
seed = seed if seed is not None else 42
|
||||
enable_streaming = enable_streaming if enable_streaming is not None else False
|
||||
payload = {
|
||||
"messages": [
|
||||
@@ -530,14 +741,15 @@ async def oai_chat_completions(user_prompt,
|
||||
completion_response = {
|
||||
'content': '',
|
||||
'timings': {
|
||||
'predicted_n': 0
|
||||
'predicted_n': 0,
|
||||
'prompt_n': 0
|
||||
}
|
||||
}
|
||||
if async_client:
|
||||
origin = 'llama.cpp'
|
||||
headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(f'{base_url}/v1/chat/completions',
|
||||
async with session.post(f'{base_url}{base_path}',
|
||||
json=payload,
|
||||
headers=headers) as response:
|
||||
if enable_streaming:
|
||||
@@ -571,7 +783,8 @@ async def oai_chat_completions(user_prompt,
|
||||
completion_response = {
|
||||
'content': chat_completion_raw['choices'][0]['message'],
|
||||
'timings': {
|
||||
'predicted_n': chat_completion_raw['usage']['completion_tokens']
|
||||
'predicted_n': chat_completion_raw['usage']['completion_tokens'],
|
||||
'prompt_n': chat_completion_raw['usage']['prompt_tokens']
|
||||
}
|
||||
}
|
||||
else:
|
||||
@@ -579,7 +792,7 @@ async def oai_chat_completions(user_prompt,
|
||||
else:
|
||||
try:
|
||||
openai.api_key = user_api_key
|
||||
openai.api_base = f'{base_url}/v1/chat'
|
||||
openai.api_base = f'{base_url}{base_path}'
|
||||
chat_completion = openai.Completion.create(
|
||||
messages=payload['messages'],
|
||||
model=model,
|
||||
@@ -600,13 +813,16 @@ async def oai_chat_completions(user_prompt,
|
||||
if 'content' in delta:
|
||||
completion_response['content'] += delta['content']
|
||||
completion_response['timings']['predicted_n'] += 1
|
||||
completion_response['truncated'] = chunk.choices[0].finish_reason != 'stop'
|
||||
else:
|
||||
assert len(chat_completion.choices) == 1
|
||||
completion_response = {
|
||||
'content': chat_completion.choices[0].message.content,
|
||||
'timings': {
|
||||
'predicted_n': chat_completion.usage.completion_tokens
|
||||
}
|
||||
'predicted_n': chat_completion.usage.completion_tokens,
|
||||
'prompt_n': chat_completion.usage.prompt_tokens
|
||||
},
|
||||
'truncated': chat_completion.choices[0].finish_reason != 'stop'
|
||||
}
|
||||
if debug:
|
||||
print("OAI response formatted to llama.cpp:", completion_response)
|
||||
@@ -621,7 +837,7 @@ async def request_embedding(content, base_url=None):
|
||||
}) as response:
|
||||
assert response.status == 200
|
||||
response_json = await response.json()
|
||||
return response_json['embedding']
|
||||
return [response_json['embedding']]
|
||||
|
||||
|
||||
async def request_oai_embeddings(input,
|
||||
@@ -631,6 +847,7 @@ async def request_oai_embeddings(input,
|
||||
user_api_key = user_api_key if user_api_key is not None else 'nope'
|
||||
if async_client:
|
||||
origin = 'llama.cpp'
|
||||
headers=[]
|
||||
if user_api_key is not None:
|
||||
headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
|
||||
async with aiohttp.ClientSession() as session:
|
||||
@@ -639,14 +856,21 @@ async def request_oai_embeddings(input,
|
||||
"input": input,
|
||||
"model": model,
|
||||
},
|
||||
headers=headers) as response:
|
||||
headers=headers,
|
||||
timeout=3600) as response:
|
||||
assert response.status == 200, f"received status code not expected: {response.status}"
|
||||
assert response.headers['Access-Control-Allow-Origin'] == origin
|
||||
assert response.headers['Content-Type'] == "application/json; charset=utf-8"
|
||||
response_json = await response.json()
|
||||
assert response_json['model'] == model, f"invalid model received: {response_json['model']}"
|
||||
assert response_json['object'] == 'list'
|
||||
return response_json['data']
|
||||
if isinstance(input, collections.abc.Sequence):
|
||||
embeddings = []
|
||||
for an_oai_embeddings in response_json['data']:
|
||||
embeddings.append(an_oai_embeddings['embedding'])
|
||||
else:
|
||||
embeddings = [response_json['data']['embedding']]
|
||||
return embeddings
|
||||
else:
|
||||
openai.api_key = user_api_key
|
||||
openai.api_base = f'{base_url}/v1'
|
||||
@@ -660,7 +884,7 @@ async def request_oai_embeddings(input,
|
||||
for an_oai_embeddings in oai_embeddings.data:
|
||||
embeddings.append(an_oai_embeddings.embedding)
|
||||
else:
|
||||
embeddings = oai_embeddings.data.embedding
|
||||
embeddings = [oai_embeddings.data.embedding]
|
||||
return embeddings
|
||||
|
||||
|
||||
@@ -668,20 +892,31 @@ def assert_n_tokens_predicted(completion_response, expected_predicted_n=None, re
|
||||
content = completion_response['content']
|
||||
n_predicted = completion_response['timings']['predicted_n']
|
||||
assert len(content) > 0, "no token predicted"
|
||||
if expected_predicted_n is not None:
|
||||
if re_content is not None:
|
||||
p = re.compile(re_content, flags=RegexFlag.IGNORECASE | RegexFlag.MULTILINE | RegexFlag.DOTALL)
|
||||
matches = p.finditer(content)
|
||||
last_match = 0
|
||||
highlighted = ''
|
||||
for match in matches:
|
||||
start, end = match.span()
|
||||
highlighted += content[last_match: start]
|
||||
highlighted += '\x1b[33m'
|
||||
highlighted += content[start: end]
|
||||
highlighted += '\x1b[0m'
|
||||
last_match = end
|
||||
highlighted += content[last_match:]
|
||||
if 'DEBUG' in os.environ and os.environ['DEBUG'] == 'ON':
|
||||
print(f"Checking completion response: {highlighted}\n")
|
||||
assert last_match > 0, f'/{re_content}/ must match ```{highlighted}```'
|
||||
if expected_predicted_n and expected_predicted_n > 0:
|
||||
assert n_predicted == expected_predicted_n, (f'invalid number of tokens predicted:'
|
||||
f' {n_predicted} <> {expected_predicted_n}')
|
||||
if re_content is not None:
|
||||
re_content = '^.*' + re_content.replace('<or>', '|') + '.*$'
|
||||
assert re.match(re_content, content, flags=RegexFlag.IGNORECASE | RegexFlag.MULTILINE | RegexFlag.DOTALL), (
|
||||
f'invalid tokens predicted:'
|
||||
f' ```\n{content}\n``` do not match /{re_content}/')
|
||||
|
||||
|
||||
async def gather_tasks_results(context):
|
||||
n_tasks = len(context.concurrent_tasks)
|
||||
if context.debug:
|
||||
print(f"Waiting for all {n_tasks} tasks results...")
|
||||
print(f"Waiting for all {n_tasks} tasks results...\n")
|
||||
for task_no in range(n_tasks):
|
||||
context.tasks_result.append(await context.concurrent_tasks.pop())
|
||||
n_completions = len(context.tasks_result)
|
||||
@@ -692,15 +927,13 @@ async def wait_for_health_status(context,
|
||||
base_url,
|
||||
expected_http_status_code,
|
||||
expected_health_status,
|
||||
timeout=3,
|
||||
params=None,
|
||||
slots_idle=None,
|
||||
slots_processing=None,
|
||||
expected_slots=None):
|
||||
if context.debug:
|
||||
print(f"Starting checking for health for expected_health_status={expected_health_status}")
|
||||
timeout = 3 # seconds
|
||||
if expected_health_status == 'ok':
|
||||
timeout = 10 # CI slow inference
|
||||
print(f"Starting checking for health for expected_health_status={expected_health_status}\n")
|
||||
interval = 0.5
|
||||
counter = 0
|
||||
async with aiohttp.ClientSession() as session:
|
||||
@@ -710,7 +943,7 @@ async def wait_for_health_status(context,
|
||||
health = await health_response.json()
|
||||
if context.debug:
|
||||
print(f"HEALTH - response for expected health status='{expected_health_status}' on "
|
||||
f"'{base_url}/health'?{params} is {health}")
|
||||
f"'{base_url}/health'?{params} is {health}\n")
|
||||
if (status_code == expected_http_status_code
|
||||
and health['status'] == expected_health_status
|
||||
and (slots_idle is None or health['slots_idle'] == slots_idle)
|
||||
@@ -733,7 +966,7 @@ async def wait_for_health_status(context,
|
||||
if expected_http_status_code == 503:
|
||||
if len(context.tasks_result) == 0:
|
||||
print("\x1b[5;37;43mWARNING: forcing concurrent tasks,"
|
||||
" busy health check missed, probably too fast inference\x1b[0m")
|
||||
" busy health check missed, probably too fast inference\x1b[0m\n")
|
||||
n_completions = await gather_tasks_results(context)
|
||||
if n_completions > 0:
|
||||
return
|
||||
@@ -745,6 +978,8 @@ def assert_embeddings(embeddings):
|
||||
assert len(embeddings) > 0
|
||||
embeddings_computed = False
|
||||
for emb in embeddings:
|
||||
if not isinstance(emb, float):
|
||||
assert False, f"Bad embeddings: {embeddings}"
|
||||
if emb != 0:
|
||||
embeddings_computed = True
|
||||
assert embeddings_computed, f"Embeddings: {embeddings}"
|
||||
@@ -767,6 +1002,11 @@ def assert_slots_status(slots, expected_slots):
|
||||
f" = {expected[key]} != {slot[key]}")
|
||||
|
||||
|
||||
async def completions_seed(context):
|
||||
return context.seed if hasattr(context, 'seed') and context.seed is not None \
|
||||
else context.server_seed if hasattr(context, 'server_seed') else None
|
||||
|
||||
|
||||
def start_server_background(context):
|
||||
context.server_path = '../../../build/bin/server'
|
||||
if 'LLAMA_SERVER_BIN_PATH' in os.environ:
|
||||
@@ -776,27 +1016,35 @@ def start_server_background(context):
|
||||
'--port', context.server_port,
|
||||
'--model', context.model_file
|
||||
]
|
||||
if context.n_batch:
|
||||
server_args.extend(['--batch-size', context.n_batch])
|
||||
if context.n_gpu_layer:
|
||||
server_args.extend(['--n-gpu-layers', context.n_gpu_layer])
|
||||
if context.server_continuous_batching:
|
||||
server_args.append('--cont-batching')
|
||||
if context.server_embeddings:
|
||||
server_args.append('--embedding')
|
||||
if context.server_metrics:
|
||||
server_args.append('--metrics')
|
||||
if context.model_alias is not None:
|
||||
if context.model_alias:
|
||||
server_args.extend(['--alias', context.model_alias])
|
||||
if context.n_ctx is not None:
|
||||
if context.n_ctx:
|
||||
server_args.extend(['--ctx-size', context.n_ctx])
|
||||
if context.n_slots is not None:
|
||||
if context.n_slots:
|
||||
server_args.extend(['--parallel', context.n_slots])
|
||||
if context.n_server_predict is not None:
|
||||
if context.n_server_predict:
|
||||
server_args.extend(['--n-predict', context.n_server_predict])
|
||||
if context.server_api_key is not None:
|
||||
if context.server_api_key:
|
||||
server_args.extend(['--api-key', context.server_api_key])
|
||||
if context.n_ga:
|
||||
server_args.extend(['--grp-attn-n', context.n_ga])
|
||||
if context.n_ga_w:
|
||||
server_args.extend(['--grp-attn-w', context.n_ga_w])
|
||||
if context.debug:
|
||||
server_args.append('--verbose')
|
||||
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
|
||||
server_args.extend(['--log-format', "text"])
|
||||
print(f"starting server with: {context.server_path}", *server_args)
|
||||
print(f"starting server with: {context.server_path} {server_args}\n")
|
||||
context.server_process = subprocess.Popen(
|
||||
[str(arg) for arg in [context.server_path, *server_args]],
|
||||
close_fds=True)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# run with ./test.sh --tags wrong_usage
|
||||
# run with: ./tests.sh --no-skipped --tags wrong_usage
|
||||
@wrong_usage
|
||||
Feature: Wrong usage of llama.cpp server
|
||||
|
||||
@@ -7,7 +7,7 @@ Feature: Wrong usage of llama.cpp server
|
||||
# or pass n_predict/max_tokens in the request.
|
||||
Scenario: Infinite loop
|
||||
Given a server listening on localhost:8080
|
||||
And a model file stories260K.gguf
|
||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
# Uncomment below to fix the issue
|
||||
#And 64 server max tokens to predict
|
||||
Then the server is starting
|
||||
@@ -18,4 +18,5 @@ Feature: Wrong usage of llama.cpp server
|
||||
# Uncomment below to fix the issue
|
||||
#And 128 max tokens to predict
|
||||
Given concurrent completion requests
|
||||
Then the server is idle
|
||||
Then all prompts are predicted
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
aiohttp~=3.9.3
|
||||
behave~=1.2.6
|
||||
huggingface_hub~=0.20.3
|
||||
numpy~=1.24.4
|
||||
openai~=0.25.0
|
||||
prometheus-client~=0.20.0
|
||||
|
||||
@@ -5,7 +5,7 @@ set -eu
|
||||
if [ $# -lt 1 ]
|
||||
then
|
||||
# Start @llama.cpp scenario
|
||||
behave --summary --stop --no-capture --exclude 'issues|wrong_usages' --tags llama.cpp
|
||||
behave --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
||||
else
|
||||
behave "$@"
|
||||
fi
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <unordered_map>
|
||||
#include "llama.h"
|
||||
#include "common.h"
|
||||
|
||||
#include "json.hpp"
|
||||
|
||||
#include "../llava/clip.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <random>
|
||||
|
||||
#define DEFAULT_OAICOMPAT_MODEL "gpt-3.5-turbo-0613"
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
@@ -37,125 +38,35 @@ extern bool server_log_json;
|
||||
#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
|
||||
|
||||
//
|
||||
// parallel
|
||||
//
|
||||
template <typename T>
|
||||
static T json_value(const json &body, const std::string &key, const T &default_value) {
|
||||
// Fallback null to default value
|
||||
return body.contains(key) && !body.at(key).is_null()
|
||||
? body.value(key, default_value)
|
||||
: default_value;
|
||||
}
|
||||
|
||||
enum server_state {
|
||||
SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet
|
||||
SERVER_STATE_READY, // Server is ready and model is loaded
|
||||
SERVER_STATE_ERROR // An error occurred, load_model failed
|
||||
};
|
||||
|
||||
enum task_type {
|
||||
TASK_TYPE_COMPLETION,
|
||||
TASK_TYPE_CANCEL,
|
||||
TASK_TYPE_NEXT_RESPONSE,
|
||||
TASK_TYPE_METRICS
|
||||
};
|
||||
|
||||
struct task_server {
|
||||
int id = -1; // to be filled by llama_server_queue
|
||||
int target_id;
|
||||
task_type type;
|
||||
json data;
|
||||
bool infill_mode = false;
|
||||
bool embedding_mode = false;
|
||||
int multitask_id = -1;
|
||||
};
|
||||
|
||||
struct task_result {
|
||||
int id;
|
||||
int multitask_id = -1;
|
||||
bool stop;
|
||||
bool error;
|
||||
json result_json;
|
||||
};
|
||||
|
||||
struct task_multi {
|
||||
int id;
|
||||
std::set<int> subtasks_remaining{};
|
||||
std::vector<task_result> results{};
|
||||
};
|
||||
|
||||
// TODO: can become bool if we can't find use of more states
|
||||
enum slot_state
|
||||
{
|
||||
IDLE,
|
||||
PROCESSING,
|
||||
};
|
||||
|
||||
enum slot_command
|
||||
{
|
||||
NONE,
|
||||
LOAD_PROMPT,
|
||||
RELEASE,
|
||||
};
|
||||
|
||||
struct slot_params
|
||||
{
|
||||
bool stream = true;
|
||||
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
|
||||
|
||||
uint32_t seed = -1; // RNG seed
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
|
||||
std::vector<std::string> antiprompt;
|
||||
|
||||
json input_prefix;
|
||||
json input_suffix;
|
||||
};
|
||||
|
||||
struct slot_image
|
||||
{
|
||||
int32_t id;
|
||||
|
||||
bool request_encode_image = false;
|
||||
float * image_embedding = nullptr;
|
||||
int32_t image_tokens = 0;
|
||||
|
||||
clip_image_u8 * img_data;
|
||||
|
||||
std::string prefix_prompt; // before of this image
|
||||
};
|
||||
|
||||
// completion token output with probabilities
|
||||
struct completion_token_output
|
||||
{
|
||||
struct token_prob
|
||||
{
|
||||
llama_token tok;
|
||||
float prob;
|
||||
};
|
||||
|
||||
std::vector<token_prob> probs;
|
||||
llama_token tok;
|
||||
std::string text_to_send;
|
||||
};
|
||||
|
||||
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra)
|
||||
{
|
||||
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) {
|
||||
std::stringstream ss_tid;
|
||||
ss_tid << std::this_thread::get_id();
|
||||
json log = nlohmann::ordered_json{
|
||||
{"tid", ss_tid.str()},
|
||||
{"tid", ss_tid.str()},
|
||||
{"timestamp", time(nullptr)},
|
||||
};
|
||||
|
||||
if (server_log_json) {
|
||||
log.merge_patch(
|
||||
{
|
||||
{"level", level},
|
||||
{"function", function},
|
||||
{"line", line},
|
||||
{"msg", message},
|
||||
});
|
||||
log.merge_patch( {
|
||||
{"level", level},
|
||||
{"function", function},
|
||||
{"line", line},
|
||||
{"msg", message},
|
||||
});
|
||||
|
||||
if (!extra.empty()) {
|
||||
log.merge_patch(extra);
|
||||
}
|
||||
|
||||
std::cout << log.dump(-1, ' ', false, json::error_handler_t::replace) << "\n" << std::flush;
|
||||
printf("%s\n", log.dump(-1, ' ', false, json::error_handler_t::replace).c_str());
|
||||
} else {
|
||||
char buf[1024];
|
||||
snprintf(buf, 1024, "%4s [%24s] %s", level, function, message);
|
||||
@@ -168,8 +79,7 @@ static inline void server_log(const char *level, const char *function, int line,
|
||||
for (const auto& el : log.items())
|
||||
{
|
||||
const std::string value = el.value().dump(-1, ' ', false, json::error_handler_t::replace);
|
||||
snprintf(buf, 1024, " %s=%s", el.key().c_str(), value.c_str());
|
||||
ss << buf;
|
||||
ss << " " << el.key() << "=" << value;
|
||||
}
|
||||
|
||||
const std::string str = ss.str();
|
||||
@@ -179,36 +89,25 @@ static inline void server_log(const char *level, const char *function, int line,
|
||||
}
|
||||
|
||||
//
|
||||
// server utils
|
||||
// chat template utils
|
||||
//
|
||||
|
||||
template <typename T>
|
||||
static T json_value(const json &body, const std::string &key, const T &default_value)
|
||||
{
|
||||
// Fallback null to default value
|
||||
return body.contains(key) && !body.at(key).is_null()
|
||||
? body.value(key, default_value)
|
||||
: default_value;
|
||||
}
|
||||
|
||||
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
||||
inline bool verify_custom_template(const std::string & tmpl) {
|
||||
llama_chat_message chat[] = {{"user", "test"}};
|
||||
std::vector<char> buf(1);
|
||||
int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, buf.data(), buf.size());
|
||||
int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, nullptr, 0);
|
||||
return res >= 0;
|
||||
}
|
||||
|
||||
// Format given chat. If tmpl is empty, we take the template from model metadata
|
||||
inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::vector<json> & messages)
|
||||
{
|
||||
inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::vector<json> & messages) {
|
||||
size_t alloc_size = 0;
|
||||
// vector holding all allocated string to be passed to llama_chat_apply_template
|
||||
std::vector<std::string> str(messages.size() * 2);
|
||||
std::vector<llama_chat_message> chat(messages.size());
|
||||
|
||||
for (size_t i = 0; i < messages.size(); ++i) {
|
||||
auto &curr_msg = messages[i];
|
||||
const auto & curr_msg = messages[i];
|
||||
str[i*2 + 0] = json_value(curr_msg, "role", std::string(""));
|
||||
str[i*2 + 1] = json_value(curr_msg, "content", std::string(""));
|
||||
alloc_size += str[i*2 + 1].length();
|
||||
@@ -228,252 +127,13 @@ inline std::string format_chat(const struct llama_model * model, const std::stri
|
||||
res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
|
||||
}
|
||||
|
||||
std::string formatted_chat(buf.data(), res);
|
||||
const std::string formatted_chat(buf.data(), res);
|
||||
|
||||
LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}});
|
||||
|
||||
return formatted_chat;
|
||||
}
|
||||
|
||||
//
|
||||
// work queue utils
|
||||
//
|
||||
|
||||
struct llama_server_queue {
|
||||
int id = 0;
|
||||
std::mutex mutex_tasks;
|
||||
bool running;
|
||||
// queues
|
||||
std::vector<task_server> queue_tasks;
|
||||
std::vector<task_server> queue_tasks_deferred;
|
||||
std::vector<task_multi> queue_multitasks;
|
||||
std::condition_variable condition_tasks;
|
||||
// callback functions
|
||||
std::function<void(task_server&)> callback_new_task;
|
||||
std::function<void(task_multi&)> callback_finish_multitask;
|
||||
std::function<void(void)> callback_all_task_finished;
|
||||
|
||||
// Add a new task to the end of the queue
|
||||
int post(task_server task) {
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
if (task.id == -1) {
|
||||
task.id = id++;
|
||||
LOG_VERBOSE("new task id", {{"new_id", task.id}});
|
||||
}
|
||||
queue_tasks.push_back(std::move(task));
|
||||
condition_tasks.notify_one();
|
||||
return task.id;
|
||||
}
|
||||
|
||||
// Add a new task, but defer until one slot is available
|
||||
void defer(task_server task) {
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
queue_tasks_deferred.push_back(std::move(task));
|
||||
}
|
||||
|
||||
// Get the next id for creating anew task
|
||||
int get_new_id() {
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
int new_id = id++;
|
||||
LOG_VERBOSE("new task id", {{"new_id", new_id}});
|
||||
return new_id;
|
||||
}
|
||||
|
||||
// Register function to process a new task
|
||||
void on_new_task(std::function<void(task_server&)> callback) {
|
||||
callback_new_task = callback;
|
||||
}
|
||||
|
||||
// Register function to process a multitask
|
||||
void on_finish_multitask(std::function<void(task_multi&)> callback) {
|
||||
callback_finish_multitask = callback;
|
||||
}
|
||||
|
||||
// Register the function to be called when the batch of tasks is finished
|
||||
void on_all_tasks_finished(std::function<void(void)> callback) {
|
||||
callback_all_task_finished = callback;
|
||||
}
|
||||
|
||||
// Call when the state of one slot is changed
|
||||
void notify_slot_changed() {
|
||||
// move deferred tasks back to main loop
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
for (auto & task : queue_tasks_deferred) {
|
||||
queue_tasks.push_back(std::move(task));
|
||||
}
|
||||
queue_tasks_deferred.clear();
|
||||
}
|
||||
|
||||
// end the start_loop routine
|
||||
void terminate() {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
running = false;
|
||||
}
|
||||
condition_tasks.notify_all();
|
||||
}
|
||||
|
||||
// Start the main loop.
|
||||
void start_loop() {
|
||||
running = true;
|
||||
while (true) {
|
||||
LOG_VERBOSE("new task may arrive", {});
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
if (queue_tasks.empty()) {
|
||||
lock.unlock();
|
||||
break;
|
||||
}
|
||||
task_server task = queue_tasks.front();
|
||||
queue_tasks.erase(queue_tasks.begin());
|
||||
lock.unlock();
|
||||
LOG_VERBOSE("callback_new_task", {{"task_id", task.id}});
|
||||
callback_new_task(task);
|
||||
}
|
||||
LOG_VERBOSE("callback_all_task_finished", {});
|
||||
// process and update all the multitasks
|
||||
auto queue_iterator = queue_multitasks.begin();
|
||||
while (queue_iterator != queue_multitasks.end())
|
||||
{
|
||||
if (queue_iterator->subtasks_remaining.empty())
|
||||
{
|
||||
// all subtasks done == multitask is done
|
||||
task_multi current_multitask = *queue_iterator;
|
||||
callback_finish_multitask(current_multitask);
|
||||
// remove this multitask
|
||||
queue_iterator = queue_multitasks.erase(queue_iterator);
|
||||
}
|
||||
else
|
||||
{
|
||||
++queue_iterator;
|
||||
}
|
||||
}
|
||||
// all tasks in the current loop is finished
|
||||
callback_all_task_finished();
|
||||
}
|
||||
LOG_VERBOSE("wait for new task", {});
|
||||
// wait for new task
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||
if (queue_tasks.empty()) {
|
||||
if (!running) {
|
||||
LOG_VERBOSE("ending start_loop", {});
|
||||
return;
|
||||
}
|
||||
condition_tasks.wait(lock, [&]{
|
||||
return (!queue_tasks.empty() || !running);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// functions to manage multitasks
|
||||
//
|
||||
|
||||
// add a multitask by specifying the id of all subtask (subtask is a task_server)
|
||||
void add_multitask(int multitask_id, std::vector<int>& sub_ids)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_tasks);
|
||||
task_multi multi;
|
||||
multi.id = multitask_id;
|
||||
std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end()));
|
||||
queue_multitasks.push_back(multi);
|
||||
}
|
||||
|
||||
// updatethe remaining subtasks, while appending results to multitask
|
||||
void update_multitask(int multitask_id, int subtask_id, task_result& result)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mutex_tasks);
|
||||
for (auto& multitask : queue_multitasks)
|
||||
{
|
||||
if (multitask.id == multitask_id)
|
||||
{
|
||||
multitask.subtasks_remaining.erase(subtask_id);
|
||||
multitask.results.push_back(result);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct llama_server_response {
|
||||
typedef std::function<void(int, int, task_result&)> callback_multitask_t;
|
||||
callback_multitask_t callback_update_multitask;
|
||||
// for keeping track of all tasks waiting for the result
|
||||
std::set<int> waiting_task_ids;
|
||||
// the main result queue
|
||||
std::vector<task_result> queue_results;
|
||||
std::mutex mutex_results;
|
||||
std::condition_variable condition_results;
|
||||
|
||||
void add_waiting_task_id(int task_id) {
|
||||
LOG_VERBOSE("waiting for task id", {{"task_id", task_id}});
|
||||
std::unique_lock<std::mutex> lock(mutex_results);
|
||||
waiting_task_ids.insert(task_id);
|
||||
}
|
||||
|
||||
void remove_waiting_task_id(int task_id) {
|
||||
LOG_VERBOSE("remove waiting for task id", {{"task_id", task_id}});
|
||||
std::unique_lock<std::mutex> lock(mutex_results);
|
||||
waiting_task_ids.erase(task_id);
|
||||
}
|
||||
|
||||
// This function blocks the thread until there is a response for this task_id
|
||||
task_result recv(int task_id) {
|
||||
while (true)
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_results);
|
||||
condition_results.wait(lock, [&]{
|
||||
return !queue_results.empty();
|
||||
});
|
||||
|
||||
for (int i = 0; i < (int) queue_results.size(); i++)
|
||||
{
|
||||
if (queue_results[i].id == task_id)
|
||||
{
|
||||
assert(queue_results[i].multitask_id == -1);
|
||||
task_result res = queue_results[i];
|
||||
queue_results.erase(queue_results.begin() + i);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// should never reach here
|
||||
}
|
||||
|
||||
// Register the function to update multitask
|
||||
void on_multitask_update(callback_multitask_t callback) {
|
||||
callback_update_multitask = callback;
|
||||
}
|
||||
|
||||
// Send a new result to a waiting task_id
|
||||
void send(task_result result) {
|
||||
std::unique_lock<std::mutex> lock(mutex_results);
|
||||
LOG_VERBOSE("send new result", {{"task_id", result.id}});
|
||||
for (auto& task_id : waiting_task_ids) {
|
||||
// LOG_TEE("waiting task id %i \n", task_id);
|
||||
// for now, tasks that have associated parent multitasks just get erased once multitask picks up the result
|
||||
if (result.multitask_id == task_id)
|
||||
{
|
||||
LOG_VERBOSE("callback_update_multitask", {{"task_id", task_id}});
|
||||
callback_update_multitask(task_id, result.id, result);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (result.id == task_id)
|
||||
{
|
||||
LOG_VERBOSE("queue_results.push_back", {{"task_id", task_id}});
|
||||
queue_results.push_back(result);
|
||||
condition_results.notify_all();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// base64 utils (TODO: move to common in the future)
|
||||
//
|
||||
@@ -483,13 +143,11 @@ static const std::string base64_chars =
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789+/";
|
||||
|
||||
static inline bool is_base64(uint8_t c)
|
||||
{
|
||||
static inline bool is_base64(uint8_t c) {
|
||||
return (isalnum(c) || (c == '+') || (c == '/'));
|
||||
}
|
||||
|
||||
static inline std::vector<uint8_t> base64_decode(const std::string & encoded_string)
|
||||
{
|
||||
static inline std::vector<uint8_t> base64_decode(const std::string & encoded_string) {
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
int in_ = 0;
|
||||
@@ -501,13 +159,10 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
|
||||
|
||||
std::vector<uint8_t> ret;
|
||||
|
||||
while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_]))
|
||||
{
|
||||
while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
|
||||
char_array_4[i++] = encoded_string[in_]; in_++;
|
||||
if (i == 4)
|
||||
{
|
||||
for (i = 0; i <4; i++)
|
||||
{
|
||||
if (i == 4) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
char_array_4[i] = base64_chars.find(char_array_4[i]);
|
||||
}
|
||||
|
||||
@@ -515,23 +170,20 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
|
||||
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
||||
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
||||
|
||||
for (i = 0; (i < 3); i++)
|
||||
{
|
||||
for (i = 0; (i < 3); i++) {
|
||||
ret.push_back(char_array_3[i]);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i)
|
||||
{
|
||||
for (j = i; j <4; j++)
|
||||
{
|
||||
if (i) {
|
||||
for (j = i; j < 4; j++) {
|
||||
char_array_4[j] = 0;
|
||||
}
|
||||
|
||||
for (j = 0; j <4; j++)
|
||||
{
|
||||
for (j = 0; j < 4; j++) {
|
||||
char_array_4[j] = base64_chars.find(char_array_4[j]);
|
||||
}
|
||||
|
||||
@@ -539,8 +191,7 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
|
||||
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
||||
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
||||
|
||||
for (j = 0; (j < i - 1); j++)
|
||||
{
|
||||
for (j = 0; j < i - 1; j++) {
|
||||
ret.push_back(char_array_3[j]);
|
||||
}
|
||||
}
|
||||
@@ -552,8 +203,7 @@ static inline std::vector<uint8_t> base64_decode(const std::string & encoded_str
|
||||
// random string / id
|
||||
//
|
||||
|
||||
static std::string random_string()
|
||||
{
|
||||
static std::string random_string() {
|
||||
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
|
||||
|
||||
std::random_device rd;
|
||||
@@ -568,9 +218,327 @@ static std::string random_string()
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string gen_chatcmplid()
|
||||
{
|
||||
static std::string gen_chatcmplid() {
|
||||
std::stringstream chatcmplid;
|
||||
chatcmplid << "chatcmpl-" << random_string();
|
||||
|
||||
return chatcmplid.str();
|
||||
}
|
||||
|
||||
//
|
||||
// other common utils
|
||||
//
|
||||
|
||||
static size_t common_part(const std::vector<llama_token> & a, const std::vector<llama_token> & b) {
|
||||
size_t i;
|
||||
for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++) {}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static bool ends_with(const std::string & str, const std::string & suffix) {
|
||||
return str.size() >= suffix.size() && 0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix);
|
||||
}
|
||||
|
||||
static size_t find_partial_stop_string(const std::string &stop, const std::string &text) {
|
||||
if (!text.empty() && !stop.empty()) {
|
||||
const char text_last_char = text.back();
|
||||
for (int64_t char_index = stop.size() - 1; char_index >= 0; char_index--) {
|
||||
if (stop[char_index] == text_last_char) {
|
||||
const std::string current_partial = stop.substr(0, char_index + 1);
|
||||
if (ends_with(text, current_partial)) {
|
||||
return text.size() - char_index - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return std::string::npos;
|
||||
}
|
||||
|
||||
// TODO: reuse llama_detokenize
|
||||
template <class Iter>
|
||||
static std::string tokens_to_str(llama_context * ctx, Iter begin, Iter end) {
|
||||
std::string ret;
|
||||
for (; begin != end; ++begin) {
|
||||
ret += llama_token_to_piece(ctx, *begin);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// format incomplete utf-8 multibyte character for output
|
||||
static std::string tokens_to_output_formatted_string(const llama_context * ctx, const llama_token token) {
|
||||
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
|
||||
|
||||
// if the size is 1 and first bit is 1, meaning it's a partial character
|
||||
// (size > 1 meaning it's already a known token)
|
||||
if (out.size() == 1 && (out[0] & 0x80) == 0x80) {
|
||||
std::stringstream ss;
|
||||
ss << std::hex << (out[0] & 0xff);
|
||||
std::string res(ss.str());
|
||||
out = "byte: \\x" + res;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
struct completion_token_output {
|
||||
llama_token tok;
|
||||
std::string text_to_send;
|
||||
|
||||
struct token_prob {
|
||||
llama_token tok;
|
||||
float prob;
|
||||
};
|
||||
|
||||
std::vector<token_prob> probs;
|
||||
};
|
||||
|
||||
// convert a vector of completion_token_output to json
|
||||
static json probs_vector_to_json(const llama_context * ctx, const std::vector<completion_token_output> & probs) {
|
||||
json out = json::array();
|
||||
|
||||
for (const auto & prob : probs) {
|
||||
json probs_for_token = json::array();
|
||||
|
||||
for (const auto & p : prob.probs) {
|
||||
const std::string tok_str = tokens_to_output_formatted_string(ctx, p.tok);
|
||||
probs_for_token.push_back(json {
|
||||
{"tok_str", tok_str},
|
||||
{"prob", p.prob},
|
||||
});
|
||||
}
|
||||
|
||||
const std::string tok_str = tokens_to_output_formatted_string(ctx, prob.tok);
|
||||
out.push_back(json {
|
||||
{"content", tok_str},
|
||||
{"probs", probs_for_token},
|
||||
});
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
//
|
||||
// OAI utils
|
||||
//
|
||||
|
||||
static json oaicompat_completion_params_parse(
|
||||
const struct llama_model * model,
|
||||
const json & body, /* openai api json semantics */
|
||||
const std::string & chat_template) {
|
||||
json llama_params;
|
||||
|
||||
llama_params["__oaicompat"] = true;
|
||||
|
||||
// Map OpenAI parameters to llama.cpp parameters
|
||||
//
|
||||
// For parameters that are defined by the OpenAI documentation (e.g.
|
||||
// temperature), we explicitly specify OpenAI's intended default; we
|
||||
// need to do that because sometimes OpenAI disagrees with llama.cpp
|
||||
//
|
||||
// https://platform.openai.com/docs/api-reference/chat/create
|
||||
llama_sampling_params default_sparams;
|
||||
llama_params["model"] = json_value(body, "model", std::string("unknown"));
|
||||
llama_params["prompt"] = format_chat(model, chat_template, body["messages"]);
|
||||
llama_params["cache_prompt"] = json_value(body, "cache_prompt", false);
|
||||
llama_params["temperature"] = json_value(body, "temperature", 0.0);
|
||||
llama_params["top_k"] = json_value(body, "top_k", default_sparams.top_k);
|
||||
llama_params["top_p"] = json_value(body, "top_p", 1.0);
|
||||
llama_params["n_predict"] = json_value(body, "max_tokens", -1);
|
||||
llama_params["logit_bias"] = json_value(body, "logit_bias", json::object());
|
||||
llama_params["frequency_penalty"] = json_value(body, "frequency_penalty", 0.0);
|
||||
llama_params["presence_penalty"] = json_value(body, "presence_penalty", 0.0);
|
||||
llama_params["seed"] = json_value(body, "seed", LLAMA_DEFAULT_SEED);
|
||||
llama_params["stream"] = json_value(body, "stream", false);
|
||||
llama_params["mirostat"] = json_value(body, "mirostat", default_sparams.mirostat);
|
||||
llama_params["mirostat_tau"] = json_value(body, "mirostat_tau", default_sparams.mirostat_tau);
|
||||
llama_params["mirostat_eta"] = json_value(body, "mirostat_eta", default_sparams.mirostat_eta);
|
||||
llama_params["penalize_nl"] = json_value(body, "penalize_nl", default_sparams.penalize_nl);
|
||||
llama_params["typical_p"] = json_value(body, "typical_p", default_sparams.typical_p);
|
||||
llama_params["repeat_last_n"] = json_value(body, "repeat_last_n", default_sparams.penalty_last_n);
|
||||
llama_params["ignore_eos"] = json_value(body, "ignore_eos", false);
|
||||
llama_params["tfs_z"] = json_value(body, "tfs_z", default_sparams.tfs_z);
|
||||
|
||||
if (body.count("grammar") != 0) {
|
||||
llama_params["grammar"] = json_value(body, "grammar", json::object());
|
||||
}
|
||||
|
||||
// Handle 'stop' field
|
||||
if (body.contains("stop") && body["stop"].is_string()) {
|
||||
llama_params["stop"] = json::array({body["stop"].get<std::string>()});
|
||||
} else {
|
||||
llama_params["stop"] = json_value(body, "stop", json::array());
|
||||
}
|
||||
|
||||
// Ensure there is ChatML-specific end sequence among stop words
|
||||
llama_params["stop"].push_back("<|im_end|>");
|
||||
|
||||
return llama_params;
|
||||
}
|
||||
|
||||
static json format_final_response_oaicompat(const json & request, json result, bool streaming = false) {
|
||||
bool stopped_word = result.count("stopped_word") != 0;
|
||||
bool stopped_eos = json_value(result, "stopped_eos", false);
|
||||
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
|
||||
int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
|
||||
std::string content = json_value(result, "content", std::string(""));
|
||||
|
||||
std::string finish_reason = "length";
|
||||
if (stopped_word || stopped_eos) {
|
||||
finish_reason = "stop";
|
||||
}
|
||||
|
||||
json choices =
|
||||
streaming ? json::array({json{{"finish_reason", finish_reason},
|
||||
{"index", 0},
|
||||
{"delta", json::object()}}})
|
||||
: json::array({json{{"finish_reason", finish_reason},
|
||||
{"index", 0},
|
||||
{"message", json{{"content", content},
|
||||
{"role", "assistant"}}}}});
|
||||
|
||||
std::time_t t = std::time(0);
|
||||
|
||||
json res = json {
|
||||
{"choices", choices},
|
||||
{"created", t},
|
||||
{"model",
|
||||
json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"object", streaming ? "chat.completion.chunk" : "chat.completion"},
|
||||
{"usage", json {
|
||||
{"completion_tokens", num_tokens_predicted},
|
||||
{"prompt_tokens", num_prompt_tokens},
|
||||
{"total_tokens", num_tokens_predicted + num_prompt_tokens}
|
||||
}},
|
||||
{"id", gen_chatcmplid()}
|
||||
};
|
||||
|
||||
if (server_verbose) {
|
||||
res["__verbose"] = result;
|
||||
}
|
||||
|
||||
if (result.contains("completion_probabilities")) {
|
||||
res["completion_probabilities"] = json_value(result, "completion_probabilities", json::array());
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// return value is vector as there is one case where we might need to generate two responses
|
||||
static std::vector<json> format_partial_response_oaicompat(json result) {
|
||||
if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
|
||||
return std::vector<json>({result});
|
||||
}
|
||||
|
||||
bool first = json_value(result, "oaicompat_token_ctr", 0) == 0;
|
||||
std::string modelname = json_value(result, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
|
||||
|
||||
bool stopped_word = json_value(result, "stopped_word", false);
|
||||
bool stopped_eos = json_value(result, "stopped_eos", false);
|
||||
bool stopped_limit = json_value(result, "stopped_limit", false);
|
||||
std::string content = json_value(result, "content", std::string(""));
|
||||
|
||||
std::string finish_reason;
|
||||
if (stopped_word || stopped_eos) {
|
||||
finish_reason = "stop";
|
||||
}
|
||||
if (stopped_limit) {
|
||||
finish_reason = "length";
|
||||
}
|
||||
|
||||
std::time_t t = std::time(0);
|
||||
|
||||
json choices;
|
||||
|
||||
if (!finish_reason.empty()) {
|
||||
choices = json::array({json{{"finish_reason", finish_reason},
|
||||
{"index", 0},
|
||||
{"delta", json::object()}}});
|
||||
} else {
|
||||
if (first) {
|
||||
if (content.empty()) {
|
||||
choices = json::array({json{{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta", json{{"role", "assistant"}}}}});
|
||||
} else {
|
||||
// We have to send this as two updates to conform to openai behavior
|
||||
json initial_ret = json{{"choices", json::array({json{
|
||||
{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta", json{
|
||||
{"role", "assistant"}
|
||||
}}}})},
|
||||
{"created", t},
|
||||
{"id", gen_chatcmplid()},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
json second_ret = json{
|
||||
{"choices", json::array({json{{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta", json{
|
||||
{"content", content}}}
|
||||
}})},
|
||||
{"created", t},
|
||||
{"id", gen_chatcmplid()},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
return std::vector<json>({initial_ret, second_ret});
|
||||
}
|
||||
} else {
|
||||
// Some idiosyncrasy in task processing logic makes several trailing calls
|
||||
// with empty content, we ignore these at the calee site.
|
||||
if (content.empty()) {
|
||||
return std::vector<json>({json::object()});
|
||||
}
|
||||
|
||||
choices = json::array({json{
|
||||
{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta",
|
||||
json{
|
||||
{"content", content},
|
||||
}},
|
||||
}});
|
||||
}
|
||||
}
|
||||
|
||||
json ret = json {
|
||||
{"choices", choices},
|
||||
{"created", t},
|
||||
{"id", gen_chatcmplid()},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}
|
||||
};
|
||||
|
||||
return std::vector<json>({ret});
|
||||
}
|
||||
|
||||
static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) {
|
||||
json res = json {
|
||||
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"object", "list"},
|
||||
{"usage", json {
|
||||
{"prompt_tokens", 0},
|
||||
{"total_tokens", 0}
|
||||
}},
|
||||
{"data", embeddings}
|
||||
};
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static json format_tokenizer_response(const std::vector<llama_token> & tokens) {
|
||||
return json {
|
||||
{"tokens", tokens}
|
||||
};
|
||||
}
|
||||
|
||||
static json format_detokenized_response(const std::string & content) {
|
||||
return json {
|
||||
{"content", content}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -6,3 +6,4 @@ More info:
|
||||
|
||||
- https://github.com/ggerganov/llama.cpp/pull/2926
|
||||
- https://github.com/ggerganov/llama.cpp/pull/3624
|
||||
- https://github.com/ggerganov/llama.cpp/pull/5625
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
#define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 100
|
||||
#define SPEC_VOCAB_CHECK_START_TOKEN_ID 5
|
||||
@@ -18,6 +19,7 @@ struct seq_draft {
|
||||
std::vector<int> i_batch_tgt;
|
||||
|
||||
std::vector<llama_token> tokens;
|
||||
std::vector<std::vector<llama_token_data>> dists;
|
||||
|
||||
struct llama_sampling_context * ctx_sampling;
|
||||
};
|
||||
@@ -37,12 +39,15 @@ int main(int argc, char ** argv) {
|
||||
// max number of parallel drafting sequences (i.e. tree branches)
|
||||
const int n_seq_dft = params.n_parallel;
|
||||
|
||||
// probability threshold for accepting a token from the draft model
|
||||
const float p_accept = params.p_accept;
|
||||
|
||||
// probability threshold for splitting a draft branch (only for n_seq_dft > 1)
|
||||
const float p_split = params.p_split;
|
||||
|
||||
if (params.seed == LLAMA_DEFAULT_SEED) {
|
||||
params.seed = time(NULL);
|
||||
}
|
||||
std::default_random_engine rng(params.seed);
|
||||
std::uniform_real_distribution<> u_dist;
|
||||
|
||||
#ifndef LOG_DISABLE_LOGS
|
||||
log_set_target(log_filename_generator("speculative", "log"));
|
||||
LOG_TEE("Log start\n");
|
||||
@@ -166,7 +171,9 @@ int main(int argc, char ** argv) {
|
||||
std::vector<seq_draft> drafts(n_seq_dft);
|
||||
|
||||
params.sparams.grammar.clear(); // the draft samplers will copy the target sampler's grammar
|
||||
params.sparams.temp = -1.0f; // force greedy sampling with probs for the draft model
|
||||
if (params.sparams.temp == 0) {
|
||||
params.sparams.temp = -1.0f; // force greedy sampling with probs for the draft model
|
||||
}
|
||||
|
||||
for (int s = 0; s < n_seq_dft; ++s) {
|
||||
drafts[s].ctx_sampling = llama_sampling_init(params.sparams);
|
||||
@@ -182,12 +189,15 @@ int main(int argc, char ** argv) {
|
||||
drafts[0].i_batch_tgt[0] = 0;
|
||||
|
||||
while (true) {
|
||||
std::set<int> active_seqs = {};
|
||||
|
||||
// print current draft sequences
|
||||
for (int s = 0; s < n_seq_dft; ++s) {
|
||||
if (!drafts[s].active) {
|
||||
continue;
|
||||
}
|
||||
|
||||
active_seqs.insert(s);
|
||||
const auto & tokens = drafts[s].tokens;
|
||||
|
||||
LOG("draft %d: %s\n", s, LOG_TOKENS_TOSTR_PRETTY(ctx_dft, tokens).c_str());
|
||||
@@ -196,48 +206,156 @@ int main(int argc, char ** argv) {
|
||||
int i_dft = 0;
|
||||
int s_keep = 0;
|
||||
|
||||
llama_token token_id;
|
||||
std::string token_str;
|
||||
|
||||
// loop until we fail to accept a drafted token or we run out of drafted tokens
|
||||
while (true) {
|
||||
LOG("sampling target: s_keep = %3d, i_dft = %3d, i_batch_tgt = %3d\n", s_keep, i_dft, drafts[s_keep].i_batch_tgt[i_dft]);
|
||||
|
||||
// sample from the target model
|
||||
llama_token id = llama_sampling_sample(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft]);
|
||||
|
||||
llama_sampling_accept(ctx_sampling, ctx_tgt, id, true);
|
||||
|
||||
//LOG("last: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx_tgt, ctx_sampling->prev).c_str());
|
||||
|
||||
const std::string token_str = llama_token_to_piece(ctx_tgt, id);
|
||||
|
||||
if (!params.use_color) {
|
||||
printf("%s", token_str.c_str());
|
||||
}
|
||||
|
||||
if (id == llama_token_eos(model_tgt)) {
|
||||
has_eos = true;
|
||||
}
|
||||
|
||||
++n_predict;
|
||||
|
||||
// check if the target token matches any of the drafts
|
||||
// for stochastic sampling, attempt to match the token with the drafted tokens
|
||||
{
|
||||
bool matches = false;
|
||||
bool accept = false;
|
||||
if (params.sparams.temp > 0) {
|
||||
// stochastic verification
|
||||
|
||||
for (int s = 0; s < n_seq_dft; ++s) {
|
||||
if (!drafts[s].active) {
|
||||
continue;
|
||||
llama_token_data_array dist_tgt = llama_sampling_probability_distribution(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft]);
|
||||
float p_tgt = 0, p_dft = 0;
|
||||
|
||||
// GGML_ASSERT(dist_tgt.size() == dist_dft.size());
|
||||
|
||||
while (active_seqs.size() > 0) {
|
||||
// randomly select a sequence to verify from active sequences
|
||||
std::uniform_int_distribution<unsigned int> u_int_dist(0, active_seqs.size() - 1);
|
||||
int s = *std::next(active_seqs.begin(), u_int_dist(rng));
|
||||
if (i_dft >= (int) drafts[s].tokens.size()) {
|
||||
drafts[s].active = false;
|
||||
active_seqs.erase(s);
|
||||
continue;
|
||||
}
|
||||
if (accept) {
|
||||
// if we already accepted a token, we can skip the rest
|
||||
if (drafts[s].tokens[i_dft] != drafts[s_keep].tokens[i_dft]) {
|
||||
drafts[s].active = false;
|
||||
active_seqs.erase(s);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
LOG("verifying sequence #%d at pos #%d from %d active sequence(s)\n", s, i_dft, (int) active_seqs.size());
|
||||
float r = u_dist(rng);
|
||||
llama_token_data_array dist_dft = { drafts[s].dists[i_dft].data() , drafts[s].dists[i_dft].size(), true };
|
||||
// acquire the token probabilities assigned by the draft and target models
|
||||
for (size_t i = 0; i < dist_tgt.size; i++) {
|
||||
if (dist_tgt.data[i].id == drafts[s].tokens[i_dft]) {
|
||||
p_tgt = dist_tgt.data[i].p;
|
||||
}
|
||||
if (dist_dft.data[i].id == drafts[s].tokens[i_dft]) {
|
||||
p_dft = dist_dft.data[i].p;
|
||||
}
|
||||
if (p_tgt && p_dft) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
LOG("r = %f, p_dft = %f, p_tgt = %f\n", r, p_dft, p_tgt);
|
||||
if (r <= p_tgt / p_dft) {
|
||||
s_keep = s;
|
||||
accept = true;
|
||||
token_id = drafts[s].tokens[i_dft];
|
||||
token_str = llama_token_to_piece(ctx_tgt, token_id);
|
||||
llama_sampling_accept(ctx_sampling, ctx_tgt, token_id, true);
|
||||
|
||||
LOG("draft token %d of sequence %d (%d, '%s') accepted\n", i_dft, s, token_id, token_str.c_str());
|
||||
break;
|
||||
} else {
|
||||
LOG("draft token %d of sequence %d (%d, '%s') rejected\n", i_dft, s, drafts[s].tokens[i_dft], llama_token_to_piece(ctx_tgt, drafts[s].tokens[i_dft]).c_str());
|
||||
drafts[s].active = false;
|
||||
|
||||
// calculate residual probability
|
||||
GGML_ASSERT(dist_tgt.sorted);
|
||||
GGML_ASSERT(dist_dft.sorted);
|
||||
float sum_probs = 0.0f;
|
||||
|
||||
// sort dist by id
|
||||
std::sort(dist_tgt.data, dist_tgt.data + dist_tgt.size, [](const llama_token_data &a, const llama_token_data &b) {
|
||||
return a.id < b.id;
|
||||
});
|
||||
std::sort(dist_dft.data, dist_dft.data + dist_dft.size, [](const llama_token_data &a, const llama_token_data &b) {
|
||||
return a.id < b.id;
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < dist_tgt.size; i++) {
|
||||
dist_tgt.data[i].p = std::max(0.0f, dist_tgt.data[i].p - dist_dft.data[i].p);
|
||||
sum_probs += dist_tgt.data[i].p;
|
||||
}
|
||||
for (size_t i = 0; i < dist_tgt.size; i++) {
|
||||
dist_tgt.data[i].p /= sum_probs;
|
||||
}
|
||||
|
||||
// sort dist_tgt by p desc
|
||||
std::sort(dist_tgt.data, dist_tgt.data + dist_tgt.size, [](const llama_token_data &a, const llama_token_data &b) {
|
||||
return a.p > b.p;
|
||||
});
|
||||
}
|
||||
|
||||
active_seqs.erase(s);
|
||||
for(int i = 0; i < n_seq_dft; i++) {
|
||||
if (i == s) {
|
||||
continue;
|
||||
}
|
||||
if (drafts[i].tokens[i_dft] == drafts[s].tokens[i_dft]) {
|
||||
// synchronize active status for sequences with the same drafted token
|
||||
drafts[i].active = drafts[i].active && accept;
|
||||
if (!drafts[i].active) {
|
||||
active_seqs.erase(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (i_dft < (int) drafts[s].tokens.size() && id == drafts[s].tokens[i_dft]) {
|
||||
LOG("the sampled target token matches the %dth drafted token of sequence %d (%d, '%s') - accepted\n", i_dft, s, id, token_str.c_str());
|
||||
if (!accept) {
|
||||
// all drafted tokens were rejected
|
||||
// sample from the target model
|
||||
LOG("all drafted tokens were rejected, sampling from residual distribution\n");
|
||||
token_id = llama_sample_token(ctx_tgt, &dist_tgt);
|
||||
llama_sampling_accept(ctx_sampling, ctx_tgt, token_id, true);
|
||||
token_str = llama_token_to_piece(ctx_tgt, token_id);
|
||||
}
|
||||
|
||||
s_keep = s;
|
||||
matches = true;
|
||||
} else {
|
||||
drafts[s].active = false;
|
||||
} else {
|
||||
// greedy verification
|
||||
|
||||
// sample from the target model
|
||||
LOG("sampling target: s_keep = %3d, i_dft = %3d, i_batch_tgt = %3d\n", s_keep, i_dft, drafts[s_keep].i_batch_tgt[i_dft]);
|
||||
token_id = llama_sampling_sample(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft]);
|
||||
|
||||
llama_sampling_accept(ctx_sampling, ctx_tgt, token_id, true);
|
||||
|
||||
//LOG("last: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx_tgt, ctx_sampling->prev).c_str());
|
||||
|
||||
token_str = llama_token_to_piece(ctx_tgt, token_id);
|
||||
|
||||
for (int s = 0; s < n_seq_dft; ++s) {
|
||||
if (!drafts[s].active) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i_dft < (int) drafts[s].tokens.size() && token_id == drafts[s].tokens[i_dft]) {
|
||||
LOG("the sampled target token matches the %dth drafted token of sequence %d (%d, '%s') - accepted\n", i_dft, s, token_id, token_str.c_str());
|
||||
|
||||
s_keep = s;
|
||||
accept = true;
|
||||
} else {
|
||||
drafts[s].active = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matches) {
|
||||
if (token_id == llama_token_eos(model_tgt)) {
|
||||
has_eos = true;
|
||||
}
|
||||
++n_predict;
|
||||
|
||||
if (accept) {
|
||||
++n_accept;
|
||||
++n_past_tgt;
|
||||
++n_past_dft;
|
||||
@@ -245,17 +363,21 @@ int main(int argc, char ** argv) {
|
||||
if (params.use_color) {
|
||||
// Color token according to its origin sequence
|
||||
printf("\u001b[%dm%s\u001b[37m", (36 - s_keep % 6), token_str.c_str());
|
||||
fflush(stdout);
|
||||
} else {
|
||||
printf("%s", token_str.c_str());
|
||||
}
|
||||
fflush(stdout);
|
||||
continue;
|
||||
} else {
|
||||
printf("%s", token_str.c_str());
|
||||
fflush(stdout);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (params.use_color) {
|
||||
printf("%s", token_str.c_str());
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
LOG("the sampled target token (%d, '%s') did not match, or we ran out of drafted tokens\n", id, token_str.c_str());
|
||||
{
|
||||
LOG("the sampled target token (%d, '%s') did not match, or we ran out of drafted tokens\n", token_id, token_str.c_str());
|
||||
|
||||
// TODO: simplify
|
||||
{
|
||||
@@ -275,21 +397,21 @@ int main(int argc, char ** argv) {
|
||||
drafts[s].active = false;
|
||||
drafts[s].tokens.clear();
|
||||
drafts[s].i_batch_tgt.clear();
|
||||
drafts[s].dists.clear();
|
||||
}
|
||||
// note: will be erased after the speculation phase
|
||||
drafts[0].tokens.push_back(id);
|
||||
drafts[0].tokens.push_back(token_id);
|
||||
drafts[0].dists.push_back(std::vector<llama_token_data>());
|
||||
drafts[0].i_batch_tgt.push_back(0);
|
||||
|
||||
llama_batch_clear(batch_dft);
|
||||
llama_batch_add (batch_dft, id, n_past_dft, { 0 }, true);
|
||||
llama_batch_add (batch_dft, token_id, n_past_dft, { 0 }, true);
|
||||
|
||||
llama_kv_cache_seq_rm(ctx_dft, 0, n_past_dft, -1);
|
||||
// LOG("dft batch: %s\n", LOG_BATCH_TOSTR_PRETTY(ctx_dft, batch_dft).c_str());
|
||||
llama_decode (ctx_dft, batch_dft);
|
||||
llama_decode(ctx_dft, batch_dft);
|
||||
|
||||
++n_past_dft;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (n_predict > params.n_predict || has_eos) {
|
||||
@@ -334,12 +456,6 @@ int main(int argc, char ** argv) {
|
||||
k, s, i, cur_p[k].id, cur_p[k].p, llama_token_to_piece(ctx_dft, cur_p[k].id).c_str());
|
||||
}
|
||||
|
||||
if (cur_p[0].p < p_accept) {
|
||||
LOG("stopping drafting for seq %3d, probability too low: %.3f < %.3f\n", s, cur_p[0].p, p_accept);
|
||||
drafts[s].drafting = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<int> sa(1, s);
|
||||
|
||||
// attempt to split the branch if the probability is high enough
|
||||
@@ -367,6 +483,7 @@ int main(int argc, char ** argv) {
|
||||
drafts[n_seq_cur].skip = true;
|
||||
|
||||
drafts[n_seq_cur].tokens = drafts[s].tokens;
|
||||
drafts[n_seq_cur].dists = drafts[s].dists;
|
||||
drafts[n_seq_cur].i_batch_dft = drafts[s].i_batch_dft;
|
||||
drafts[n_seq_cur].i_batch_tgt = drafts[s].i_batch_tgt;
|
||||
|
||||
@@ -389,6 +506,8 @@ int main(int argc, char ** argv) {
|
||||
llama_sampling_accept(drafts[s].ctx_sampling, ctx_dft, id, true);
|
||||
|
||||
drafts[s].tokens.push_back(id);
|
||||
// save cur_p.data into drafts[s].dists
|
||||
drafts[s].dists.push_back(cur_p);
|
||||
|
||||
// add unique drafted tokens to the target batch
|
||||
drafts[s].i_batch_tgt.push_back(batch_tgt.n_tokens);
|
||||
@@ -440,6 +559,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
drafts[s].tokens.erase(drafts[s].tokens.begin());
|
||||
drafts[s].dists.erase(drafts[s].dists.begin());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#include "ggml-sycl.h"
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
int main() {
|
||||
ggml_backend_sycl_print_sycl_devices();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -8,12 +8,19 @@ INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
if [ $# -gt 0 ]; then
|
||||
export GGML_SYCL_DEVICE=$1
|
||||
GGML_SYCL_DEVICE=$1
|
||||
else
|
||||
export GGML_SYCL_DEVICE=0
|
||||
GGML_SYCL_DEVICE=0
|
||||
fi
|
||||
echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE
|
||||
echo "use $GGML_SYCL_DEVICE as main GPU"
|
||||
#export GGML_SYCL_DEBUG=1
|
||||
./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
||||
#./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 5 -e -ngl 33 -t 1 -s 0
|
||||
|
||||
|
||||
#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer.
|
||||
|
||||
#use all GPUs with same max compute units
|
||||
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
||||
|
||||
#use main GPU only
|
||||
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
|
||||
|
||||
|
||||
18
flake.lock
generated
18
flake.lock
generated
@@ -5,11 +5,11 @@
|
||||
"nixpkgs-lib": "nixpkgs-lib"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1706830856,
|
||||
"narHash": "sha256-a0NYyp+h9hlb7ddVz4LUn1vT/PLwqfrWYcHMvFB1xYg=",
|
||||
"lastModified": 1709336216,
|
||||
"narHash": "sha256-Dt/wOWeW6Sqm11Yh+2+t0dfEWxoMxGBvv3JpIocFl9E=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "b253292d9c0a5ead9bc98c4e9a26c6312e27d69f",
|
||||
"rev": "f7b3c975cf067e56e7cda6cb098ebe3fb4d74ca2",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -20,11 +20,11 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1708655239,
|
||||
"narHash": "sha256-ZrP/yACUvDB+zbqYJsln4iwotbH6CTZiTkANJ0AgDv4=",
|
||||
"lastModified": 1709237383,
|
||||
"narHash": "sha256-cy6ArO4k5qTx+l5o+0mL9f5fa86tYUX3ozE1S+Txlds=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "cbc4211f0afffe6dfd2478a62615dd5175a13f9a",
|
||||
"rev": "1536926ef5621b09bba54035ae2bb6d806d72ac8",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@@ -37,11 +37,11 @@
|
||||
"nixpkgs-lib": {
|
||||
"locked": {
|
||||
"dir": "lib",
|
||||
"lastModified": 1706550542,
|
||||
"narHash": "sha256-UcsnCG6wx++23yeER4Hg18CXWbgNpqNXcHIo5/1Y+hc=",
|
||||
"lastModified": 1709237383,
|
||||
"narHash": "sha256-cy6ArO4k5qTx+l5o+0mL9f5fa86tYUX3ozE1S+Txlds=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "97b17f32362e475016f942bbdfda4a4a72a8a652",
|
||||
"rev": "1536926ef5621b09bba54035ae2bb6d806d72ac8",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
||||
10
flake.nix
10
flake.nix
@@ -107,11 +107,12 @@
|
||||
# ```
|
||||
#
|
||||
# Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format
|
||||
flake.overlays.default =
|
||||
(final: prev: {
|
||||
flake.overlays.default = (
|
||||
final: prev: {
|
||||
llamaPackages = final.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||
inherit (final.llamaPackages) llama-cpp;
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
systems = [
|
||||
"aarch64-darwin"
|
||||
@@ -131,6 +132,9 @@
|
||||
...
|
||||
}:
|
||||
{
|
||||
# For standardised reproducible formatting with `nix fmt`
|
||||
formatter = pkgs.nixfmt-rfc-style;
|
||||
|
||||
# Unlike `.#packages`, legacyPackages may contain values of
|
||||
# arbitrary types (including nested attrsets) and may even throw
|
||||
# exceptions. This attribute isn't recursed into by `nix flake
|
||||
|
||||
@@ -91,19 +91,22 @@ extern "C" {
|
||||
// (optional) complete all pending operations
|
||||
void (*GGML_CALL synchronize)(ggml_backend_t backend);
|
||||
|
||||
// compute graph with a plan
|
||||
// create a plan for ggml_cgraph and free it
|
||||
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
|
||||
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
void (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
|
||||
// compute graph with a plan
|
||||
enum ggml_status (*GGML_CALL graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
// compute graph without a plan (async)
|
||||
bool (*GGML_CALL graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
enum ggml_status (*GGML_CALL graph_compute) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
|
||||
// check if the backend supports an operation
|
||||
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
};
|
||||
|
||||
struct ggml_backend {
|
||||
ggml_guid_t guid;
|
||||
|
||||
struct ggml_backend_i iface;
|
||||
|
||||
ggml_backend_context_t context;
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
|
||||
// backend buffer type
|
||||
|
||||
const char * ggml_backend_buft_name(ggml_backend_buffer_type_t buft) {
|
||||
@@ -159,6 +158,13 @@ bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml
|
||||
|
||||
// backend
|
||||
|
||||
ggml_guid_t ggml_backend_guid(ggml_backend_t backend) {
|
||||
if (backend == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return backend->guid;
|
||||
}
|
||||
|
||||
const char * ggml_backend_name(ggml_backend_t backend) {
|
||||
if (backend == NULL) {
|
||||
return "NULL";
|
||||
@@ -256,11 +262,11 @@ void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_pla
|
||||
backend->iface.graph_plan_free(backend, plan);
|
||||
}
|
||||
|
||||
void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
||||
backend->iface.graph_plan_compute(backend, plan);
|
||||
enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
||||
return backend->iface.graph_plan_compute(backend, plan);
|
||||
}
|
||||
|
||||
bool ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
return backend->iface.graph_compute(backend, cgraph);
|
||||
}
|
||||
|
||||
@@ -726,15 +732,15 @@ GGML_CALL static void ggml_backend_cpu_graph_plan_free(ggml_backend_t backend, g
|
||||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
||||
GGML_CALL static void ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
||||
GGML_CALL static enum ggml_status ggml_backend_cpu_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
||||
struct ggml_backend_plan_cpu * cpu_plan = (struct ggml_backend_plan_cpu *)plan;
|
||||
|
||||
ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
|
||||
return ggml_graph_compute(&cpu_plan->cgraph, &cpu_plan->cplan);
|
||||
|
||||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
||||
GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
GGML_CALL static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
struct ggml_backend_cpu_context * cpu_ctx = (struct ggml_backend_cpu_context *)backend->context;
|
||||
|
||||
struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
|
||||
@@ -749,8 +755,7 @@ GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, str
|
||||
cplan.abort_callback = cpu_ctx->abort_callback;
|
||||
cplan.abort_callback_data = cpu_ctx->abort_callback_data;
|
||||
|
||||
ggml_graph_compute(cgraph, &cplan);
|
||||
return true;
|
||||
return ggml_graph_compute(cgraph, &cplan);
|
||||
}
|
||||
|
||||
GGML_CALL static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
|
||||
@@ -781,6 +786,11 @@ static struct ggml_backend_i cpu_backend_i = {
|
||||
/* .supports_op = */ ggml_backend_cpu_supports_op,
|
||||
};
|
||||
|
||||
static ggml_guid_t ggml_backend_cpu_guid(void) {
|
||||
static ggml_guid guid = { 0xaa, 0x67, 0xc7, 0x43, 0x96, 0xe6, 0xa3, 0x8a, 0xe3, 0xaf, 0xea, 0x92, 0x36, 0xbc, 0xfc, 0x89 };
|
||||
return &guid;
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_cpu_init(void) {
|
||||
struct ggml_backend_cpu_context * ctx = malloc(sizeof(struct ggml_backend_cpu_context));
|
||||
if (ctx == NULL) {
|
||||
@@ -800,6 +810,7 @@ ggml_backend_t ggml_backend_cpu_init(void) {
|
||||
}
|
||||
|
||||
*cpu_backend = (struct ggml_backend) {
|
||||
/* .guid = */ ggml_backend_cpu_guid(),
|
||||
/* .interface = */ cpu_backend_i,
|
||||
/* .context = */ ctx
|
||||
};
|
||||
@@ -807,7 +818,7 @@ ggml_backend_t ggml_backend_cpu_init(void) {
|
||||
}
|
||||
|
||||
GGML_CALL bool ggml_backend_is_cpu(ggml_backend_t backend) {
|
||||
return backend && backend->iface.get_name == ggml_backend_cpu_name;
|
||||
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_cpu_guid());
|
||||
}
|
||||
|
||||
void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
|
||||
@@ -1425,7 +1436,7 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
|
||||
static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
|
||||
uint64_t copy_us[GGML_MAX_BACKENDS] = {0};
|
||||
uint64_t compute_us[GGML_MAX_BACKENDS] = {0};
|
||||
|
||||
@@ -1460,8 +1471,9 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
|
||||
|
||||
uint64_t compute_start_us = ggml_time_us();
|
||||
if (!sched->callback_eval) {
|
||||
if (!ggml_backend_graph_compute(split_backend, &split->graph)) {
|
||||
return false;
|
||||
enum ggml_status ec = ggml_backend_graph_compute(split_backend, &split->graph);
|
||||
if (ec != GGML_STATUS_SUCCESS) {
|
||||
return ec;
|
||||
}
|
||||
//ggml_backend_synchronize(split_backend); // necessary to measure compute time
|
||||
} else {
|
||||
@@ -1482,8 +1494,9 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
|
||||
|
||||
struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1);
|
||||
|
||||
if (!ggml_backend_graph_compute(split_backend, &gv)) {
|
||||
return false;
|
||||
enum ggml_status ec = ggml_backend_graph_compute(split_backend, &gv);
|
||||
if (ec != GGML_STATUS_SUCCESS) {
|
||||
return ec;
|
||||
}
|
||||
|
||||
if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) {
|
||||
@@ -1507,7 +1520,7 @@ static bool ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
return GGML_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size) {
|
||||
@@ -1569,7 +1582,7 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS);
|
||||
|
||||
if (!sched->is_reset) {
|
||||
@@ -1578,14 +1591,10 @@ bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cg
|
||||
|
||||
ggml_backend_sched_split_graph(sched, graph);
|
||||
if (!ggml_backend_sched_alloc_splits(sched)) {
|
||||
return false;
|
||||
return GGML_STATUS_ALLOC_FAILED;
|
||||
}
|
||||
|
||||
if (!ggml_backend_sched_compute_splits(sched)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return ggml_backend_sched_compute_splits(sched);
|
||||
}
|
||||
|
||||
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
|
||||
|
||||
@@ -49,7 +49,7 @@ extern "C" {
|
||||
// Backend
|
||||
//
|
||||
|
||||
|
||||
GGML_API ggml_guid_t ggml_backend_guid(ggml_backend_t backend);
|
||||
GGML_API const char * ggml_backend_name(ggml_backend_t backend);
|
||||
GGML_API void ggml_backend_free(ggml_backend_t backend);
|
||||
|
||||
@@ -66,12 +66,13 @@ extern "C" {
|
||||
|
||||
GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
|
||||
|
||||
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
GGML_API ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
|
||||
GGML_API void ggml_backend_graph_plan_free (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
GGML_API void ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
GGML_API bool ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
GGML_API bool ggml_backend_supports_op (ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
GGML_API enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
|
||||
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
|
||||
// tensor copy between different backends
|
||||
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
@@ -157,26 +158,26 @@ extern "C" {
|
||||
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
||||
|
||||
// Initialize a backend scheduler
|
||||
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
|
||||
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
||||
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
|
||||
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
||||
// Initialize backend buffers from a measure graph
|
||||
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
||||
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
||||
// Get the number of splits of the last graph
|
||||
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
||||
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
||||
|
||||
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||
|
||||
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||
GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||
GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||
|
||||
// Allocate and compute graph on the backend scheduler
|
||||
GGML_API bool ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||
|
||||
// Reset all assignments and allocators - must be called before changing the node backends
|
||||
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
||||
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
||||
|
||||
// Set a callback to be called for each resulting node during graph compute
|
||||
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
||||
GGML_API void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data);
|
||||
|
||||
//
|
||||
// Utils
|
||||
|
||||
779
ggml-common.h
Normal file
779
ggml-common.h
Normal file
@@ -0,0 +1,779 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(GGML_COMMON_IMPL_C)
|
||||
#include <stdint.h>
|
||||
|
||||
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
|
||||
#define GGML_TABLE_END() };
|
||||
|
||||
#define GGML_COMMON_IMPL
|
||||
#elif defined(GGML_COMMON_IMPL_METAL)
|
||||
#include <metal_stdlib>
|
||||
|
||||
#define GGML_TABLE_BEGIN(type, name, size) static const constant type name[size] = {
|
||||
#define GGML_TABLE_END() };
|
||||
|
||||
#define GGML_COMMON_IMPL
|
||||
#elif defined(GGML_COMMON_IMPL_CUDA)
|
||||
#include <cstdint>
|
||||
|
||||
#define GGML_TABLE_BEGIN(type, name, size) static const __device__ __constant__ type name[size] = {
|
||||
#define GGML_TABLE_END() };
|
||||
|
||||
#define GGML_COMMON_IMPL
|
||||
#elif defined(GGML_COMMON_IMPL_SYCL)
|
||||
#include <cstdint>
|
||||
|
||||
#define GGML_TABLE_BEGIN(type, name, size) static dpct::global_memory<const type, 1> name(sycl::range<1>(size), {
|
||||
#define GGML_TABLE_END() });
|
||||
|
||||
#define GGML_COMMON_IMPL
|
||||
#endif
|
||||
|
||||
#if defined(GGML_COMMON_IMPL)
|
||||
|
||||
GGML_TABLE_BEGIN(uint8_t, kmask_iq2xs, 8)
|
||||
1, 2, 4, 8, 16, 32, 64, 128
|
||||
GGML_TABLE_END()
|
||||
|
||||
GGML_TABLE_BEGIN(uint8_t, ksigns_iq2xs, 128)
|
||||
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
|
||||
144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
|
||||
160, 33, 34, 163, 36, 165, 166, 39, 40, 169, 170, 43, 172, 45, 46, 175,
|
||||
48, 177, 178, 51, 180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63,
|
||||
192, 65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77, 78, 207,
|
||||
80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90, 219, 92, 221, 222, 95,
|
||||
96, 225, 226, 99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
|
||||
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
||||
GGML_TABLE_END()
|
||||
|
||||
//#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
|
||||
GGML_TABLE_BEGIN(uint64_t, ksigns64, 128)
|
||||
0x0000000000000000, 0xff000000000000ff, 0xff0000000000ff00, 0x000000000000ffff,
|
||||
0xff00000000ff0000, 0x0000000000ff00ff, 0x0000000000ffff00, 0xff00000000ffffff,
|
||||
0xff000000ff000000, 0x00000000ff0000ff, 0x00000000ff00ff00, 0xff000000ff00ffff,
|
||||
0x00000000ffff0000, 0xff000000ffff00ff, 0xff000000ffffff00, 0x00000000ffffffff,
|
||||
0xff0000ff00000000, 0x000000ff000000ff, 0x000000ff0000ff00, 0xff0000ff0000ffff,
|
||||
0x000000ff00ff0000, 0xff0000ff00ff00ff, 0xff0000ff00ffff00, 0x000000ff00ffffff,
|
||||
0x000000ffff000000, 0xff0000ffff0000ff, 0xff0000ffff00ff00, 0x000000ffff00ffff,
|
||||
0xff0000ffffff0000, 0x000000ffffff00ff, 0x000000ffffffff00, 0xff0000ffffffffff,
|
||||
0xff00ff0000000000, 0x0000ff00000000ff, 0x0000ff000000ff00, 0xff00ff000000ffff,
|
||||
0x0000ff0000ff0000, 0xff00ff0000ff00ff, 0xff00ff0000ffff00, 0x0000ff0000ffffff,
|
||||
0x0000ff00ff000000, 0xff00ff00ff0000ff, 0xff00ff00ff00ff00, 0x0000ff00ff00ffff,
|
||||
0xff00ff00ffff0000, 0x0000ff00ffff00ff, 0x0000ff00ffffff00, 0xff00ff00ffffffff,
|
||||
0x0000ffff00000000, 0xff00ffff000000ff, 0xff00ffff0000ff00, 0x0000ffff0000ffff,
|
||||
0xff00ffff00ff0000, 0x0000ffff00ff00ff, 0x0000ffff00ffff00, 0xff00ffff00ffffff,
|
||||
0xff00ffffff000000, 0x0000ffffff0000ff, 0x0000ffffff00ff00, 0xff00ffffff00ffff,
|
||||
0x0000ffffffff0000, 0xff00ffffffff00ff, 0xff00ffffffffff00, 0x0000ffffffffffff,
|
||||
0xffff000000000000, 0x00ff0000000000ff, 0x00ff00000000ff00, 0xffff00000000ffff,
|
||||
0x00ff000000ff0000, 0xffff000000ff00ff, 0xffff000000ffff00, 0x00ff000000ffffff,
|
||||
0x00ff0000ff000000, 0xffff0000ff0000ff, 0xffff0000ff00ff00, 0x00ff0000ff00ffff,
|
||||
0xffff0000ffff0000, 0x00ff0000ffff00ff, 0x00ff0000ffffff00, 0xffff0000ffffffff,
|
||||
0x00ff00ff00000000, 0xffff00ff000000ff, 0xffff00ff0000ff00, 0x00ff00ff0000ffff,
|
||||
0xffff00ff00ff0000, 0x00ff00ff00ff00ff, 0x00ff00ff00ffff00, 0xffff00ff00ffffff,
|
||||
0xffff00ffff000000, 0x00ff00ffff0000ff, 0x00ff00ffff00ff00, 0xffff00ffff00ffff,
|
||||
0x00ff00ffffff0000, 0xffff00ffffff00ff, 0xffff00ffffffff00, 0x00ff00ffffffffff,
|
||||
0x00ffff0000000000, 0xffffff00000000ff, 0xffffff000000ff00, 0x00ffff000000ffff,
|
||||
0xffffff0000ff0000, 0x00ffff0000ff00ff, 0x00ffff0000ffff00, 0xffffff0000ffffff,
|
||||
0xffffff00ff000000, 0x00ffff00ff0000ff, 0x00ffff00ff00ff00, 0xffffff00ff00ffff,
|
||||
0x00ffff00ffff0000, 0xffffff00ffff00ff, 0xffffff00ffffff00, 0x00ffff00ffffffff,
|
||||
0xffffffff00000000, 0x00ffffff000000ff, 0x00ffffff0000ff00, 0xffffffff0000ffff,
|
||||
0x00ffffff00ff0000, 0xffffffff00ff00ff, 0xffffffff00ffff00, 0x00ffffff00ffffff,
|
||||
0x00ffffffff000000, 0xffffffffff0000ff, 0xffffffffff00ff00, 0x00ffffffff00ffff,
|
||||
0xffffffffffff0000, 0x00ffffffffff00ff, 0x00ffffffffffff00, 0xffffffffffffffff,
|
||||
GGML_TABLE_END()
|
||||
//#endif
|
||||
|
||||
|
||||
GGML_TABLE_BEGIN(uint64_t, iq2xxs_grid, 256)
|
||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x08080808082b0808,
|
||||
0x08080808082b082b, 0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
|
||||
0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
|
||||
0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
|
||||
0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
|
||||
0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
|
||||
0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
|
||||
0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
|
||||
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
|
||||
0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
|
||||
0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
|
||||
0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
|
||||
0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
|
||||
0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
|
||||
0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
|
||||
0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
|
||||
0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
|
||||
0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
|
||||
0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
|
||||
0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
|
||||
0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
|
||||
0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
|
||||
0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
|
||||
0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
|
||||
0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
|
||||
0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
|
||||
0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
|
||||
0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
|
||||
0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
|
||||
0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
|
||||
0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
|
||||
0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
|
||||
0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
|
||||
0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
|
||||
0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
|
||||
0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
|
||||
0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
|
||||
0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
|
||||
0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
|
||||
0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
|
||||
0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
|
||||
0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
|
||||
0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
|
||||
0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
|
||||
0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
|
||||
0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
|
||||
0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
|
||||
0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
|
||||
0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
|
||||
0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
|
||||
0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
|
||||
0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
|
||||
0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
|
||||
0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
|
||||
0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
|
||||
0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
|
||||
0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
|
||||
0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
|
||||
0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
|
||||
0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
|
||||
0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
|
||||
0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
|
||||
0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
|
||||
GGML_TABLE_END()
|
||||
|
||||
GGML_TABLE_BEGIN(uint64_t, iq2xs_grid, 512)
|
||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
||||
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
||||
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
||||
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
||||
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
||||
0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08, 0x080808082b190819,
|
||||
0x080808082b191908, 0x080808082b192b19, 0x080808082b2b0808, 0x0808081908080819,
|
||||
0x0808081908081908, 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
|
||||
0x080808190819082b, 0x0808081908191919, 0x0808081908192b08, 0x0808081908192b2b,
|
||||
0x08080819082b0819, 0x08080819082b1908, 0x0808081919080808, 0x080808191908082b,
|
||||
0x0808081919081919, 0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
|
||||
0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819, 0x080808192b081908,
|
||||
0x080808192b190808, 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b08081919,
|
||||
0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
|
||||
0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808, 0x0808082b19191919,
|
||||
0x0808082b2b080808, 0x0808082b2b082b2b, 0x0808190808080819, 0x0808190808081908,
|
||||
0x080819080808192b, 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
|
||||
0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908,
|
||||
0x0808190819080808, 0x080819081908082b, 0x0808190819081919, 0x0808190819082b08,
|
||||
0x0808190819190819, 0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
|
||||
0x080819082b080819, 0x080819082b081908, 0x080819082b190808, 0x0808191908080808,
|
||||
0x080819190808082b, 0x0808191908081919, 0x0808191908082b08, 0x0808191908190819,
|
||||
0x0808191908191908, 0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
|
||||
0x0808191919190808, 0x08081919192b0819, 0x080819192b080808, 0x0808192b08080819,
|
||||
0x0808192b08081908, 0x0808192b08190808, 0x0808192b082b192b, 0x0808192b19080808,
|
||||
0x0808192b1908082b, 0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
|
||||
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b, 0x08082b0808190819,
|
||||
0x08082b0808191908, 0x08082b08082b0808, 0x08082b08082b1919, 0x08082b0819080819,
|
||||
0x08082b0819081908, 0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
|
||||
0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819, 0x08082b1908081908,
|
||||
0x08082b1908190808, 0x08082b1919080808, 0x08082b192b080819, 0x08082b192b082b19,
|
||||
0x08082b2b08080808, 0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
|
||||
0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908, 0x081908080808192b,
|
||||
0x0819080808082b19, 0x0819080808190808, 0x081908080819082b, 0x0819080808191919,
|
||||
0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
|
||||
0x081908081908082b, 0x0819080819081919, 0x0819080819082b08, 0x0819080819190819,
|
||||
0x0819080819191908, 0x08190808192b0808, 0x08190808192b2b2b, 0x081908082b080819,
|
||||
0x081908082b081908, 0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
|
||||
0x0819081908081919, 0x0819081908082b08, 0x0819081908190819, 0x0819081908191908,
|
||||
0x08190819082b0808, 0x0819081919080819, 0x0819081919081908, 0x0819081919190808,
|
||||
0x081908192b080808, 0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
|
||||
0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808, 0x0819082b19080808,
|
||||
0x0819082b192b0808, 0x0819190808080808, 0x081919080808082b, 0x0819190808081919,
|
||||
0x0819190808082b08, 0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
|
||||
0x0819190819080819, 0x0819190819081908, 0x0819190819082b19, 0x0819190819190808,
|
||||
0x08191908192b1908, 0x081919082b080808, 0x0819191908080819, 0x0819191908081908,
|
||||
0x0819191908190808, 0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
|
||||
0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
|
||||
0x08192b080819082b, 0x08192b0819080808, 0x08192b0819191908, 0x08192b082b08192b,
|
||||
0x08192b1908080808, 0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
|
||||
0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919,
|
||||
0x082b080808082b08, 0x082b080808082b2b, 0x082b080808190819, 0x082b080808191908,
|
||||
0x082b0808082b0808, 0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
|
||||
0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819, 0x082b081908081908,
|
||||
0x082b081908190808, 0x082b081919080808, 0x082b081919082b08, 0x082b0819192b1919,
|
||||
0x082b082b08080808, 0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
|
||||
0x082b190808080819, 0x082b190808081908, 0x082b190808190808, 0x082b1908082b2b19,
|
||||
0x082b190819080808, 0x082b191908080808, 0x082b191919080819, 0x082b19191919082b,
|
||||
0x082b19192b192b19, 0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
|
||||
0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b, 0x082b2b08082b0808,
|
||||
0x082b2b0819191919, 0x082b2b082b082b08, 0x082b2b082b2b082b, 0x082b2b19192b2b08,
|
||||
0x082b2b192b190808, 0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
|
||||
0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819, 0x1908080808081908,
|
||||
0x190808080808192b, 0x1908080808082b19, 0x1908080808190808, 0x190808080819082b,
|
||||
0x1908080808191919, 0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
||||
0x1908080819080808, 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08,
|
||||
0x1908080819082b2b, 0x1908080819190819, 0x1908080819191908, 0x19080808192b0808,
|
||||
0x19080808192b1919, 0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
|
||||
0x1908081908080808, 0x190808190808082b, 0x1908081908081919, 0x1908081908082b08,
|
||||
0x1908081908190819, 0x1908081908191908, 0x19080819082b0808, 0x1908081919080819,
|
||||
0x1908081919081908, 0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
|
||||
0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908, 0x1908082b08190808,
|
||||
0x1908082b0819082b, 0x1908082b082b2b19, 0x1908082b19080808, 0x1908190808080808,
|
||||
0x190819080808082b, 0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
|
||||
0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808, 0x1908190819080819,
|
||||
0x1908190819081908, 0x1908190819190808, 0x190819082b080808, 0x190819082b191908,
|
||||
0x1908191908080819, 0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
|
||||
0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808, 0x1908192b08082b2b,
|
||||
0x1908192b19081908, 0x1908192b19190808, 0x19082b0808080819, 0x19082b0808081908,
|
||||
0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
|
||||
0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819, 0x19082b1919081908,
|
||||
0x19082b1919190808, 0x19082b19192b2b19, 0x19082b2b08081908, 0x1919080808080808,
|
||||
0x191908080808082b, 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
|
||||
0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08, 0x1919080819080819,
|
||||
0x1919080819081908, 0x1919080819190808, 0x191908082b080808, 0x1919081908080819,
|
||||
0x1919081908081908, 0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
|
||||
0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908, 0x1919082b2b2b2b2b,
|
||||
0x1919190808080819, 0x1919190808081908, 0x1919190808190808, 0x19191908082b0819,
|
||||
0x1919190819080808, 0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
|
||||
0x1919191908080808, 0x1919191908082b08, 0x191919192b080808, 0x191919192b082b08,
|
||||
0x1919192b082b0819, 0x1919192b192b2b08, 0x1919192b2b2b0819, 0x19192b0808080808,
|
||||
0x19192b0808191908, 0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
|
||||
0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b, 0x19192b2b2b081919,
|
||||
0x192b080808080819, 0x192b080808081908, 0x192b080808190808, 0x192b080819080808,
|
||||
0x192b080819191908, 0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
|
||||
0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b, 0x192b082b2b19082b,
|
||||
0x192b190808080808, 0x192b19080819192b, 0x192b191908190808, 0x192b191919080808,
|
||||
0x192b191919081919, 0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
|
||||
0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908, 0x192b2b2b192b082b,
|
||||
0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08,
|
||||
0x2b08080808190819, 0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
|
||||
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808082b080808,
|
||||
0x2b0808082b08082b, 0x2b0808082b2b2b08, 0x2b0808082b2b2b2b, 0x2b08081908080819,
|
||||
0x2b08081908081908, 0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
|
||||
0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808, 0x2b08082b082b0808,
|
||||
0x2b08082b2b080808, 0x2b08082b2b08082b, 0x2b08082b2b2b0808, 0x2b08082b2b2b2b08,
|
||||
0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
|
||||
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808, 0x2b0819082b082b19,
|
||||
0x2b08191908080808, 0x2b08191919081908, 0x2b0819192b2b1919, 0x2b08192b08192b08,
|
||||
0x2b08192b192b2b2b, 0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
|
||||
0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b, 0x2b082b082b2b2b08,
|
||||
0x2b082b190808192b, 0x2b082b2b082b082b, 0x2b082b2b2b080808, 0x2b082b2b2b082b08,
|
||||
0x2b082b2b2b19192b, 0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
|
||||
0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b, 0x2b1908082b081908,
|
||||
0x2b19081908080808, 0x2b190819082b082b, 0x2b190819192b1908, 0x2b19082b1919192b,
|
||||
0x2b19082b2b082b19, 0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
|
||||
0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19, 0x2b1919192b190808,
|
||||
0x2b1919192b19082b, 0x2b19192b19080819, 0x2b192b0819190819, 0x2b192b082b2b192b,
|
||||
0x2b192b1919082b19, 0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
|
||||
0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b, 0x2b2b0808082b0808,
|
||||
0x2b2b0808082b2b2b, 0x2b2b08082b2b0808, 0x2b2b081919190819, 0x2b2b081919192b19,
|
||||
0x2b2b08192b2b192b, 0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
|
||||
0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808, 0x2b2b190819080808,
|
||||
0x2b2b19082b191919, 0x2b2b192b192b1919, 0x2b2b192b2b192b08, 0x2b2b2b0808082b2b,
|
||||
0x2b2b2b08082b0808, 0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
|
||||
0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908, 0x2b2b2b192b08192b,
|
||||
0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
||||
GGML_TABLE_END()
|
||||
|
||||
GGML_TABLE_BEGIN(uint64_t, iq2s_grid, 1024)
|
||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
||||
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
||||
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
||||
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
||||
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x08080808192b192b,
|
||||
0x08080808192b2b19, 0x080808082b080808, 0x080808082b08082b, 0x080808082b081919,
|
||||
0x080808082b082b08, 0x080808082b190819, 0x080808082b191908, 0x080808082b2b0808,
|
||||
0x080808082b2b1919, 0x080808082b2b2b2b, 0x0808081908080819, 0x0808081908081908,
|
||||
0x080808190808192b, 0x0808081908082b19, 0x0808081908190808, 0x080808190819082b,
|
||||
0x0808081908191919, 0x0808081908192b08, 0x08080819082b0819, 0x08080819082b1908,
|
||||
0x0808081919080808, 0x080808191908082b, 0x0808081919081919, 0x0808081919082b08,
|
||||
0x0808081919190819, 0x0808081919191908, 0x080808191919192b, 0x0808081919192b19,
|
||||
0x08080819192b0808, 0x08080819192b1919, 0x08080819192b2b08, 0x080808192b080819,
|
||||
0x080808192b081908, 0x080808192b190808, 0x080808192b19082b, 0x080808192b191919,
|
||||
0x080808192b2b0819, 0x080808192b2b1908, 0x0808082b08080808, 0x0808082b0808082b,
|
||||
0x0808082b08081919, 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908,
|
||||
0x0808082b082b0808, 0x0808082b082b2b2b, 0x0808082b19080819, 0x0808082b19081908,
|
||||
0x0808082b1908192b, 0x0808082b19082b19, 0x0808082b19190808, 0x0808082b19191919,
|
||||
0x0808082b2b080808, 0x0808082b2b081919, 0x0808082b2b082b2b, 0x0808082b2b191908,
|
||||
0x0808082b2b2b082b, 0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
|
||||
0x0808190808082b19, 0x0808190808190808, 0x080819080819082b, 0x0808190808191919,
|
||||
0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908, 0x08081908082b192b,
|
||||
0x08081908082b2b19, 0x0808190819080808, 0x080819081908082b, 0x0808190819081919,
|
||||
0x0808190819082b08, 0x0808190819082b2b, 0x0808190819190819, 0x0808190819191908,
|
||||
0x080819081919192b, 0x0808190819192b19, 0x08081908192b0808, 0x08081908192b082b,
|
||||
0x08081908192b1919, 0x080819082b080819, 0x080819082b081908, 0x080819082b08192b,
|
||||
0x080819082b082b19, 0x080819082b190808, 0x080819082b191919, 0x080819082b192b08,
|
||||
0x080819082b2b0819, 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b,
|
||||
0x0808191908081919, 0x0808191908082b08, 0x0808191908082b2b, 0x0808191908190819,
|
||||
0x0808191908191908, 0x080819190819192b, 0x0808191908192b19, 0x08081919082b0808,
|
||||
0x08081919082b1919, 0x08081919082b2b08, 0x0808191919080819, 0x0808191919081908,
|
||||
0x080819191908192b, 0x0808191919082b19, 0x0808191919190808, 0x080819191919082b,
|
||||
0x0808191919191919, 0x0808191919192b08, 0x08081919192b0819, 0x08081919192b1908,
|
||||
0x080819192b080808, 0x080819192b08082b, 0x080819192b081919, 0x080819192b082b08,
|
||||
0x080819192b190819, 0x080819192b191908, 0x080819192b2b0808, 0x0808192b08080819,
|
||||
0x0808192b08081908, 0x0808192b0808192b, 0x0808192b08082b19, 0x0808192b08190808,
|
||||
0x0808192b08191919, 0x0808192b19080808, 0x0808192b19081919, 0x0808192b19082b08,
|
||||
0x0808192b19190819, 0x0808192b19191908, 0x0808192b192b0808, 0x0808192b2b080819,
|
||||
0x0808192b2b081908, 0x0808192b2b190808, 0x08082b0808080808, 0x08082b080808082b,
|
||||
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808190819, 0x08082b0808191908,
|
||||
0x08082b080819192b, 0x08082b0808192b19, 0x08082b08082b0808, 0x08082b08082b1919,
|
||||
0x08082b08082b2b2b, 0x08082b0819080819, 0x08082b0819081908, 0x08082b081908192b,
|
||||
0x08082b0819082b19, 0x08082b0819190808, 0x08082b081919082b, 0x08082b0819191919,
|
||||
0x08082b0819192b08, 0x08082b08192b0819, 0x08082b08192b1908, 0x08082b082b080808,
|
||||
0x08082b082b081919, 0x08082b082b191908, 0x08082b082b2b2b2b, 0x08082b1908080819,
|
||||
0x08082b1908081908, 0x08082b1908190808, 0x08082b190819082b, 0x08082b1908191919,
|
||||
0x08082b1908192b08, 0x08082b19082b0819, 0x08082b1919080808, 0x08082b1919081919,
|
||||
0x08082b1919082b08, 0x08082b1919190819, 0x08082b1919191908, 0x08082b19192b0808,
|
||||
0x08082b192b080819, 0x08082b192b190808, 0x08082b2b08080808, 0x08082b2b08190819,
|
||||
0x08082b2b08191908, 0x08082b2b082b082b, 0x08082b2b082b2b08, 0x08082b2b082b2b2b,
|
||||
0x08082b2b19190808, 0x08082b2b2b192b19, 0x0819080808080819, 0x0819080808081908,
|
||||
0x081908080808192b, 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b,
|
||||
0x0819080808191919, 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908,
|
||||
0x08190808082b192b, 0x0819080819080808, 0x081908081908082b, 0x0819080819081919,
|
||||
0x0819080819082b08, 0x0819080819190819, 0x0819080819191908, 0x081908081919192b,
|
||||
0x0819080819192b19, 0x08190808192b0808, 0x08190808192b082b, 0x08190808192b1919,
|
||||
0x08190808192b2b08, 0x081908082b080819, 0x081908082b081908, 0x081908082b08192b,
|
||||
0x081908082b190808, 0x081908082b191919, 0x081908082b192b08, 0x081908082b2b0819,
|
||||
0x081908082b2b1908, 0x0819081908080808, 0x081908190808082b, 0x0819081908081919,
|
||||
0x0819081908082b08, 0x0819081908082b2b, 0x0819081908190819, 0x0819081908191908,
|
||||
0x081908190819192b, 0x0819081908192b19, 0x08190819082b0808, 0x08190819082b082b,
|
||||
0x08190819082b1919, 0x08190819082b2b08, 0x0819081919080819, 0x0819081919081908,
|
||||
0x081908191908192b, 0x0819081919082b19, 0x0819081919190808, 0x081908191919082b,
|
||||
0x0819081919191919, 0x0819081919192b08, 0x08190819192b0819, 0x08190819192b1908,
|
||||
0x081908192b080808, 0x081908192b08082b, 0x081908192b081919, 0x081908192b082b08,
|
||||
0x081908192b190819, 0x081908192b191908, 0x0819082b08080819, 0x0819082b08081908,
|
||||
0x0819082b08082b19, 0x0819082b08190808, 0x0819082b08191919, 0x0819082b082b0819,
|
||||
0x0819082b082b1908, 0x0819082b19080808, 0x0819082b19081919, 0x0819082b19190819,
|
||||
0x0819082b19191908, 0x0819082b2b080819, 0x0819082b2b081908, 0x0819082b2b190808,
|
||||
0x0819190808080808, 0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
|
||||
0x0819190808190819, 0x0819190808191908, 0x081919080819192b, 0x0819190808192b19,
|
||||
0x08191908082b0808, 0x08191908082b1919, 0x08191908082b2b08, 0x0819190819080819,
|
||||
0x0819190819081908, 0x081919081908192b, 0x0819190819082b19, 0x0819190819190808,
|
||||
0x081919081919082b, 0x0819190819191919, 0x0819190819192b08, 0x08191908192b0819,
|
||||
0x08191908192b1908, 0x081919082b080808, 0x081919082b08082b, 0x081919082b081919,
|
||||
0x081919082b082b08, 0x081919082b190819, 0x081919082b191908, 0x081919082b2b0808,
|
||||
0x0819191908080819, 0x0819191908081908, 0x081919190808192b, 0x0819191908082b19,
|
||||
0x0819191908190808, 0x081919190819082b, 0x0819191908191919, 0x0819191908192b08,
|
||||
0x08191919082b0819, 0x08191919082b1908, 0x0819191919080808, 0x081919191908082b,
|
||||
0x0819191919081919, 0x0819191919082b08, 0x0819191919190819, 0x0819191919191908,
|
||||
0x08191919192b0808, 0x081919192b080819, 0x081919192b081908, 0x081919192b190808,
|
||||
0x0819192b08080808, 0x0819192b08081919, 0x0819192b08082b08, 0x0819192b08190819,
|
||||
0x0819192b08191908, 0x0819192b082b0808, 0x0819192b19080819, 0x0819192b19081908,
|
||||
0x0819192b19190808, 0x0819192b2b080808, 0x0819192b2b2b2b2b, 0x08192b0808080819,
|
||||
0x08192b0808081908, 0x08192b080808192b, 0x08192b0808082b19, 0x08192b0808190808,
|
||||
0x08192b0808191919, 0x08192b0808192b08, 0x08192b08082b0819, 0x08192b0819080808,
|
||||
0x08192b081908082b, 0x08192b0819081919, 0x08192b0819082b08, 0x08192b0819190819,
|
||||
0x08192b0819191908, 0x08192b08192b0808, 0x08192b082b080819, 0x08192b082b081908,
|
||||
0x08192b1908080808, 0x08192b190808082b, 0x08192b1908081919, 0x08192b1908082b08,
|
||||
0x08192b1908190819, 0x08192b1908191908, 0x08192b19082b0808, 0x08192b1919080819,
|
||||
0x08192b1919081908, 0x08192b1919190808, 0x08192b19192b2b19, 0x08192b192b2b082b,
|
||||
0x08192b2b08081908, 0x08192b2b08190808, 0x08192b2b19080808, 0x08192b2b1919192b,
|
||||
0x082b080808080808, 0x082b08080808082b, 0x082b080808081919, 0x082b080808082b08,
|
||||
0x082b080808190819, 0x082b080808191908, 0x082b08080819192b, 0x082b080808192b19,
|
||||
0x082b0808082b0808, 0x082b0808082b1919, 0x082b0808082b2b2b, 0x082b080819080819,
|
||||
0x082b080819081908, 0x082b080819190808, 0x082b08081919082b, 0x082b080819191919,
|
||||
0x082b0808192b1908, 0x082b08082b080808, 0x082b08082b082b2b, 0x082b08082b191908,
|
||||
0x082b08082b2b2b2b, 0x082b081908080819, 0x082b081908081908, 0x082b081908190808,
|
||||
0x082b08190819082b, 0x082b081908191919, 0x082b0819082b0819, 0x082b081919080808,
|
||||
0x082b08191908082b, 0x082b081919081919, 0x082b081919190819, 0x082b081919191908,
|
||||
0x082b0819192b0808, 0x082b08192b080819, 0x082b08192b081908, 0x082b08192b190808,
|
||||
0x082b082b08080808, 0x082b082b08082b2b, 0x082b082b082b082b, 0x082b082b082b2b08,
|
||||
0x082b082b082b2b2b, 0x082b082b19081908, 0x082b082b19190808, 0x082b082b2b082b08,
|
||||
0x082b082b2b082b2b, 0x082b082b2b2b2b08, 0x082b190808080819, 0x082b190808081908,
|
||||
0x082b19080808192b, 0x082b190808082b19, 0x082b190808190808, 0x082b190808191919,
|
||||
0x082b190808192b08, 0x082b1908082b0819, 0x082b1908082b1908, 0x082b190819080808,
|
||||
0x082b19081908082b, 0x082b190819081919, 0x082b190819082b08, 0x082b190819190819,
|
||||
0x082b190819191908, 0x082b1908192b0808, 0x082b19082b080819, 0x082b19082b081908,
|
||||
0x082b19082b190808, 0x082b191908080808, 0x082b191908081919, 0x082b191908082b08,
|
||||
0x082b191908190819, 0x082b191908191908, 0x082b1919082b0808, 0x082b191919080819,
|
||||
0x082b191919081908, 0x082b191919190808, 0x082b1919192b192b, 0x082b19192b080808,
|
||||
0x082b192b08080819, 0x082b192b08081908, 0x082b192b08190808, 0x082b192b19080808,
|
||||
0x082b192b19192b19, 0x082b2b0808080808, 0x082b2b0808081919, 0x082b2b0808190819,
|
||||
0x082b2b0808191908, 0x082b2b0819080819, 0x082b2b0819081908, 0x082b2b0819190808,
|
||||
0x082b2b082b082b2b, 0x082b2b082b2b2b2b, 0x082b2b1908080819, 0x082b2b1908081908,
|
||||
0x082b2b1908190808, 0x082b2b192b191919, 0x082b2b2b08082b2b, 0x082b2b2b082b082b,
|
||||
0x082b2b2b192b1908, 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
|
||||
0x1908080808081908, 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808,
|
||||
0x190808080819082b, 0x1908080808191919, 0x1908080808192b08, 0x1908080808192b2b,
|
||||
0x19080808082b0819, 0x19080808082b1908, 0x19080808082b192b, 0x1908080819080808,
|
||||
0x190808081908082b, 0x1908080819081919, 0x1908080819082b08, 0x1908080819082b2b,
|
||||
0x1908080819190819, 0x1908080819191908, 0x190808081919192b, 0x1908080819192b19,
|
||||
0x19080808192b0808, 0x19080808192b082b, 0x19080808192b1919, 0x190808082b080819,
|
||||
0x190808082b081908, 0x190808082b190808, 0x190808082b191919, 0x190808082b192b08,
|
||||
0x190808082b2b0819, 0x190808082b2b1908, 0x1908081908080808, 0x190808190808082b,
|
||||
0x1908081908081919, 0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
|
||||
0x190808190819192b, 0x1908081908192b19, 0x19080819082b0808, 0x19080819082b082b,
|
||||
0x19080819082b1919, 0x1908081919080819, 0x1908081919081908, 0x190808191908192b,
|
||||
0x1908081919082b19, 0x1908081919190808, 0x190808191919082b, 0x1908081919191919,
|
||||
0x1908081919192b08, 0x19080819192b0819, 0x19080819192b1908, 0x190808192b080808,
|
||||
0x190808192b08082b, 0x190808192b081919, 0x190808192b082b08, 0x190808192b190819,
|
||||
0x190808192b191908, 0x190808192b2b0808, 0x1908082b08080819, 0x1908082b08081908,
|
||||
0x1908082b08190808, 0x1908082b0819082b, 0x1908082b08191919, 0x1908082b08192b08,
|
||||
0x1908082b082b1908, 0x1908082b19080808, 0x1908082b19081919, 0x1908082b19082b08,
|
||||
0x1908082b19190819, 0x1908082b19191908, 0x1908082b192b0808, 0x1908082b2b080819,
|
||||
0x1908082b2b081908, 0x1908190808080808, 0x190819080808082b, 0x1908190808081919,
|
||||
0x1908190808082b08, 0x1908190808082b2b, 0x1908190808190819, 0x1908190808191908,
|
||||
0x190819080819192b, 0x1908190808192b19, 0x19081908082b0808, 0x19081908082b082b,
|
||||
0x19081908082b1919, 0x19081908082b2b08, 0x1908190819080819, 0x1908190819081908,
|
||||
0x190819081908192b, 0x1908190819082b19, 0x1908190819190808, 0x190819081919082b,
|
||||
0x1908190819191919, 0x1908190819192b08, 0x19081908192b0819, 0x19081908192b1908,
|
||||
0x190819082b080808, 0x190819082b08082b, 0x190819082b081919, 0x190819082b082b08,
|
||||
0x190819082b190819, 0x190819082b191908, 0x190819082b2b0808, 0x1908191908080819,
|
||||
0x1908191908081908, 0x190819190808192b, 0x1908191908082b19, 0x1908191908190808,
|
||||
0x190819190819082b, 0x1908191908191919, 0x1908191908192b08, 0x19081919082b0819,
|
||||
0x19081919082b1908, 0x1908191919080808, 0x190819191908082b, 0x1908191919081919,
|
||||
0x1908191919082b08, 0x1908191919190819, 0x1908191919191908, 0x19081919192b0808,
|
||||
0x19081919192b2b2b, 0x190819192b080819, 0x190819192b081908, 0x190819192b190808,
|
||||
0x1908192b08080808, 0x1908192b0808082b, 0x1908192b08081919, 0x1908192b08082b08,
|
||||
0x1908192b08190819, 0x1908192b08191908, 0x1908192b082b0808, 0x1908192b19080819,
|
||||
0x1908192b19081908, 0x1908192b19190808, 0x1908192b2b080808, 0x1908192b2b2b1919,
|
||||
0x19082b0808080819, 0x19082b0808081908, 0x19082b0808082b19, 0x19082b0808190808,
|
||||
0x19082b080819082b, 0x19082b0808191919, 0x19082b0808192b08, 0x19082b08082b0819,
|
||||
0x19082b08082b1908, 0x19082b0819080808, 0x19082b081908082b, 0x19082b0819081919,
|
||||
0x19082b0819082b08, 0x19082b0819190819, 0x19082b0819191908, 0x19082b08192b0808,
|
||||
0x19082b082b081908, 0x19082b082b190808, 0x19082b1908080808, 0x19082b190808082b,
|
||||
0x19082b1908081919, 0x19082b1908082b08, 0x19082b1908190819, 0x19082b1908191908,
|
||||
0x19082b19082b0808, 0x19082b1919080819, 0x19082b1919081908, 0x19082b1919190808,
|
||||
0x19082b192b080808, 0x19082b192b19192b, 0x19082b2b08080819, 0x19082b2b08081908,
|
||||
0x19082b2b08190808, 0x19082b2b19080808, 0x1919080808080808, 0x191908080808082b,
|
||||
0x1919080808081919, 0x1919080808082b08, 0x1919080808190819, 0x1919080808191908,
|
||||
0x191908080819192b, 0x1919080808192b19, 0x19190808082b0808, 0x19190808082b082b,
|
||||
0x19190808082b1919, 0x19190808082b2b08, 0x1919080819080819, 0x1919080819081908,
|
||||
0x191908081908192b, 0x1919080819082b19, 0x1919080819190808, 0x191908081919082b,
|
||||
0x1919080819191919, 0x1919080819192b08, 0x19190808192b0819, 0x19190808192b1908,
|
||||
0x191908082b080808, 0x191908082b08082b, 0x191908082b081919, 0x191908082b082b08,
|
||||
0x191908082b190819, 0x191908082b191908, 0x1919081908080819, 0x1919081908081908,
|
||||
0x191908190808192b, 0x1919081908082b19, 0x1919081908190808, 0x191908190819082b,
|
||||
0x1919081908191919, 0x1919081908192b08, 0x19190819082b0819, 0x19190819082b1908,
|
||||
0x1919081919080808, 0x191908191908082b, 0x1919081919081919, 0x1919081919082b08,
|
||||
0x1919081919190819, 0x1919081919191908, 0x19190819192b0808, 0x191908192b080819,
|
||||
0x191908192b081908, 0x191908192b190808, 0x1919082b08080808, 0x1919082b08081919,
|
||||
0x1919082b08082b08, 0x1919082b08190819, 0x1919082b08191908, 0x1919082b082b0808,
|
||||
0x1919082b19080819, 0x1919082b19081908, 0x1919082b19190808, 0x1919082b192b2b19,
|
||||
0x1919082b2b080808, 0x1919190808080819, 0x1919190808081908, 0x191919080808192b,
|
||||
0x1919190808082b19, 0x1919190808190808, 0x191919080819082b, 0x1919190808191919,
|
||||
0x1919190808192b08, 0x19191908082b0819, 0x19191908082b1908, 0x1919190819080808,
|
||||
0x191919081908082b, 0x1919190819081919, 0x1919190819082b08, 0x1919190819190819,
|
||||
0x1919190819191908, 0x19191908192b0808, 0x191919082b080819, 0x191919082b081908,
|
||||
0x191919082b190808, 0x1919191908080808, 0x191919190808082b, 0x1919191908081919,
|
||||
0x1919191908082b08, 0x1919191908190819, 0x1919191908191908, 0x19191919082b0808,
|
||||
0x1919191919080819, 0x1919191919081908, 0x1919191919190808, 0x191919192b080808,
|
||||
0x1919192b08080819, 0x1919192b08081908, 0x1919192b08190808, 0x1919192b082b192b,
|
||||
0x1919192b19080808, 0x19192b0808080808, 0x19192b080808082b, 0x19192b0808081919,
|
||||
0x19192b0808082b08, 0x19192b0808190819, 0x19192b0808191908, 0x19192b08082b0808,
|
||||
0x19192b0819080819, 0x19192b0819081908, 0x19192b0819190808, 0x19192b0819192b2b,
|
||||
0x19192b082b080808, 0x19192b1908080819, 0x19192b1908081908, 0x19192b1908190808,
|
||||
0x19192b1919080808, 0x19192b2b08080808, 0x19192b2b08192b19, 0x19192b2b2b081919,
|
||||
0x19192b2b2b2b2b08, 0x192b080808080819, 0x192b080808081908, 0x192b08080808192b,
|
||||
0x192b080808190808, 0x192b08080819082b, 0x192b080808191919, 0x192b080808192b08,
|
||||
0x192b0808082b0819, 0x192b0808082b1908, 0x192b080819080808, 0x192b080819081919,
|
||||
0x192b080819082b08, 0x192b080819190819, 0x192b080819191908, 0x192b0808192b0808,
|
||||
0x192b08082b081908, 0x192b08082b190808, 0x192b081908080808, 0x192b08190808082b,
|
||||
0x192b081908081919, 0x192b081908082b08, 0x192b081908190819, 0x192b081908191908,
|
||||
0x192b0819082b0808, 0x192b081919080819, 0x192b081919081908, 0x192b081919190808,
|
||||
0x192b08192b080808, 0x192b08192b192b19, 0x192b082b08081908, 0x192b082b08190808,
|
||||
0x192b082b19080808, 0x192b082b1919192b, 0x192b082b2b2b0819, 0x192b190808080808,
|
||||
0x192b190808081919, 0x192b190808082b08, 0x192b190808190819, 0x192b190808191908,
|
||||
0x192b1908082b0808, 0x192b190819080819, 0x192b190819081908, 0x192b190819190808,
|
||||
0x192b19082b080808, 0x192b191908080819, 0x192b191908081908, 0x192b191908190808,
|
||||
0x192b191919080808, 0x192b191919082b2b, 0x192b1919192b2b08, 0x192b19192b19082b,
|
||||
0x192b192b08080808, 0x192b192b2b191908, 0x192b2b0808080819, 0x192b2b0808081908,
|
||||
0x192b2b0808190808, 0x192b2b08192b1919, 0x192b2b082b192b08, 0x192b2b1908080808,
|
||||
0x192b2b19082b2b2b, 0x192b2b2b1908082b, 0x192b2b2b2b2b0819, 0x2b08080808080808,
|
||||
0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
|
||||
0x2b08080808191908, 0x2b08080808192b19, 0x2b080808082b0808, 0x2b080808082b1919,
|
||||
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808081919082b,
|
||||
0x2b08080819191919, 0x2b08080819192b08, 0x2b080808192b0819, 0x2b0808082b080808,
|
||||
0x2b0808082b081919, 0x2b0808082b190819, 0x2b0808082b191908, 0x2b08081908080819,
|
||||
0x2b08081908081908, 0x2b08081908082b19, 0x2b08081908190808, 0x2b0808190819082b,
|
||||
0x2b08081908191919, 0x2b08081908192b08, 0x2b080819082b0819, 0x2b080819082b1908,
|
||||
0x2b08081919080808, 0x2b0808191908082b, 0x2b08081919081919, 0x2b08081919082b08,
|
||||
0x2b08081919190819, 0x2b08081919191908, 0x2b0808192b080819, 0x2b0808192b081908,
|
||||
0x2b0808192b190808, 0x2b0808192b2b2b19, 0x2b08082b08080808, 0x2b08082b08081919,
|
||||
0x2b08082b08082b2b, 0x2b08082b08190819, 0x2b08082b08191908, 0x2b08082b19080819,
|
||||
0x2b08082b19081908, 0x2b08082b19190808, 0x2b08190808080819, 0x2b08190808081908,
|
||||
0x2b0819080808192b, 0x2b08190808082b19, 0x2b08190808190808, 0x2b0819080819082b,
|
||||
0x2b08190808191919, 0x2b08190808192b08, 0x2b081908082b0819, 0x2b08190819080808,
|
||||
0x2b0819081908082b, 0x2b08190819081919, 0x2b08190819082b08, 0x2b08190819190819,
|
||||
0x2b08190819191908, 0x2b081908192b0808, 0x2b0819082b080819, 0x2b0819082b081908,
|
||||
0x2b0819082b190808, 0x2b08191908080808, 0x2b0819190808082b, 0x2b08191908081919,
|
||||
0x2b08191908082b08, 0x2b08191908190819, 0x2b08191908191908, 0x2b081919082b0808,
|
||||
0x2b08191919080819, 0x2b08191919081908, 0x2b08191919190808, 0x2b0819192b080808,
|
||||
0x2b0819192b082b2b, 0x2b08192b08080819, 0x2b08192b08081908, 0x2b08192b08190808,
|
||||
0x2b08192b082b2b19, 0x2b08192b19080808, 0x2b082b0808080808, 0x2b082b0808081919,
|
||||
0x2b082b0808190819, 0x2b082b0808191908, 0x2b082b0819080819, 0x2b082b0819081908,
|
||||
0x2b082b0819190808, 0x2b082b082b2b082b, 0x2b082b1908080819, 0x2b082b1908081908,
|
||||
0x2b082b1919080808, 0x2b082b19192b1919, 0x2b082b2b082b082b, 0x2b082b2b19192b08,
|
||||
0x2b082b2b19192b2b, 0x2b082b2b2b08082b, 0x2b082b2b2b2b082b, 0x2b19080808080819,
|
||||
0x2b19080808081908, 0x2b19080808082b19, 0x2b19080808190808, 0x2b1908080819082b,
|
||||
0x2b19080808191919, 0x2b19080808192b08, 0x2b190808082b1908, 0x2b19080819080808,
|
||||
0x2b1908081908082b, 0x2b19080819081919, 0x2b19080819082b08, 0x2b19080819190819,
|
||||
0x2b19080819191908, 0x2b190808192b0808, 0x2b1908082b080819, 0x2b1908082b081908,
|
||||
0x2b1908082b190808, 0x2b19081908080808, 0x2b19081908081919, 0x2b19081908190819,
|
||||
0x2b19081908191908, 0x2b19081919080819, 0x2b19081919081908, 0x2b19081919190808,
|
||||
0x2b19081919192b2b, 0x2b19082b08080819, 0x2b19082b08081908, 0x2b19082b08190808,
|
||||
0x2b19082b19080808, 0x2b19082b2b2b192b, 0x2b19190808080808, 0x2b1919080808082b,
|
||||
0x2b19190808081919, 0x2b19190808082b08, 0x2b19190808190819, 0x2b19190808191908,
|
||||
0x2b191908082b0808, 0x2b19190819080819, 0x2b19190819081908, 0x2b19190819190808,
|
||||
0x2b1919082b080808, 0x2b1919082b19192b, 0x2b19191908080819, 0x2b19191908081908,
|
||||
0x2b19191908190808, 0x2b19191919080808, 0x2b1919192b192b08, 0x2b1919192b2b0819,
|
||||
0x2b19192b08080808, 0x2b19192b1908192b, 0x2b19192b192b1908, 0x2b192b0808080819,
|
||||
0x2b192b0808081908, 0x2b192b0808190808, 0x2b192b08082b192b, 0x2b192b0819080808,
|
||||
0x2b192b082b2b2b19, 0x2b192b1908080808, 0x2b192b1919082b19, 0x2b192b191919082b,
|
||||
0x2b192b2b2b190808, 0x2b2b080808080808, 0x2b2b080808081919, 0x2b2b080808082b2b,
|
||||
0x2b2b080808191908, 0x2b2b0808082b082b, 0x2b2b0808082b2b2b, 0x2b2b080819080819,
|
||||
0x2b2b080819081908, 0x2b2b080819190808, 0x2b2b08082b2b082b, 0x2b2b08082b2b2b2b,
|
||||
0x2b2b081919080808, 0x2b2b0819192b1919, 0x2b2b082b0808082b, 0x2b2b082b08082b2b,
|
||||
0x2b2b082b082b082b, 0x2b2b082b082b2b08, 0x2b2b082b082b2b2b, 0x2b2b082b2b08082b,
|
||||
0x2b2b082b2b082b08, 0x2b2b082b2b082b2b, 0x2b2b082b2b2b2b08, 0x2b2b190808080819,
|
||||
0x2b2b190808081908, 0x2b2b190808190808, 0x2b2b190819080808, 0x2b2b19082b082b19,
|
||||
0x2b2b19082b2b1908, 0x2b2b191908080808, 0x2b2b191908192b19, 0x2b2b192b19190819,
|
||||
0x2b2b2b0808082b2b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b082b, 0x2b2b2b1919191908,
|
||||
0x2b2b2b192b08192b, 0x2b2b2b2b08082b08, 0x2b2b2b2b08082b2b, 0x2b2b2b2b082b0808,
|
||||
0x2b2b2b2b082b082b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b2b082b08, 0x2b2b2b2b2b2b2b2b,
|
||||
GGML_TABLE_END()
|
||||
|
||||
GGML_TABLE_BEGIN(uint32_t, iq3xxs_grid, 256)
|
||||
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
|
||||
0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
|
||||
0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
||||
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
|
||||
0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
|
||||
0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
||||
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
|
||||
0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
|
||||
0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
||||
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
|
||||
0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
|
||||
0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
||||
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
|
||||
0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
|
||||
0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
||||
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
|
||||
0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
|
||||
0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
||||
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
|
||||
0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
|
||||
0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
||||
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
|
||||
0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
|
||||
0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
||||
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
|
||||
0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
|
||||
0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
||||
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
|
||||
0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
|
||||
0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
||||
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
|
||||
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
||||
GGML_TABLE_END()
|
||||
|
||||
GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
|
||||
0x01010101, 0x01010103, 0x01010105, 0x0101010b, 0x0101010f, 0x01010301, 0x01010303, 0x01010305,
|
||||
0x01010309, 0x0101030d, 0x01010501, 0x01010503, 0x0101050b, 0x01010707, 0x01010901, 0x01010905,
|
||||
0x0101090b, 0x0101090f, 0x01010b03, 0x01010b07, 0x01010d01, 0x01010d05, 0x01010f03, 0x01010f09,
|
||||
0x01010f0f, 0x01030101, 0x01030103, 0x01030105, 0x01030109, 0x01030301, 0x01030303, 0x0103030b,
|
||||
0x01030501, 0x01030507, 0x0103050f, 0x01030703, 0x0103070b, 0x01030909, 0x01030d03, 0x01030d0b,
|
||||
0x01030f05, 0x01050101, 0x01050103, 0x0105010b, 0x0105010f, 0x01050301, 0x01050307, 0x0105030d,
|
||||
0x01050503, 0x0105050b, 0x01050701, 0x01050709, 0x01050905, 0x0105090b, 0x0105090f, 0x01050b03,
|
||||
0x01050b07, 0x01050f01, 0x01050f07, 0x01070107, 0x01070303, 0x0107030b, 0x01070501, 0x01070505,
|
||||
0x01070703, 0x01070707, 0x0107070d, 0x01070909, 0x01070b01, 0x01070b05, 0x01070d0f, 0x01070f03,
|
||||
0x01070f0b, 0x01090101, 0x01090307, 0x0109030f, 0x01090503, 0x01090509, 0x01090705, 0x01090901,
|
||||
0x01090907, 0x01090b03, 0x01090f01, 0x010b0105, 0x010b0109, 0x010b0501, 0x010b0505, 0x010b050d,
|
||||
0x010b0707, 0x010b0903, 0x010b090b, 0x010b090f, 0x010b0d0d, 0x010b0f07, 0x010d010d, 0x010d0303,
|
||||
0x010d0307, 0x010d0703, 0x010d0b05, 0x010d0f03, 0x010f0101, 0x010f0105, 0x010f0109, 0x010f0501,
|
||||
0x010f0505, 0x010f050d, 0x010f0707, 0x010f0b01, 0x010f0b09, 0x03010101, 0x03010103, 0x03010105,
|
||||
0x03010109, 0x03010301, 0x03010303, 0x03010307, 0x0301030b, 0x0301030f, 0x03010501, 0x03010505,
|
||||
0x03010703, 0x03010709, 0x0301070d, 0x03010b09, 0x03010b0d, 0x03010d03, 0x03010f05, 0x03030101,
|
||||
0x03030103, 0x03030107, 0x0303010d, 0x03030301, 0x03030309, 0x03030503, 0x03030701, 0x03030707,
|
||||
0x03030903, 0x03030b01, 0x03030b05, 0x03030f01, 0x03030f0d, 0x03050101, 0x03050305, 0x0305030b,
|
||||
0x0305030f, 0x03050501, 0x03050509, 0x03050705, 0x03050901, 0x03050907, 0x03050b0b, 0x03050d01,
|
||||
0x03050f05, 0x03070103, 0x03070109, 0x0307010f, 0x03070301, 0x03070307, 0x03070503, 0x0307050f,
|
||||
0x03070701, 0x03070709, 0x03070903, 0x03070d05, 0x03070f01, 0x03090107, 0x0309010b, 0x03090305,
|
||||
0x03090309, 0x03090703, 0x03090707, 0x03090905, 0x0309090d, 0x03090b01, 0x03090b09, 0x030b0103,
|
||||
0x030b0301, 0x030b0307, 0x030b0503, 0x030b0701, 0x030b0705, 0x030b0b03, 0x030d0501, 0x030d0509,
|
||||
0x030d050f, 0x030d0909, 0x030d090d, 0x030f0103, 0x030f0107, 0x030f0301, 0x030f0305, 0x030f0503,
|
||||
0x030f070b, 0x030f0903, 0x030f0d05, 0x030f0f01, 0x05010101, 0x05010103, 0x05010107, 0x0501010b,
|
||||
0x0501010f, 0x05010301, 0x05010305, 0x05010309, 0x0501030d, 0x05010503, 0x05010507, 0x0501050f,
|
||||
0x05010701, 0x05010705, 0x05010903, 0x05010907, 0x0501090b, 0x05010b01, 0x05010b05, 0x05010d0f,
|
||||
0x05010f01, 0x05010f07, 0x05010f0b, 0x05030101, 0x05030105, 0x05030301, 0x05030307, 0x0503030f,
|
||||
0x05030505, 0x0503050b, 0x05030703, 0x05030709, 0x05030905, 0x05030b03, 0x05050103, 0x05050109,
|
||||
0x0505010f, 0x05050503, 0x05050507, 0x05050701, 0x0505070f, 0x05050903, 0x05050b07, 0x05050b0f,
|
||||
0x05050f03, 0x05050f09, 0x05070101, 0x05070105, 0x0507010b, 0x05070303, 0x05070505, 0x05070509,
|
||||
0x05070703, 0x05070707, 0x05070905, 0x05070b01, 0x05070d0d, 0x05090103, 0x0509010f, 0x05090501,
|
||||
0x05090507, 0x05090705, 0x0509070b, 0x05090903, 0x05090f05, 0x05090f0b, 0x050b0109, 0x050b0303,
|
||||
0x050b0505, 0x050b070f, 0x050b0901, 0x050b0b07, 0x050b0f01, 0x050d0101, 0x050d0105, 0x050d010f,
|
||||
0x050d0503, 0x050d0b0b, 0x050d0d03, 0x050f010b, 0x050f0303, 0x050f050d, 0x050f0701, 0x050f0907,
|
||||
0x050f0b01, 0x07010105, 0x07010303, 0x07010307, 0x0701030b, 0x0701030f, 0x07010505, 0x07010703,
|
||||
0x07010707, 0x0701070b, 0x07010905, 0x07010909, 0x0701090f, 0x07010b03, 0x07010d07, 0x07010f03,
|
||||
0x07030103, 0x07030107, 0x0703010b, 0x07030309, 0x07030503, 0x07030507, 0x07030901, 0x07030d01,
|
||||
0x07030f05, 0x07030f0d, 0x07050101, 0x07050305, 0x07050501, 0x07050705, 0x07050709, 0x07050b01,
|
||||
0x07070103, 0x07070301, 0x07070309, 0x07070503, 0x07070507, 0x0707050f, 0x07070701, 0x07070903,
|
||||
0x07070907, 0x0707090f, 0x07070b0b, 0x07070f07, 0x07090107, 0x07090303, 0x0709030d, 0x07090505,
|
||||
0x07090703, 0x07090b05, 0x07090d01, 0x07090d09, 0x070b0103, 0x070b0301, 0x070b0305, 0x070b050b,
|
||||
0x070b0705, 0x070b0909, 0x070b0b0d, 0x070b0f07, 0x070d030d, 0x070d0903, 0x070f0103, 0x070f0107,
|
||||
0x070f0501, 0x070f0505, 0x070f070b, 0x09010101, 0x09010109, 0x09010305, 0x09010501, 0x09010509,
|
||||
0x0901050f, 0x09010705, 0x09010903, 0x09010b01, 0x09010f01, 0x09030105, 0x0903010f, 0x09030303,
|
||||
0x09030307, 0x09030505, 0x09030701, 0x0903070b, 0x09030907, 0x09030b03, 0x09030b0b, 0x09050103,
|
||||
0x09050107, 0x09050301, 0x0905030b, 0x09050503, 0x09050707, 0x09050901, 0x09050b0f, 0x09050d05,
|
||||
0x09050f01, 0x09070109, 0x09070303, 0x09070307, 0x09070501, 0x09070505, 0x09070703, 0x0907070b,
|
||||
0x09090101, 0x09090105, 0x09090509, 0x0909070f, 0x09090901, 0x09090f03, 0x090b010b, 0x090b010f,
|
||||
0x090b0503, 0x090b0d05, 0x090d0307, 0x090d0709, 0x090d0d01, 0x090f0301, 0x090f030b, 0x090f0701,
|
||||
0x090f0907, 0x090f0b03, 0x0b010105, 0x0b010301, 0x0b010309, 0x0b010505, 0x0b010901, 0x0b010909,
|
||||
0x0b01090f, 0x0b010b05, 0x0b010d0d, 0x0b010f09, 0x0b030103, 0x0b030107, 0x0b03010b, 0x0b030305,
|
||||
0x0b030503, 0x0b030705, 0x0b030f05, 0x0b050101, 0x0b050303, 0x0b050507, 0x0b050701, 0x0b05070d,
|
||||
0x0b050b07, 0x0b070105, 0x0b07010f, 0x0b070301, 0x0b07050f, 0x0b070909, 0x0b070b03, 0x0b070d0b,
|
||||
0x0b070f07, 0x0b090103, 0x0b090109, 0x0b090501, 0x0b090705, 0x0b09090d, 0x0b0b0305, 0x0b0b050d,
|
||||
0x0b0b0b03, 0x0b0b0b07, 0x0b0d0905, 0x0b0f0105, 0x0b0f0109, 0x0b0f0505, 0x0d010303, 0x0d010307,
|
||||
0x0d01030b, 0x0d010703, 0x0d010707, 0x0d010d01, 0x0d030101, 0x0d030501, 0x0d03050f, 0x0d030d09,
|
||||
0x0d050305, 0x0d050709, 0x0d050905, 0x0d050b0b, 0x0d050d05, 0x0d050f01, 0x0d070101, 0x0d070309,
|
||||
0x0d070503, 0x0d070901, 0x0d09050b, 0x0d090907, 0x0d090d05, 0x0d0b0101, 0x0d0b0107, 0x0d0b0709,
|
||||
0x0d0b0d01, 0x0d0d010b, 0x0d0d0901, 0x0d0f0303, 0x0d0f0307, 0x0f010101, 0x0f010109, 0x0f01010f,
|
||||
0x0f010501, 0x0f010505, 0x0f01070d, 0x0f010901, 0x0f010b09, 0x0f010d05, 0x0f030105, 0x0f030303,
|
||||
0x0f030509, 0x0f030907, 0x0f03090b, 0x0f050103, 0x0f050109, 0x0f050301, 0x0f05030d, 0x0f050503,
|
||||
0x0f050701, 0x0f050b03, 0x0f070105, 0x0f070705, 0x0f07070b, 0x0f070b07, 0x0f090103, 0x0f09010b,
|
||||
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
|
||||
GGML_TABLE_END()
|
||||
|
||||
#define NGRID_IQ2XXS 512
|
||||
GGML_TABLE_BEGIN(uint64_t, iq1s_grid, NGRID_IQ2XXS)
|
||||
0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000,
|
||||
0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01,
|
||||
0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100,
|
||||
0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00,
|
||||
0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101,
|
||||
0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100,
|
||||
0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00,
|
||||
0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff,
|
||||
0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000,
|
||||
0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000,
|
||||
0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001,
|
||||
0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff,
|
||||
0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01,
|
||||
0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001,
|
||||
0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00,
|
||||
0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001,
|
||||
0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100,
|
||||
0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000,
|
||||
0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000,
|
||||
0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000,
|
||||
0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff,
|
||||
0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff,
|
||||
0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01,
|
||||
0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100,
|
||||
0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff,
|
||||
0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000,
|
||||
0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101,
|
||||
0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff,
|
||||
0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff,
|
||||
0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001,
|
||||
0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01,
|
||||
0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101,
|
||||
0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100,
|
||||
0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00,
|
||||
0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001,
|
||||
0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff,
|
||||
0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000,
|
||||
0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000,
|
||||
0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100,
|
||||
0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100,
|
||||
0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01,
|
||||
0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff,
|
||||
0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101,
|
||||
0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000,
|
||||
0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff,
|
||||
0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000,
|
||||
0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff,
|
||||
0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00,
|
||||
0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101,
|
||||
0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000,
|
||||
0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000,
|
||||
0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000,
|
||||
0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100,
|
||||
0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000,
|
||||
0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001,
|
||||
0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff,
|
||||
0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000,
|
||||
0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000,
|
||||
0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000,
|
||||
0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000,
|
||||
0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff,
|
||||
0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000,
|
||||
0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001,
|
||||
0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01,
|
||||
0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100,
|
||||
0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000,
|
||||
0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00,
|
||||
0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100,
|
||||
0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000,
|
||||
0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001,
|
||||
0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00,
|
||||
0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff,
|
||||
0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100,
|
||||
0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff,
|
||||
0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000,
|
||||
0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff,
|
||||
0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff,
|
||||
0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00,
|
||||
0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001,
|
||||
0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001,
|
||||
0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01,
|
||||
0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000,
|
||||
0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101,
|
||||
0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00,
|
||||
0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100,
|
||||
0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101,
|
||||
0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101,
|
||||
0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000,
|
||||
0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff,
|
||||
0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff,
|
||||
0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101,
|
||||
0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff,
|
||||
0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101,
|
||||
0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001,
|
||||
0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff,
|
||||
0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff,
|
||||
0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01,
|
||||
0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff,
|
||||
0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100,
|
||||
0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001,
|
||||
0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00,
|
||||
0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff,
|
||||
0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff,
|
||||
0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000,
|
||||
0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000,
|
||||
0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101,
|
||||
0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001,
|
||||
0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000,
|
||||
0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101,
|
||||
0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000,
|
||||
0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001,
|
||||
0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000,
|
||||
0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100,
|
||||
0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000,
|
||||
0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000,
|
||||
0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100,
|
||||
0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff,
|
||||
0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff,
|
||||
0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00,
|
||||
0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101,
|
||||
0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000,
|
||||
0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00,
|
||||
0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000,
|
||||
0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff,
|
||||
0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101,
|
||||
0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff,
|
||||
0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00,
|
||||
0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff,
|
||||
GGML_TABLE_END()
|
||||
|
||||
#endif // GGML_COMMON_IMPL
|
||||
1023
ggml-cuda.cu
1023
ggml-cuda.cu
File diff suppressed because it is too large
Load Diff
@@ -1927,10 +1927,10 @@ static ggml_backend_buffer_type_t ggml_backend_kompute_get_default_buffer_type(g
|
||||
return ggml_backend_kompute_buffer_type(ctx->device);
|
||||
}
|
||||
|
||||
static bool ggml_backend_kompute_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
auto * ctx = static_cast<ggml_kompute_context *>(backend->context);
|
||||
ggml_vk_graph_compute(ctx, cgraph);
|
||||
return true;
|
||||
return GGML_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
|
||||
@@ -1953,11 +1953,17 @@ static struct ggml_backend_i kompute_backend_i = {
|
||||
/* .supports_op = */ ggml_backend_kompute_supports_op,
|
||||
};
|
||||
|
||||
static ggml_guid_t ggml_backend_kompute_guid() {
|
||||
static ggml_guid guid = { 0x7b, 0x57, 0xdc, 0xaf, 0xde, 0x12, 0x1d, 0x49, 0xfb, 0x35, 0xfa, 0x9b, 0x18, 0x31, 0x1d, 0xca };
|
||||
return &guid;
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_kompute_init(int device) {
|
||||
GGML_ASSERT(s_kompute_context == nullptr);
|
||||
s_kompute_context = new ggml_kompute_context(device);
|
||||
|
||||
ggml_backend_t kompute_backend = new ggml_backend {
|
||||
/* .guid = */ ggml_backend_kompute_guid(),
|
||||
/* .interface = */ kompute_backend_i,
|
||||
/* .context = */ s_kompute_context,
|
||||
};
|
||||
@@ -1966,7 +1972,7 @@ ggml_backend_t ggml_backend_kompute_init(int device) {
|
||||
}
|
||||
|
||||
bool ggml_backend_is_kompute(ggml_backend_t backend) {
|
||||
return backend && backend->iface.get_name == ggml_backend_kompute_name;
|
||||
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid());
|
||||
}
|
||||
|
||||
static ggml_backend_t ggml_backend_reg_kompute_init(const char * params, void * user_data) {
|
||||
|
||||
78
ggml-metal.m
78
ggml-metal.m
@@ -163,6 +163,8 @@ enum ggml_metal_kernel_type {
|
||||
GGML_METAL_KERNEL_TYPE_IM2COL_F32,
|
||||
GGML_METAL_KERNEL_TYPE_UPSCALE_F32,
|
||||
GGML_METAL_KERNEL_TYPE_PAD_F32,
|
||||
GGML_METAL_KERNEL_TYPE_ARANGE_F32,
|
||||
GGML_METAL_KERNEL_TYPE_TIMESTEP_EMBEDDING_F32,
|
||||
GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC,
|
||||
GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC,
|
||||
GGML_METAL_KERNEL_TYPE_LEAKY_RELU_F32,
|
||||
@@ -569,6 +571,8 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_IM2COL_F32, im2col_f32, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_UPSCALE_F32, upscale_f32, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_F32, pad_f32, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_TIMESTEP_EMBEDDING_F32, timestep_embedding_f32, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARANGE_F32, arange_f32, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC, argsort_f32_i32_asc, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_DESC, argsort_f32_i32_desc, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_LEAKY_RELU_F32, leaky_relu_f32, true);
|
||||
@@ -697,6 +701,8 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
|
||||
return false;
|
||||
case GGML_OP_UPSCALE:
|
||||
case GGML_OP_PAD:
|
||||
case GGML_OP_ARANGE:
|
||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||
case GGML_OP_ARGSORT:
|
||||
case GGML_OP_LEAKY_RELU:
|
||||
return true;
|
||||
@@ -742,7 +748,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
|
||||
}
|
||||
}
|
||||
|
||||
static bool ggml_metal_graph_compute(
|
||||
static enum ggml_status ggml_metal_graph_compute(
|
||||
struct ggml_metal_context * ctx,
|
||||
struct ggml_cgraph * gf) {
|
||||
|
||||
@@ -1091,7 +1097,8 @@ static bool ggml_metal_graph_compute(
|
||||
{
|
||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||
|
||||
const float scale = *(const float *) dst->op_params;
|
||||
float scale;
|
||||
memcpy(&scale, dst->op_params, sizeof(scale));
|
||||
|
||||
int64_t n = ggml_nelements(dst);
|
||||
|
||||
@@ -1250,11 +1257,15 @@ static bool ggml_metal_graph_compute(
|
||||
pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SOFT_MAX].pipeline;
|
||||
}
|
||||
|
||||
const float scale = ((float *) dst->op_params)[0];
|
||||
const float max_bias = ((float *) dst->op_params)[1];
|
||||
float scale;
|
||||
float max_bias;
|
||||
|
||||
memcpy(&scale, ((int32_t *) dst->op_params) + 0, sizeof(scale));
|
||||
memcpy(&max_bias, ((int32_t *) dst->op_params) + 1, sizeof(max_bias));
|
||||
|
||||
const int64_t nrows_x = ggml_nrows(src0);
|
||||
const int64_t nrows_y = src0->ne[1];
|
||||
|
||||
const uint32_t n_head_kv = nrows_x/nrows_y;
|
||||
const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head_kv));
|
||||
|
||||
@@ -2086,6 +2097,7 @@ static bool ggml_metal_graph_compute(
|
||||
|
||||
//const int n_past = ((int32_t *) dst->op_params)[0];
|
||||
const int n_head = ((int32_t *) dst->op_params)[1];
|
||||
|
||||
float max_bias;
|
||||
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
||||
|
||||
@@ -2300,6 +2312,50 @@ static bool ggml_metal_graph_compute(
|
||||
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(ne1, ne2, ne3) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
||||
} break;
|
||||
case GGML_OP_ARANGE:
|
||||
{
|
||||
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
||||
|
||||
float start;
|
||||
float step;
|
||||
|
||||
memcpy(&start, ((int32_t *) dst->op_params) + 0, sizeof(float));
|
||||
memcpy(&step, ((int32_t *) dst->op_params) + 2, sizeof(float));
|
||||
|
||||
id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ARANGE_F32].pipeline;
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:0];
|
||||
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:1];
|
||||
[encoder setBytes:&start length:sizeof(start) atIndex:2];
|
||||
[encoder setBytes:&step length:sizeof(step) atIndex:3];
|
||||
|
||||
const int nth = MIN(1024, ne0);
|
||||
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(1, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
||||
} break;
|
||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||
{
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
|
||||
const int dim = dst->op_params[0];
|
||||
const int max_period = dst->op_params[1];
|
||||
|
||||
const int half = dim / 2;
|
||||
|
||||
id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_TIMESTEP_EMBEDDING_F32].pipeline;
|
||||
|
||||
[encoder setComputePipelineState:pipeline];
|
||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
||||
[encoder setBytes:&nb1 length:sizeof(nb1) atIndex:2];
|
||||
[encoder setBytes:&dim length:sizeof(dim) atIndex:3];
|
||||
[encoder setBytes:&max_period length:sizeof(max_period) atIndex:4];
|
||||
|
||||
const int nth = MIN(1024, half);
|
||||
|
||||
[encoder dispatchThreadgroups:MTLSizeMake(ne00, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
||||
} break;
|
||||
case GGML_OP_ARGSORT:
|
||||
{
|
||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||
@@ -2428,7 +2484,7 @@ static bool ggml_metal_graph_compute(
|
||||
MTLCommandBufferStatus status = [command_buffer status];
|
||||
if (status != MTLCommandBufferStatusCompleted) {
|
||||
GGML_METAL_LOG_INFO("%s: command buffer %d failed with status %lu\n", __func__, i, status);
|
||||
return false;
|
||||
return GGML_STATUS_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2437,7 +2493,7 @@ static bool ggml_metal_graph_compute(
|
||||
}
|
||||
|
||||
}
|
||||
return true;
|
||||
return GGML_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -2739,7 +2795,7 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_metal_get_default_buffe
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
GGML_CALL static bool ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
GGML_CALL static enum ggml_status ggml_backend_metal_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
struct ggml_metal_context * metal_ctx = (struct ggml_metal_context *)backend->context;
|
||||
|
||||
return ggml_metal_graph_compute(metal_ctx, cgraph);
|
||||
@@ -2771,6 +2827,11 @@ void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void *
|
||||
ggml_metal_log_user_data = user_data;
|
||||
}
|
||||
|
||||
static ggml_guid_t ggml_backend_metal_guid(void) {
|
||||
static ggml_guid guid = { 0x81, 0xa1, 0x8b, 0x1e, 0x71, 0xec, 0x79, 0xed, 0x2b, 0x85, 0xdc, 0x8a, 0x61, 0x98, 0x30, 0xe6 };
|
||||
return &guid;
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_metal_init(void) {
|
||||
struct ggml_metal_context * ctx = ggml_metal_init(GGML_DEFAULT_N_THREADS);
|
||||
|
||||
@@ -2781,6 +2842,7 @@ ggml_backend_t ggml_backend_metal_init(void) {
|
||||
ggml_backend_t metal_backend = malloc(sizeof(struct ggml_backend));
|
||||
|
||||
*metal_backend = (struct ggml_backend) {
|
||||
/* .guid = */ ggml_backend_metal_guid(),
|
||||
/* .interface = */ ggml_backend_metal_i,
|
||||
/* .context = */ ctx,
|
||||
};
|
||||
@@ -2789,7 +2851,7 @@ ggml_backend_t ggml_backend_metal_init(void) {
|
||||
}
|
||||
|
||||
bool ggml_backend_is_metal(ggml_backend_t backend) {
|
||||
return backend && backend->iface.get_name == ggml_backend_metal_name;
|
||||
return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_metal_guid());
|
||||
}
|
||||
|
||||
void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
|
||||
|
||||
830
ggml-metal.metal
830
ggml-metal.metal
@@ -1,5 +1,8 @@
|
||||
#include <metal_stdlib>
|
||||
|
||||
#define GGML_COMMON_IMPL_METAL
|
||||
#include "ggml-common.h"
|
||||
|
||||
using namespace metal;
|
||||
|
||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||
@@ -1959,6 +1962,49 @@ kernel void kernel_pad_f32(
|
||||
}
|
||||
}
|
||||
|
||||
kernel void kernel_arange_f32(
|
||||
device char * dst,
|
||||
constant int64_t & ne0,
|
||||
constant float & start,
|
||||
constant float & step,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]) {
|
||||
|
||||
device float * dst_ptr = (device float *) dst;
|
||||
|
||||
for (int i0 = tpitg.x; i0 < ne0; i0 += ntg.x) {
|
||||
dst_ptr[i0] = start + step * i0;
|
||||
}
|
||||
}
|
||||
|
||||
kernel void kernel_timestep_embedding_f32(
|
||||
device const char * src0,
|
||||
device char * dst,
|
||||
constant uint64_t & nb1,
|
||||
constant int & dim,
|
||||
constant int & max_period,
|
||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||
uint3 ntg[[threads_per_threadgroup]]) {
|
||||
|
||||
int i = tgpig.x;
|
||||
device float * embed_data = (device float *)(dst + i*nb1);
|
||||
|
||||
int half_ = dim / 2;
|
||||
for (int j = tpitg.x; j < half_; j += ntg.x) {
|
||||
float timestep = ((device float *)src0)[i];
|
||||
float freq = (float)exp(-log((float)max_period) * j / half_);
|
||||
float arg = timestep * freq;
|
||||
embed_data[j ] = cos(arg);
|
||||
embed_data[j + half_] = sin(arg);
|
||||
}
|
||||
|
||||
if (dim % 2 != 0 && tpitg.x == 0) {
|
||||
embed_data[dim] = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
// bitonic sort implementation following the CUDA kernels as reference
|
||||
typedef void (argsort_t)(
|
||||
device const float * x,
|
||||
@@ -2560,12 +2606,16 @@ typedef struct {
|
||||
uint8_t qs[QK4_NL/2];
|
||||
} block_iq4_nl;
|
||||
|
||||
#if QK_K == 64
|
||||
#define block_iq4_xs block_iq4_nl
|
||||
#else
|
||||
typedef struct {
|
||||
half d;
|
||||
uint16_t scales_h;
|
||||
uint8_t scales_l[QK_K/64];
|
||||
uint8_t qs[QK_K/2];
|
||||
} block_iq4_xs;
|
||||
#endif
|
||||
|
||||
//====================================== dot products =========================
|
||||
|
||||
@@ -3591,710 +3641,6 @@ kernel void kernel_mul_mv_q6_K_f32(
|
||||
|
||||
// ======================= "True" 2-bit
|
||||
|
||||
constexpr constant static uint64_t iq2xxs_grid[256] = {
|
||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x08080808082b0808,
|
||||
0x08080808082b082b, 0x08080808082b2b08, 0x08080808082b2b2b, 0x0808080819080819,
|
||||
0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
|
||||
0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
|
||||
0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
|
||||
0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
|
||||
0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
|
||||
0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
|
||||
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
|
||||
0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
|
||||
0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
|
||||
0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
|
||||
0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
|
||||
0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
|
||||
0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
|
||||
0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
|
||||
0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
|
||||
0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
|
||||
0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
|
||||
0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
|
||||
0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
|
||||
0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
|
||||
0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
|
||||
0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
|
||||
0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
|
||||
0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
|
||||
0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
|
||||
0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
|
||||
0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
|
||||
0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
|
||||
0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
|
||||
0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
|
||||
0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
|
||||
0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
|
||||
0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
|
||||
0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
|
||||
0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
|
||||
0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
|
||||
0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
|
||||
0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
|
||||
0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
|
||||
0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
|
||||
0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
|
||||
0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
|
||||
0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
|
||||
0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
|
||||
0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
|
||||
0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
|
||||
0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
|
||||
0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
|
||||
0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
|
||||
0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
|
||||
0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
|
||||
0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
|
||||
0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
|
||||
0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
|
||||
0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
|
||||
0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
|
||||
0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
|
||||
0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
|
||||
0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
|
||||
0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
|
||||
0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
|
||||
};
|
||||
|
||||
constexpr constant static uint64_t iq2xs_grid[512] = {
|
||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
||||
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
||||
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
||||
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
||||
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
||||
0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08, 0x080808082b190819,
|
||||
0x080808082b191908, 0x080808082b192b19, 0x080808082b2b0808, 0x0808081908080819,
|
||||
0x0808081908081908, 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
|
||||
0x080808190819082b, 0x0808081908191919, 0x0808081908192b08, 0x0808081908192b2b,
|
||||
0x08080819082b0819, 0x08080819082b1908, 0x0808081919080808, 0x080808191908082b,
|
||||
0x0808081919081919, 0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
|
||||
0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819, 0x080808192b081908,
|
||||
0x080808192b190808, 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b08081919,
|
||||
0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
|
||||
0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808, 0x0808082b19191919,
|
||||
0x0808082b2b080808, 0x0808082b2b082b2b, 0x0808190808080819, 0x0808190808081908,
|
||||
0x080819080808192b, 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
|
||||
0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908,
|
||||
0x0808190819080808, 0x080819081908082b, 0x0808190819081919, 0x0808190819082b08,
|
||||
0x0808190819190819, 0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
|
||||
0x080819082b080819, 0x080819082b081908, 0x080819082b190808, 0x0808191908080808,
|
||||
0x080819190808082b, 0x0808191908081919, 0x0808191908082b08, 0x0808191908190819,
|
||||
0x0808191908191908, 0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
|
||||
0x0808191919190808, 0x08081919192b0819, 0x080819192b080808, 0x0808192b08080819,
|
||||
0x0808192b08081908, 0x0808192b08190808, 0x0808192b082b192b, 0x0808192b19080808,
|
||||
0x0808192b1908082b, 0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
|
||||
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b, 0x08082b0808190819,
|
||||
0x08082b0808191908, 0x08082b08082b0808, 0x08082b08082b1919, 0x08082b0819080819,
|
||||
0x08082b0819081908, 0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
|
||||
0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819, 0x08082b1908081908,
|
||||
0x08082b1908190808, 0x08082b1919080808, 0x08082b192b080819, 0x08082b192b082b19,
|
||||
0x08082b2b08080808, 0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
|
||||
0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908, 0x081908080808192b,
|
||||
0x0819080808082b19, 0x0819080808190808, 0x081908080819082b, 0x0819080808191919,
|
||||
0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
|
||||
0x081908081908082b, 0x0819080819081919, 0x0819080819082b08, 0x0819080819190819,
|
||||
0x0819080819191908, 0x08190808192b0808, 0x08190808192b2b2b, 0x081908082b080819,
|
||||
0x081908082b081908, 0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
|
||||
0x0819081908081919, 0x0819081908082b08, 0x0819081908190819, 0x0819081908191908,
|
||||
0x08190819082b0808, 0x0819081919080819, 0x0819081919081908, 0x0819081919190808,
|
||||
0x081908192b080808, 0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
|
||||
0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808, 0x0819082b19080808,
|
||||
0x0819082b192b0808, 0x0819190808080808, 0x081919080808082b, 0x0819190808081919,
|
||||
0x0819190808082b08, 0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
|
||||
0x0819190819080819, 0x0819190819081908, 0x0819190819082b19, 0x0819190819190808,
|
||||
0x08191908192b1908, 0x081919082b080808, 0x0819191908080819, 0x0819191908081908,
|
||||
0x0819191908190808, 0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
|
||||
0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
|
||||
0x08192b080819082b, 0x08192b0819080808, 0x08192b0819191908, 0x08192b082b08192b,
|
||||
0x08192b1908080808, 0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
|
||||
0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919,
|
||||
0x082b080808082b08, 0x082b080808082b2b, 0x082b080808190819, 0x082b080808191908,
|
||||
0x082b0808082b0808, 0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
|
||||
0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819, 0x082b081908081908,
|
||||
0x082b081908190808, 0x082b081919080808, 0x082b081919082b08, 0x082b0819192b1919,
|
||||
0x082b082b08080808, 0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
|
||||
0x082b190808080819, 0x082b190808081908, 0x082b190808190808, 0x082b1908082b2b19,
|
||||
0x082b190819080808, 0x082b191908080808, 0x082b191919080819, 0x082b19191919082b,
|
||||
0x082b19192b192b19, 0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
|
||||
0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b, 0x082b2b08082b0808,
|
||||
0x082b2b0819191919, 0x082b2b082b082b08, 0x082b2b082b2b082b, 0x082b2b19192b2b08,
|
||||
0x082b2b192b190808, 0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
|
||||
0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819, 0x1908080808081908,
|
||||
0x190808080808192b, 0x1908080808082b19, 0x1908080808190808, 0x190808080819082b,
|
||||
0x1908080808191919, 0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
||||
0x1908080819080808, 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08,
|
||||
0x1908080819082b2b, 0x1908080819190819, 0x1908080819191908, 0x19080808192b0808,
|
||||
0x19080808192b1919, 0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
|
||||
0x1908081908080808, 0x190808190808082b, 0x1908081908081919, 0x1908081908082b08,
|
||||
0x1908081908190819, 0x1908081908191908, 0x19080819082b0808, 0x1908081919080819,
|
||||
0x1908081919081908, 0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
|
||||
0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908, 0x1908082b08190808,
|
||||
0x1908082b0819082b, 0x1908082b082b2b19, 0x1908082b19080808, 0x1908190808080808,
|
||||
0x190819080808082b, 0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
|
||||
0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808, 0x1908190819080819,
|
||||
0x1908190819081908, 0x1908190819190808, 0x190819082b080808, 0x190819082b191908,
|
||||
0x1908191908080819, 0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
|
||||
0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808, 0x1908192b08082b2b,
|
||||
0x1908192b19081908, 0x1908192b19190808, 0x19082b0808080819, 0x19082b0808081908,
|
||||
0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
|
||||
0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819, 0x19082b1919081908,
|
||||
0x19082b1919190808, 0x19082b19192b2b19, 0x19082b2b08081908, 0x1919080808080808,
|
||||
0x191908080808082b, 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
|
||||
0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08, 0x1919080819080819,
|
||||
0x1919080819081908, 0x1919080819190808, 0x191908082b080808, 0x1919081908080819,
|
||||
0x1919081908081908, 0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
|
||||
0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908, 0x1919082b2b2b2b2b,
|
||||
0x1919190808080819, 0x1919190808081908, 0x1919190808190808, 0x19191908082b0819,
|
||||
0x1919190819080808, 0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
|
||||
0x1919191908080808, 0x1919191908082b08, 0x191919192b080808, 0x191919192b082b08,
|
||||
0x1919192b082b0819, 0x1919192b192b2b08, 0x1919192b2b2b0819, 0x19192b0808080808,
|
||||
0x19192b0808191908, 0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
|
||||
0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b, 0x19192b2b2b081919,
|
||||
0x192b080808080819, 0x192b080808081908, 0x192b080808190808, 0x192b080819080808,
|
||||
0x192b080819191908, 0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
|
||||
0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b, 0x192b082b2b19082b,
|
||||
0x192b190808080808, 0x192b19080819192b, 0x192b191908190808, 0x192b191919080808,
|
||||
0x192b191919081919, 0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
|
||||
0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908, 0x192b2b2b192b082b,
|
||||
0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08,
|
||||
0x2b08080808190819, 0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
|
||||
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808082b080808,
|
||||
0x2b0808082b08082b, 0x2b0808082b2b2b08, 0x2b0808082b2b2b2b, 0x2b08081908080819,
|
||||
0x2b08081908081908, 0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
|
||||
0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808, 0x2b08082b082b0808,
|
||||
0x2b08082b2b080808, 0x2b08082b2b08082b, 0x2b08082b2b2b0808, 0x2b08082b2b2b2b08,
|
||||
0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
|
||||
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808, 0x2b0819082b082b19,
|
||||
0x2b08191908080808, 0x2b08191919081908, 0x2b0819192b2b1919, 0x2b08192b08192b08,
|
||||
0x2b08192b192b2b2b, 0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
|
||||
0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b, 0x2b082b082b2b2b08,
|
||||
0x2b082b190808192b, 0x2b082b2b082b082b, 0x2b082b2b2b080808, 0x2b082b2b2b082b08,
|
||||
0x2b082b2b2b19192b, 0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
|
||||
0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b, 0x2b1908082b081908,
|
||||
0x2b19081908080808, 0x2b190819082b082b, 0x2b190819192b1908, 0x2b19082b1919192b,
|
||||
0x2b19082b2b082b19, 0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
|
||||
0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19, 0x2b1919192b190808,
|
||||
0x2b1919192b19082b, 0x2b19192b19080819, 0x2b192b0819190819, 0x2b192b082b2b192b,
|
||||
0x2b192b1919082b19, 0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
|
||||
0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b, 0x2b2b0808082b0808,
|
||||
0x2b2b0808082b2b2b, 0x2b2b08082b2b0808, 0x2b2b081919190819, 0x2b2b081919192b19,
|
||||
0x2b2b08192b2b192b, 0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
|
||||
0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808, 0x2b2b190819080808,
|
||||
0x2b2b19082b191919, 0x2b2b192b192b1919, 0x2b2b192b2b192b08, 0x2b2b2b0808082b2b,
|
||||
0x2b2b2b08082b0808, 0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
|
||||
0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908, 0x2b2b2b192b08192b,
|
||||
0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
||||
};
|
||||
|
||||
constexpr constant static uint64_t iq2s_grid[1024] = {
|
||||
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
||||
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
||||
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
||||
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
||||
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
||||
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x08080808192b192b,
|
||||
0x08080808192b2b19, 0x080808082b080808, 0x080808082b08082b, 0x080808082b081919,
|
||||
0x080808082b082b08, 0x080808082b190819, 0x080808082b191908, 0x080808082b2b0808,
|
||||
0x080808082b2b1919, 0x080808082b2b2b2b, 0x0808081908080819, 0x0808081908081908,
|
||||
0x080808190808192b, 0x0808081908082b19, 0x0808081908190808, 0x080808190819082b,
|
||||
0x0808081908191919, 0x0808081908192b08, 0x08080819082b0819, 0x08080819082b1908,
|
||||
0x0808081919080808, 0x080808191908082b, 0x0808081919081919, 0x0808081919082b08,
|
||||
0x0808081919190819, 0x0808081919191908, 0x080808191919192b, 0x0808081919192b19,
|
||||
0x08080819192b0808, 0x08080819192b1919, 0x08080819192b2b08, 0x080808192b080819,
|
||||
0x080808192b081908, 0x080808192b190808, 0x080808192b19082b, 0x080808192b191919,
|
||||
0x080808192b2b0819, 0x080808192b2b1908, 0x0808082b08080808, 0x0808082b0808082b,
|
||||
0x0808082b08081919, 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908,
|
||||
0x0808082b082b0808, 0x0808082b082b2b2b, 0x0808082b19080819, 0x0808082b19081908,
|
||||
0x0808082b1908192b, 0x0808082b19082b19, 0x0808082b19190808, 0x0808082b19191919,
|
||||
0x0808082b2b080808, 0x0808082b2b081919, 0x0808082b2b082b2b, 0x0808082b2b191908,
|
||||
0x0808082b2b2b082b, 0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
|
||||
0x0808190808082b19, 0x0808190808190808, 0x080819080819082b, 0x0808190808191919,
|
||||
0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908, 0x08081908082b192b,
|
||||
0x08081908082b2b19, 0x0808190819080808, 0x080819081908082b, 0x0808190819081919,
|
||||
0x0808190819082b08, 0x0808190819082b2b, 0x0808190819190819, 0x0808190819191908,
|
||||
0x080819081919192b, 0x0808190819192b19, 0x08081908192b0808, 0x08081908192b082b,
|
||||
0x08081908192b1919, 0x080819082b080819, 0x080819082b081908, 0x080819082b08192b,
|
||||
0x080819082b082b19, 0x080819082b190808, 0x080819082b191919, 0x080819082b192b08,
|
||||
0x080819082b2b0819, 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b,
|
||||
0x0808191908081919, 0x0808191908082b08, 0x0808191908082b2b, 0x0808191908190819,
|
||||
0x0808191908191908, 0x080819190819192b, 0x0808191908192b19, 0x08081919082b0808,
|
||||
0x08081919082b1919, 0x08081919082b2b08, 0x0808191919080819, 0x0808191919081908,
|
||||
0x080819191908192b, 0x0808191919082b19, 0x0808191919190808, 0x080819191919082b,
|
||||
0x0808191919191919, 0x0808191919192b08, 0x08081919192b0819, 0x08081919192b1908,
|
||||
0x080819192b080808, 0x080819192b08082b, 0x080819192b081919, 0x080819192b082b08,
|
||||
0x080819192b190819, 0x080819192b191908, 0x080819192b2b0808, 0x0808192b08080819,
|
||||
0x0808192b08081908, 0x0808192b0808192b, 0x0808192b08082b19, 0x0808192b08190808,
|
||||
0x0808192b08191919, 0x0808192b19080808, 0x0808192b19081919, 0x0808192b19082b08,
|
||||
0x0808192b19190819, 0x0808192b19191908, 0x0808192b192b0808, 0x0808192b2b080819,
|
||||
0x0808192b2b081908, 0x0808192b2b190808, 0x08082b0808080808, 0x08082b080808082b,
|
||||
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808190819, 0x08082b0808191908,
|
||||
0x08082b080819192b, 0x08082b0808192b19, 0x08082b08082b0808, 0x08082b08082b1919,
|
||||
0x08082b08082b2b2b, 0x08082b0819080819, 0x08082b0819081908, 0x08082b081908192b,
|
||||
0x08082b0819082b19, 0x08082b0819190808, 0x08082b081919082b, 0x08082b0819191919,
|
||||
0x08082b0819192b08, 0x08082b08192b0819, 0x08082b08192b1908, 0x08082b082b080808,
|
||||
0x08082b082b081919, 0x08082b082b191908, 0x08082b082b2b2b2b, 0x08082b1908080819,
|
||||
0x08082b1908081908, 0x08082b1908190808, 0x08082b190819082b, 0x08082b1908191919,
|
||||
0x08082b1908192b08, 0x08082b19082b0819, 0x08082b1919080808, 0x08082b1919081919,
|
||||
0x08082b1919082b08, 0x08082b1919190819, 0x08082b1919191908, 0x08082b19192b0808,
|
||||
0x08082b192b080819, 0x08082b192b190808, 0x08082b2b08080808, 0x08082b2b08190819,
|
||||
0x08082b2b08191908, 0x08082b2b082b082b, 0x08082b2b082b2b08, 0x08082b2b082b2b2b,
|
||||
0x08082b2b19190808, 0x08082b2b2b192b19, 0x0819080808080819, 0x0819080808081908,
|
||||
0x081908080808192b, 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b,
|
||||
0x0819080808191919, 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908,
|
||||
0x08190808082b192b, 0x0819080819080808, 0x081908081908082b, 0x0819080819081919,
|
||||
0x0819080819082b08, 0x0819080819190819, 0x0819080819191908, 0x081908081919192b,
|
||||
0x0819080819192b19, 0x08190808192b0808, 0x08190808192b082b, 0x08190808192b1919,
|
||||
0x08190808192b2b08, 0x081908082b080819, 0x081908082b081908, 0x081908082b08192b,
|
||||
0x081908082b190808, 0x081908082b191919, 0x081908082b192b08, 0x081908082b2b0819,
|
||||
0x081908082b2b1908, 0x0819081908080808, 0x081908190808082b, 0x0819081908081919,
|
||||
0x0819081908082b08, 0x0819081908082b2b, 0x0819081908190819, 0x0819081908191908,
|
||||
0x081908190819192b, 0x0819081908192b19, 0x08190819082b0808, 0x08190819082b082b,
|
||||
0x08190819082b1919, 0x08190819082b2b08, 0x0819081919080819, 0x0819081919081908,
|
||||
0x081908191908192b, 0x0819081919082b19, 0x0819081919190808, 0x081908191919082b,
|
||||
0x0819081919191919, 0x0819081919192b08, 0x08190819192b0819, 0x08190819192b1908,
|
||||
0x081908192b080808, 0x081908192b08082b, 0x081908192b081919, 0x081908192b082b08,
|
||||
0x081908192b190819, 0x081908192b191908, 0x0819082b08080819, 0x0819082b08081908,
|
||||
0x0819082b08082b19, 0x0819082b08190808, 0x0819082b08191919, 0x0819082b082b0819,
|
||||
0x0819082b082b1908, 0x0819082b19080808, 0x0819082b19081919, 0x0819082b19190819,
|
||||
0x0819082b19191908, 0x0819082b2b080819, 0x0819082b2b081908, 0x0819082b2b190808,
|
||||
0x0819190808080808, 0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
|
||||
0x0819190808190819, 0x0819190808191908, 0x081919080819192b, 0x0819190808192b19,
|
||||
0x08191908082b0808, 0x08191908082b1919, 0x08191908082b2b08, 0x0819190819080819,
|
||||
0x0819190819081908, 0x081919081908192b, 0x0819190819082b19, 0x0819190819190808,
|
||||
0x081919081919082b, 0x0819190819191919, 0x0819190819192b08, 0x08191908192b0819,
|
||||
0x08191908192b1908, 0x081919082b080808, 0x081919082b08082b, 0x081919082b081919,
|
||||
0x081919082b082b08, 0x081919082b190819, 0x081919082b191908, 0x081919082b2b0808,
|
||||
0x0819191908080819, 0x0819191908081908, 0x081919190808192b, 0x0819191908082b19,
|
||||
0x0819191908190808, 0x081919190819082b, 0x0819191908191919, 0x0819191908192b08,
|
||||
0x08191919082b0819, 0x08191919082b1908, 0x0819191919080808, 0x081919191908082b,
|
||||
0x0819191919081919, 0x0819191919082b08, 0x0819191919190819, 0x0819191919191908,
|
||||
0x08191919192b0808, 0x081919192b080819, 0x081919192b081908, 0x081919192b190808,
|
||||
0x0819192b08080808, 0x0819192b08081919, 0x0819192b08082b08, 0x0819192b08190819,
|
||||
0x0819192b08191908, 0x0819192b082b0808, 0x0819192b19080819, 0x0819192b19081908,
|
||||
0x0819192b19190808, 0x0819192b2b080808, 0x0819192b2b2b2b2b, 0x08192b0808080819,
|
||||
0x08192b0808081908, 0x08192b080808192b, 0x08192b0808082b19, 0x08192b0808190808,
|
||||
0x08192b0808191919, 0x08192b0808192b08, 0x08192b08082b0819, 0x08192b0819080808,
|
||||
0x08192b081908082b, 0x08192b0819081919, 0x08192b0819082b08, 0x08192b0819190819,
|
||||
0x08192b0819191908, 0x08192b08192b0808, 0x08192b082b080819, 0x08192b082b081908,
|
||||
0x08192b1908080808, 0x08192b190808082b, 0x08192b1908081919, 0x08192b1908082b08,
|
||||
0x08192b1908190819, 0x08192b1908191908, 0x08192b19082b0808, 0x08192b1919080819,
|
||||
0x08192b1919081908, 0x08192b1919190808, 0x08192b19192b2b19, 0x08192b192b2b082b,
|
||||
0x08192b2b08081908, 0x08192b2b08190808, 0x08192b2b19080808, 0x08192b2b1919192b,
|
||||
0x082b080808080808, 0x082b08080808082b, 0x082b080808081919, 0x082b080808082b08,
|
||||
0x082b080808190819, 0x082b080808191908, 0x082b08080819192b, 0x082b080808192b19,
|
||||
0x082b0808082b0808, 0x082b0808082b1919, 0x082b0808082b2b2b, 0x082b080819080819,
|
||||
0x082b080819081908, 0x082b080819190808, 0x082b08081919082b, 0x082b080819191919,
|
||||
0x082b0808192b1908, 0x082b08082b080808, 0x082b08082b082b2b, 0x082b08082b191908,
|
||||
0x082b08082b2b2b2b, 0x082b081908080819, 0x082b081908081908, 0x082b081908190808,
|
||||
0x082b08190819082b, 0x082b081908191919, 0x082b0819082b0819, 0x082b081919080808,
|
||||
0x082b08191908082b, 0x082b081919081919, 0x082b081919190819, 0x082b081919191908,
|
||||
0x082b0819192b0808, 0x082b08192b080819, 0x082b08192b081908, 0x082b08192b190808,
|
||||
0x082b082b08080808, 0x082b082b08082b2b, 0x082b082b082b082b, 0x082b082b082b2b08,
|
||||
0x082b082b082b2b2b, 0x082b082b19081908, 0x082b082b19190808, 0x082b082b2b082b08,
|
||||
0x082b082b2b082b2b, 0x082b082b2b2b2b08, 0x082b190808080819, 0x082b190808081908,
|
||||
0x082b19080808192b, 0x082b190808082b19, 0x082b190808190808, 0x082b190808191919,
|
||||
0x082b190808192b08, 0x082b1908082b0819, 0x082b1908082b1908, 0x082b190819080808,
|
||||
0x082b19081908082b, 0x082b190819081919, 0x082b190819082b08, 0x082b190819190819,
|
||||
0x082b190819191908, 0x082b1908192b0808, 0x082b19082b080819, 0x082b19082b081908,
|
||||
0x082b19082b190808, 0x082b191908080808, 0x082b191908081919, 0x082b191908082b08,
|
||||
0x082b191908190819, 0x082b191908191908, 0x082b1919082b0808, 0x082b191919080819,
|
||||
0x082b191919081908, 0x082b191919190808, 0x082b1919192b192b, 0x082b19192b080808,
|
||||
0x082b192b08080819, 0x082b192b08081908, 0x082b192b08190808, 0x082b192b19080808,
|
||||
0x082b192b19192b19, 0x082b2b0808080808, 0x082b2b0808081919, 0x082b2b0808190819,
|
||||
0x082b2b0808191908, 0x082b2b0819080819, 0x082b2b0819081908, 0x082b2b0819190808,
|
||||
0x082b2b082b082b2b, 0x082b2b082b2b2b2b, 0x082b2b1908080819, 0x082b2b1908081908,
|
||||
0x082b2b1908190808, 0x082b2b192b191919, 0x082b2b2b08082b2b, 0x082b2b2b082b082b,
|
||||
0x082b2b2b192b1908, 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
|
||||
0x1908080808081908, 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808,
|
||||
0x190808080819082b, 0x1908080808191919, 0x1908080808192b08, 0x1908080808192b2b,
|
||||
0x19080808082b0819, 0x19080808082b1908, 0x19080808082b192b, 0x1908080819080808,
|
||||
0x190808081908082b, 0x1908080819081919, 0x1908080819082b08, 0x1908080819082b2b,
|
||||
0x1908080819190819, 0x1908080819191908, 0x190808081919192b, 0x1908080819192b19,
|
||||
0x19080808192b0808, 0x19080808192b082b, 0x19080808192b1919, 0x190808082b080819,
|
||||
0x190808082b081908, 0x190808082b190808, 0x190808082b191919, 0x190808082b192b08,
|
||||
0x190808082b2b0819, 0x190808082b2b1908, 0x1908081908080808, 0x190808190808082b,
|
||||
0x1908081908081919, 0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
|
||||
0x190808190819192b, 0x1908081908192b19, 0x19080819082b0808, 0x19080819082b082b,
|
||||
0x19080819082b1919, 0x1908081919080819, 0x1908081919081908, 0x190808191908192b,
|
||||
0x1908081919082b19, 0x1908081919190808, 0x190808191919082b, 0x1908081919191919,
|
||||
0x1908081919192b08, 0x19080819192b0819, 0x19080819192b1908, 0x190808192b080808,
|
||||
0x190808192b08082b, 0x190808192b081919, 0x190808192b082b08, 0x190808192b190819,
|
||||
0x190808192b191908, 0x190808192b2b0808, 0x1908082b08080819, 0x1908082b08081908,
|
||||
0x1908082b08190808, 0x1908082b0819082b, 0x1908082b08191919, 0x1908082b08192b08,
|
||||
0x1908082b082b1908, 0x1908082b19080808, 0x1908082b19081919, 0x1908082b19082b08,
|
||||
0x1908082b19190819, 0x1908082b19191908, 0x1908082b192b0808, 0x1908082b2b080819,
|
||||
0x1908082b2b081908, 0x1908190808080808, 0x190819080808082b, 0x1908190808081919,
|
||||
0x1908190808082b08, 0x1908190808082b2b, 0x1908190808190819, 0x1908190808191908,
|
||||
0x190819080819192b, 0x1908190808192b19, 0x19081908082b0808, 0x19081908082b082b,
|
||||
0x19081908082b1919, 0x19081908082b2b08, 0x1908190819080819, 0x1908190819081908,
|
||||
0x190819081908192b, 0x1908190819082b19, 0x1908190819190808, 0x190819081919082b,
|
||||
0x1908190819191919, 0x1908190819192b08, 0x19081908192b0819, 0x19081908192b1908,
|
||||
0x190819082b080808, 0x190819082b08082b, 0x190819082b081919, 0x190819082b082b08,
|
||||
0x190819082b190819, 0x190819082b191908, 0x190819082b2b0808, 0x1908191908080819,
|
||||
0x1908191908081908, 0x190819190808192b, 0x1908191908082b19, 0x1908191908190808,
|
||||
0x190819190819082b, 0x1908191908191919, 0x1908191908192b08, 0x19081919082b0819,
|
||||
0x19081919082b1908, 0x1908191919080808, 0x190819191908082b, 0x1908191919081919,
|
||||
0x1908191919082b08, 0x1908191919190819, 0x1908191919191908, 0x19081919192b0808,
|
||||
0x19081919192b2b2b, 0x190819192b080819, 0x190819192b081908, 0x190819192b190808,
|
||||
0x1908192b08080808, 0x1908192b0808082b, 0x1908192b08081919, 0x1908192b08082b08,
|
||||
0x1908192b08190819, 0x1908192b08191908, 0x1908192b082b0808, 0x1908192b19080819,
|
||||
0x1908192b19081908, 0x1908192b19190808, 0x1908192b2b080808, 0x1908192b2b2b1919,
|
||||
0x19082b0808080819, 0x19082b0808081908, 0x19082b0808082b19, 0x19082b0808190808,
|
||||
0x19082b080819082b, 0x19082b0808191919, 0x19082b0808192b08, 0x19082b08082b0819,
|
||||
0x19082b08082b1908, 0x19082b0819080808, 0x19082b081908082b, 0x19082b0819081919,
|
||||
0x19082b0819082b08, 0x19082b0819190819, 0x19082b0819191908, 0x19082b08192b0808,
|
||||
0x19082b082b081908, 0x19082b082b190808, 0x19082b1908080808, 0x19082b190808082b,
|
||||
0x19082b1908081919, 0x19082b1908082b08, 0x19082b1908190819, 0x19082b1908191908,
|
||||
0x19082b19082b0808, 0x19082b1919080819, 0x19082b1919081908, 0x19082b1919190808,
|
||||
0x19082b192b080808, 0x19082b192b19192b, 0x19082b2b08080819, 0x19082b2b08081908,
|
||||
0x19082b2b08190808, 0x19082b2b19080808, 0x1919080808080808, 0x191908080808082b,
|
||||
0x1919080808081919, 0x1919080808082b08, 0x1919080808190819, 0x1919080808191908,
|
||||
0x191908080819192b, 0x1919080808192b19, 0x19190808082b0808, 0x19190808082b082b,
|
||||
0x19190808082b1919, 0x19190808082b2b08, 0x1919080819080819, 0x1919080819081908,
|
||||
0x191908081908192b, 0x1919080819082b19, 0x1919080819190808, 0x191908081919082b,
|
||||
0x1919080819191919, 0x1919080819192b08, 0x19190808192b0819, 0x19190808192b1908,
|
||||
0x191908082b080808, 0x191908082b08082b, 0x191908082b081919, 0x191908082b082b08,
|
||||
0x191908082b190819, 0x191908082b191908, 0x1919081908080819, 0x1919081908081908,
|
||||
0x191908190808192b, 0x1919081908082b19, 0x1919081908190808, 0x191908190819082b,
|
||||
0x1919081908191919, 0x1919081908192b08, 0x19190819082b0819, 0x19190819082b1908,
|
||||
0x1919081919080808, 0x191908191908082b, 0x1919081919081919, 0x1919081919082b08,
|
||||
0x1919081919190819, 0x1919081919191908, 0x19190819192b0808, 0x191908192b080819,
|
||||
0x191908192b081908, 0x191908192b190808, 0x1919082b08080808, 0x1919082b08081919,
|
||||
0x1919082b08082b08, 0x1919082b08190819, 0x1919082b08191908, 0x1919082b082b0808,
|
||||
0x1919082b19080819, 0x1919082b19081908, 0x1919082b19190808, 0x1919082b192b2b19,
|
||||
0x1919082b2b080808, 0x1919190808080819, 0x1919190808081908, 0x191919080808192b,
|
||||
0x1919190808082b19, 0x1919190808190808, 0x191919080819082b, 0x1919190808191919,
|
||||
0x1919190808192b08, 0x19191908082b0819, 0x19191908082b1908, 0x1919190819080808,
|
||||
0x191919081908082b, 0x1919190819081919, 0x1919190819082b08, 0x1919190819190819,
|
||||
0x1919190819191908, 0x19191908192b0808, 0x191919082b080819, 0x191919082b081908,
|
||||
0x191919082b190808, 0x1919191908080808, 0x191919190808082b, 0x1919191908081919,
|
||||
0x1919191908082b08, 0x1919191908190819, 0x1919191908191908, 0x19191919082b0808,
|
||||
0x1919191919080819, 0x1919191919081908, 0x1919191919190808, 0x191919192b080808,
|
||||
0x1919192b08080819, 0x1919192b08081908, 0x1919192b08190808, 0x1919192b082b192b,
|
||||
0x1919192b19080808, 0x19192b0808080808, 0x19192b080808082b, 0x19192b0808081919,
|
||||
0x19192b0808082b08, 0x19192b0808190819, 0x19192b0808191908, 0x19192b08082b0808,
|
||||
0x19192b0819080819, 0x19192b0819081908, 0x19192b0819190808, 0x19192b0819192b2b,
|
||||
0x19192b082b080808, 0x19192b1908080819, 0x19192b1908081908, 0x19192b1908190808,
|
||||
0x19192b1919080808, 0x19192b2b08080808, 0x19192b2b08192b19, 0x19192b2b2b081919,
|
||||
0x19192b2b2b2b2b08, 0x192b080808080819, 0x192b080808081908, 0x192b08080808192b,
|
||||
0x192b080808190808, 0x192b08080819082b, 0x192b080808191919, 0x192b080808192b08,
|
||||
0x192b0808082b0819, 0x192b0808082b1908, 0x192b080819080808, 0x192b080819081919,
|
||||
0x192b080819082b08, 0x192b080819190819, 0x192b080819191908, 0x192b0808192b0808,
|
||||
0x192b08082b081908, 0x192b08082b190808, 0x192b081908080808, 0x192b08190808082b,
|
||||
0x192b081908081919, 0x192b081908082b08, 0x192b081908190819, 0x192b081908191908,
|
||||
0x192b0819082b0808, 0x192b081919080819, 0x192b081919081908, 0x192b081919190808,
|
||||
0x192b08192b080808, 0x192b08192b192b19, 0x192b082b08081908, 0x192b082b08190808,
|
||||
0x192b082b19080808, 0x192b082b1919192b, 0x192b082b2b2b0819, 0x192b190808080808,
|
||||
0x192b190808081919, 0x192b190808082b08, 0x192b190808190819, 0x192b190808191908,
|
||||
0x192b1908082b0808, 0x192b190819080819, 0x192b190819081908, 0x192b190819190808,
|
||||
0x192b19082b080808, 0x192b191908080819, 0x192b191908081908, 0x192b191908190808,
|
||||
0x192b191919080808, 0x192b191919082b2b, 0x192b1919192b2b08, 0x192b19192b19082b,
|
||||
0x192b192b08080808, 0x192b192b2b191908, 0x192b2b0808080819, 0x192b2b0808081908,
|
||||
0x192b2b0808190808, 0x192b2b08192b1919, 0x192b2b082b192b08, 0x192b2b1908080808,
|
||||
0x192b2b19082b2b2b, 0x192b2b2b1908082b, 0x192b2b2b2b2b0819, 0x2b08080808080808,
|
||||
0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
|
||||
0x2b08080808191908, 0x2b08080808192b19, 0x2b080808082b0808, 0x2b080808082b1919,
|
||||
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808081919082b,
|
||||
0x2b08080819191919, 0x2b08080819192b08, 0x2b080808192b0819, 0x2b0808082b080808,
|
||||
0x2b0808082b081919, 0x2b0808082b190819, 0x2b0808082b191908, 0x2b08081908080819,
|
||||
0x2b08081908081908, 0x2b08081908082b19, 0x2b08081908190808, 0x2b0808190819082b,
|
||||
0x2b08081908191919, 0x2b08081908192b08, 0x2b080819082b0819, 0x2b080819082b1908,
|
||||
0x2b08081919080808, 0x2b0808191908082b, 0x2b08081919081919, 0x2b08081919082b08,
|
||||
0x2b08081919190819, 0x2b08081919191908, 0x2b0808192b080819, 0x2b0808192b081908,
|
||||
0x2b0808192b190808, 0x2b0808192b2b2b19, 0x2b08082b08080808, 0x2b08082b08081919,
|
||||
0x2b08082b08082b2b, 0x2b08082b08190819, 0x2b08082b08191908, 0x2b08082b19080819,
|
||||
0x2b08082b19081908, 0x2b08082b19190808, 0x2b08190808080819, 0x2b08190808081908,
|
||||
0x2b0819080808192b, 0x2b08190808082b19, 0x2b08190808190808, 0x2b0819080819082b,
|
||||
0x2b08190808191919, 0x2b08190808192b08, 0x2b081908082b0819, 0x2b08190819080808,
|
||||
0x2b0819081908082b, 0x2b08190819081919, 0x2b08190819082b08, 0x2b08190819190819,
|
||||
0x2b08190819191908, 0x2b081908192b0808, 0x2b0819082b080819, 0x2b0819082b081908,
|
||||
0x2b0819082b190808, 0x2b08191908080808, 0x2b0819190808082b, 0x2b08191908081919,
|
||||
0x2b08191908082b08, 0x2b08191908190819, 0x2b08191908191908, 0x2b081919082b0808,
|
||||
0x2b08191919080819, 0x2b08191919081908, 0x2b08191919190808, 0x2b0819192b080808,
|
||||
0x2b0819192b082b2b, 0x2b08192b08080819, 0x2b08192b08081908, 0x2b08192b08190808,
|
||||
0x2b08192b082b2b19, 0x2b08192b19080808, 0x2b082b0808080808, 0x2b082b0808081919,
|
||||
0x2b082b0808190819, 0x2b082b0808191908, 0x2b082b0819080819, 0x2b082b0819081908,
|
||||
0x2b082b0819190808, 0x2b082b082b2b082b, 0x2b082b1908080819, 0x2b082b1908081908,
|
||||
0x2b082b1919080808, 0x2b082b19192b1919, 0x2b082b2b082b082b, 0x2b082b2b19192b08,
|
||||
0x2b082b2b19192b2b, 0x2b082b2b2b08082b, 0x2b082b2b2b2b082b, 0x2b19080808080819,
|
||||
0x2b19080808081908, 0x2b19080808082b19, 0x2b19080808190808, 0x2b1908080819082b,
|
||||
0x2b19080808191919, 0x2b19080808192b08, 0x2b190808082b1908, 0x2b19080819080808,
|
||||
0x2b1908081908082b, 0x2b19080819081919, 0x2b19080819082b08, 0x2b19080819190819,
|
||||
0x2b19080819191908, 0x2b190808192b0808, 0x2b1908082b080819, 0x2b1908082b081908,
|
||||
0x2b1908082b190808, 0x2b19081908080808, 0x2b19081908081919, 0x2b19081908190819,
|
||||
0x2b19081908191908, 0x2b19081919080819, 0x2b19081919081908, 0x2b19081919190808,
|
||||
0x2b19081919192b2b, 0x2b19082b08080819, 0x2b19082b08081908, 0x2b19082b08190808,
|
||||
0x2b19082b19080808, 0x2b19082b2b2b192b, 0x2b19190808080808, 0x2b1919080808082b,
|
||||
0x2b19190808081919, 0x2b19190808082b08, 0x2b19190808190819, 0x2b19190808191908,
|
||||
0x2b191908082b0808, 0x2b19190819080819, 0x2b19190819081908, 0x2b19190819190808,
|
||||
0x2b1919082b080808, 0x2b1919082b19192b, 0x2b19191908080819, 0x2b19191908081908,
|
||||
0x2b19191908190808, 0x2b19191919080808, 0x2b1919192b192b08, 0x2b1919192b2b0819,
|
||||
0x2b19192b08080808, 0x2b19192b1908192b, 0x2b19192b192b1908, 0x2b192b0808080819,
|
||||
0x2b192b0808081908, 0x2b192b0808190808, 0x2b192b08082b192b, 0x2b192b0819080808,
|
||||
0x2b192b082b2b2b19, 0x2b192b1908080808, 0x2b192b1919082b19, 0x2b192b191919082b,
|
||||
0x2b192b2b2b190808, 0x2b2b080808080808, 0x2b2b080808081919, 0x2b2b080808082b2b,
|
||||
0x2b2b080808191908, 0x2b2b0808082b082b, 0x2b2b0808082b2b2b, 0x2b2b080819080819,
|
||||
0x2b2b080819081908, 0x2b2b080819190808, 0x2b2b08082b2b082b, 0x2b2b08082b2b2b2b,
|
||||
0x2b2b081919080808, 0x2b2b0819192b1919, 0x2b2b082b0808082b, 0x2b2b082b08082b2b,
|
||||
0x2b2b082b082b082b, 0x2b2b082b082b2b08, 0x2b2b082b082b2b2b, 0x2b2b082b2b08082b,
|
||||
0x2b2b082b2b082b08, 0x2b2b082b2b082b2b, 0x2b2b082b2b2b2b08, 0x2b2b190808080819,
|
||||
0x2b2b190808081908, 0x2b2b190808190808, 0x2b2b190819080808, 0x2b2b19082b082b19,
|
||||
0x2b2b19082b2b1908, 0x2b2b191908080808, 0x2b2b191908192b19, 0x2b2b192b19190819,
|
||||
0x2b2b2b0808082b2b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b082b, 0x2b2b2b1919191908,
|
||||
0x2b2b2b192b08192b, 0x2b2b2b2b08082b08, 0x2b2b2b2b08082b2b, 0x2b2b2b2b082b0808,
|
||||
0x2b2b2b2b082b082b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b2b082b08, 0x2b2b2b2b2b2b2b2b,
|
||||
};
|
||||
|
||||
constexpr constant static uint32_t iq3xxs_grid[256] = {
|
||||
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
|
||||
0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
|
||||
0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
||||
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
|
||||
0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
|
||||
0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
||||
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
|
||||
0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
|
||||
0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
||||
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
|
||||
0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
|
||||
0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
||||
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
|
||||
0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
|
||||
0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
||||
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
|
||||
0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
|
||||
0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
||||
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
|
||||
0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
|
||||
0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
||||
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
|
||||
0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
|
||||
0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
||||
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
|
||||
0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
|
||||
0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
||||
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
|
||||
0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
|
||||
0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
||||
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
|
||||
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
||||
};
|
||||
|
||||
constexpr constant static uint32_t iq3xs_grid[512] = {
|
||||
0x04040404, 0x0404040c, 0x04040414, 0x0404042c, 0x0404043e, 0x04040c04, 0x04040c0c, 0x04040c14,
|
||||
0x04040c24, 0x04040c34, 0x04041404, 0x0404140c, 0x0404142c, 0x04041c1c, 0x04042404, 0x04042414,
|
||||
0x0404242c, 0x0404243e, 0x04042c0c, 0x04042c1c, 0x04043404, 0x04043414, 0x04043e0c, 0x04043e24,
|
||||
0x04043e3e, 0x040c0404, 0x040c040c, 0x040c0414, 0x040c0424, 0x040c0c04, 0x040c0c0c, 0x040c0c2c,
|
||||
0x040c1404, 0x040c141c, 0x040c143e, 0x040c1c0c, 0x040c1c2c, 0x040c2424, 0x040c340c, 0x040c342c,
|
||||
0x040c3e14, 0x04140404, 0x0414040c, 0x0414042c, 0x0414043e, 0x04140c04, 0x04140c1c, 0x04140c34,
|
||||
0x0414140c, 0x0414142c, 0x04141c04, 0x04141c24, 0x04142414, 0x0414242c, 0x0414243e, 0x04142c0c,
|
||||
0x04142c1c, 0x04143e04, 0x04143e1c, 0x041c041c, 0x041c0c0c, 0x041c0c2c, 0x041c1404, 0x041c1414,
|
||||
0x041c1c0c, 0x041c1c1c, 0x041c1c34, 0x041c2424, 0x041c2c04, 0x041c2c14, 0x041c343e, 0x041c3e0c,
|
||||
0x041c3e2c, 0x04240404, 0x04240c1c, 0x04240c3e, 0x0424140c, 0x04241424, 0x04241c14, 0x04242404,
|
||||
0x0424241c, 0x04242c0c, 0x04243e04, 0x042c0414, 0x042c0424, 0x042c1404, 0x042c1414, 0x042c1434,
|
||||
0x042c1c1c, 0x042c240c, 0x042c242c, 0x042c243e, 0x042c3434, 0x042c3e1c, 0x04340434, 0x04340c0c,
|
||||
0x04340c1c, 0x04341c0c, 0x04342c14, 0x04343e0c, 0x043e0404, 0x043e0414, 0x043e0424, 0x043e1404,
|
||||
0x043e1414, 0x043e1434, 0x043e1c1c, 0x043e2c04, 0x043e2c24, 0x0c040404, 0x0c04040c, 0x0c040414,
|
||||
0x0c040424, 0x0c040c04, 0x0c040c0c, 0x0c040c1c, 0x0c040c2c, 0x0c040c3e, 0x0c041404, 0x0c041414,
|
||||
0x0c041c0c, 0x0c041c24, 0x0c041c34, 0x0c042c24, 0x0c042c34, 0x0c04340c, 0x0c043e14, 0x0c0c0404,
|
||||
0x0c0c040c, 0x0c0c041c, 0x0c0c0434, 0x0c0c0c04, 0x0c0c0c24, 0x0c0c140c, 0x0c0c1c04, 0x0c0c1c1c,
|
||||
0x0c0c240c, 0x0c0c2c04, 0x0c0c2c14, 0x0c0c3e04, 0x0c0c3e34, 0x0c140404, 0x0c140c14, 0x0c140c2c,
|
||||
0x0c140c3e, 0x0c141404, 0x0c141424, 0x0c141c14, 0x0c142404, 0x0c14241c, 0x0c142c2c, 0x0c143404,
|
||||
0x0c143e14, 0x0c1c040c, 0x0c1c0424, 0x0c1c043e, 0x0c1c0c04, 0x0c1c0c1c, 0x0c1c140c, 0x0c1c143e,
|
||||
0x0c1c1c04, 0x0c1c1c24, 0x0c1c240c, 0x0c1c3414, 0x0c1c3e04, 0x0c24041c, 0x0c24042c, 0x0c240c14,
|
||||
0x0c240c24, 0x0c241c0c, 0x0c241c1c, 0x0c242414, 0x0c242434, 0x0c242c04, 0x0c242c24, 0x0c2c040c,
|
||||
0x0c2c0c04, 0x0c2c0c1c, 0x0c2c140c, 0x0c2c1c04, 0x0c2c1c14, 0x0c2c2c0c, 0x0c341404, 0x0c341424,
|
||||
0x0c34143e, 0x0c342424, 0x0c342434, 0x0c3e040c, 0x0c3e041c, 0x0c3e0c04, 0x0c3e0c14, 0x0c3e140c,
|
||||
0x0c3e1c2c, 0x0c3e240c, 0x0c3e3414, 0x0c3e3e04, 0x14040404, 0x1404040c, 0x1404041c, 0x1404042c,
|
||||
0x1404043e, 0x14040c04, 0x14040c14, 0x14040c24, 0x14040c34, 0x1404140c, 0x1404141c, 0x1404143e,
|
||||
0x14041c04, 0x14041c14, 0x1404240c, 0x1404241c, 0x1404242c, 0x14042c04, 0x14042c14, 0x1404343e,
|
||||
0x14043e04, 0x14043e1c, 0x14043e2c, 0x140c0404, 0x140c0414, 0x140c0c04, 0x140c0c1c, 0x140c0c3e,
|
||||
0x140c1414, 0x140c142c, 0x140c1c0c, 0x140c1c24, 0x140c2414, 0x140c2c0c, 0x1414040c, 0x14140424,
|
||||
0x1414043e, 0x1414140c, 0x1414141c, 0x14141c04, 0x14141c3e, 0x1414240c, 0x14142c1c, 0x14142c3e,
|
||||
0x14143e0c, 0x14143e24, 0x141c0404, 0x141c0414, 0x141c042c, 0x141c0c0c, 0x141c1414, 0x141c1424,
|
||||
0x141c1c0c, 0x141c1c1c, 0x141c2414, 0x141c2c04, 0x141c3434, 0x1424040c, 0x1424043e, 0x14241404,
|
||||
0x1424141c, 0x14241c14, 0x14241c2c, 0x1424240c, 0x14243e14, 0x14243e2c, 0x142c0424, 0x142c0c0c,
|
||||
0x142c1414, 0x142c1c3e, 0x142c2404, 0x142c2c1c, 0x142c3e04, 0x14340404, 0x14340414, 0x1434043e,
|
||||
0x1434140c, 0x14342c2c, 0x1434340c, 0x143e042c, 0x143e0c0c, 0x143e1434, 0x143e1c04, 0x143e241c,
|
||||
0x143e2c04, 0x1c040414, 0x1c040c0c, 0x1c040c1c, 0x1c040c2c, 0x1c040c3e, 0x1c041414, 0x1c041c0c,
|
||||
0x1c041c1c, 0x1c041c2c, 0x1c042414, 0x1c042424, 0x1c04243e, 0x1c042c0c, 0x1c04341c, 0x1c043e0c,
|
||||
0x1c0c040c, 0x1c0c041c, 0x1c0c042c, 0x1c0c0c24, 0x1c0c140c, 0x1c0c141c, 0x1c0c2404, 0x1c0c3404,
|
||||
0x1c0c3e14, 0x1c0c3e34, 0x1c140404, 0x1c140c14, 0x1c141404, 0x1c141c14, 0x1c141c24, 0x1c142c04,
|
||||
0x1c1c040c, 0x1c1c0c04, 0x1c1c0c24, 0x1c1c140c, 0x1c1c141c, 0x1c1c143e, 0x1c1c1c04, 0x1c1c240c,
|
||||
0x1c1c241c, 0x1c1c243e, 0x1c1c2c2c, 0x1c1c3e1c, 0x1c24041c, 0x1c240c0c, 0x1c240c34, 0x1c241414,
|
||||
0x1c241c0c, 0x1c242c14, 0x1c243404, 0x1c243424, 0x1c2c040c, 0x1c2c0c04, 0x1c2c0c14, 0x1c2c142c,
|
||||
0x1c2c1c14, 0x1c2c2424, 0x1c2c2c34, 0x1c2c3e1c, 0x1c340c34, 0x1c34240c, 0x1c3e040c, 0x1c3e041c,
|
||||
0x1c3e1404, 0x1c3e1414, 0x1c3e1c2c, 0x24040404, 0x24040424, 0x24040c14, 0x24041404, 0x24041424,
|
||||
0x2404143e, 0x24041c14, 0x2404240c, 0x24042c04, 0x24043e04, 0x240c0414, 0x240c043e, 0x240c0c0c,
|
||||
0x240c0c1c, 0x240c1414, 0x240c1c04, 0x240c1c2c, 0x240c241c, 0x240c2c0c, 0x240c2c2c, 0x2414040c,
|
||||
0x2414041c, 0x24140c04, 0x24140c2c, 0x2414140c, 0x24141c1c, 0x24142404, 0x24142c3e, 0x24143414,
|
||||
0x24143e04, 0x241c0424, 0x241c0c0c, 0x241c0c1c, 0x241c1404, 0x241c1414, 0x241c1c0c, 0x241c1c2c,
|
||||
0x24240404, 0x24240414, 0x24241424, 0x24241c3e, 0x24242404, 0x24243e0c, 0x242c042c, 0x242c043e,
|
||||
0x242c140c, 0x242c3414, 0x24340c1c, 0x24341c24, 0x24343404, 0x243e0c04, 0x243e0c2c, 0x243e1c04,
|
||||
0x243e241c, 0x243e2c0c, 0x2c040414, 0x2c040c04, 0x2c040c24, 0x2c041414, 0x2c042404, 0x2c042424,
|
||||
0x2c04243e, 0x2c042c14, 0x2c043434, 0x2c043e24, 0x2c0c040c, 0x2c0c041c, 0x2c0c042c, 0x2c0c0c14,
|
||||
0x2c0c140c, 0x2c0c1c14, 0x2c0c3e14, 0x2c140404, 0x2c140c0c, 0x2c14141c, 0x2c141c04, 0x2c141c34,
|
||||
0x2c142c1c, 0x2c1c0414, 0x2c1c043e, 0x2c1c0c04, 0x2c1c143e, 0x2c1c2424, 0x2c1c2c0c, 0x2c1c342c,
|
||||
0x2c1c3e1c, 0x2c24040c, 0x2c240424, 0x2c241404, 0x2c241c14, 0x2c242434, 0x2c2c0c14, 0x2c2c1434,
|
||||
0x2c2c2c0c, 0x2c2c2c1c, 0x2c342414, 0x2c3e0414, 0x2c3e0424, 0x2c3e1414, 0x34040c0c, 0x34040c1c,
|
||||
0x34040c2c, 0x34041c0c, 0x34041c1c, 0x34043404, 0x340c0404, 0x340c1404, 0x340c143e, 0x340c3424,
|
||||
0x34140c14, 0x34141c24, 0x34142414, 0x34142c2c, 0x34143414, 0x34143e04, 0x341c0404, 0x341c0c24,
|
||||
0x341c140c, 0x341c2404, 0x3424142c, 0x3424241c, 0x34243414, 0x342c0404, 0x342c041c, 0x342c1c24,
|
||||
0x342c3404, 0x3434042c, 0x34342404, 0x343e0c0c, 0x343e0c1c, 0x3e040404, 0x3e040424, 0x3e04043e,
|
||||
0x3e041404, 0x3e041414, 0x3e041c34, 0x3e042404, 0x3e042c24, 0x3e043414, 0x3e0c0414, 0x3e0c0c0c,
|
||||
0x3e0c1424, 0x3e0c241c, 0x3e0c242c, 0x3e14040c, 0x3e140424, 0x3e140c04, 0x3e140c34, 0x3e14140c,
|
||||
0x3e141c04, 0x3e142c0c, 0x3e1c0414, 0x3e1c1c14, 0x3e1c1c2c, 0x3e1c2c1c, 0x3e24040c, 0x3e24042c,
|
||||
0x3e240c1c, 0x3e241404, 0x3e242c04, 0x3e2c1414, 0x3e2c2414, 0x3e340414, 0x3e341c0c, 0x3e3e0404,
|
||||
};
|
||||
|
||||
#define NGRID_IQ1S 512
|
||||
constexpr constant static uint64_t iq1s_grid[NGRID_IQ1S] = {
|
||||
0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000,
|
||||
0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01,
|
||||
0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100,
|
||||
0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00,
|
||||
0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101,
|
||||
0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100,
|
||||
0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00,
|
||||
0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff,
|
||||
0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000,
|
||||
0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000,
|
||||
0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001,
|
||||
0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff,
|
||||
0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01,
|
||||
0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001,
|
||||
0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00,
|
||||
0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001,
|
||||
0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100,
|
||||
0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000,
|
||||
0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000,
|
||||
0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000,
|
||||
0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff,
|
||||
0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff,
|
||||
0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01,
|
||||
0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100,
|
||||
0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff,
|
||||
0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000,
|
||||
0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101,
|
||||
0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff,
|
||||
0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff,
|
||||
0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001,
|
||||
0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01,
|
||||
0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101,
|
||||
0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100,
|
||||
0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00,
|
||||
0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001,
|
||||
0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff,
|
||||
0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000,
|
||||
0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000,
|
||||
0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100,
|
||||
0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100,
|
||||
0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01,
|
||||
0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff,
|
||||
0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101,
|
||||
0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000,
|
||||
0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff,
|
||||
0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000,
|
||||
0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff,
|
||||
0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00,
|
||||
0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101,
|
||||
0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000,
|
||||
0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000,
|
||||
0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000,
|
||||
0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100,
|
||||
0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000,
|
||||
0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001,
|
||||
0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff,
|
||||
0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000,
|
||||
0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000,
|
||||
0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000,
|
||||
0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000,
|
||||
0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff,
|
||||
0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000,
|
||||
0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001,
|
||||
0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01,
|
||||
0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100,
|
||||
0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000,
|
||||
0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00,
|
||||
0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100,
|
||||
0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000,
|
||||
0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001,
|
||||
0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00,
|
||||
0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff,
|
||||
0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100,
|
||||
0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff,
|
||||
0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000,
|
||||
0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff,
|
||||
0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff,
|
||||
0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00,
|
||||
0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001,
|
||||
0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001,
|
||||
0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01,
|
||||
0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000,
|
||||
0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101,
|
||||
0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00,
|
||||
0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100,
|
||||
0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101,
|
||||
0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101,
|
||||
0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000,
|
||||
0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff,
|
||||
0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff,
|
||||
0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101,
|
||||
0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff,
|
||||
0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101,
|
||||
0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001,
|
||||
0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff,
|
||||
0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff,
|
||||
0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01,
|
||||
0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff,
|
||||
0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100,
|
||||
0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001,
|
||||
0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00,
|
||||
0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff,
|
||||
0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff,
|
||||
0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000,
|
||||
0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000,
|
||||
0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101,
|
||||
0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001,
|
||||
0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000,
|
||||
0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101,
|
||||
0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000,
|
||||
0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001,
|
||||
0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000,
|
||||
0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100,
|
||||
0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000,
|
||||
0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000,
|
||||
0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100,
|
||||
0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff,
|
||||
0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff,
|
||||
0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00,
|
||||
0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101,
|
||||
0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000,
|
||||
0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00,
|
||||
0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000,
|
||||
0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff,
|
||||
0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101,
|
||||
0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff,
|
||||
0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00,
|
||||
0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff,
|
||||
};
|
||||
|
||||
constexpr constant static uint8_t ksigns_iq2xs[128] = {
|
||||
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
|
||||
144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
|
||||
160, 33, 34, 163, 36, 165, 166, 39, 40, 169, 170, 43, 172, 45, 46, 175,
|
||||
48, 177, 178, 51, 180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63,
|
||||
192, 65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77, 78, 207,
|
||||
80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90, 219, 92, 221, 222, 95,
|
||||
96, 225, 226, 99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
|
||||
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
||||
};
|
||||
|
||||
constexpr constant static uint8_t kmask_iq2xs[8] = {1, 2, 4, 8, 16, 32, 64, 128};
|
||||
|
||||
void kernel_mul_mv_iq2_xxs_f32_impl(
|
||||
device const void * src0,
|
||||
device const float * src1,
|
||||
@@ -4346,7 +3692,6 @@ void kernel_mul_mv_iq2_xxs_f32_impl(
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
}
|
||||
|
||||
#if QK_K == 256
|
||||
const int ix = tiisg;
|
||||
|
||||
device const float * y4 = y + 32 * ix;
|
||||
@@ -4387,12 +3732,6 @@ void kernel_mul_mv_iq2_xxs_f32_impl(
|
||||
|
||||
y4 += 32 * 32;
|
||||
}
|
||||
#else
|
||||
(void) x;
|
||||
(void) y;
|
||||
(void) yl;
|
||||
(void) nb32;
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < N_DST; ++row) {
|
||||
all_sum = simd_sum(sumf[row]);
|
||||
@@ -4482,7 +3821,6 @@ void kernel_mul_mv_iq2_xs_f32_impl(
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
}
|
||||
|
||||
#if QK_K == 256
|
||||
const int ix = tiisg;
|
||||
|
||||
device const float * y4 = y + 32 * ix;
|
||||
@@ -4533,12 +3871,6 @@ void kernel_mul_mv_iq2_xs_f32_impl(
|
||||
|
||||
y4 += 32 * 32;
|
||||
}
|
||||
#else
|
||||
(void) x;
|
||||
(void) y;
|
||||
(void) yl;
|
||||
(void) nb32;
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < N_DST; ++row) {
|
||||
all_sum = simd_sum(sumf[row]);
|
||||
@@ -4628,7 +3960,6 @@ void kernel_mul_mv_iq3_xxs_f32_impl(
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
}
|
||||
|
||||
#if QK_K == 256
|
||||
const int ix = tiisg;
|
||||
|
||||
device const float * y4 = y + 32 * ix;
|
||||
@@ -4672,12 +4003,6 @@ void kernel_mul_mv_iq3_xxs_f32_impl(
|
||||
|
||||
y4 += 32 * 32;
|
||||
}
|
||||
#else
|
||||
(void) x;
|
||||
(void) y;
|
||||
(void) yl;
|
||||
(void) nb32;
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < N_DST; ++row) {
|
||||
all_sum = simd_sum(sumf[row]);
|
||||
@@ -4759,7 +4084,7 @@ void kernel_mul_mv_iq3_s_f32_impl(
|
||||
{
|
||||
int nval = 8;
|
||||
int pos = (32*sgitg + tiisg)*nval;
|
||||
for (int i = 0; i < nval; ++i) values[pos + i] = iq3xs_grid[pos + i];
|
||||
for (int i = 0; i < nval; ++i) values[pos + i] = iq3s_grid[pos + i];
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
}
|
||||
|
||||
@@ -4786,12 +4111,14 @@ void kernel_mul_mv_iq3_s_f32_impl(
|
||||
for (int row = 0; row < N_DST; row++) {
|
||||
|
||||
const float db = dh[0];
|
||||
const float d = db * (0.5f + ((sc[0] >> 4*(ib%2)) & 0xf));
|
||||
const float d = db * (1 + 2*((sc[0] >> 4*(ib%2)) & 0xf));
|
||||
|
||||
float2 sum = {0};
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
const threadgroup uint8_t * grid1 = (const threadgroup uint8_t *)(values + (qs[2*l+0] | ((qh[0] << (8-2*l)) & 256)));
|
||||
const threadgroup uint8_t * grid2 = (const threadgroup uint8_t *)(values + (qs[2*l+1] | ((qh[0] << (7-2*l)) & 256)));
|
||||
const threadgroup uint32_t * table1 = qh[0] & kmask_iq2xs[2*l+0] ? values + 256 : values;
|
||||
const threadgroup uint32_t * table2 = qh[0] & kmask_iq2xs[2*l+1] ? values + 256 : values;
|
||||
const threadgroup uint8_t * grid1 = (const threadgroup uint8_t *)(table1 + qs[2*l+0]);
|
||||
const threadgroup uint8_t * grid2 = (const threadgroup uint8_t *)(table2 + qs[2*l+1]);
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
sum[0] += yl[8*l + j + 0] * grid1[j] * select(1, -1, signs[l] & kmask_iq2xs[j+0]);
|
||||
sum[1] += yl[8*l + j + 4] * grid2[j] * select(1, -1, signs[l] & kmask_iq2xs[j+4]);
|
||||
@@ -4812,7 +4139,7 @@ void kernel_mul_mv_iq3_s_f32_impl(
|
||||
for (int row = 0; row < N_DST; ++row) {
|
||||
all_sum = simd_sum(sumf[row]);
|
||||
if (tiisg == 0) {
|
||||
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum * 0.5f;
|
||||
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5016,7 +4343,6 @@ void kernel_mul_mv_iq1_s_f32_impl(
|
||||
|
||||
const int nb32 = nb * (QK_K / 32);
|
||||
|
||||
#if QK_K == 256
|
||||
const int ix = tiisg/2;
|
||||
const int il = tiisg%2;
|
||||
|
||||
@@ -5055,12 +4381,6 @@ void kernel_mul_mv_iq1_s_f32_impl(
|
||||
|
||||
y4 += 16 * 32;
|
||||
}
|
||||
#else
|
||||
(void) x;
|
||||
(void) y;
|
||||
(void) yl;
|
||||
(void) nb32;
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < N_DST; ++row) {
|
||||
all_sum = simd_sum(sumf[row]);
|
||||
@@ -5167,6 +4487,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
|
||||
}
|
||||
}
|
||||
|
||||
#if QK_K != 64
|
||||
void kernel_mul_mv_iq4_xs_f32_impl(
|
||||
device const void * src0,
|
||||
device const float * src1,
|
||||
@@ -5260,6 +4581,7 @@ void kernel_mul_mv_iq4_xs_f32_impl(
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
[[host_name("kernel_mul_mv_iq1_s_f32")]]
|
||||
kernel void kernel_mul_mv_iq1_s_f32(
|
||||
@@ -5344,7 +4666,11 @@ kernel void kernel_mul_mv_iq4_xs_f32(
|
||||
uint tiisg[[thread_index_in_simdgroup]],
|
||||
uint sgitg[[simdgroup_index_in_threadgroup]]) {
|
||||
|
||||
#if QK_K == 64
|
||||
kernel_mul_mv_iq4_nl_f32_impl(src0, src1, dst, ne00, ne01, ne02, ne10, ne12, ne0, ne1, r2, r3, shared_values, tgpig, tiisg, sgitg);
|
||||
#else
|
||||
kernel_mul_mv_iq4_xs_f32_impl(src0, src1, dst, ne00, ne01, ne02, ne10, ne12, ne0, ne1, r2, r3, shared_values, tgpig, tiisg, sgitg);
|
||||
#endif
|
||||
}
|
||||
|
||||
//============================= templates and their specializations =============================
|
||||
@@ -5703,15 +5029,15 @@ void dequantize_iq3_s(device const block_iq3_s * xb, short il, thread type4x4 &
|
||||
device const uint8_t * qs = xb->qs + 8*ib32;
|
||||
device const uint8_t * signs = xb->signs + 4*ib32 + 2*il;
|
||||
const uint8_t qh = xb->qh[ib32] >> 4*il;
|
||||
const float dl = d * (0.5f + ((xb->scales[ib32/2] >> 4*(ib32%2)) & 0xf)) * 0.5f;
|
||||
constant uint8_t * grid1 = (constant uint8_t *)(iq3xs_grid + (qs[4*il+0] | ((qh << 8) & 256)));
|
||||
constant uint8_t * grid2 = (constant uint8_t *)(iq3xs_grid + (qs[4*il+1] | ((qh << 7) & 256)));
|
||||
const float dl = d * (1 + 2*((xb->scales[ib32/2] >> 4*(ib32%2)) & 0xf));
|
||||
constant uint8_t * grid1 = (constant uint8_t *)(iq3s_grid + (qs[4*il+0] | ((qh << 8) & 256)));
|
||||
constant uint8_t * grid2 = (constant uint8_t *)(iq3s_grid + (qs[4*il+1] | ((qh << 7) & 256)));
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
reg[0][i] = dl * grid1[i] * select(1, -1, signs[0] & kmask_iq2xs[i+0]);
|
||||
reg[1][i] = dl * grid2[i] * select(1, -1, signs[0] & kmask_iq2xs[i+4]);
|
||||
}
|
||||
grid1 = (constant uint8_t *)(iq3xs_grid + (qs[4*il+2] | ((qh << 6) & 256)));
|
||||
grid2 = (constant uint8_t *)(iq3xs_grid + (qs[4*il+3] | ((qh << 5) & 256)));
|
||||
grid1 = (constant uint8_t *)(iq3s_grid + (qs[4*il+2] | ((qh << 6) & 256)));
|
||||
grid2 = (constant uint8_t *)(iq3s_grid + (qs[4*il+3] | ((qh << 5) & 256)));
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
reg[2][i] = dl * grid1[i] * select(1, -1, signs[1] & kmask_iq2xs[i+0]);
|
||||
reg[3][i] = dl * grid2[i] * select(1, -1, signs[1] & kmask_iq2xs[i+4]);
|
||||
@@ -5770,6 +5096,9 @@ void dequantize_iq4_nl(device const block_iq4_nl * xb, short il, thread type4x4
|
||||
|
||||
template <typename type4x4>
|
||||
void dequantize_iq4_xs(device const block_iq4_xs * xb, short il, thread type4x4 & reg) {
|
||||
#if QK_K == 64
|
||||
dequantize_iq4_nl(xb, il, reg);
|
||||
#else
|
||||
// il is 0...15 for QK_K = 256 => index of block of 32 is il/2
|
||||
const int ib32 = il/2;
|
||||
il = il%2;
|
||||
@@ -5786,6 +5115,7 @@ void dequantize_iq4_xs(device const block_iq4_xs * xb, short il, thread type4x4
|
||||
reg[i][2] = d * kvalues_iq4nl_f[q8[2]];
|
||||
reg[i][3] = d * kvalues_iq4nl_f[q8[3]];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename block_q, short nl, void (*dequantize_func)(device const block_q *, short, thread float4x4 &)>
|
||||
@@ -6334,7 +5664,11 @@ template [[host_name("kernel_get_rows_iq3_s")]] kernel get_rows_t kernel_get_r
|
||||
template [[host_name("kernel_get_rows_iq2_s")]] kernel get_rows_t kernel_get_rows<block_iq2_s, QK_NL, dequantize_iq2_s>;
|
||||
template [[host_name("kernel_get_rows_iq1_s")]] kernel get_rows_t kernel_get_rows<block_iq1_s, QK_NL, dequantize_iq1_s>;
|
||||
template [[host_name("kernel_get_rows_iq4_nl")]] kernel get_rows_t kernel_get_rows<block_iq4_nl, 2, dequantize_iq4_nl>;
|
||||
#if QK_K == 64
|
||||
template [[host_name("kernel_get_rows_iq4_xs")]] kernel get_rows_t kernel_get_rows<block_iq4_xs, 2, dequantize_iq4_xs>;
|
||||
#else
|
||||
template [[host_name("kernel_get_rows_iq4_xs")]] kernel get_rows_t kernel_get_rows<block_iq4_xs, QK_NL, dequantize_iq4_xs>;
|
||||
#endif
|
||||
|
||||
//
|
||||
// matrix-matrix multiplication
|
||||
@@ -6378,7 +5712,11 @@ template [[host_name("kernel_mul_mm_iq3_s_f32")]] kernel mat_mm_t kernel_mul_m
|
||||
template [[host_name("kernel_mul_mm_iq2_s_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq2_s, QK_NL, dequantize_iq2_s>;
|
||||
template [[host_name("kernel_mul_mm_iq1_s_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq1_s, QK_NL, dequantize_iq1_s>;
|
||||
template [[host_name("kernel_mul_mm_iq4_nl_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq4_nl, 2, dequantize_iq4_nl>;
|
||||
#if QK_K == 64
|
||||
template [[host_name("kernel_mul_mm_iq4_xs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq4_nl, 2, dequantize_iq4_xs>;
|
||||
#else
|
||||
template [[host_name("kernel_mul_mm_iq4_xs_f32")]] kernel mat_mm_t kernel_mul_mm<block_iq4_xs, QK_NL, dequantize_iq4_xs>;
|
||||
#endif
|
||||
|
||||
//
|
||||
// indirect matrix-matrix multiplication
|
||||
@@ -6434,7 +5772,11 @@ template [[host_name("kernel_mul_mm_id_iq3_s_f32")]] kernel mat_mm_id_t kernel
|
||||
template [[host_name("kernel_mul_mm_id_iq2_s_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq2_s, QK_NL, dequantize_iq2_s>;
|
||||
template [[host_name("kernel_mul_mm_id_iq1_s_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq1_s, QK_NL, dequantize_iq1_s>;
|
||||
template [[host_name("kernel_mul_mm_id_iq4_nl_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq4_nl, 2, dequantize_iq4_nl>;
|
||||
#if QK_K == 64
|
||||
template [[host_name("kernel_mul_mm_id_iq4_xs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq4_xs, 2, dequantize_iq4_xs>;
|
||||
#else
|
||||
template [[host_name("kernel_mul_mm_id_iq4_xs_f32")]] kernel mat_mm_id_t kernel_mul_mm_id<block_iq4_xs, QK_NL, dequantize_iq4_xs>;
|
||||
#endif
|
||||
|
||||
//
|
||||
// matrix-vector multiplication
|
||||
@@ -7707,7 +7049,11 @@ kernel void kernel_mul_mv_id_iq4_xs_f32(
|
||||
|
||||
const int32_t id = ((device int32_t *) (ids + bid*nbi1))[idx];
|
||||
|
||||
#if QK_K == 64
|
||||
kernel_mul_mv_iq4_nl_f32_impl(
|
||||
#else
|
||||
kernel_mul_mv_iq4_xs_f32_impl(
|
||||
#endif
|
||||
src0[id],
|
||||
(device const float *) (src1 + bid*nb11),
|
||||
dst + bid*ne0,
|
||||
|
||||
@@ -2231,7 +2231,7 @@ static ggml_backend_buffer_type_t ggml_backend_opencl_get_default_buffer_type(gg
|
||||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
||||
static bool ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) {
|
||||
static ggml_status ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgraph * graph) {
|
||||
for (int i = 0; i < graph->n_nodes; ++i) {
|
||||
ggml_tensor * node = graph->nodes[i];
|
||||
switch (node->op) {
|
||||
@@ -2246,7 +2246,7 @@ static bool ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggml_cgrap
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return GGML_STATUS_SUCCESS;
|
||||
|
||||
GGML_UNUSED(backend);
|
||||
}
|
||||
|
||||
1186
ggml-quants.c
1186
ggml-quants.c
File diff suppressed because it is too large
Load Diff
@@ -1,9 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include "ggml-impl.h"
|
||||
|
||||
// GGML internal header
|
||||
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
@@ -230,6 +230,10 @@ typedef struct {
|
||||
} block_iq4_nl;
|
||||
static_assert(sizeof(block_iq4_nl) == sizeof(ggml_fp16_t) + QK4_NL/2, "wrong iq4_nl block size/padding");
|
||||
|
||||
#if QK_K == 64
|
||||
#define block_iq4_xs block_iq4_nl
|
||||
//typedef struct block_iq4_nl block_iq4_xs;
|
||||
#else
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
uint16_t scales_h;
|
||||
@@ -237,6 +241,7 @@ typedef struct {
|
||||
uint8_t qs[QK_K/2];
|
||||
} block_iq4_xs;
|
||||
static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -256,6 +261,7 @@ void quantize_row_q4_K_reference(const float * GGML_RESTRICT x, block_q4_K * GGM
|
||||
void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int k);
|
||||
void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int k);
|
||||
void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int k);
|
||||
|
||||
void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int k);
|
||||
void quantize_row_iq4_nl_reference (const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int k);
|
||||
void quantize_row_iq4_xs_reference (const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int k);
|
||||
@@ -275,6 +281,7 @@ void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
|
||||
void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||
void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||
void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||
|
||||
void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||
void quantize_row_iq4_nl (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||
void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
||||
@@ -295,6 +302,7 @@ void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRI
|
||||
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||
|
||||
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||
void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||
void dequantize_row_iq2_s (const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
||||
@@ -316,6 +324,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi
|
||||
void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||
void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||
void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||
|
||||
void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||
void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||
void ggml_vec_dot_iq2_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||
@@ -325,26 +334,26 @@ void ggml_vec_dot_iq4_nl_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const
|
||||
void ggml_vec_dot_iq4_xs_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||
void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
|
||||
|
||||
//
|
||||
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
|
||||
//
|
||||
size_t quantize_iq2_xxs(const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_iq2_xs (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_iq2_s (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_iq3_xxs(const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_iq1_s (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_iq4_nl (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_iq4_xs (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_iq3_s (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_q2_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_q3_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_q4_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_q5_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_q6_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_q4_0 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_q4_1 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_q5_0 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_q5_1 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_iq2_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_iq2_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_iq1_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_iq4_nl (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_iq4_xs (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_iq3_s (const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
|
||||
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int nrows, int n_per_row, const float * imatrix);
|
||||
|
||||
void iq2xs_init_impl(enum ggml_type type);
|
||||
void iq2xs_free_impl(enum ggml_type type);
|
||||
|
||||
3931
ggml-sycl.cpp
3931
ggml-sycl.cpp
File diff suppressed because it is too large
Load Diff
@@ -24,6 +24,11 @@ GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
|
||||
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
|
||||
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
||||
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
||||
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
86931
ggml-vulkan-shaders.hpp
86931
ggml-vulkan-shaders.hpp
File diff suppressed because it is too large
Load Diff
2128
ggml-vulkan.cpp
2128
ggml-vulkan.cpp
File diff suppressed because it is too large
Load Diff
@@ -10,6 +10,7 @@ extern "C" {
|
||||
#define GGML_VK_NAME "Vulkan"
|
||||
#define GGML_VK_MAX_DEVICES 16
|
||||
|
||||
GGML_API void ggml_vk_instance_init(void);
|
||||
GGML_API void ggml_vk_init_cpu_assist(void);
|
||||
|
||||
GGML_API void ggml_vk_preallocate_buffers_graph_cpu_assist(struct ggml_tensor * node);
|
||||
|
||||
86
ggml.h
86
ggml.h
@@ -315,6 +315,16 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum ggml_status {
|
||||
GGML_STATUS_ALLOC_FAILED = -2,
|
||||
GGML_STATUS_FAILED = -1,
|
||||
GGML_STATUS_SUCCESS = 0,
|
||||
GGML_STATUS_ABORTED = 1,
|
||||
};
|
||||
|
||||
// get ggml_status name string
|
||||
GGML_API GGML_CALL const char * ggml_status_to_string(enum ggml_status status);
|
||||
|
||||
typedef uint16_t ggml_fp16_t;
|
||||
|
||||
// convert FP16 <-> FP32
|
||||
@@ -454,12 +464,16 @@ extern "C" {
|
||||
GGML_OP_POOL_2D,
|
||||
GGML_OP_UPSCALE, // nearest interpolate
|
||||
GGML_OP_PAD,
|
||||
GGML_OP_ARANGE,
|
||||
GGML_OP_TIMESTEP_EMBEDDING,
|
||||
GGML_OP_ARGSORT,
|
||||
GGML_OP_LEAKY_RELU,
|
||||
|
||||
GGML_OP_FLASH_ATTN,
|
||||
GGML_OP_FLASH_FF,
|
||||
GGML_OP_FLASH_ATTN_BACK,
|
||||
GGML_OP_SSM_CONV,
|
||||
GGML_OP_SSM_SCAN,
|
||||
GGML_OP_WIN_PART,
|
||||
GGML_OP_WIN_UNPART,
|
||||
GGML_OP_GET_REL_POS,
|
||||
@@ -672,6 +686,16 @@ extern "C" {
|
||||
GGML_NUMA_STRATEGY_COUNT
|
||||
};
|
||||
|
||||
//
|
||||
// GUID
|
||||
//
|
||||
|
||||
// GUID types
|
||||
typedef uint8_t ggml_guid[16];
|
||||
typedef ggml_guid * ggml_guid_t;
|
||||
|
||||
GGML_API bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b);
|
||||
|
||||
// misc
|
||||
|
||||
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
||||
@@ -1651,6 +1675,15 @@ extern "C" {
|
||||
int p2,
|
||||
int p3);
|
||||
|
||||
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
||||
// timesteps: [N,]
|
||||
// return: [N, dim]
|
||||
GGML_API struct ggml_tensor * ggml_timestep_embedding(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * timesteps,
|
||||
int dim,
|
||||
int max_period);
|
||||
|
||||
// sort rows
|
||||
enum ggml_sort_order {
|
||||
GGML_SORT_ORDER_ASC,
|
||||
@@ -1662,6 +1695,12 @@ extern "C" {
|
||||
struct ggml_tensor * a,
|
||||
enum ggml_sort_order order);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_arange(
|
||||
struct ggml_context * ctx,
|
||||
float start,
|
||||
float stop,
|
||||
float step);
|
||||
|
||||
// top k elements per row
|
||||
GGML_API struct ggml_tensor * ggml_top_k(
|
||||
struct ggml_context * ctx,
|
||||
@@ -1691,6 +1730,23 @@ extern "C" {
|
||||
struct ggml_tensor * c0,
|
||||
struct ggml_tensor * c1);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_ssm_conv(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * s,
|
||||
struct ggml_tensor * x,
|
||||
struct ggml_tensor * c,
|
||||
struct ggml_tensor * sq);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_ssm_scan(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * s,
|
||||
struct ggml_tensor * x,
|
||||
struct ggml_tensor * dt,
|
||||
struct ggml_tensor * A,
|
||||
struct ggml_tensor * B,
|
||||
struct ggml_tensor * C,
|
||||
struct ggml_tensor * sq);
|
||||
|
||||
// partition into non-overlapping windows with padding if needed
|
||||
// example:
|
||||
// a: 768 64 64 1
|
||||
@@ -1913,12 +1969,11 @@ extern "C" {
|
||||
|
||||
// ggml_graph_plan() has to be called before ggml_graph_compute()
|
||||
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
||||
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
|
||||
GGML_API int ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
||||
|
||||
GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
|
||||
GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
|
||||
// same as ggml_graph_compute() but the work data is allocated as a part of the context
|
||||
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
||||
GGML_API void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
||||
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
|
||||
|
||||
@@ -2139,25 +2194,18 @@ extern "C" {
|
||||
GGML_API void ggml_quantize_init(enum ggml_type type);
|
||||
GGML_API void ggml_quantize_free(void);
|
||||
|
||||
// TODO: these would probably get removed in favor of the more general ggml_quantize_chunk
|
||||
GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
|
||||
GGML_API size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
GGML_API size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
GGML_API size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
GGML_API size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
GGML_API size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);
|
||||
|
||||
// some quantization type cannot be used without an importance matrix
|
||||
GGML_API bool ggml_quantize_requires_imatrix(enum ggml_type type);
|
||||
|
||||
// calls ggml_quantize_init internally (i.e. can allocate memory)
|
||||
GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst,
|
||||
int start, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
|
||||
GGML_API size_t ggml_quantize_chunk(
|
||||
enum ggml_type type,
|
||||
const float * src,
|
||||
void * dst,
|
||||
int start,
|
||||
int nrows,
|
||||
int n_per_row,
|
||||
const float * imatrix);
|
||||
|
||||
//
|
||||
// gguf
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -61,6 +61,12 @@ class Keys:
|
||||
SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
|
||||
SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
|
||||
|
||||
class SSM:
|
||||
CONV_KERNEL = "{arch}.ssm.conv_kernel"
|
||||
INNER_SIZE = "{arch}.ssm.inner_size"
|
||||
STATE_SIZE = "{arch}.ssm.state_size"
|
||||
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
|
||||
|
||||
class Tokenizer:
|
||||
MODEL = "tokenizer.ggml.model"
|
||||
LIST = "tokenizer.ggml.tokens"
|
||||
@@ -112,6 +118,8 @@ class MODEL_ARCH(IntEnum):
|
||||
INTERNLM2 = auto()
|
||||
MINICPM = auto()
|
||||
GEMMA = auto()
|
||||
STARCODER2 = auto()
|
||||
MAMBA = auto()
|
||||
|
||||
|
||||
class MODEL_TENSOR(IntEnum):
|
||||
@@ -143,6 +151,13 @@ class MODEL_TENSOR(IntEnum):
|
||||
ATTN_Q_NORM = auto()
|
||||
ATTN_K_NORM = auto()
|
||||
LAYER_OUT_NORM = auto()
|
||||
SSM_IN = auto()
|
||||
SSM_CONV1D = auto()
|
||||
SSM_X = auto()
|
||||
SSM_DT = auto()
|
||||
SSM_A = auto()
|
||||
SSM_D = auto()
|
||||
SSM_OUT = auto()
|
||||
|
||||
|
||||
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||
@@ -169,6 +184,8 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||
MODEL_ARCH.INTERNLM2: "internlm2",
|
||||
MODEL_ARCH.MINICPM: "minicpm",
|
||||
MODEL_ARCH.GEMMA: "gemma",
|
||||
MODEL_ARCH.STARCODER2: "starcoder2",
|
||||
MODEL_ARCH.MAMBA: "mamba",
|
||||
}
|
||||
|
||||
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||
@@ -200,6 +217,13 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}",
|
||||
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}",
|
||||
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
||||
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
||||
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
||||
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
||||
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
|
||||
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
||||
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
||||
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
||||
}
|
||||
|
||||
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
@@ -526,6 +550,34 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
MODEL_TENSOR.FFN_NORM,
|
||||
],
|
||||
MODEL_ARCH.STARCODER2: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.ATTN_Q,
|
||||
MODEL_TENSOR.ATTN_K,
|
||||
MODEL_TENSOR.ATTN_V,
|
||||
MODEL_TENSOR.ATTN_OUT,
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||
MODEL_TENSOR.FFN_NORM,
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
],
|
||||
MODEL_ARCH.MAMBA: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.SSM_IN,
|
||||
MODEL_TENSOR.SSM_CONV1D,
|
||||
MODEL_TENSOR.SSM_X,
|
||||
MODEL_TENSOR.SSM_DT,
|
||||
MODEL_TENSOR.SSM_A,
|
||||
MODEL_TENSOR.SSM_D,
|
||||
MODEL_TENSOR.SSM_OUT,
|
||||
],
|
||||
# TODO
|
||||
}
|
||||
|
||||
@@ -554,6 +606,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||
],
|
||||
MODEL_ARCH.STARCODER2: [
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||
],
|
||||
}
|
||||
|
||||
#
|
||||
@@ -583,20 +639,28 @@ class PoolingType(IntEnum):
|
||||
|
||||
|
||||
class GGMLQuantizationType(IntEnum):
|
||||
F32 = 0
|
||||
F16 = 1
|
||||
Q4_0 = 2
|
||||
Q4_1 = 3
|
||||
Q5_0 = 6
|
||||
Q5_1 = 7
|
||||
Q8_0 = 8
|
||||
Q8_1 = 9
|
||||
Q2_K = 10
|
||||
Q3_K = 11
|
||||
Q4_K = 12
|
||||
Q5_K = 13
|
||||
Q6_K = 14
|
||||
Q8_K = 15
|
||||
F32 = 0
|
||||
F16 = 1
|
||||
Q4_0 = 2
|
||||
Q4_1 = 3
|
||||
Q5_0 = 6
|
||||
Q5_1 = 7
|
||||
Q8_0 = 8
|
||||
Q8_1 = 9
|
||||
Q2_K = 10
|
||||
Q3_K = 11
|
||||
Q4_K = 12
|
||||
Q5_K = 13
|
||||
Q6_K = 14
|
||||
Q8_K = 15
|
||||
IQ2_XXS = 16
|
||||
IQ2_XS = 17
|
||||
IQ3_XXS = 18
|
||||
IQ1_S = 19
|
||||
IQ4_NL = 20
|
||||
IQ3_S = 21
|
||||
IQ2_S = 22
|
||||
IQ4_XS = 23
|
||||
|
||||
|
||||
class GGUFEndian(IntEnum):
|
||||
@@ -641,20 +705,28 @@ class GGUFValueType(IntEnum):
|
||||
QK_K = 256
|
||||
# Items here are (block size, type size)
|
||||
GGML_QUANT_SIZES = {
|
||||
GGMLQuantizationType.F32: (1, 4),
|
||||
GGMLQuantizationType.F16: (1, 2),
|
||||
GGMLQuantizationType.Q4_0: (32, 2 + 16),
|
||||
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
|
||||
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
|
||||
GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
|
||||
GGMLQuantizationType.Q8_0: (32, 2 + 32),
|
||||
GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
|
||||
GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
|
||||
GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
|
||||
GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
|
||||
GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
|
||||
GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
|
||||
GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
|
||||
GGMLQuantizationType.F32: (1, 4),
|
||||
GGMLQuantizationType.F16: (1, 2),
|
||||
GGMLQuantizationType.Q4_0: (32, 2 + 16),
|
||||
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
|
||||
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
|
||||
GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
|
||||
GGMLQuantizationType.Q8_0: (32, 2 + 32),
|
||||
GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
|
||||
GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
|
||||
GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
|
||||
GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
|
||||
GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
|
||||
GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
|
||||
GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
|
||||
GGMLQuantizationType.IQ2_XXS: (256, 2 + QK_K // 4),
|
||||
GGMLQuantizationType.IQ2_XS: (256, 2 + QK_K // 4 + QK_K // 32),
|
||||
GGMLQuantizationType.IQ3_XXS: (256, 2 + QK_K // 4 + QK_K // 8),
|
||||
GGMLQuantizationType.IQ1_S: (256, 2 + QK_K // 8 + QK_K // 16),
|
||||
GGMLQuantizationType.IQ4_NL: (32, 2 + 16),
|
||||
GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
|
||||
GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
|
||||
GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
|
||||
}
|
||||
|
||||
|
||||
@@ -697,6 +769,12 @@ KEY_ROPE_SCALING_FACTOR = Keys.Rope.SCALING_FACTOR
|
||||
KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN
|
||||
KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED
|
||||
|
||||
# SSM
|
||||
KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
|
||||
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
|
||||
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
|
||||
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
|
||||
|
||||
# tokenization
|
||||
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
|
||||
KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
|
||||
|
||||
@@ -362,7 +362,7 @@ class GGUFWriter:
|
||||
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
|
||||
|
||||
def add_pooling_type(self, value: PoolingType) -> None:
|
||||
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value)
|
||||
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
|
||||
|
||||
def add_rope_dimension_count(self, count: int) -> None:
|
||||
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
||||
@@ -382,6 +382,18 @@ class GGUFWriter:
|
||||
def add_rope_scaling_finetuned(self, value: bool) -> None:
|
||||
self.add_bool(Keys.Rope.SCALING_FINETUNED.format(arch=self.arch), value)
|
||||
|
||||
def add_ssm_conv_kernel(self, value: int) -> None:
|
||||
self.add_uint32(Keys.SSM.CONV_KERNEL.format(arch=self.arch), value)
|
||||
|
||||
def add_ssm_inner_size(self, value: int) -> None:
|
||||
self.add_uint32(Keys.SSM.INNER_SIZE.format(arch=self.arch), value)
|
||||
|
||||
def add_ssm_state_size(self, value: int) -> None:
|
||||
self.add_uint32(Keys.SSM.STATE_SIZE.format(arch=self.arch), value)
|
||||
|
||||
def add_ssm_time_step_rank(self, value: int) -> None:
|
||||
self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
|
||||
|
||||
def add_tokenizer_model(self, model: str) -> None:
|
||||
self.add_string(Keys.Tokenizer.MODEL, model)
|
||||
|
||||
|
||||
@@ -20,6 +20,9 @@ class TensorNameMap:
|
||||
"wte", # gpt2
|
||||
"transformer.embd.wte", # phi2
|
||||
"model.tok_embeddings", # internlm2
|
||||
"model.embedding", # mamba-qbert
|
||||
"backbone.embedding", # mamba
|
||||
"backbone.embeddings", # mamba-hf
|
||||
),
|
||||
|
||||
# Token type embeddings
|
||||
@@ -44,7 +47,7 @@ class TensorNameMap:
|
||||
# Output
|
||||
MODEL_TENSOR.OUTPUT: (
|
||||
"embed_out", # gptneox
|
||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen
|
||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba
|
||||
"output", # llama-pth bloom internlm2
|
||||
"word_embeddings_for_head", # persimmon
|
||||
"lm_head.linear", # phi2
|
||||
@@ -61,6 +64,8 @@ class TensorNameMap:
|
||||
"language_model.encoder.final_layernorm", # persimmon
|
||||
"model.final_layernorm", # persimmon
|
||||
"lm_head.ln", # phi2
|
||||
"model.norm_f", # mamba-qbert
|
||||
"backbone.norm_f", # mamba
|
||||
),
|
||||
|
||||
# Rope frequencies
|
||||
@@ -86,6 +91,8 @@ class TensorNameMap:
|
||||
"transformer.h.{bid}.ln", # phi2
|
||||
"model.layers.layers.{bid}.norm", # plamo
|
||||
"model.layers.{bid}.attention_norm", # internlm2
|
||||
"model.layers.{bid}.norm", # mamba-qbert
|
||||
"backbone.layers.{bid}.norm", # mamba
|
||||
),
|
||||
|
||||
# Attention norm 2
|
||||
@@ -210,6 +217,7 @@ class TensorNameMap:
|
||||
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w3", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
||||
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_UP_EXP: (
|
||||
@@ -256,6 +264,7 @@ class TensorNameMap:
|
||||
"model.layers.layers.{bid}.mlp.down_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w2", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
|
||||
"model.layers.{bid}.mlp.c_proj", # starcoder2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||
@@ -280,7 +289,42 @@ class TensorNameMap:
|
||||
MODEL_TENSOR.LAYER_OUT_NORM: (
|
||||
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||
)
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_IN: (
|
||||
"model.layers.{bid}.in_proj",
|
||||
"backbone.layers.{bid}.mixer.in_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_CONV1D: (
|
||||
"model.layers.{bid}.conv1d",
|
||||
"backbone.layers.{bid}.mixer.conv1d",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_X: (
|
||||
"model.layers.{bid}.x_proj",
|
||||
"backbone.layers.{bid}.mixer.x_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_DT: (
|
||||
"model.layers.{bid}.dt_proj",
|
||||
"backbone.layers.{bid}.mixer.dt_proj",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_A: (
|
||||
"model.layers.{bid}.A_log",
|
||||
"backbone.layers.{bid}.mixer.A_log",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_D: (
|
||||
"model.layers.{bid}.D",
|
||||
"backbone.layers.{bid}.mixer.D",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_OUT: (
|
||||
"model.layers.{bid}.out_proj",
|
||||
"backbone.layers.{bid}.mixer.out_proj",
|
||||
),
|
||||
}
|
||||
|
||||
mapping: dict[str, tuple[MODEL_TENSOR, str]]
|
||||
|
||||
@@ -15,7 +15,7 @@ array ::=
|
||||
|
||||
string ::=
|
||||
"\"" (
|
||||
[^"\\] |
|
||||
[^"\\\x7F\x00-\x1F] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||
)* "\"" ws
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ array ::=
|
||||
|
||||
string ::=
|
||||
"\"" (
|
||||
[^"\\] |
|
||||
[^"\\\x7F\x00-\x1F] |
|
||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||
)* "\"" ws
|
||||
|
||||
|
||||
90
llama.h
90
llama.h
@@ -129,6 +129,7 @@ extern "C" {
|
||||
};
|
||||
|
||||
enum llama_pooling_type {
|
||||
LLAMA_POOLING_TYPE_UNSPECIFIED = -1,
|
||||
LLAMA_POOLING_TYPE_NONE = 0,
|
||||
LLAMA_POOLING_TYPE_MEAN = 1,
|
||||
LLAMA_POOLING_TYPE_CLS = 2,
|
||||
@@ -162,7 +163,7 @@ extern "C" {
|
||||
// - embd : token embeddings (i.e. float vector of size n_embd) (used when token is NULL)
|
||||
// - pos : the positions of the respective token in the sequence
|
||||
// - seq_id : the sequence to which the respective token belongs
|
||||
// - logits : if zero, the logits for the respective token will not be output
|
||||
// - logits : if zero, the logits (and/or the embeddings) for the respective token will not be output
|
||||
//
|
||||
typedef struct llama_batch {
|
||||
int32_t n_tokens;
|
||||
@@ -172,7 +173,7 @@ extern "C" {
|
||||
llama_pos * pos;
|
||||
int32_t * n_seq_id;
|
||||
llama_seq_id ** seq_id;
|
||||
int8_t * logits;
|
||||
int8_t * logits; // TODO: rename this to "output"
|
||||
|
||||
// NOTE: helpers for smooth API transition - can be deprecated in the future
|
||||
// for future-proof code, use the above fields instead and ignore everything below
|
||||
@@ -234,9 +235,13 @@ extern "C" {
|
||||
uint32_t seed; // RNG seed, -1 for random
|
||||
uint32_t n_ctx; // text context, 0 = from model
|
||||
uint32_t n_batch; // prompt processing maximum batch size
|
||||
uint32_t n_parallel; // number of parallel sequences (i.e. distinct states for recurrent models)
|
||||
uint32_t n_threads; // number of threads to use for generation
|
||||
uint32_t n_threads_batch; // number of threads to use for batch processing
|
||||
int32_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
||||
|
||||
enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
||||
enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
|
||||
// (ignored if no pooling layer)
|
||||
|
||||
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
||||
float rope_freq_base; // RoPE base frequency, 0 = from model
|
||||
@@ -255,11 +260,15 @@ extern "C" {
|
||||
enum ggml_type type_v; // data type for V cache
|
||||
|
||||
// Keep the booleans together to avoid misalignment during copy-by-value.
|
||||
bool mul_mat_q; // if true, use experimental mul_mat_q kernels (DEPRECATED - always true)
|
||||
bool logits_all; // the llama_eval() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
||||
bool embedding; // embedding mode only
|
||||
bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
|
||||
bool embeddings; // if true, extract embeddings (together with logits)
|
||||
bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
|
||||
bool do_pooling; // whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
|
||||
|
||||
// Abort callback
|
||||
// if it returns true, execution of llama_decode() will be aborted
|
||||
// currently works only with CPU execution
|
||||
ggml_abort_callback abort_callback;
|
||||
void * abort_callback_data;
|
||||
};
|
||||
|
||||
// model quantization parameters
|
||||
@@ -364,13 +373,11 @@ extern "C" {
|
||||
LLAMA_API bool llama_supports_mlock (void);
|
||||
LLAMA_API bool llama_supports_gpu_offload(void);
|
||||
|
||||
LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
|
||||
LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
|
||||
|
||||
LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
|
||||
|
||||
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
|
||||
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
||||
LLAMA_API uint32_t llama_n_max_seq (const struct llama_context * ctx);
|
||||
|
||||
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
||||
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
||||
@@ -423,14 +430,6 @@ extern "C" {
|
||||
// The model needs to be reloaded before applying a new adapter, otherwise the adapter
|
||||
// will be applied on top of the previous one
|
||||
// Returns 0 on success
|
||||
LLAMA_API DEPRECATED(int32_t llama_apply_lora_from_file(
|
||||
struct llama_context * ctx,
|
||||
const char * path_lora,
|
||||
float scale,
|
||||
const char * path_base_model,
|
||||
int32_t n_threads),
|
||||
"use llama_model_apply_lora_from_file instead");
|
||||
|
||||
LLAMA_API int32_t llama_model_apply_lora_from_file(
|
||||
const struct llama_model * model,
|
||||
const char * path_lora,
|
||||
@@ -505,7 +504,7 @@ extern "C" {
|
||||
// seq_id < 0 : match any sequence
|
||||
// p0 < 0 : [0, p1]
|
||||
// p1 < 0 : [p0, inf)
|
||||
LLAMA_API void llama_kv_cache_seq_rm(
|
||||
LLAMA_API bool llama_kv_cache_seq_rm(
|
||||
struct llama_context * ctx,
|
||||
llama_seq_id seq_id,
|
||||
llama_pos p0,
|
||||
@@ -586,7 +585,7 @@ extern "C" {
|
||||
// Returns the number of bytes read
|
||||
LLAMA_API size_t llama_set_state_data(
|
||||
struct llama_context * ctx,
|
||||
uint8_t * src);
|
||||
const uint8_t * src);
|
||||
|
||||
// Save/load session file
|
||||
LLAMA_API bool llama_load_session_file(
|
||||
@@ -606,27 +605,6 @@ extern "C" {
|
||||
// Decoding
|
||||
//
|
||||
|
||||
// Run the llama inference to obtain the logits and probabilities for the next token(s).
|
||||
// tokens + n_tokens is the provided batch of new tokens to process
|
||||
// n_past is the number of tokens to use from previous eval calls
|
||||
// Returns 0 on success
|
||||
// DEPRECATED: use llama_decode() instead
|
||||
LLAMA_API DEPRECATED(int llama_eval(
|
||||
struct llama_context * ctx,
|
||||
llama_token * tokens,
|
||||
int32_t n_tokens,
|
||||
int32_t n_past),
|
||||
"use llama_decode() instead");
|
||||
|
||||
// Same as llama_eval, but use float matrix input directly.
|
||||
// DEPRECATED: use llama_decode() instead
|
||||
LLAMA_API DEPRECATED(int llama_eval_embd(
|
||||
struct llama_context * ctx,
|
||||
float * embd,
|
||||
int32_t n_tokens,
|
||||
int32_t n_past),
|
||||
"use llama_decode() instead");
|
||||
|
||||
// Return batch for single sequence of tokens starting at pos_0
|
||||
//
|
||||
// NOTE: this is a helper function to facilitate transition to the new batch API - avoid using it
|
||||
@@ -665,7 +643,10 @@ extern "C" {
|
||||
// n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
|
||||
LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch);
|
||||
|
||||
// Token logits obtained from the last call to llama_eval()
|
||||
// Set abort callback
|
||||
LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||
|
||||
// Token logits obtained from the last call to llama_decode()
|
||||
// The logits for the last token are stored in the last row
|
||||
// Logits for which llama_batch.logits[i] == 0 are undefined
|
||||
// Rows: n_tokens provided with llama_batch
|
||||
@@ -676,14 +657,20 @@ extern "C" {
|
||||
// llama_get_logits(ctx) + i*n_vocab
|
||||
LLAMA_API float * llama_get_logits_ith(struct llama_context * ctx, int32_t i);
|
||||
|
||||
// Get the embeddings for the input
|
||||
// shape: [n_embd] (1-dimensional)
|
||||
// Get all output token embeddings
|
||||
// shape: [n_tokens*n_embd] (1-dimensional)
|
||||
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
||||
|
||||
// Get the embeddings for the ith sequence
|
||||
// Get the embeddings for the ith token
|
||||
// llama_get_embeddings(ctx) + i*n_embd
|
||||
// shape: [n_embd] (1-dimensional)
|
||||
LLAMA_API float * llama_get_embeddings_ith(struct llama_context * ctx, int32_t i);
|
||||
|
||||
// Get the embeddings for a sequence id
|
||||
// Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
|
||||
// shape: [n_embd] (1-dimensional)
|
||||
LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id);
|
||||
|
||||
//
|
||||
// Vocab
|
||||
//
|
||||
@@ -800,13 +787,6 @@ extern "C" {
|
||||
float * logits_guidance,
|
||||
float scale);
|
||||
|
||||
LLAMA_API DEPRECATED(void llama_sample_classifier_free_guidance(
|
||||
struct llama_context * ctx,
|
||||
llama_token_data_array * candidates,
|
||||
struct llama_context * guidance_ctx,
|
||||
float scale),
|
||||
"use llama_sample_apply_guidance() instead");
|
||||
|
||||
/// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
|
||||
LLAMA_API void llama_sample_softmax(
|
||||
struct llama_context * ctx,
|
||||
@@ -860,12 +840,6 @@ extern "C" {
|
||||
llama_token_data_array * candidates,
|
||||
float temp);
|
||||
|
||||
LLAMA_API DEPRECATED(void llama_sample_temperature(
|
||||
struct llama_context * ctx,
|
||||
llama_token_data_array * candidates,
|
||||
float temp),
|
||||
"use llama_sample_temp instead");
|
||||
|
||||
/// @details Apply constraints from grammar
|
||||
LLAMA_API void llama_sample_grammar(
|
||||
struct llama_context * ctx,
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
-r ./requirements-convert.txt
|
||||
torch~=2.1.1
|
||||
einops~=0.7.0
|
||||
|
||||
@@ -18,7 +18,7 @@ except ImportError as e:
|
||||
KEY_PROPERTIES = [
|
||||
"cpu_info", "gpu_info", "n_gpu_layers", "main_gpu", "cuda", "opencl", "metal", "gpu_blas",
|
||||
"blas", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_threads",
|
||||
"type_k", "type_v", "no_kv_offload", "mul_mat_q", "tensor_split", "n_prompt", "n_gen"
|
||||
"type_k", "type_v", "no_kv_offload", "tensor_split", "n_prompt", "n_gen"
|
||||
]
|
||||
|
||||
# Properties that are boolean and are converted to Yes/No for the table:
|
||||
@@ -31,7 +31,7 @@ PRETTY_NAMES = {
|
||||
"model_size": "Model Size [GiB]", "model_n_params": "Num. of Parameters",
|
||||
"n_batch": "Batch size", "n_threads": "Threads", "type_k": "K type", "type_v": "V type",
|
||||
"n_gpu_layers": "GPU layers", "main_gpu": "Main GPU", "no_kv_offload": "NKVO",
|
||||
"mul_mat_q": "MMQ", "tensor_split": "Tensor split"
|
||||
"tensor_split": "Tensor split"
|
||||
}
|
||||
|
||||
DEFAULT_SHOW = ["model_type"] # Always show these properties by default.
|
||||
|
||||
213
scripts/pod-llama.sh
Normal file
213
scripts/pod-llama.sh
Normal file
@@ -0,0 +1,213 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Use this script only on fresh pods (runpod.io)!
|
||||
# Otherwise, it can break your environment!
|
||||
#
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: $0 <data>"
|
||||
echo " 0: no models"
|
||||
echo " 1: tinyllama-1b"
|
||||
echo " 2: codellama-7b"
|
||||
echo " 3: codellama-13b"
|
||||
echo " 4: codellama-34b"
|
||||
echo " 5: codellama-7b-instruct"
|
||||
echo " 6: codellama-13b-instruct"
|
||||
echo " 7: codellama-34b-instruct"
|
||||
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -x
|
||||
|
||||
# setup deps
|
||||
apt-get update
|
||||
apt-get install -y git-lfs cmake cmake-curses-gui vim ruby
|
||||
git-lfs install
|
||||
|
||||
if [ ! -d "/workspace" ]; then
|
||||
ln -sfn $(pwd) /workspace
|
||||
fi
|
||||
|
||||
# download data
|
||||
cd /workspace
|
||||
|
||||
# this is useful to git clone repos without doubling the disk size due to .git
|
||||
git clone https://github.com/iboB/git-lfs-download
|
||||
ln -sfn /workspace/git-lfs-download/git-lfs-download /usr/local/bin/git-lfs-download
|
||||
|
||||
# llama.cpp
|
||||
cd /workspace
|
||||
git clone https://github.com/ggerganov/llama.cpp
|
||||
|
||||
cd llama.cpp
|
||||
|
||||
LLAMA_CUBLAS=1 make -j
|
||||
|
||||
ln -sfn /workspace/TinyLlama-1.1B-Chat-v0.3 ./models/tinyllama-1b
|
||||
ln -sfn /workspace/CodeLlama-7b-hf ./models/codellama-7b
|
||||
ln -sfn /workspace/CodeLlama-13b-hf ./models/codellama-13b
|
||||
ln -sfn /workspace/CodeLlama-34b-hf ./models/codellama-34b
|
||||
ln -sfn /workspace/CodeLlama-7b-Instruct-hf ./models/codellama-7b-instruct
|
||||
ln -sfn /workspace/CodeLlama-13b-Instruct-hf ./models/codellama-13b-instruct
|
||||
ln -sfn /workspace/CodeLlama-34b-Instruct-hf ./models/codellama-34b-instruct
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
# cmake
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
mkdir build-cublas
|
||||
cd build-cublas
|
||||
|
||||
cmake -DLLAMA_CUBLAS=1 ../
|
||||
make -j
|
||||
|
||||
if [ "$1" -eq "0" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# more models
|
||||
if [ "$1" -eq "1" ]; then
|
||||
cd /workspace
|
||||
|
||||
git-lfs-download https://huggingface.co/PY007/TinyLlama-1.1B-Chat-v0.3
|
||||
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
python3 convert.py ./models/tinyllama-1b --outfile ./models/tinyllama-1b/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/tinyllama-1b/ggml-model-f16.gguf ./models/tinyllama-1b/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "2" ]; then
|
||||
cd /workspace
|
||||
|
||||
git-lfs-download https://huggingface.co/codellama/CodeLlama-7b-hf --without *safetensors*
|
||||
rm -v ./CodeLlama-7b-hf/*safetensors*
|
||||
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
python3 convert.py ./models/codellama-7b --outfile ./models/codellama-7b/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-7b/ggml-model-f16.gguf ./models/codellama-7b/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "3" ]; then
|
||||
cd /workspace
|
||||
|
||||
git-lfs-download https://huggingface.co/codellama/CodeLlama-13b-hf --without *safetensors*
|
||||
rm -v ./CodeLlama-13b-hf/*safetensors*
|
||||
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
python3 convert.py ./models/codellama-13b --outfile ./models/codellama-13b/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-13b/ggml-model-f16.gguf ./models/codellama-13b/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "4" ]; then
|
||||
cd /workspace
|
||||
|
||||
git-lfs-download https://huggingface.co/codellama/CodeLlama-34b-hf --without *safetensors*
|
||||
rm -v ./CodeLlama-34b-hf/*safetensors*
|
||||
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
python3 convert.py ./models/codellama-34b --outfile ./models/codellama-34b/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-34b/ggml-model-f16.gguf ./models/codellama-34b/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "5" ]; then
|
||||
cd /workspace
|
||||
|
||||
git-lfs-download https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf --without *safetensors*
|
||||
rm -v ./CodeLlama-7b-Instruct-hf/*safetensors*
|
||||
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
python3 convert.py ./models/codellama-7b-instruct --outfile ./models/codellama-7b-instruct/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-7b-instruct/ggml-model-f16.gguf ./models/codellama-7b-instruct/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "6" ]; then
|
||||
cd /workspace
|
||||
|
||||
git-lfs-download https://huggingface.co/codellama/CodeLlama-13b-Instruct-hf --without *safetensors*
|
||||
rm -v ./CodeLlama-13b-Instruct-hf/*safetensors*
|
||||
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
python3 convert.py ./models/codellama-13b-instruct --outfile ./models/codellama-13b-instruct/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-13b-instruct/ggml-model-f16.gguf ./models/codellama-13b-instruct/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "7" ]; then
|
||||
cd /workspace
|
||||
|
||||
git-lfs-download https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf --without *safetensors*
|
||||
rm -v ./CodeLlama-34b-Instruct-hf/*safetensors*
|
||||
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
python3 convert.py ./models/codellama-34b-instruct --outfile ./models/codellama-34b-instruct/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
./quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_0.gguf q4_0
|
||||
./quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q4_k.gguf q4_k
|
||||
./quantize ./models/codellama-34b-instruct/ggml-model-f16.gguf ./models/codellama-34b-instruct/ggml-model-q8_0.gguf q8_0
|
||||
fi
|
||||
|
||||
if [ "$1" -eq "1" ]; then
|
||||
# perf + perplexity
|
||||
cd /workspace/llama.cpp/build-cublas
|
||||
|
||||
make -j && ../scripts/run-all-perf.sh tinyllama-1b "f16" "-ngl 99 -t 1 -p 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,32,64,128,256,512,1024,2048 -n 128"
|
||||
|
||||
../scripts/get-wikitext-2.sh
|
||||
unzip wikitext-2-raw-v1.zip
|
||||
|
||||
make -j && ./bin/perplexity -m ../models/tinyllama-1b/ggml-model-f16.gguf -f ./wikitext-2-raw/wiki.test.raw -ngl 100 --chunks 32
|
||||
|
||||
# batched
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
LLAMA_CUBLAS=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
|
||||
|
||||
# batched-bench
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
|
||||
|
||||
# parallel
|
||||
cd /workspace/llama.cpp
|
||||
|
||||
LLAMA_CUBLAS=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
|
||||
|
||||
fi
|
||||
|
||||
# speculative
|
||||
#if [ "$1" -eq "7" ]; then
|
||||
# cd /workspace/llama.cpp
|
||||
#
|
||||
# LLAMA_CUBLAS=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
|
||||
#fi
|
||||
|
||||
# more benches
|
||||
#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
||||
#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
||||
|
||||
@@ -94,6 +94,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||
# src/ggml-alloc.c -> ggml-alloc.c
|
||||
# src/ggml-backend-impl.h -> ggml-backend-impl.h
|
||||
# src/ggml-backend.c -> ggml-backend.c
|
||||
# src/ggml-common.h -> ggml-common.h
|
||||
# src/ggml-cuda.cu -> ggml-cuda.cu
|
||||
# src/ggml-cuda.h -> ggml-cuda.h
|
||||
# src/ggml-impl.h -> ggml-impl.h
|
||||
@@ -126,6 +127,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||
-e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
|
||||
-e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
|
||||
-e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
|
||||
-e 's/src\/ggml-common\.h/ggml-common.h/g' \
|
||||
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
|
||||
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
|
||||
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
|
||||
|
||||
@@ -1 +1 @@
|
||||
8cdf783f288a98eddf521b0ab1b4d405be9e18ba
|
||||
8695910a39102609073d0e099aa7c97d6bcb3bf9
|
||||
|
||||
@@ -4,6 +4,7 @@ cp -rpv ../ggml/src/ggml.c ./ggml.c
|
||||
cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
|
||||
cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
|
||||
cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c
|
||||
cp -rpv ../ggml/src/ggml-common.h ./ggml-common.h
|
||||
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
|
||||
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
|
||||
cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h
|
||||
|
||||
1
tests/.gitignore
vendored
1
tests/.gitignore
vendored
@@ -1,3 +1,4 @@
|
||||
*
|
||||
!*.*
|
||||
*.o
|
||||
ggml-common.h
|
||||
|
||||
@@ -53,7 +53,6 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
||||
} else if (ggml_is_quantized(tensor->type) || tensor->type == GGML_TYPE_F16) {
|
||||
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
|
||||
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
|
||||
int64_t hist[16];
|
||||
std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix
|
||||
const float * im = imatrix.data();
|
||||
if (!ggml_quantize_requires_imatrix(tensor->type)) {
|
||||
@@ -63,7 +62,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
||||
im = nullptr;
|
||||
}
|
||||
}
|
||||
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], hist, im);
|
||||
ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], im);
|
||||
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
|
||||
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
|
||||
// This is going to create some weird integers though.
|
||||
@@ -1412,6 +1411,50 @@ struct test_pad : public test_case {
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_ARANGE
|
||||
struct test_arange : public test_case {
|
||||
const ggml_type type;
|
||||
const float start;
|
||||
const float stop;
|
||||
const float step;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR4(type, start, stop, step);
|
||||
}
|
||||
|
||||
test_arange(ggml_type type = GGML_TYPE_F32,
|
||||
float start = 0.f, float stop = 10.f, float step = 1.f)
|
||||
: type(type), start(start), stop(stop), step(step) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * out = ggml_arange(ctx, start, stop, step);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_TIMESTEP_EMBEDDING
|
||||
struct test_timestep_embedding : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne_a;
|
||||
const int dim;
|
||||
const int max_period;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR4(type, ne_a, dim, max_period);
|
||||
}
|
||||
|
||||
test_timestep_embedding(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne_a = {2, 1, 1, 1},
|
||||
int dim = 320, int max_period=10000)
|
||||
: type(type), ne_a(ne_a), dim(dim), max_period(max_period) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
|
||||
ggml_tensor * out = ggml_timestep_embedding(ctx, a, dim, max_period);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_LEAKY_RELU
|
||||
struct test_leaky_relu : public test_case {
|
||||
const ggml_type type;
|
||||
@@ -2126,6 +2169,8 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
|
||||
test_cases.emplace_back(new test_group_norm());
|
||||
test_cases.emplace_back(new test_acc());
|
||||
test_cases.emplace_back(new test_pad());
|
||||
test_cases.emplace_back(new test_arange());
|
||||
test_cases.emplace_back(new test_timestep_embedding());
|
||||
test_cases.emplace_back(new test_leaky_relu());
|
||||
|
||||
// these tests are disabled to save execution time, but they can be handy for debugging
|
||||
|
||||
308
unicode.h
308
unicode.h
@@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
@@ -223,6 +224,313 @@ static const std::vector<std::pair<uint32_t, uint32_t>> control_ranges = {
|
||||
{0x2B81E, 0x2B81F}, {0x2CEA2, 0x2CEAF}, {0x2EBE1, 0x2F7FF}, {0x2FA1E, 0x2FFFF}, {0x3134B, 0xE00FF}, {0xE01F0, 0x10FFFF},
|
||||
};
|
||||
|
||||
static const std::multimap<uint32_t, uint32_t> nfd_map = {
|
||||
{0xC0, 0x41}, {0xC0, 0x300}, {0xC1, 0x41}, {0xC1, 0x301}, {0xC2, 0x41}, {0xC2, 0x302}, {0xC3, 0x41}, {0xC3, 0x303}, {0xC4, 0x41}, {0xC4, 0x308}, {0xC5, 0x41}, {0xC5, 0x30A}, {0xC7, 0x43},
|
||||
{0xC7, 0x327}, {0xC8, 0x45}, {0xC8, 0x300}, {0xC9, 0x45}, {0xC9, 0x301}, {0xCA, 0x45}, {0xCA, 0x302}, {0xCB, 0x45}, {0xCB, 0x308}, {0xCC, 0x49}, {0xCC, 0x300}, {0xCD, 0x49}, {0xCD, 0x301},
|
||||
{0xCE, 0x49}, {0xCE, 0x302}, {0xCF, 0x49}, {0xCF, 0x308}, {0xD1, 0x4E}, {0xD1, 0x303}, {0xD2, 0x4F}, {0xD2, 0x300}, {0xD3, 0x4F}, {0xD3, 0x301}, {0xD4, 0x4F}, {0xD4, 0x302}, {0xD5, 0x4F},
|
||||
{0xD5, 0x303}, {0xD6, 0x4F}, {0xD6, 0x308}, {0xD9, 0x55}, {0xD9, 0x300}, {0xDA, 0x55}, {0xDA, 0x301}, {0xDB, 0x55}, {0xDB, 0x302}, {0xDC, 0x55}, {0xDC, 0x308}, {0xDD, 0x59}, {0xDD, 0x301},
|
||||
{0xE0, 0x61}, {0xE0, 0x300}, {0xE1, 0x61}, {0xE1, 0x301}, {0xE2, 0x61}, {0xE2, 0x302}, {0xE3, 0x61}, {0xE3, 0x303}, {0xE4, 0x61}, {0xE4, 0x308}, {0xE5, 0x61}, {0xE5, 0x30A}, {0xE7, 0x63},
|
||||
{0xE7, 0x327}, {0xE8, 0x65}, {0xE8, 0x300}, {0xE9, 0x65}, {0xE9, 0x301}, {0xEA, 0x65}, {0xEA, 0x302}, {0xEB, 0x65}, {0xEB, 0x308}, {0xEC, 0x69}, {0xEC, 0x300}, {0xED, 0x69}, {0xED, 0x301},
|
||||
{0xEE, 0x69}, {0xEE, 0x302}, {0xEF, 0x69}, {0xEF, 0x308}, {0xF1, 0x6E}, {0xF1, 0x303}, {0xF2, 0x6F}, {0xF2, 0x300}, {0xF3, 0x6F}, {0xF3, 0x301}, {0xF4, 0x6F}, {0xF4, 0x302}, {0xF5, 0x6F},
|
||||
{0xF5, 0x303}, {0xF6, 0x6F}, {0xF6, 0x308}, {0xF9, 0x75}, {0xF9, 0x300}, {0xFA, 0x75}, {0xFA, 0x301}, {0xFB, 0x75}, {0xFB, 0x302}, {0xFC, 0x75}, {0xFC, 0x308}, {0xFD, 0x79}, {0xFD, 0x301},
|
||||
{0xFF, 0x79}, {0xFF, 0x308}, {0x100, 0x41}, {0x100, 0x304}, {0x101, 0x61}, {0x101, 0x304}, {0x102, 0x41}, {0x102, 0x306}, {0x103, 0x61}, {0x103, 0x306}, {0x104, 0x41}, {0x104, 0x328}, {0x105, 0x61},
|
||||
{0x105, 0x328}, {0x106, 0x43}, {0x106, 0x301}, {0x107, 0x63}, {0x107, 0x301}, {0x108, 0x43}, {0x108, 0x302}, {0x109, 0x63}, {0x109, 0x302}, {0x10A, 0x43}, {0x10A, 0x307}, {0x10B, 0x63},
|
||||
{0x10B, 0x307}, {0x10C, 0x43}, {0x10C, 0x30C}, {0x10D, 0x63}, {0x10D, 0x30C}, {0x10E, 0x44}, {0x10E, 0x30C}, {0x10F, 0x64}, {0x10F, 0x30C}, {0x112, 0x45}, {0x112, 0x304}, {0x113, 0x65},
|
||||
{0x113, 0x304}, {0x114, 0x45}, {0x114, 0x306}, {0x115, 0x65}, {0x115, 0x306}, {0x116, 0x45}, {0x116, 0x307}, {0x117, 0x65}, {0x117, 0x307}, {0x118, 0x45}, {0x118, 0x328}, {0x119, 0x65},
|
||||
{0x119, 0x328}, {0x11A, 0x45}, {0x11A, 0x30C}, {0x11B, 0x65}, {0x11B, 0x30C}, {0x11C, 0x47}, {0x11C, 0x302}, {0x11D, 0x67}, {0x11D, 0x302}, {0x11E, 0x47}, {0x11E, 0x306}, {0x11F, 0x67},
|
||||
{0x11F, 0x306}, {0x120, 0x47}, {0x120, 0x307}, {0x121, 0x67}, {0x121, 0x307}, {0x122, 0x47}, {0x122, 0x327}, {0x123, 0x67}, {0x123, 0x327}, {0x124, 0x48}, {0x124, 0x302}, {0x125, 0x68},
|
||||
{0x125, 0x302}, {0x128, 0x49}, {0x128, 0x303}, {0x129, 0x69}, {0x129, 0x303}, {0x12A, 0x49}, {0x12A, 0x304}, {0x12B, 0x69}, {0x12B, 0x304}, {0x12C, 0x49}, {0x12C, 0x306}, {0x12D, 0x69},
|
||||
{0x12D, 0x306}, {0x12E, 0x49}, {0x12E, 0x328}, {0x12F, 0x69}, {0x12F, 0x328}, {0x130, 0x49}, {0x130, 0x307}, {0x134, 0x4A}, {0x134, 0x302}, {0x135, 0x6A}, {0x135, 0x302}, {0x136, 0x4B},
|
||||
{0x136, 0x327}, {0x137, 0x6B}, {0x137, 0x327}, {0x139, 0x4C}, {0x139, 0x301}, {0x13A, 0x6C}, {0x13A, 0x301}, {0x13B, 0x4C}, {0x13B, 0x327}, {0x13C, 0x6C}, {0x13C, 0x327}, {0x13D, 0x4C},
|
||||
{0x13D, 0x30C}, {0x13E, 0x6C}, {0x13E, 0x30C}, {0x143, 0x4E}, {0x143, 0x301}, {0x144, 0x6E}, {0x144, 0x301}, {0x145, 0x4E}, {0x145, 0x327}, {0x146, 0x6E}, {0x146, 0x327}, {0x147, 0x4E},
|
||||
{0x147, 0x30C}, {0x148, 0x6E}, {0x148, 0x30C}, {0x14C, 0x4F}, {0x14C, 0x304}, {0x14D, 0x6F}, {0x14D, 0x304}, {0x14E, 0x4F}, {0x14E, 0x306}, {0x14F, 0x6F}, {0x14F, 0x306}, {0x150, 0x4F},
|
||||
{0x150, 0x30B}, {0x151, 0x6F}, {0x151, 0x30B}, {0x154, 0x52}, {0x154, 0x301}, {0x155, 0x72}, {0x155, 0x301}, {0x156, 0x52}, {0x156, 0x327}, {0x157, 0x72}, {0x157, 0x327}, {0x158, 0x52},
|
||||
{0x158, 0x30C}, {0x159, 0x72}, {0x159, 0x30C}, {0x15A, 0x53}, {0x15A, 0x301}, {0x15B, 0x73}, {0x15B, 0x301}, {0x15C, 0x53}, {0x15C, 0x302}, {0x15D, 0x73}, {0x15D, 0x302}, {0x15E, 0x53},
|
||||
{0x15E, 0x327}, {0x15F, 0x73}, {0x15F, 0x327}, {0x160, 0x53}, {0x160, 0x30C}, {0x161, 0x73}, {0x161, 0x30C}, {0x162, 0x54}, {0x162, 0x327}, {0x163, 0x74}, {0x163, 0x327}, {0x164, 0x54},
|
||||
{0x164, 0x30C}, {0x165, 0x74}, {0x165, 0x30C}, {0x168, 0x55}, {0x168, 0x303}, {0x169, 0x75}, {0x169, 0x303}, {0x16A, 0x55}, {0x16A, 0x304}, {0x16B, 0x75}, {0x16B, 0x304}, {0x16C, 0x55},
|
||||
{0x16C, 0x306}, {0x16D, 0x75}, {0x16D, 0x306}, {0x16E, 0x55}, {0x16E, 0x30A}, {0x16F, 0x75}, {0x16F, 0x30A}, {0x170, 0x55}, {0x170, 0x30B}, {0x171, 0x75}, {0x171, 0x30B}, {0x172, 0x55},
|
||||
{0x172, 0x328}, {0x173, 0x75}, {0x173, 0x328}, {0x174, 0x57}, {0x174, 0x302}, {0x175, 0x77}, {0x175, 0x302}, {0x176, 0x59}, {0x176, 0x302}, {0x177, 0x79}, {0x177, 0x302}, {0x178, 0x59},
|
||||
{0x178, 0x308}, {0x179, 0x5A}, {0x179, 0x301}, {0x17A, 0x7A}, {0x17A, 0x301}, {0x17B, 0x5A}, {0x17B, 0x307}, {0x17C, 0x7A}, {0x17C, 0x307}, {0x17D, 0x5A}, {0x17D, 0x30C}, {0x17E, 0x7A},
|
||||
{0x17E, 0x30C}, {0x1A0, 0x4F}, {0x1A0, 0x31B}, {0x1A1, 0x6F}, {0x1A1, 0x31B}, {0x1AF, 0x55}, {0x1AF, 0x31B}, {0x1B0, 0x75}, {0x1B0, 0x31B}, {0x1CD, 0x41}, {0x1CD, 0x30C}, {0x1CE, 0x61},
|
||||
{0x1CE, 0x30C}, {0x1CF, 0x49}, {0x1CF, 0x30C}, {0x1D0, 0x69}, {0x1D0, 0x30C}, {0x1D1, 0x4F}, {0x1D1, 0x30C}, {0x1D2, 0x6F}, {0x1D2, 0x30C}, {0x1D3, 0x55}, {0x1D3, 0x30C}, {0x1D4, 0x75},
|
||||
{0x1D4, 0x30C}, {0x1D5, 0x55}, {0x1D5, 0x308}, {0x1D5, 0x304}, {0x1D6, 0x75}, {0x1D6, 0x308}, {0x1D6, 0x304}, {0x1D7, 0x55}, {0x1D7, 0x308}, {0x1D7, 0x301}, {0x1D8, 0x75}, {0x1D8, 0x308},
|
||||
{0x1D8, 0x301}, {0x1D9, 0x55}, {0x1D9, 0x308}, {0x1D9, 0x30C}, {0x1DA, 0x75}, {0x1DA, 0x308}, {0x1DA, 0x30C}, {0x1DB, 0x55}, {0x1DB, 0x308}, {0x1DB, 0x300}, {0x1DC, 0x75}, {0x1DC, 0x308},
|
||||
{0x1DC, 0x300}, {0x1DE, 0x41}, {0x1DE, 0x308}, {0x1DE, 0x304}, {0x1DF, 0x61}, {0x1DF, 0x308}, {0x1DF, 0x304}, {0x1E0, 0x41}, {0x1E0, 0x307}, {0x1E0, 0x304}, {0x1E1, 0x61}, {0x1E1, 0x307},
|
||||
{0x1E1, 0x304}, {0x1E2, 0xC6}, {0x1E2, 0x304}, {0x1E3, 0xE6}, {0x1E3, 0x304}, {0x1E6, 0x47}, {0x1E6, 0x30C}, {0x1E7, 0x67}, {0x1E7, 0x30C}, {0x1E8, 0x4B}, {0x1E8, 0x30C}, {0x1E9, 0x6B},
|
||||
{0x1E9, 0x30C}, {0x1EA, 0x4F}, {0x1EA, 0x328}, {0x1EB, 0x6F}, {0x1EB, 0x328}, {0x1EC, 0x4F}, {0x1EC, 0x328}, {0x1EC, 0x304}, {0x1ED, 0x6F}, {0x1ED, 0x328}, {0x1ED, 0x304}, {0x1EE, 0x1B7},
|
||||
{0x1EE, 0x30C}, {0x1EF, 0x292}, {0x1EF, 0x30C}, {0x1F0, 0x6A}, {0x1F0, 0x30C}, {0x1F4, 0x47}, {0x1F4, 0x301}, {0x1F5, 0x67}, {0x1F5, 0x301}, {0x1F8, 0x4E}, {0x1F8, 0x300}, {0x1F9, 0x6E},
|
||||
{0x1F9, 0x300}, {0x1FA, 0x41}, {0x1FA, 0x30A}, {0x1FA, 0x301}, {0x1FB, 0x61}, {0x1FB, 0x30A}, {0x1FB, 0x301}, {0x1FC, 0xC6}, {0x1FC, 0x301}, {0x1FD, 0xE6}, {0x1FD, 0x301}, {0x1FE, 0xD8},
|
||||
{0x1FE, 0x301}, {0x1FF, 0xF8}, {0x1FF, 0x301}, {0x200, 0x41}, {0x200, 0x30F}, {0x201, 0x61}, {0x201, 0x30F}, {0x202, 0x41}, {0x202, 0x311}, {0x203, 0x61}, {0x203, 0x311}, {0x204, 0x45},
|
||||
{0x204, 0x30F}, {0x205, 0x65}, {0x205, 0x30F}, {0x206, 0x45}, {0x206, 0x311}, {0x207, 0x65}, {0x207, 0x311}, {0x208, 0x49}, {0x208, 0x30F}, {0x209, 0x69}, {0x209, 0x30F}, {0x20A, 0x49},
|
||||
{0x20A, 0x311}, {0x20B, 0x69}, {0x20B, 0x311}, {0x20C, 0x4F}, {0x20C, 0x30F}, {0x20D, 0x6F}, {0x20D, 0x30F}, {0x20E, 0x4F}, {0x20E, 0x311}, {0x20F, 0x6F}, {0x20F, 0x311}, {0x210, 0x52},
|
||||
{0x210, 0x30F}, {0x211, 0x72}, {0x211, 0x30F}, {0x212, 0x52}, {0x212, 0x311}, {0x213, 0x72}, {0x213, 0x311}, {0x214, 0x55}, {0x214, 0x30F}, {0x215, 0x75}, {0x215, 0x30F}, {0x216, 0x55},
|
||||
{0x216, 0x311}, {0x217, 0x75}, {0x217, 0x311}, {0x218, 0x53}, {0x218, 0x326}, {0x219, 0x73}, {0x219, 0x326}, {0x21A, 0x54}, {0x21A, 0x326}, {0x21B, 0x74}, {0x21B, 0x326}, {0x21E, 0x48},
|
||||
{0x21E, 0x30C}, {0x21F, 0x68}, {0x21F, 0x30C}, {0x226, 0x41}, {0x226, 0x307}, {0x227, 0x61}, {0x227, 0x307}, {0x228, 0x45}, {0x228, 0x327}, {0x229, 0x65}, {0x229, 0x327}, {0x22A, 0x4F},
|
||||
{0x22A, 0x308}, {0x22A, 0x304}, {0x22B, 0x6F}, {0x22B, 0x308}, {0x22B, 0x304}, {0x22C, 0x4F}, {0x22C, 0x303}, {0x22C, 0x304}, {0x22D, 0x6F}, {0x22D, 0x303}, {0x22D, 0x304}, {0x22E, 0x4F},
|
||||
{0x22E, 0x307}, {0x22F, 0x6F}, {0x22F, 0x307}, {0x230, 0x4F}, {0x230, 0x307}, {0x230, 0x304}, {0x231, 0x6F}, {0x231, 0x307}, {0x231, 0x304}, {0x232, 0x59}, {0x232, 0x304}, {0x233, 0x79},
|
||||
{0x233, 0x304}, {0x340, 0x300}, {0x341, 0x301}, {0x343, 0x313}, {0x344, 0x308}, {0x344, 0x301}, {0x374, 0x2B9}, {0x37E, 0x3B}, {0x385, 0xA8}, {0x385, 0x301}, {0x386, 0x391}, {0x386, 0x301},
|
||||
{0x387, 0xB7}, {0x388, 0x395}, {0x388, 0x301}, {0x389, 0x397}, {0x389, 0x301}, {0x38A, 0x399}, {0x38A, 0x301}, {0x38C, 0x39F}, {0x38C, 0x301}, {0x38E, 0x3A5}, {0x38E, 0x301}, {0x38F, 0x3A9},
|
||||
{0x38F, 0x301}, {0x390, 0x3B9}, {0x390, 0x308}, {0x390, 0x301}, {0x3AA, 0x399}, {0x3AA, 0x308}, {0x3AB, 0x3A5}, {0x3AB, 0x308}, {0x3AC, 0x3B1}, {0x3AC, 0x301}, {0x3AD, 0x3B5}, {0x3AD, 0x301},
|
||||
{0x3AE, 0x3B7}, {0x3AE, 0x301}, {0x3AF, 0x3B9}, {0x3AF, 0x301}, {0x3B0, 0x3C5}, {0x3B0, 0x308}, {0x3B0, 0x301}, {0x3CA, 0x3B9}, {0x3CA, 0x308}, {0x3CB, 0x3C5}, {0x3CB, 0x308}, {0x3CC, 0x3BF},
|
||||
{0x3CC, 0x301}, {0x3CD, 0x3C5}, {0x3CD, 0x301}, {0x3CE, 0x3C9}, {0x3CE, 0x301}, {0x3D3, 0x3D2}, {0x3D3, 0x301}, {0x3D4, 0x3D2}, {0x3D4, 0x308}, {0x400, 0x415}, {0x400, 0x300}, {0x401, 0x415},
|
||||
{0x401, 0x308}, {0x403, 0x413}, {0x403, 0x301}, {0x407, 0x406}, {0x407, 0x308}, {0x40C, 0x41A}, {0x40C, 0x301}, {0x40D, 0x418}, {0x40D, 0x300}, {0x40E, 0x423}, {0x40E, 0x306}, {0x419, 0x418},
|
||||
{0x419, 0x306}, {0x439, 0x438}, {0x439, 0x306}, {0x450, 0x435}, {0x450, 0x300}, {0x451, 0x435}, {0x451, 0x308}, {0x453, 0x433}, {0x453, 0x301}, {0x457, 0x456}, {0x457, 0x308}, {0x45C, 0x43A},
|
||||
{0x45C, 0x301}, {0x45D, 0x438}, {0x45D, 0x300}, {0x45E, 0x443}, {0x45E, 0x306}, {0x476, 0x474}, {0x476, 0x30F}, {0x477, 0x475}, {0x477, 0x30F}, {0x4C1, 0x416}, {0x4C1, 0x306}, {0x4C2, 0x436},
|
||||
{0x4C2, 0x306}, {0x4D0, 0x410}, {0x4D0, 0x306}, {0x4D1, 0x430}, {0x4D1, 0x306}, {0x4D2, 0x410}, {0x4D2, 0x308}, {0x4D3, 0x430}, {0x4D3, 0x308}, {0x4D6, 0x415}, {0x4D6, 0x306}, {0x4D7, 0x435},
|
||||
{0x4D7, 0x306}, {0x4DA, 0x4D8}, {0x4DA, 0x308}, {0x4DB, 0x4D9}, {0x4DB, 0x308}, {0x4DC, 0x416}, {0x4DC, 0x308}, {0x4DD, 0x436}, {0x4DD, 0x308}, {0x4DE, 0x417}, {0x4DE, 0x308}, {0x4DF, 0x437},
|
||||
{0x4DF, 0x308}, {0x4E2, 0x418}, {0x4E2, 0x304}, {0x4E3, 0x438}, {0x4E3, 0x304}, {0x4E4, 0x418}, {0x4E4, 0x308}, {0x4E5, 0x438}, {0x4E5, 0x308}, {0x4E6, 0x41E}, {0x4E6, 0x308}, {0x4E7, 0x43E},
|
||||
{0x4E7, 0x308}, {0x4EA, 0x4E8}, {0x4EA, 0x308}, {0x4EB, 0x4E9}, {0x4EB, 0x308}, {0x4EC, 0x42D}, {0x4EC, 0x308}, {0x4ED, 0x44D}, {0x4ED, 0x308}, {0x4EE, 0x423}, {0x4EE, 0x304}, {0x4EF, 0x443},
|
||||
{0x4EF, 0x304}, {0x4F0, 0x423}, {0x4F0, 0x308}, {0x4F1, 0x443}, {0x4F1, 0x308}, {0x4F2, 0x423}, {0x4F2, 0x30B}, {0x4F3, 0x443}, {0x4F3, 0x30B}, {0x4F4, 0x427}, {0x4F4, 0x308}, {0x4F5, 0x447},
|
||||
{0x4F5, 0x308}, {0x4F8, 0x42B}, {0x4F8, 0x308}, {0x4F9, 0x44B}, {0x4F9, 0x308}, {0x622, 0x627}, {0x622, 0x653}, {0x623, 0x627}, {0x623, 0x654}, {0x624, 0x648}, {0x624, 0x654}, {0x625, 0x627},
|
||||
{0x625, 0x655}, {0x626, 0x64A}, {0x626, 0x654}, {0x6C0, 0x6D5}, {0x6C0, 0x654}, {0x6C2, 0x6C1}, {0x6C2, 0x654}, {0x6D3, 0x6D2}, {0x6D3, 0x654}, {0x929, 0x928}, {0x929, 0x93C}, {0x931, 0x930},
|
||||
{0x931, 0x93C}, {0x934, 0x933}, {0x934, 0x93C}, {0x958, 0x915}, {0x958, 0x93C}, {0x959, 0x916}, {0x959, 0x93C}, {0x95A, 0x917}, {0x95A, 0x93C}, {0x95B, 0x91C}, {0x95B, 0x93C}, {0x95C, 0x921},
|
||||
{0x95C, 0x93C}, {0x95D, 0x922}, {0x95D, 0x93C}, {0x95E, 0x92B}, {0x95E, 0x93C}, {0x95F, 0x92F}, {0x95F, 0x93C}, {0x9CB, 0x9C7}, {0x9CB, 0x9BE}, {0x9CC, 0x9C7}, {0x9CC, 0x9D7}, {0x9DC, 0x9A1},
|
||||
{0x9DC, 0x9BC}, {0x9DD, 0x9A2}, {0x9DD, 0x9BC}, {0x9DF, 0x9AF}, {0x9DF, 0x9BC}, {0xA33, 0xA32}, {0xA33, 0xA3C}, {0xA36, 0xA38}, {0xA36, 0xA3C}, {0xA59, 0xA16}, {0xA59, 0xA3C}, {0xA5A, 0xA17},
|
||||
{0xA5A, 0xA3C}, {0xA5B, 0xA1C}, {0xA5B, 0xA3C}, {0xA5E, 0xA2B}, {0xA5E, 0xA3C}, {0xB48, 0xB47}, {0xB48, 0xB56}, {0xB4B, 0xB47}, {0xB4B, 0xB3E}, {0xB4C, 0xB47}, {0xB4C, 0xB57}, {0xB5C, 0xB21},
|
||||
{0xB5C, 0xB3C}, {0xB5D, 0xB22}, {0xB5D, 0xB3C}, {0xB94, 0xB92}, {0xB94, 0xBD7}, {0xBCA, 0xBC6}, {0xBCA, 0xBBE}, {0xBCB, 0xBC7}, {0xBCB, 0xBBE}, {0xBCC, 0xBC6}, {0xBCC, 0xBD7}, {0xC48, 0xC46},
|
||||
{0xC48, 0xC56}, {0xCC0, 0xCBF}, {0xCC0, 0xCD5}, {0xCC7, 0xCC6}, {0xCC7, 0xCD5}, {0xCC8, 0xCC6}, {0xCC8, 0xCD6}, {0xCCA, 0xCC6}, {0xCCA, 0xCC2}, {0xCCB, 0xCC6}, {0xCCB, 0xCC2}, {0xCCB, 0xCD5},
|
||||
{0xD4A, 0xD46}, {0xD4A, 0xD3E}, {0xD4B, 0xD47}, {0xD4B, 0xD3E}, {0xD4C, 0xD46}, {0xD4C, 0xD57}, {0xDDA, 0xDD9}, {0xDDA, 0xDCA}, {0xDDC, 0xDD9}, {0xDDC, 0xDCF}, {0xDDD, 0xDD9}, {0xDDD, 0xDCF},
|
||||
{0xDDD, 0xDCA}, {0xDDE, 0xDD9}, {0xDDE, 0xDDF}, {0xF43, 0xF42}, {0xF43, 0xFB7}, {0xF4D, 0xF4C}, {0xF4D, 0xFB7}, {0xF52, 0xF51}, {0xF52, 0xFB7}, {0xF57, 0xF56}, {0xF57, 0xFB7}, {0xF5C, 0xF5B},
|
||||
{0xF5C, 0xFB7}, {0xF69, 0xF40}, {0xF69, 0xFB5}, {0xF73, 0xF71}, {0xF73, 0xF72}, {0xF75, 0xF71}, {0xF75, 0xF74}, {0xF76, 0xFB2}, {0xF76, 0xF80}, {0xF78, 0xFB3}, {0xF78, 0xF80}, {0xF81, 0xF71},
|
||||
{0xF81, 0xF80}, {0xF93, 0xF92}, {0xF93, 0xFB7}, {0xF9D, 0xF9C}, {0xF9D, 0xFB7}, {0xFA2, 0xFA1}, {0xFA2, 0xFB7}, {0xFA7, 0xFA6}, {0xFA7, 0xFB7}, {0xFAC, 0xFAB}, {0xFAC, 0xFB7}, {0xFB9, 0xF90},
|
||||
{0xFB9, 0xFB5}, {0x1026, 0x1025}, {0x1026, 0x102E}, {0x1B06, 0x1B05}, {0x1B06, 0x1B35}, {0x1B08, 0x1B07}, {0x1B08, 0x1B35}, {0x1B0A, 0x1B09}, {0x1B0A, 0x1B35}, {0x1B0C, 0x1B0B}, {0x1B0C, 0x1B35},
|
||||
{0x1B0E, 0x1B0D}, {0x1B0E, 0x1B35}, {0x1B12, 0x1B11}, {0x1B12, 0x1B35}, {0x1B3B, 0x1B3A}, {0x1B3B, 0x1B35}, {0x1B3D, 0x1B3C}, {0x1B3D, 0x1B35}, {0x1B40, 0x1B3E}, {0x1B40, 0x1B35}, {0x1B41, 0x1B3F},
|
||||
{0x1B41, 0x1B35}, {0x1B43, 0x1B42}, {0x1B43, 0x1B35}, {0x1E00, 0x41}, {0x1E00, 0x325}, {0x1E01, 0x61}, {0x1E01, 0x325}, {0x1E02, 0x42}, {0x1E02, 0x307}, {0x1E03, 0x62}, {0x1E03, 0x307},
|
||||
{0x1E04, 0x42}, {0x1E04, 0x323}, {0x1E05, 0x62}, {0x1E05, 0x323}, {0x1E06, 0x42}, {0x1E06, 0x331}, {0x1E07, 0x62}, {0x1E07, 0x331}, {0x1E08, 0x43}, {0x1E08, 0x327}, {0x1E08, 0x301}, {0x1E09, 0x63},
|
||||
{0x1E09, 0x327}, {0x1E09, 0x301}, {0x1E0A, 0x44}, {0x1E0A, 0x307}, {0x1E0B, 0x64}, {0x1E0B, 0x307}, {0x1E0C, 0x44}, {0x1E0C, 0x323}, {0x1E0D, 0x64}, {0x1E0D, 0x323}, {0x1E0E, 0x44}, {0x1E0E, 0x331},
|
||||
{0x1E0F, 0x64}, {0x1E0F, 0x331}, {0x1E10, 0x44}, {0x1E10, 0x327}, {0x1E11, 0x64}, {0x1E11, 0x327}, {0x1E12, 0x44}, {0x1E12, 0x32D}, {0x1E13, 0x64}, {0x1E13, 0x32D}, {0x1E14, 0x45}, {0x1E14, 0x304},
|
||||
{0x1E14, 0x300}, {0x1E15, 0x65}, {0x1E15, 0x304}, {0x1E15, 0x300}, {0x1E16, 0x45}, {0x1E16, 0x304}, {0x1E16, 0x301}, {0x1E17, 0x65}, {0x1E17, 0x304}, {0x1E17, 0x301}, {0x1E18, 0x45}, {0x1E18, 0x32D},
|
||||
{0x1E19, 0x65}, {0x1E19, 0x32D}, {0x1E1A, 0x45}, {0x1E1A, 0x330}, {0x1E1B, 0x65}, {0x1E1B, 0x330}, {0x1E1C, 0x45}, {0x1E1C, 0x327}, {0x1E1C, 0x306}, {0x1E1D, 0x65}, {0x1E1D, 0x327}, {0x1E1D, 0x306},
|
||||
{0x1E1E, 0x46}, {0x1E1E, 0x307}, {0x1E1F, 0x66}, {0x1E1F, 0x307}, {0x1E20, 0x47}, {0x1E20, 0x304}, {0x1E21, 0x67}, {0x1E21, 0x304}, {0x1E22, 0x48}, {0x1E22, 0x307}, {0x1E23, 0x68}, {0x1E23, 0x307},
|
||||
{0x1E24, 0x48}, {0x1E24, 0x323}, {0x1E25, 0x68}, {0x1E25, 0x323}, {0x1E26, 0x48}, {0x1E26, 0x308}, {0x1E27, 0x68}, {0x1E27, 0x308}, {0x1E28, 0x48}, {0x1E28, 0x327}, {0x1E29, 0x68}, {0x1E29, 0x327},
|
||||
{0x1E2A, 0x48}, {0x1E2A, 0x32E}, {0x1E2B, 0x68}, {0x1E2B, 0x32E}, {0x1E2C, 0x49}, {0x1E2C, 0x330}, {0x1E2D, 0x69}, {0x1E2D, 0x330}, {0x1E2E, 0x49}, {0x1E2E, 0x308}, {0x1E2E, 0x301}, {0x1E2F, 0x69},
|
||||
{0x1E2F, 0x308}, {0x1E2F, 0x301}, {0x1E30, 0x4B}, {0x1E30, 0x301}, {0x1E31, 0x6B}, {0x1E31, 0x301}, {0x1E32, 0x4B}, {0x1E32, 0x323}, {0x1E33, 0x6B}, {0x1E33, 0x323}, {0x1E34, 0x4B}, {0x1E34, 0x331},
|
||||
{0x1E35, 0x6B}, {0x1E35, 0x331}, {0x1E36, 0x4C}, {0x1E36, 0x323}, {0x1E37, 0x6C}, {0x1E37, 0x323}, {0x1E38, 0x4C}, {0x1E38, 0x323}, {0x1E38, 0x304}, {0x1E39, 0x6C}, {0x1E39, 0x323}, {0x1E39, 0x304},
|
||||
{0x1E3A, 0x4C}, {0x1E3A, 0x331}, {0x1E3B, 0x6C}, {0x1E3B, 0x331}, {0x1E3C, 0x4C}, {0x1E3C, 0x32D}, {0x1E3D, 0x6C}, {0x1E3D, 0x32D}, {0x1E3E, 0x4D}, {0x1E3E, 0x301}, {0x1E3F, 0x6D}, {0x1E3F, 0x301},
|
||||
{0x1E40, 0x4D}, {0x1E40, 0x307}, {0x1E41, 0x6D}, {0x1E41, 0x307}, {0x1E42, 0x4D}, {0x1E42, 0x323}, {0x1E43, 0x6D}, {0x1E43, 0x323}, {0x1E44, 0x4E}, {0x1E44, 0x307}, {0x1E45, 0x6E}, {0x1E45, 0x307},
|
||||
{0x1E46, 0x4E}, {0x1E46, 0x323}, {0x1E47, 0x6E}, {0x1E47, 0x323}, {0x1E48, 0x4E}, {0x1E48, 0x331}, {0x1E49, 0x6E}, {0x1E49, 0x331}, {0x1E4A, 0x4E}, {0x1E4A, 0x32D}, {0x1E4B, 0x6E}, {0x1E4B, 0x32D},
|
||||
{0x1E4C, 0x4F}, {0x1E4C, 0x303}, {0x1E4C, 0x301}, {0x1E4D, 0x6F}, {0x1E4D, 0x303}, {0x1E4D, 0x301}, {0x1E4E, 0x4F}, {0x1E4E, 0x303}, {0x1E4E, 0x308}, {0x1E4F, 0x6F}, {0x1E4F, 0x303}, {0x1E4F, 0x308},
|
||||
{0x1E50, 0x4F}, {0x1E50, 0x304}, {0x1E50, 0x300}, {0x1E51, 0x6F}, {0x1E51, 0x304}, {0x1E51, 0x300}, {0x1E52, 0x4F}, {0x1E52, 0x304}, {0x1E52, 0x301}, {0x1E53, 0x6F}, {0x1E53, 0x304}, {0x1E53, 0x301},
|
||||
{0x1E54, 0x50}, {0x1E54, 0x301}, {0x1E55, 0x70}, {0x1E55, 0x301}, {0x1E56, 0x50}, {0x1E56, 0x307}, {0x1E57, 0x70}, {0x1E57, 0x307}, {0x1E58, 0x52}, {0x1E58, 0x307}, {0x1E59, 0x72}, {0x1E59, 0x307},
|
||||
{0x1E5A, 0x52}, {0x1E5A, 0x323}, {0x1E5B, 0x72}, {0x1E5B, 0x323}, {0x1E5C, 0x52}, {0x1E5C, 0x323}, {0x1E5C, 0x304}, {0x1E5D, 0x72}, {0x1E5D, 0x323}, {0x1E5D, 0x304}, {0x1E5E, 0x52}, {0x1E5E, 0x331},
|
||||
{0x1E5F, 0x72}, {0x1E5F, 0x331}, {0x1E60, 0x53}, {0x1E60, 0x307}, {0x1E61, 0x73}, {0x1E61, 0x307}, {0x1E62, 0x53}, {0x1E62, 0x323}, {0x1E63, 0x73}, {0x1E63, 0x323}, {0x1E64, 0x53}, {0x1E64, 0x301},
|
||||
{0x1E64, 0x307}, {0x1E65, 0x73}, {0x1E65, 0x301}, {0x1E65, 0x307}, {0x1E66, 0x53}, {0x1E66, 0x30C}, {0x1E66, 0x307}, {0x1E67, 0x73}, {0x1E67, 0x30C}, {0x1E67, 0x307}, {0x1E68, 0x53}, {0x1E68, 0x323},
|
||||
{0x1E68, 0x307}, {0x1E69, 0x73}, {0x1E69, 0x323}, {0x1E69, 0x307}, {0x1E6A, 0x54}, {0x1E6A, 0x307}, {0x1E6B, 0x74}, {0x1E6B, 0x307}, {0x1E6C, 0x54}, {0x1E6C, 0x323}, {0x1E6D, 0x74}, {0x1E6D, 0x323},
|
||||
{0x1E6E, 0x54}, {0x1E6E, 0x331}, {0x1E6F, 0x74}, {0x1E6F, 0x331}, {0x1E70, 0x54}, {0x1E70, 0x32D}, {0x1E71, 0x74}, {0x1E71, 0x32D}, {0x1E72, 0x55}, {0x1E72, 0x324}, {0x1E73, 0x75}, {0x1E73, 0x324},
|
||||
{0x1E74, 0x55}, {0x1E74, 0x330}, {0x1E75, 0x75}, {0x1E75, 0x330}, {0x1E76, 0x55}, {0x1E76, 0x32D}, {0x1E77, 0x75}, {0x1E77, 0x32D}, {0x1E78, 0x55}, {0x1E78, 0x303}, {0x1E78, 0x301}, {0x1E79, 0x75},
|
||||
{0x1E79, 0x303}, {0x1E79, 0x301}, {0x1E7A, 0x55}, {0x1E7A, 0x304}, {0x1E7A, 0x308}, {0x1E7B, 0x75}, {0x1E7B, 0x304}, {0x1E7B, 0x308}, {0x1E7C, 0x56}, {0x1E7C, 0x303}, {0x1E7D, 0x76}, {0x1E7D, 0x303},
|
||||
{0x1E7E, 0x56}, {0x1E7E, 0x323}, {0x1E7F, 0x76}, {0x1E7F, 0x323}, {0x1E80, 0x57}, {0x1E80, 0x300}, {0x1E81, 0x77}, {0x1E81, 0x300}, {0x1E82, 0x57}, {0x1E82, 0x301}, {0x1E83, 0x77}, {0x1E83, 0x301},
|
||||
{0x1E84, 0x57}, {0x1E84, 0x308}, {0x1E85, 0x77}, {0x1E85, 0x308}, {0x1E86, 0x57}, {0x1E86, 0x307}, {0x1E87, 0x77}, {0x1E87, 0x307}, {0x1E88, 0x57}, {0x1E88, 0x323}, {0x1E89, 0x77}, {0x1E89, 0x323},
|
||||
{0x1E8A, 0x58}, {0x1E8A, 0x307}, {0x1E8B, 0x78}, {0x1E8B, 0x307}, {0x1E8C, 0x58}, {0x1E8C, 0x308}, {0x1E8D, 0x78}, {0x1E8D, 0x308}, {0x1E8E, 0x59}, {0x1E8E, 0x307}, {0x1E8F, 0x79}, {0x1E8F, 0x307},
|
||||
{0x1E90, 0x5A}, {0x1E90, 0x302}, {0x1E91, 0x7A}, {0x1E91, 0x302}, {0x1E92, 0x5A}, {0x1E92, 0x323}, {0x1E93, 0x7A}, {0x1E93, 0x323}, {0x1E94, 0x5A}, {0x1E94, 0x331}, {0x1E95, 0x7A}, {0x1E95, 0x331},
|
||||
{0x1E96, 0x68}, {0x1E96, 0x331}, {0x1E97, 0x74}, {0x1E97, 0x308}, {0x1E98, 0x77}, {0x1E98, 0x30A}, {0x1E99, 0x79}, {0x1E99, 0x30A}, {0x1E9B, 0x17F}, {0x1E9B, 0x307}, {0x1EA0, 0x41}, {0x1EA0, 0x323},
|
||||
{0x1EA1, 0x61}, {0x1EA1, 0x323}, {0x1EA2, 0x41}, {0x1EA2, 0x309}, {0x1EA3, 0x61}, {0x1EA3, 0x309}, {0x1EA4, 0x41}, {0x1EA4, 0x302}, {0x1EA4, 0x301}, {0x1EA5, 0x61}, {0x1EA5, 0x302}, {0x1EA5, 0x301},
|
||||
{0x1EA6, 0x41}, {0x1EA6, 0x302}, {0x1EA6, 0x300}, {0x1EA7, 0x61}, {0x1EA7, 0x302}, {0x1EA7, 0x300}, {0x1EA8, 0x41}, {0x1EA8, 0x302}, {0x1EA8, 0x309}, {0x1EA9, 0x61}, {0x1EA9, 0x302}, {0x1EA9, 0x309},
|
||||
{0x1EAA, 0x41}, {0x1EAA, 0x302}, {0x1EAA, 0x303}, {0x1EAB, 0x61}, {0x1EAB, 0x302}, {0x1EAB, 0x303}, {0x1EAC, 0x41}, {0x1EAC, 0x323}, {0x1EAC, 0x302}, {0x1EAD, 0x61}, {0x1EAD, 0x323}, {0x1EAD, 0x302},
|
||||
{0x1EAE, 0x41}, {0x1EAE, 0x306}, {0x1EAE, 0x301}, {0x1EAF, 0x61}, {0x1EAF, 0x306}, {0x1EAF, 0x301}, {0x1EB0, 0x41}, {0x1EB0, 0x306}, {0x1EB0, 0x300}, {0x1EB1, 0x61}, {0x1EB1, 0x306}, {0x1EB1, 0x300},
|
||||
{0x1EB2, 0x41}, {0x1EB2, 0x306}, {0x1EB2, 0x309}, {0x1EB3, 0x61}, {0x1EB3, 0x306}, {0x1EB3, 0x309}, {0x1EB4, 0x41}, {0x1EB4, 0x306}, {0x1EB4, 0x303}, {0x1EB5, 0x61}, {0x1EB5, 0x306}, {0x1EB5, 0x303},
|
||||
{0x1EB6, 0x41}, {0x1EB6, 0x323}, {0x1EB6, 0x306}, {0x1EB7, 0x61}, {0x1EB7, 0x323}, {0x1EB7, 0x306}, {0x1EB8, 0x45}, {0x1EB8, 0x323}, {0x1EB9, 0x65}, {0x1EB9, 0x323}, {0x1EBA, 0x45}, {0x1EBA, 0x309},
|
||||
{0x1EBB, 0x65}, {0x1EBB, 0x309}, {0x1EBC, 0x45}, {0x1EBC, 0x303}, {0x1EBD, 0x65}, {0x1EBD, 0x303}, {0x1EBE, 0x45}, {0x1EBE, 0x302}, {0x1EBE, 0x301}, {0x1EBF, 0x65}, {0x1EBF, 0x302}, {0x1EBF, 0x301},
|
||||
{0x1EC0, 0x45}, {0x1EC0, 0x302}, {0x1EC0, 0x300}, {0x1EC1, 0x65}, {0x1EC1, 0x302}, {0x1EC1, 0x300}, {0x1EC2, 0x45}, {0x1EC2, 0x302}, {0x1EC2, 0x309}, {0x1EC3, 0x65}, {0x1EC3, 0x302}, {0x1EC3, 0x309},
|
||||
{0x1EC4, 0x45}, {0x1EC4, 0x302}, {0x1EC4, 0x303}, {0x1EC5, 0x65}, {0x1EC5, 0x302}, {0x1EC5, 0x303}, {0x1EC6, 0x45}, {0x1EC6, 0x323}, {0x1EC6, 0x302}, {0x1EC7, 0x65}, {0x1EC7, 0x323}, {0x1EC7, 0x302},
|
||||
{0x1EC8, 0x49}, {0x1EC8, 0x309}, {0x1EC9, 0x69}, {0x1EC9, 0x309}, {0x1ECA, 0x49}, {0x1ECA, 0x323}, {0x1ECB, 0x69}, {0x1ECB, 0x323}, {0x1ECC, 0x4F}, {0x1ECC, 0x323}, {0x1ECD, 0x6F}, {0x1ECD, 0x323},
|
||||
{0x1ECE, 0x4F}, {0x1ECE, 0x309}, {0x1ECF, 0x6F}, {0x1ECF, 0x309}, {0x1ED0, 0x4F}, {0x1ED0, 0x302}, {0x1ED0, 0x301}, {0x1ED1, 0x6F}, {0x1ED1, 0x302}, {0x1ED1, 0x301}, {0x1ED2, 0x4F}, {0x1ED2, 0x302},
|
||||
{0x1ED2, 0x300}, {0x1ED3, 0x6F}, {0x1ED3, 0x302}, {0x1ED3, 0x300}, {0x1ED4, 0x4F}, {0x1ED4, 0x302}, {0x1ED4, 0x309}, {0x1ED5, 0x6F}, {0x1ED5, 0x302}, {0x1ED5, 0x309}, {0x1ED6, 0x4F}, {0x1ED6, 0x302},
|
||||
{0x1ED6, 0x303}, {0x1ED7, 0x6F}, {0x1ED7, 0x302}, {0x1ED7, 0x303}, {0x1ED8, 0x4F}, {0x1ED8, 0x323}, {0x1ED8, 0x302}, {0x1ED9, 0x6F}, {0x1ED9, 0x323}, {0x1ED9, 0x302}, {0x1EDA, 0x4F}, {0x1EDA, 0x31B},
|
||||
{0x1EDA, 0x301}, {0x1EDB, 0x6F}, {0x1EDB, 0x31B}, {0x1EDB, 0x301}, {0x1EDC, 0x4F}, {0x1EDC, 0x31B}, {0x1EDC, 0x300}, {0x1EDD, 0x6F}, {0x1EDD, 0x31B}, {0x1EDD, 0x300}, {0x1EDE, 0x4F}, {0x1EDE, 0x31B},
|
||||
{0x1EDE, 0x309}, {0x1EDF, 0x6F}, {0x1EDF, 0x31B}, {0x1EDF, 0x309}, {0x1EE0, 0x4F}, {0x1EE0, 0x31B}, {0x1EE0, 0x303}, {0x1EE1, 0x6F}, {0x1EE1, 0x31B}, {0x1EE1, 0x303}, {0x1EE2, 0x4F}, {0x1EE2, 0x31B},
|
||||
{0x1EE2, 0x323}, {0x1EE3, 0x6F}, {0x1EE3, 0x31B}, {0x1EE3, 0x323}, {0x1EE4, 0x55}, {0x1EE4, 0x323}, {0x1EE5, 0x75}, {0x1EE5, 0x323}, {0x1EE6, 0x55}, {0x1EE6, 0x309}, {0x1EE7, 0x75}, {0x1EE7, 0x309},
|
||||
{0x1EE8, 0x55}, {0x1EE8, 0x31B}, {0x1EE8, 0x301}, {0x1EE9, 0x75}, {0x1EE9, 0x31B}, {0x1EE9, 0x301}, {0x1EEA, 0x55}, {0x1EEA, 0x31B}, {0x1EEA, 0x300}, {0x1EEB, 0x75}, {0x1EEB, 0x31B}, {0x1EEB, 0x300},
|
||||
{0x1EEC, 0x55}, {0x1EEC, 0x31B}, {0x1EEC, 0x309}, {0x1EED, 0x75}, {0x1EED, 0x31B}, {0x1EED, 0x309}, {0x1EEE, 0x55}, {0x1EEE, 0x31B}, {0x1EEE, 0x303}, {0x1EEF, 0x75}, {0x1EEF, 0x31B}, {0x1EEF, 0x303},
|
||||
{0x1EF0, 0x55}, {0x1EF0, 0x31B}, {0x1EF0, 0x323}, {0x1EF1, 0x75}, {0x1EF1, 0x31B}, {0x1EF1, 0x323}, {0x1EF2, 0x59}, {0x1EF2, 0x300}, {0x1EF3, 0x79}, {0x1EF3, 0x300}, {0x1EF4, 0x59}, {0x1EF4, 0x323},
|
||||
{0x1EF5, 0x79}, {0x1EF5, 0x323}, {0x1EF6, 0x59}, {0x1EF6, 0x309}, {0x1EF7, 0x79}, {0x1EF7, 0x309}, {0x1EF8, 0x59}, {0x1EF8, 0x303}, {0x1EF9, 0x79}, {0x1EF9, 0x303}, {0x1F00, 0x3B1}, {0x1F00, 0x313},
|
||||
{0x1F01, 0x3B1}, {0x1F01, 0x314}, {0x1F02, 0x3B1}, {0x1F02, 0x313}, {0x1F02, 0x300}, {0x1F03, 0x3B1}, {0x1F03, 0x314}, {0x1F03, 0x300}, {0x1F04, 0x3B1}, {0x1F04, 0x313}, {0x1F04, 0x301},
|
||||
{0x1F05, 0x3B1}, {0x1F05, 0x314}, {0x1F05, 0x301}, {0x1F06, 0x3B1}, {0x1F06, 0x313}, {0x1F06, 0x342}, {0x1F07, 0x3B1}, {0x1F07, 0x314}, {0x1F07, 0x342}, {0x1F08, 0x391}, {0x1F08, 0x313},
|
||||
{0x1F09, 0x391}, {0x1F09, 0x314}, {0x1F0A, 0x391}, {0x1F0A, 0x313}, {0x1F0A, 0x300}, {0x1F0B, 0x391}, {0x1F0B, 0x314}, {0x1F0B, 0x300}, {0x1F0C, 0x391}, {0x1F0C, 0x313}, {0x1F0C, 0x301},
|
||||
{0x1F0D, 0x391}, {0x1F0D, 0x314}, {0x1F0D, 0x301}, {0x1F0E, 0x391}, {0x1F0E, 0x313}, {0x1F0E, 0x342}, {0x1F0F, 0x391}, {0x1F0F, 0x314}, {0x1F0F, 0x342}, {0x1F10, 0x3B5}, {0x1F10, 0x313},
|
||||
{0x1F11, 0x3B5}, {0x1F11, 0x314}, {0x1F12, 0x3B5}, {0x1F12, 0x313}, {0x1F12, 0x300}, {0x1F13, 0x3B5}, {0x1F13, 0x314}, {0x1F13, 0x300}, {0x1F14, 0x3B5}, {0x1F14, 0x313}, {0x1F14, 0x301},
|
||||
{0x1F15, 0x3B5}, {0x1F15, 0x314}, {0x1F15, 0x301}, {0x1F18, 0x395}, {0x1F18, 0x313}, {0x1F19, 0x395}, {0x1F19, 0x314}, {0x1F1A, 0x395}, {0x1F1A, 0x313}, {0x1F1A, 0x300}, {0x1F1B, 0x395},
|
||||
{0x1F1B, 0x314}, {0x1F1B, 0x300}, {0x1F1C, 0x395}, {0x1F1C, 0x313}, {0x1F1C, 0x301}, {0x1F1D, 0x395}, {0x1F1D, 0x314}, {0x1F1D, 0x301}, {0x1F20, 0x3B7}, {0x1F20, 0x313}, {0x1F21, 0x3B7},
|
||||
{0x1F21, 0x314}, {0x1F22, 0x3B7}, {0x1F22, 0x313}, {0x1F22, 0x300}, {0x1F23, 0x3B7}, {0x1F23, 0x314}, {0x1F23, 0x300}, {0x1F24, 0x3B7}, {0x1F24, 0x313}, {0x1F24, 0x301}, {0x1F25, 0x3B7},
|
||||
{0x1F25, 0x314}, {0x1F25, 0x301}, {0x1F26, 0x3B7}, {0x1F26, 0x313}, {0x1F26, 0x342}, {0x1F27, 0x3B7}, {0x1F27, 0x314}, {0x1F27, 0x342}, {0x1F28, 0x397}, {0x1F28, 0x313}, {0x1F29, 0x397},
|
||||
{0x1F29, 0x314}, {0x1F2A, 0x397}, {0x1F2A, 0x313}, {0x1F2A, 0x300}, {0x1F2B, 0x397}, {0x1F2B, 0x314}, {0x1F2B, 0x300}, {0x1F2C, 0x397}, {0x1F2C, 0x313}, {0x1F2C, 0x301}, {0x1F2D, 0x397},
|
||||
{0x1F2D, 0x314}, {0x1F2D, 0x301}, {0x1F2E, 0x397}, {0x1F2E, 0x313}, {0x1F2E, 0x342}, {0x1F2F, 0x397}, {0x1F2F, 0x314}, {0x1F2F, 0x342}, {0x1F30, 0x3B9}, {0x1F30, 0x313}, {0x1F31, 0x3B9},
|
||||
{0x1F31, 0x314}, {0x1F32, 0x3B9}, {0x1F32, 0x313}, {0x1F32, 0x300}, {0x1F33, 0x3B9}, {0x1F33, 0x314}, {0x1F33, 0x300}, {0x1F34, 0x3B9}, {0x1F34, 0x313}, {0x1F34, 0x301}, {0x1F35, 0x3B9},
|
||||
{0x1F35, 0x314}, {0x1F35, 0x301}, {0x1F36, 0x3B9}, {0x1F36, 0x313}, {0x1F36, 0x342}, {0x1F37, 0x3B9}, {0x1F37, 0x314}, {0x1F37, 0x342}, {0x1F38, 0x399}, {0x1F38, 0x313}, {0x1F39, 0x399},
|
||||
{0x1F39, 0x314}, {0x1F3A, 0x399}, {0x1F3A, 0x313}, {0x1F3A, 0x300}, {0x1F3B, 0x399}, {0x1F3B, 0x314}, {0x1F3B, 0x300}, {0x1F3C, 0x399}, {0x1F3C, 0x313}, {0x1F3C, 0x301}, {0x1F3D, 0x399},
|
||||
{0x1F3D, 0x314}, {0x1F3D, 0x301}, {0x1F3E, 0x399}, {0x1F3E, 0x313}, {0x1F3E, 0x342}, {0x1F3F, 0x399}, {0x1F3F, 0x314}, {0x1F3F, 0x342}, {0x1F40, 0x3BF}, {0x1F40, 0x313}, {0x1F41, 0x3BF},
|
||||
{0x1F41, 0x314}, {0x1F42, 0x3BF}, {0x1F42, 0x313}, {0x1F42, 0x300}, {0x1F43, 0x3BF}, {0x1F43, 0x314}, {0x1F43, 0x300}, {0x1F44, 0x3BF}, {0x1F44, 0x313}, {0x1F44, 0x301}, {0x1F45, 0x3BF},
|
||||
{0x1F45, 0x314}, {0x1F45, 0x301}, {0x1F48, 0x39F}, {0x1F48, 0x313}, {0x1F49, 0x39F}, {0x1F49, 0x314}, {0x1F4A, 0x39F}, {0x1F4A, 0x313}, {0x1F4A, 0x300}, {0x1F4B, 0x39F}, {0x1F4B, 0x314},
|
||||
{0x1F4B, 0x300}, {0x1F4C, 0x39F}, {0x1F4C, 0x313}, {0x1F4C, 0x301}, {0x1F4D, 0x39F}, {0x1F4D, 0x314}, {0x1F4D, 0x301}, {0x1F50, 0x3C5}, {0x1F50, 0x313}, {0x1F51, 0x3C5}, {0x1F51, 0x314},
|
||||
{0x1F52, 0x3C5}, {0x1F52, 0x313}, {0x1F52, 0x300}, {0x1F53, 0x3C5}, {0x1F53, 0x314}, {0x1F53, 0x300}, {0x1F54, 0x3C5}, {0x1F54, 0x313}, {0x1F54, 0x301}, {0x1F55, 0x3C5}, {0x1F55, 0x314},
|
||||
{0x1F55, 0x301}, {0x1F56, 0x3C5}, {0x1F56, 0x313}, {0x1F56, 0x342}, {0x1F57, 0x3C5}, {0x1F57, 0x314}, {0x1F57, 0x342}, {0x1F59, 0x3A5}, {0x1F59, 0x314}, {0x1F5B, 0x3A5}, {0x1F5B, 0x314},
|
||||
{0x1F5B, 0x300}, {0x1F5D, 0x3A5}, {0x1F5D, 0x314}, {0x1F5D, 0x301}, {0x1F5F, 0x3A5}, {0x1F5F, 0x314}, {0x1F5F, 0x342}, {0x1F60, 0x3C9}, {0x1F60, 0x313}, {0x1F61, 0x3C9}, {0x1F61, 0x314},
|
||||
{0x1F62, 0x3C9}, {0x1F62, 0x313}, {0x1F62, 0x300}, {0x1F63, 0x3C9}, {0x1F63, 0x314}, {0x1F63, 0x300}, {0x1F64, 0x3C9}, {0x1F64, 0x313}, {0x1F64, 0x301}, {0x1F65, 0x3C9}, {0x1F65, 0x314},
|
||||
{0x1F65, 0x301}, {0x1F66, 0x3C9}, {0x1F66, 0x313}, {0x1F66, 0x342}, {0x1F67, 0x3C9}, {0x1F67, 0x314}, {0x1F67, 0x342}, {0x1F68, 0x3A9}, {0x1F68, 0x313}, {0x1F69, 0x3A9}, {0x1F69, 0x314},
|
||||
{0x1F6A, 0x3A9}, {0x1F6A, 0x313}, {0x1F6A, 0x300}, {0x1F6B, 0x3A9}, {0x1F6B, 0x314}, {0x1F6B, 0x300}, {0x1F6C, 0x3A9}, {0x1F6C, 0x313}, {0x1F6C, 0x301}, {0x1F6D, 0x3A9}, {0x1F6D, 0x314},
|
||||
{0x1F6D, 0x301}, {0x1F6E, 0x3A9}, {0x1F6E, 0x313}, {0x1F6E, 0x342}, {0x1F6F, 0x3A9}, {0x1F6F, 0x314}, {0x1F6F, 0x342}, {0x1F70, 0x3B1}, {0x1F70, 0x300}, {0x1F71, 0x3B1}, {0x1F71, 0x301},
|
||||
{0x1F72, 0x3B5}, {0x1F72, 0x300}, {0x1F73, 0x3B5}, {0x1F73, 0x301}, {0x1F74, 0x3B7}, {0x1F74, 0x300}, {0x1F75, 0x3B7}, {0x1F75, 0x301}, {0x1F76, 0x3B9}, {0x1F76, 0x300}, {0x1F77, 0x3B9},
|
||||
{0x1F77, 0x301}, {0x1F78, 0x3BF}, {0x1F78, 0x300}, {0x1F79, 0x3BF}, {0x1F79, 0x301}, {0x1F7A, 0x3C5}, {0x1F7A, 0x300}, {0x1F7B, 0x3C5}, {0x1F7B, 0x301}, {0x1F7C, 0x3C9}, {0x1F7C, 0x300},
|
||||
{0x1F7D, 0x3C9}, {0x1F7D, 0x301}, {0x1F80, 0x3B1}, {0x1F80, 0x313}, {0x1F80, 0x345}, {0x1F81, 0x3B1}, {0x1F81, 0x314}, {0x1F81, 0x345}, {0x1F82, 0x3B1}, {0x1F82, 0x313}, {0x1F82, 0x300},
|
||||
{0x1F82, 0x345}, {0x1F83, 0x3B1}, {0x1F83, 0x314}, {0x1F83, 0x300}, {0x1F83, 0x345}, {0x1F84, 0x3B1}, {0x1F84, 0x313}, {0x1F84, 0x301}, {0x1F84, 0x345}, {0x1F85, 0x3B1}, {0x1F85, 0x314},
|
||||
{0x1F85, 0x301}, {0x1F85, 0x345}, {0x1F86, 0x3B1}, {0x1F86, 0x313}, {0x1F86, 0x342}, {0x1F86, 0x345}, {0x1F87, 0x3B1}, {0x1F87, 0x314}, {0x1F87, 0x342}, {0x1F87, 0x345}, {0x1F88, 0x391},
|
||||
{0x1F88, 0x313}, {0x1F88, 0x345}, {0x1F89, 0x391}, {0x1F89, 0x314}, {0x1F89, 0x345}, {0x1F8A, 0x391}, {0x1F8A, 0x313}, {0x1F8A, 0x300}, {0x1F8A, 0x345}, {0x1F8B, 0x391}, {0x1F8B, 0x314},
|
||||
{0x1F8B, 0x300}, {0x1F8B, 0x345}, {0x1F8C, 0x391}, {0x1F8C, 0x313}, {0x1F8C, 0x301}, {0x1F8C, 0x345}, {0x1F8D, 0x391}, {0x1F8D, 0x314}, {0x1F8D, 0x301}, {0x1F8D, 0x345}, {0x1F8E, 0x391},
|
||||
{0x1F8E, 0x313}, {0x1F8E, 0x342}, {0x1F8E, 0x345}, {0x1F8F, 0x391}, {0x1F8F, 0x314}, {0x1F8F, 0x342}, {0x1F8F, 0x345}, {0x1F90, 0x3B7}, {0x1F90, 0x313}, {0x1F90, 0x345}, {0x1F91, 0x3B7},
|
||||
{0x1F91, 0x314}, {0x1F91, 0x345}, {0x1F92, 0x3B7}, {0x1F92, 0x313}, {0x1F92, 0x300}, {0x1F92, 0x345}, {0x1F93, 0x3B7}, {0x1F93, 0x314}, {0x1F93, 0x300}, {0x1F93, 0x345}, {0x1F94, 0x3B7},
|
||||
{0x1F94, 0x313}, {0x1F94, 0x301}, {0x1F94, 0x345}, {0x1F95, 0x3B7}, {0x1F95, 0x314}, {0x1F95, 0x301}, {0x1F95, 0x345}, {0x1F96, 0x3B7}, {0x1F96, 0x313}, {0x1F96, 0x342}, {0x1F96, 0x345},
|
||||
{0x1F97, 0x3B7}, {0x1F97, 0x314}, {0x1F97, 0x342}, {0x1F97, 0x345}, {0x1F98, 0x397}, {0x1F98, 0x313}, {0x1F98, 0x345}, {0x1F99, 0x397}, {0x1F99, 0x314}, {0x1F99, 0x345}, {0x1F9A, 0x397},
|
||||
{0x1F9A, 0x313}, {0x1F9A, 0x300}, {0x1F9A, 0x345}, {0x1F9B, 0x397}, {0x1F9B, 0x314}, {0x1F9B, 0x300}, {0x1F9B, 0x345}, {0x1F9C, 0x397}, {0x1F9C, 0x313}, {0x1F9C, 0x301}, {0x1F9C, 0x345},
|
||||
{0x1F9D, 0x397}, {0x1F9D, 0x314}, {0x1F9D, 0x301}, {0x1F9D, 0x345}, {0x1F9E, 0x397}, {0x1F9E, 0x313}, {0x1F9E, 0x342}, {0x1F9E, 0x345}, {0x1F9F, 0x397}, {0x1F9F, 0x314}, {0x1F9F, 0x342},
|
||||
{0x1F9F, 0x345}, {0x1FA0, 0x3C9}, {0x1FA0, 0x313}, {0x1FA0, 0x345}, {0x1FA1, 0x3C9}, {0x1FA1, 0x314}, {0x1FA1, 0x345}, {0x1FA2, 0x3C9}, {0x1FA2, 0x313}, {0x1FA2, 0x300}, {0x1FA2, 0x345},
|
||||
{0x1FA3, 0x3C9}, {0x1FA3, 0x314}, {0x1FA3, 0x300}, {0x1FA3, 0x345}, {0x1FA4, 0x3C9}, {0x1FA4, 0x313}, {0x1FA4, 0x301}, {0x1FA4, 0x345}, {0x1FA5, 0x3C9}, {0x1FA5, 0x314}, {0x1FA5, 0x301},
|
||||
{0x1FA5, 0x345}, {0x1FA6, 0x3C9}, {0x1FA6, 0x313}, {0x1FA6, 0x342}, {0x1FA6, 0x345}, {0x1FA7, 0x3C9}, {0x1FA7, 0x314}, {0x1FA7, 0x342}, {0x1FA7, 0x345}, {0x1FA8, 0x3A9}, {0x1FA8, 0x313},
|
||||
{0x1FA8, 0x345}, {0x1FA9, 0x3A9}, {0x1FA9, 0x314}, {0x1FA9, 0x345}, {0x1FAA, 0x3A9}, {0x1FAA, 0x313}, {0x1FAA, 0x300}, {0x1FAA, 0x345}, {0x1FAB, 0x3A9}, {0x1FAB, 0x314}, {0x1FAB, 0x300},
|
||||
{0x1FAB, 0x345}, {0x1FAC, 0x3A9}, {0x1FAC, 0x313}, {0x1FAC, 0x301}, {0x1FAC, 0x345}, {0x1FAD, 0x3A9}, {0x1FAD, 0x314}, {0x1FAD, 0x301}, {0x1FAD, 0x345}, {0x1FAE, 0x3A9}, {0x1FAE, 0x313},
|
||||
{0x1FAE, 0x342}, {0x1FAE, 0x345}, {0x1FAF, 0x3A9}, {0x1FAF, 0x314}, {0x1FAF, 0x342}, {0x1FAF, 0x345}, {0x1FB0, 0x3B1}, {0x1FB0, 0x306}, {0x1FB1, 0x3B1}, {0x1FB1, 0x304}, {0x1FB2, 0x3B1},
|
||||
{0x1FB2, 0x300}, {0x1FB2, 0x345}, {0x1FB3, 0x3B1}, {0x1FB3, 0x345}, {0x1FB4, 0x3B1}, {0x1FB4, 0x301}, {0x1FB4, 0x345}, {0x1FB6, 0x3B1}, {0x1FB6, 0x342}, {0x1FB7, 0x3B1}, {0x1FB7, 0x342},
|
||||
{0x1FB7, 0x345}, {0x1FB8, 0x391}, {0x1FB8, 0x306}, {0x1FB9, 0x391}, {0x1FB9, 0x304}, {0x1FBA, 0x391}, {0x1FBA, 0x300}, {0x1FBB, 0x391}, {0x1FBB, 0x301}, {0x1FBC, 0x391}, {0x1FBC, 0x345},
|
||||
{0x1FBE, 0x3B9}, {0x1FC1, 0xA8}, {0x1FC1, 0x342}, {0x1FC2, 0x3B7}, {0x1FC2, 0x300}, {0x1FC2, 0x345}, {0x1FC3, 0x3B7}, {0x1FC3, 0x345}, {0x1FC4, 0x3B7}, {0x1FC4, 0x301}, {0x1FC4, 0x345},
|
||||
{0x1FC6, 0x3B7}, {0x1FC6, 0x342}, {0x1FC7, 0x3B7}, {0x1FC7, 0x342}, {0x1FC7, 0x345}, {0x1FC8, 0x395}, {0x1FC8, 0x300}, {0x1FC9, 0x395}, {0x1FC9, 0x301}, {0x1FCA, 0x397}, {0x1FCA, 0x300},
|
||||
{0x1FCB, 0x397}, {0x1FCB, 0x301}, {0x1FCC, 0x397}, {0x1FCC, 0x345}, {0x1FCD, 0x1FBF}, {0x1FCD, 0x300}, {0x1FCE, 0x1FBF}, {0x1FCE, 0x301}, {0x1FCF, 0x1FBF}, {0x1FCF, 0x342}, {0x1FD0, 0x3B9},
|
||||
{0x1FD0, 0x306}, {0x1FD1, 0x3B9}, {0x1FD1, 0x304}, {0x1FD2, 0x3B9}, {0x1FD2, 0x308}, {0x1FD2, 0x300}, {0x1FD3, 0x3B9}, {0x1FD3, 0x308}, {0x1FD3, 0x301}, {0x1FD6, 0x3B9}, {0x1FD6, 0x342},
|
||||
{0x1FD7, 0x3B9}, {0x1FD7, 0x308}, {0x1FD7, 0x342}, {0x1FD8, 0x399}, {0x1FD8, 0x306}, {0x1FD9, 0x399}, {0x1FD9, 0x304}, {0x1FDA, 0x399}, {0x1FDA, 0x300}, {0x1FDB, 0x399}, {0x1FDB, 0x301},
|
||||
{0x1FDD, 0x1FFE}, {0x1FDD, 0x300}, {0x1FDE, 0x1FFE}, {0x1FDE, 0x301}, {0x1FDF, 0x1FFE}, {0x1FDF, 0x342}, {0x1FE0, 0x3C5}, {0x1FE0, 0x306}, {0x1FE1, 0x3C5}, {0x1FE1, 0x304}, {0x1FE2, 0x3C5},
|
||||
{0x1FE2, 0x308}, {0x1FE2, 0x300}, {0x1FE3, 0x3C5}, {0x1FE3, 0x308}, {0x1FE3, 0x301}, {0x1FE4, 0x3C1}, {0x1FE4, 0x313}, {0x1FE5, 0x3C1}, {0x1FE5, 0x314}, {0x1FE6, 0x3C5}, {0x1FE6, 0x342},
|
||||
{0x1FE7, 0x3C5}, {0x1FE7, 0x308}, {0x1FE7, 0x342}, {0x1FE8, 0x3A5}, {0x1FE8, 0x306}, {0x1FE9, 0x3A5}, {0x1FE9, 0x304}, {0x1FEA, 0x3A5}, {0x1FEA, 0x300}, {0x1FEB, 0x3A5}, {0x1FEB, 0x301},
|
||||
{0x1FEC, 0x3A1}, {0x1FEC, 0x314}, {0x1FED, 0xA8}, {0x1FED, 0x300}, {0x1FEE, 0xA8}, {0x1FEE, 0x301}, {0x1FEF, 0x60}, {0x1FF2, 0x3C9}, {0x1FF2, 0x300}, {0x1FF2, 0x345}, {0x1FF3, 0x3C9}, {0x1FF3, 0x345},
|
||||
{0x1FF4, 0x3C9}, {0x1FF4, 0x301}, {0x1FF4, 0x345}, {0x1FF6, 0x3C9}, {0x1FF6, 0x342}, {0x1FF7, 0x3C9}, {0x1FF7, 0x342}, {0x1FF7, 0x345}, {0x1FF8, 0x39F}, {0x1FF8, 0x300}, {0x1FF9, 0x39F},
|
||||
{0x1FF9, 0x301}, {0x1FFA, 0x3A9}, {0x1FFA, 0x300}, {0x1FFB, 0x3A9}, {0x1FFB, 0x301}, {0x1FFC, 0x3A9}, {0x1FFC, 0x345}, {0x1FFD, 0xB4}, {0x2000, 0x2002}, {0x2001, 0x2003}, {0x2126, 0x3A9},
|
||||
{0x212A, 0x4B}, {0x212B, 0x41}, {0x212B, 0x30A}, {0x219A, 0x2190}, {0x219A, 0x338}, {0x219B, 0x2192}, {0x219B, 0x338}, {0x21AE, 0x2194}, {0x21AE, 0x338}, {0x21CD, 0x21D0}, {0x21CD, 0x338},
|
||||
{0x21CE, 0x21D4}, {0x21CE, 0x338}, {0x21CF, 0x21D2}, {0x21CF, 0x338}, {0x2204, 0x2203}, {0x2204, 0x338}, {0x2209, 0x2208}, {0x2209, 0x338}, {0x220C, 0x220B}, {0x220C, 0x338}, {0x2224, 0x2223},
|
||||
{0x2224, 0x338}, {0x2226, 0x2225}, {0x2226, 0x338}, {0x2241, 0x223C}, {0x2241, 0x338}, {0x2244, 0x2243}, {0x2244, 0x338}, {0x2247, 0x2245}, {0x2247, 0x338}, {0x2249, 0x2248}, {0x2249, 0x338},
|
||||
{0x2260, 0x3D}, {0x2260, 0x338}, {0x2262, 0x2261}, {0x2262, 0x338}, {0x226D, 0x224D}, {0x226D, 0x338}, {0x226E, 0x3C}, {0x226E, 0x338}, {0x226F, 0x3E}, {0x226F, 0x338}, {0x2270, 0x2264},
|
||||
{0x2270, 0x338}, {0x2271, 0x2265}, {0x2271, 0x338}, {0x2274, 0x2272}, {0x2274, 0x338}, {0x2275, 0x2273}, {0x2275, 0x338}, {0x2278, 0x2276}, {0x2278, 0x338}, {0x2279, 0x2277}, {0x2279, 0x338},
|
||||
{0x2280, 0x227A}, {0x2280, 0x338}, {0x2281, 0x227B}, {0x2281, 0x338}, {0x2284, 0x2282}, {0x2284, 0x338}, {0x2285, 0x2283}, {0x2285, 0x338}, {0x2288, 0x2286}, {0x2288, 0x338}, {0x2289, 0x2287},
|
||||
{0x2289, 0x338}, {0x22AC, 0x22A2}, {0x22AC, 0x338}, {0x22AD, 0x22A8}, {0x22AD, 0x338}, {0x22AE, 0x22A9}, {0x22AE, 0x338}, {0x22AF, 0x22AB}, {0x22AF, 0x338}, {0x22E0, 0x227C}, {0x22E0, 0x338},
|
||||
{0x22E1, 0x227D}, {0x22E1, 0x338}, {0x22E2, 0x2291}, {0x22E2, 0x338}, {0x22E3, 0x2292}, {0x22E3, 0x338}, {0x22EA, 0x22B2}, {0x22EA, 0x338}, {0x22EB, 0x22B3}, {0x22EB, 0x338}, {0x22EC, 0x22B4},
|
||||
{0x22EC, 0x338}, {0x22ED, 0x22B5}, {0x22ED, 0x338}, {0x2329, 0x3008}, {0x232A, 0x3009}, {0x2ADC, 0x2ADD}, {0x2ADC, 0x338}, {0x304C, 0x304B}, {0x304C, 0x3099}, {0x304E, 0x304D}, {0x304E, 0x3099},
|
||||
{0x3050, 0x304F}, {0x3050, 0x3099}, {0x3052, 0x3051}, {0x3052, 0x3099}, {0x3054, 0x3053}, {0x3054, 0x3099}, {0x3056, 0x3055}, {0x3056, 0x3099}, {0x3058, 0x3057}, {0x3058, 0x3099}, {0x305A, 0x3059},
|
||||
{0x305A, 0x3099}, {0x305C, 0x305B}, {0x305C, 0x3099}, {0x305E, 0x305D}, {0x305E, 0x3099}, {0x3060, 0x305F}, {0x3060, 0x3099}, {0x3062, 0x3061}, {0x3062, 0x3099}, {0x3065, 0x3064}, {0x3065, 0x3099},
|
||||
{0x3067, 0x3066}, {0x3067, 0x3099}, {0x3069, 0x3068}, {0x3069, 0x3099}, {0x3070, 0x306F}, {0x3070, 0x3099}, {0x3071, 0x306F}, {0x3071, 0x309A}, {0x3073, 0x3072}, {0x3073, 0x3099}, {0x3074, 0x3072},
|
||||
{0x3074, 0x309A}, {0x3076, 0x3075}, {0x3076, 0x3099}, {0x3077, 0x3075}, {0x3077, 0x309A}, {0x3079, 0x3078}, {0x3079, 0x3099}, {0x307A, 0x3078}, {0x307A, 0x309A}, {0x307C, 0x307B}, {0x307C, 0x3099},
|
||||
{0x307D, 0x307B}, {0x307D, 0x309A}, {0x3094, 0x3046}, {0x3094, 0x3099}, {0x309E, 0x309D}, {0x309E, 0x3099}, {0x30AC, 0x30AB}, {0x30AC, 0x3099}, {0x30AE, 0x30AD}, {0x30AE, 0x3099}, {0x30B0, 0x30AF},
|
||||
{0x30B0, 0x3099}, {0x30B2, 0x30B1}, {0x30B2, 0x3099}, {0x30B4, 0x30B3}, {0x30B4, 0x3099}, {0x30B6, 0x30B5}, {0x30B6, 0x3099}, {0x30B8, 0x30B7}, {0x30B8, 0x3099}, {0x30BA, 0x30B9}, {0x30BA, 0x3099},
|
||||
{0x30BC, 0x30BB}, {0x30BC, 0x3099}, {0x30BE, 0x30BD}, {0x30BE, 0x3099}, {0x30C0, 0x30BF}, {0x30C0, 0x3099}, {0x30C2, 0x30C1}, {0x30C2, 0x3099}, {0x30C5, 0x30C4}, {0x30C5, 0x3099}, {0x30C7, 0x30C6},
|
||||
{0x30C7, 0x3099}, {0x30C9, 0x30C8}, {0x30C9, 0x3099}, {0x30D0, 0x30CF}, {0x30D0, 0x3099}, {0x30D1, 0x30CF}, {0x30D1, 0x309A}, {0x30D3, 0x30D2}, {0x30D3, 0x3099}, {0x30D4, 0x30D2}, {0x30D4, 0x309A},
|
||||
{0x30D6, 0x30D5}, {0x30D6, 0x3099}, {0x30D7, 0x30D5}, {0x30D7, 0x309A}, {0x30D9, 0x30D8}, {0x30D9, 0x3099}, {0x30DA, 0x30D8}, {0x30DA, 0x309A}, {0x30DC, 0x30DB}, {0x30DC, 0x3099}, {0x30DD, 0x30DB},
|
||||
{0x30DD, 0x309A}, {0x30F4, 0x30A6}, {0x30F4, 0x3099}, {0x30F7, 0x30EF}, {0x30F7, 0x3099}, {0x30F8, 0x30F0}, {0x30F8, 0x3099}, {0x30F9, 0x30F1}, {0x30F9, 0x3099}, {0x30FA, 0x30F2}, {0x30FA, 0x3099},
|
||||
{0x30FE, 0x30FD}, {0x30FE, 0x3099}, {0xF900, 0x8C48}, {0xF901, 0x66F4}, {0xF902, 0x8ECA}, {0xF903, 0x8CC8}, {0xF904, 0x6ED1}, {0xF905, 0x4E32}, {0xF906, 0x53E5}, {0xF907, 0x9F9C}, {0xF908, 0x9F9C},
|
||||
{0xF909, 0x5951}, {0xF90A, 0x91D1}, {0xF90B, 0x5587}, {0xF90C, 0x5948}, {0xF90D, 0x61F6}, {0xF90E, 0x7669}, {0xF90F, 0x7F85}, {0xF910, 0x863F}, {0xF911, 0x87BA}, {0xF912, 0x88F8}, {0xF913, 0x908F},
|
||||
{0xF914, 0x6A02}, {0xF915, 0x6D1B}, {0xF916, 0x70D9}, {0xF917, 0x73DE}, {0xF918, 0x843D}, {0xF919, 0x916A}, {0xF91A, 0x99F1}, {0xF91B, 0x4E82}, {0xF91C, 0x5375}, {0xF91D, 0x6B04}, {0xF91E, 0x721B},
|
||||
{0xF91F, 0x862D}, {0xF920, 0x9E1E}, {0xF921, 0x5D50}, {0xF922, 0x6FEB}, {0xF923, 0x85CD}, {0xF924, 0x8964}, {0xF925, 0x62C9}, {0xF926, 0x81D8}, {0xF927, 0x881F}, {0xF928, 0x5ECA}, {0xF929, 0x6717},
|
||||
{0xF92A, 0x6D6A}, {0xF92B, 0x72FC}, {0xF92C, 0x90CE}, {0xF92D, 0x4F86}, {0xF92E, 0x51B7}, {0xF92F, 0x52DE}, {0xF930, 0x64C4}, {0xF931, 0x6AD3}, {0xF932, 0x7210}, {0xF933, 0x76E7}, {0xF934, 0x8001},
|
||||
{0xF935, 0x8606}, {0xF936, 0x865C}, {0xF937, 0x8DEF}, {0xF938, 0x9732}, {0xF939, 0x9B6F}, {0xF93A, 0x9DFA}, {0xF93B, 0x788C}, {0xF93C, 0x797F}, {0xF93D, 0x7DA0}, {0xF93E, 0x83C9}, {0xF93F, 0x9304},
|
||||
{0xF940, 0x9E7F}, {0xF941, 0x8AD6}, {0xF942, 0x58DF}, {0xF943, 0x5F04}, {0xF944, 0x7C60}, {0xF945, 0x807E}, {0xF946, 0x7262}, {0xF947, 0x78CA}, {0xF948, 0x8CC2}, {0xF949, 0x96F7}, {0xF94A, 0x58D8},
|
||||
{0xF94B, 0x5C62}, {0xF94C, 0x6A13}, {0xF94D, 0x6DDA}, {0xF94E, 0x6F0F}, {0xF94F, 0x7D2F}, {0xF950, 0x7E37}, {0xF951, 0x964B}, {0xF952, 0x52D2}, {0xF953, 0x808B}, {0xF954, 0x51DC}, {0xF955, 0x51CC},
|
||||
{0xF956, 0x7A1C}, {0xF957, 0x7DBE}, {0xF958, 0x83F1}, {0xF959, 0x9675}, {0xF95A, 0x8B80}, {0xF95B, 0x62CF}, {0xF95C, 0x6A02}, {0xF95D, 0x8AFE}, {0xF95E, 0x4E39}, {0xF95F, 0x5BE7}, {0xF960, 0x6012},
|
||||
{0xF961, 0x7387}, {0xF962, 0x7570}, {0xF963, 0x5317}, {0xF964, 0x78FB}, {0xF965, 0x4FBF}, {0xF966, 0x5FA9}, {0xF967, 0x4E0D}, {0xF968, 0x6CCC}, {0xF969, 0x6578}, {0xF96A, 0x7D22}, {0xF96B, 0x53C3},
|
||||
{0xF96C, 0x585E}, {0xF96D, 0x7701}, {0xF96E, 0x8449}, {0xF96F, 0x8AAA}, {0xF970, 0x6BBA}, {0xF971, 0x8FB0}, {0xF972, 0x6C88}, {0xF973, 0x62FE}, {0xF974, 0x82E5}, {0xF975, 0x63A0}, {0xF976, 0x7565},
|
||||
{0xF977, 0x4EAE}, {0xF978, 0x5169}, {0xF979, 0x51C9}, {0xF97A, 0x6881}, {0xF97B, 0x7CE7}, {0xF97C, 0x826F}, {0xF97D, 0x8AD2}, {0xF97E, 0x91CF}, {0xF97F, 0x52F5}, {0xF980, 0x5442}, {0xF981, 0x5973},
|
||||
{0xF982, 0x5EEC}, {0xF983, 0x65C5}, {0xF984, 0x6FFE}, {0xF985, 0x792A}, {0xF986, 0x95AD}, {0xF987, 0x9A6A}, {0xF988, 0x9E97}, {0xF989, 0x9ECE}, {0xF98A, 0x529B}, {0xF98B, 0x66C6}, {0xF98C, 0x6B77},
|
||||
{0xF98D, 0x8F62}, {0xF98E, 0x5E74}, {0xF98F, 0x6190}, {0xF990, 0x6200}, {0xF991, 0x649A}, {0xF992, 0x6F23}, {0xF993, 0x7149}, {0xF994, 0x7489}, {0xF995, 0x79CA}, {0xF996, 0x7DF4}, {0xF997, 0x806F},
|
||||
{0xF998, 0x8F26}, {0xF999, 0x84EE}, {0xF99A, 0x9023}, {0xF99B, 0x934A}, {0xF99C, 0x5217}, {0xF99D, 0x52A3}, {0xF99E, 0x54BD}, {0xF99F, 0x70C8}, {0xF9A0, 0x88C2}, {0xF9A1, 0x8AAA}, {0xF9A2, 0x5EC9},
|
||||
{0xF9A3, 0x5FF5}, {0xF9A4, 0x637B}, {0xF9A5, 0x6BAE}, {0xF9A6, 0x7C3E}, {0xF9A7, 0x7375}, {0xF9A8, 0x4EE4}, {0xF9A9, 0x56F9}, {0xF9AA, 0x5BE7}, {0xF9AB, 0x5DBA}, {0xF9AC, 0x601C}, {0xF9AD, 0x73B2},
|
||||
{0xF9AE, 0x7469}, {0xF9AF, 0x7F9A}, {0xF9B0, 0x8046}, {0xF9B1, 0x9234}, {0xF9B2, 0x96F6}, {0xF9B3, 0x9748}, {0xF9B4, 0x9818}, {0xF9B5, 0x4F8B}, {0xF9B6, 0x79AE}, {0xF9B7, 0x91B4}, {0xF9B8, 0x96B8},
|
||||
{0xF9B9, 0x60E1}, {0xF9BA, 0x4E86}, {0xF9BB, 0x50DA}, {0xF9BC, 0x5BEE}, {0xF9BD, 0x5C3F}, {0xF9BE, 0x6599}, {0xF9BF, 0x6A02}, {0xF9C0, 0x71CE}, {0xF9C1, 0x7642}, {0xF9C2, 0x84FC}, {0xF9C3, 0x907C},
|
||||
{0xF9C4, 0x9F8D}, {0xF9C5, 0x6688}, {0xF9C6, 0x962E}, {0xF9C7, 0x5289}, {0xF9C8, 0x677B}, {0xF9C9, 0x67F3}, {0xF9CA, 0x6D41}, {0xF9CB, 0x6E9C}, {0xF9CC, 0x7409}, {0xF9CD, 0x7559}, {0xF9CE, 0x786B},
|
||||
{0xF9CF, 0x7D10}, {0xF9D0, 0x985E}, {0xF9D1, 0x516D}, {0xF9D2, 0x622E}, {0xF9D3, 0x9678}, {0xF9D4, 0x502B}, {0xF9D5, 0x5D19}, {0xF9D6, 0x6DEA}, {0xF9D7, 0x8F2A}, {0xF9D8, 0x5F8B}, {0xF9D9, 0x6144},
|
||||
{0xF9DA, 0x6817}, {0xF9DB, 0x7387}, {0xF9DC, 0x9686}, {0xF9DD, 0x5229}, {0xF9DE, 0x540F}, {0xF9DF, 0x5C65}, {0xF9E0, 0x6613}, {0xF9E1, 0x674E}, {0xF9E2, 0x68A8}, {0xF9E3, 0x6CE5}, {0xF9E4, 0x7406},
|
||||
{0xF9E5, 0x75E2}, {0xF9E6, 0x7F79}, {0xF9E7, 0x88CF}, {0xF9E8, 0x88E1}, {0xF9E9, 0x91CC}, {0xF9EA, 0x96E2}, {0xF9EB, 0x533F}, {0xF9EC, 0x6EBA}, {0xF9ED, 0x541D}, {0xF9EE, 0x71D0}, {0xF9EF, 0x7498},
|
||||
{0xF9F0, 0x85FA}, {0xF9F1, 0x96A3}, {0xF9F2, 0x9C57}, {0xF9F3, 0x9E9F}, {0xF9F4, 0x6797}, {0xF9F5, 0x6DCB}, {0xF9F6, 0x81E8}, {0xF9F7, 0x7ACB}, {0xF9F8, 0x7B20}, {0xF9F9, 0x7C92}, {0xF9FA, 0x72C0},
|
||||
{0xF9FB, 0x7099}, {0xF9FC, 0x8B58}, {0xF9FD, 0x4EC0}, {0xF9FE, 0x8336}, {0xF9FF, 0x523A}, {0xFA00, 0x5207}, {0xFA01, 0x5EA6}, {0xFA02, 0x62D3}, {0xFA03, 0x7CD6}, {0xFA04, 0x5B85}, {0xFA05, 0x6D1E},
|
||||
{0xFA06, 0x66B4}, {0xFA07, 0x8F3B}, {0xFA08, 0x884C}, {0xFA09, 0x964D}, {0xFA0A, 0x898B}, {0xFA0B, 0x5ED3}, {0xFA0C, 0x5140}, {0xFA0D, 0x55C0}, {0xFA10, 0x585A}, {0xFA12, 0x6674}, {0xFA15, 0x51DE},
|
||||
{0xFA16, 0x732A}, {0xFA17, 0x76CA}, {0xFA18, 0x793C}, {0xFA19, 0x795E}, {0xFA1A, 0x7965}, {0xFA1B, 0x798F}, {0xFA1C, 0x9756}, {0xFA1D, 0x7CBE}, {0xFA1E, 0x7FBD}, {0xFA20, 0x8612}, {0xFA22, 0x8AF8},
|
||||
{0xFA25, 0x9038}, {0xFA26, 0x90FD}, {0xFA2A, 0x98EF}, {0xFA2B, 0x98FC}, {0xFA2C, 0x9928}, {0xFA2D, 0x9DB4}, {0xFA2E, 0x90DE}, {0xFA2F, 0x96B7}, {0xFA30, 0x4FAE}, {0xFA31, 0x50E7}, {0xFA32, 0x514D},
|
||||
{0xFA33, 0x52C9}, {0xFA34, 0x52E4}, {0xFA35, 0x5351}, {0xFA36, 0x559D}, {0xFA37, 0x5606}, {0xFA38, 0x5668}, {0xFA39, 0x5840}, {0xFA3A, 0x58A8}, {0xFA3B, 0x5C64}, {0xFA3C, 0x5C6E}, {0xFA3D, 0x6094},
|
||||
{0xFA3E, 0x6168}, {0xFA3F, 0x618E}, {0xFA40, 0x61F2}, {0xFA41, 0x654F}, {0xFA42, 0x65E2}, {0xFA43, 0x6691}, {0xFA44, 0x6885}, {0xFA45, 0x6D77}, {0xFA46, 0x6E1A}, {0xFA47, 0x6F22}, {0xFA48, 0x716E},
|
||||
{0xFA49, 0x722B}, {0xFA4A, 0x7422}, {0xFA4B, 0x7891}, {0xFA4C, 0x793E}, {0xFA4D, 0x7949}, {0xFA4E, 0x7948}, {0xFA4F, 0x7950}, {0xFA50, 0x7956}, {0xFA51, 0x795D}, {0xFA52, 0x798D}, {0xFA53, 0x798E},
|
||||
{0xFA54, 0x7A40}, {0xFA55, 0x7A81}, {0xFA56, 0x7BC0}, {0xFA57, 0x7DF4}, {0xFA58, 0x7E09}, {0xFA59, 0x7E41}, {0xFA5A, 0x7F72}, {0xFA5B, 0x8005}, {0xFA5C, 0x81ED}, {0xFA5D, 0x8279}, {0xFA5E, 0x8279},
|
||||
{0xFA5F, 0x8457}, {0xFA60, 0x8910}, {0xFA61, 0x8996}, {0xFA62, 0x8B01}, {0xFA63, 0x8B39}, {0xFA64, 0x8CD3}, {0xFA65, 0x8D08}, {0xFA66, 0x8FB6}, {0xFA67, 0x9038}, {0xFA68, 0x96E3}, {0xFA69, 0x97FF},
|
||||
{0xFA6A, 0x983B}, {0xFA6B, 0x6075}, {0xFA6C, 0x242EE}, {0xFA6D, 0x8218}, {0xFA70, 0x4E26}, {0xFA71, 0x51B5}, {0xFA72, 0x5168}, {0xFA73, 0x4F80}, {0xFA74, 0x5145}, {0xFA75, 0x5180}, {0xFA76, 0x52C7},
|
||||
{0xFA77, 0x52FA}, {0xFA78, 0x559D}, {0xFA79, 0x5555}, {0xFA7A, 0x5599}, {0xFA7B, 0x55E2}, {0xFA7C, 0x585A}, {0xFA7D, 0x58B3}, {0xFA7E, 0x5944}, {0xFA7F, 0x5954}, {0xFA80, 0x5A62}, {0xFA81, 0x5B28},
|
||||
{0xFA82, 0x5ED2}, {0xFA83, 0x5ED9}, {0xFA84, 0x5F69}, {0xFA85, 0x5FAD}, {0xFA86, 0x60D8}, {0xFA87, 0x614E}, {0xFA88, 0x6108}, {0xFA89, 0x618E}, {0xFA8A, 0x6160}, {0xFA8B, 0x61F2}, {0xFA8C, 0x6234},
|
||||
{0xFA8D, 0x63C4}, {0xFA8E, 0x641C}, {0xFA8F, 0x6452}, {0xFA90, 0x6556}, {0xFA91, 0x6674}, {0xFA92, 0x6717}, {0xFA93, 0x671B}, {0xFA94, 0x6756}, {0xFA95, 0x6B79}, {0xFA96, 0x6BBA}, {0xFA97, 0x6D41},
|
||||
{0xFA98, 0x6EDB}, {0xFA99, 0x6ECB}, {0xFA9A, 0x6F22}, {0xFA9B, 0x701E}, {0xFA9C, 0x716E}, {0xFA9D, 0x77A7}, {0xFA9E, 0x7235}, {0xFA9F, 0x72AF}, {0xFAA0, 0x732A}, {0xFAA1, 0x7471}, {0xFAA2, 0x7506},
|
||||
{0xFAA3, 0x753B}, {0xFAA4, 0x761D}, {0xFAA5, 0x761F}, {0xFAA6, 0x76CA}, {0xFAA7, 0x76DB}, {0xFAA8, 0x76F4}, {0xFAA9, 0x774A}, {0xFAAA, 0x7740}, {0xFAAB, 0x78CC}, {0xFAAC, 0x7AB1}, {0xFAAD, 0x7BC0},
|
||||
{0xFAAE, 0x7C7B}, {0xFAAF, 0x7D5B}, {0xFAB0, 0x7DF4}, {0xFAB1, 0x7F3E}, {0xFAB2, 0x8005}, {0xFAB3, 0x8352}, {0xFAB4, 0x83EF}, {0xFAB5, 0x8779}, {0xFAB6, 0x8941}, {0xFAB7, 0x8986}, {0xFAB8, 0x8996},
|
||||
{0xFAB9, 0x8ABF}, {0xFABA, 0x8AF8}, {0xFABB, 0x8ACB}, {0xFABC, 0x8B01}, {0xFABD, 0x8AFE}, {0xFABE, 0x8AED}, {0xFABF, 0x8B39}, {0xFAC0, 0x8B8A}, {0xFAC1, 0x8D08}, {0xFAC2, 0x8F38}, {0xFAC3, 0x9072},
|
||||
{0xFAC4, 0x9199}, {0xFAC5, 0x9276}, {0xFAC6, 0x967C}, {0xFAC7, 0x96E3}, {0xFAC8, 0x9756}, {0xFAC9, 0x97DB}, {0xFACA, 0x97FF}, {0xFACB, 0x980B}, {0xFACC, 0x983B}, {0xFACD, 0x9B12}, {0xFACE, 0x9F9C},
|
||||
{0xFACF, 0x2284A}, {0xFAD0, 0x22844}, {0xFAD1, 0x233D5}, {0xFAD2, 0x3B9D}, {0xFAD3, 0x4018}, {0xFAD4, 0x4039}, {0xFAD5, 0x25249}, {0xFAD6, 0x25CD0}, {0xFAD7, 0x27ED3}, {0xFAD8, 0x9F43},
|
||||
{0xFAD9, 0x9F8E}, {0xFB1D, 0x5D9}, {0xFB1D, 0x5B4}, {0xFB1F, 0x5F2}, {0xFB1F, 0x5B7}, {0xFB2A, 0x5E9}, {0xFB2A, 0x5C1}, {0xFB2B, 0x5E9}, {0xFB2B, 0x5C2}, {0xFB2C, 0x5E9}, {0xFB2C, 0x5BC},
|
||||
{0xFB2C, 0x5C1}, {0xFB2D, 0x5E9}, {0xFB2D, 0x5BC}, {0xFB2D, 0x5C2}, {0xFB2E, 0x5D0}, {0xFB2E, 0x5B7}, {0xFB2F, 0x5D0}, {0xFB2F, 0x5B8}, {0xFB30, 0x5D0}, {0xFB30, 0x5BC}, {0xFB31, 0x5D1},
|
||||
{0xFB31, 0x5BC}, {0xFB32, 0x5D2}, {0xFB32, 0x5BC}, {0xFB33, 0x5D3}, {0xFB33, 0x5BC}, {0xFB34, 0x5D4}, {0xFB34, 0x5BC}, {0xFB35, 0x5D5}, {0xFB35, 0x5BC}, {0xFB36, 0x5D6}, {0xFB36, 0x5BC},
|
||||
{0xFB38, 0x5D8}, {0xFB38, 0x5BC}, {0xFB39, 0x5D9}, {0xFB39, 0x5BC}, {0xFB3A, 0x5DA}, {0xFB3A, 0x5BC}, {0xFB3B, 0x5DB}, {0xFB3B, 0x5BC}, {0xFB3C, 0x5DC}, {0xFB3C, 0x5BC}, {0xFB3E, 0x5DE},
|
||||
{0xFB3E, 0x5BC}, {0xFB40, 0x5E0}, {0xFB40, 0x5BC}, {0xFB41, 0x5E1}, {0xFB41, 0x5BC}, {0xFB43, 0x5E3}, {0xFB43, 0x5BC}, {0xFB44, 0x5E4}, {0xFB44, 0x5BC}, {0xFB46, 0x5E6}, {0xFB46, 0x5BC},
|
||||
{0xFB47, 0x5E7}, {0xFB47, 0x5BC}, {0xFB48, 0x5E8}, {0xFB48, 0x5BC}, {0xFB49, 0x5E9}, {0xFB49, 0x5BC}, {0xFB4A, 0x5EA}, {0xFB4A, 0x5BC}, {0xFB4B, 0x5D5}, {0xFB4B, 0x5B9}, {0xFB4C, 0x5D1},
|
||||
{0xFB4C, 0x5BF}, {0xFB4D, 0x5DB}, {0xFB4D, 0x5BF}, {0xFB4E, 0x5E4}, {0xFB4E, 0x5BF}, {0x1109A, 0x11099}, {0x1109A, 0x110BA}, {0x1109C, 0x1109B}, {0x1109C, 0x110BA}, {0x110AB, 0x110A5},
|
||||
{0x110AB, 0x110BA}, {0x1112E, 0x11131}, {0x1112E, 0x11127}, {0x1112F, 0x11132}, {0x1112F, 0x11127}, {0x1134B, 0x11347}, {0x1134B, 0x1133E}, {0x1134C, 0x11347}, {0x1134C, 0x11357}, {0x114BB, 0x114B9},
|
||||
{0x114BB, 0x114BA}, {0x114BC, 0x114B9}, {0x114BC, 0x114B0}, {0x114BE, 0x114B9}, {0x114BE, 0x114BD}, {0x115BA, 0x115B8}, {0x115BA, 0x115AF}, {0x115BB, 0x115B9}, {0x115BB, 0x115AF}, {0x1D15E, 0x1D157},
|
||||
{0x1D15E, 0x1D165}, {0x1D15F, 0x1D158}, {0x1D15F, 0x1D165}, {0x1D160, 0x1D158}, {0x1D160, 0x1D165}, {0x1D160, 0x1D16E}, {0x1D161, 0x1D158}, {0x1D161, 0x1D165}, {0x1D161, 0x1D16F}, {0x1D162, 0x1D158},
|
||||
{0x1D162, 0x1D165}, {0x1D162, 0x1D170}, {0x1D163, 0x1D158}, {0x1D163, 0x1D165}, {0x1D163, 0x1D171}, {0x1D164, 0x1D158}, {0x1D164, 0x1D165}, {0x1D164, 0x1D172}, {0x1D1BB, 0x1D1B9}, {0x1D1BB, 0x1D165},
|
||||
{0x1D1BC, 0x1D1BA}, {0x1D1BC, 0x1D165}, {0x1D1BD, 0x1D1B9}, {0x1D1BD, 0x1D165}, {0x1D1BD, 0x1D16E}, {0x1D1BE, 0x1D1BA}, {0x1D1BE, 0x1D165}, {0x1D1BE, 0x1D16E}, {0x1D1BF, 0x1D1B9}, {0x1D1BF, 0x1D165},
|
||||
{0x1D1BF, 0x1D16F}, {0x1D1C0, 0x1D1BA}, {0x1D1C0, 0x1D165}, {0x1D1C0, 0x1D16F}, {0x2F800, 0x4E3D}, {0x2F801, 0x4E38}, {0x2F802, 0x4E41}, {0x2F803, 0x20122}, {0x2F804, 0x4F60}, {0x2F805, 0x4FAE},
|
||||
{0x2F806, 0x4FBB}, {0x2F807, 0x5002}, {0x2F808, 0x507A}, {0x2F809, 0x5099}, {0x2F80A, 0x50E7}, {0x2F80B, 0x50CF}, {0x2F80C, 0x349E}, {0x2F80D, 0x2063A}, {0x2F80E, 0x514D}, {0x2F80F, 0x5154},
|
||||
{0x2F810, 0x5164}, {0x2F811, 0x5177}, {0x2F812, 0x2051C}, {0x2F813, 0x34B9}, {0x2F814, 0x5167}, {0x2F815, 0x518D}, {0x2F816, 0x2054B}, {0x2F817, 0x5197}, {0x2F818, 0x51A4}, {0x2F819, 0x4ECC},
|
||||
{0x2F81A, 0x51AC}, {0x2F81B, 0x51B5}, {0x2F81C, 0x291DF}, {0x2F81D, 0x51F5}, {0x2F81E, 0x5203}, {0x2F81F, 0x34DF}, {0x2F820, 0x523B}, {0x2F821, 0x5246}, {0x2F822, 0x5272}, {0x2F823, 0x5277},
|
||||
{0x2F824, 0x3515}, {0x2F825, 0x52C7}, {0x2F826, 0x52C9}, {0x2F827, 0x52E4}, {0x2F828, 0x52FA}, {0x2F829, 0x5305}, {0x2F82A, 0x5306}, {0x2F82B, 0x5317}, {0x2F82C, 0x5349}, {0x2F82D, 0x5351},
|
||||
{0x2F82E, 0x535A}, {0x2F82F, 0x5373}, {0x2F830, 0x537D}, {0x2F831, 0x537F}, {0x2F832, 0x537F}, {0x2F833, 0x537F}, {0x2F834, 0x20A2C}, {0x2F835, 0x7070}, {0x2F836, 0x53CA}, {0x2F837, 0x53DF},
|
||||
{0x2F838, 0x20B63}, {0x2F839, 0x53EB}, {0x2F83A, 0x53F1}, {0x2F83B, 0x5406}, {0x2F83C, 0x549E}, {0x2F83D, 0x5438}, {0x2F83E, 0x5448}, {0x2F83F, 0x5468}, {0x2F840, 0x54A2}, {0x2F841, 0x54F6},
|
||||
{0x2F842, 0x5510}, {0x2F843, 0x5553}, {0x2F844, 0x5563}, {0x2F845, 0x5584}, {0x2F846, 0x5584}, {0x2F847, 0x5599}, {0x2F848, 0x55AB}, {0x2F849, 0x55B3}, {0x2F84A, 0x55C2}, {0x2F84B, 0x5716},
|
||||
{0x2F84C, 0x5606}, {0x2F84D, 0x5717}, {0x2F84E, 0x5651}, {0x2F84F, 0x5674}, {0x2F850, 0x5207}, {0x2F851, 0x58EE}, {0x2F852, 0x57CE}, {0x2F853, 0x57F4}, {0x2F854, 0x580D}, {0x2F855, 0x578B},
|
||||
{0x2F856, 0x5832}, {0x2F857, 0x5831}, {0x2F858, 0x58AC}, {0x2F859, 0x214E4}, {0x2F85A, 0x58F2}, {0x2F85B, 0x58F7}, {0x2F85C, 0x5906}, {0x2F85D, 0x591A}, {0x2F85E, 0x5922}, {0x2F85F, 0x5962},
|
||||
{0x2F860, 0x216A8}, {0x2F861, 0x216EA}, {0x2F862, 0x59EC}, {0x2F863, 0x5A1B}, {0x2F864, 0x5A27}, {0x2F865, 0x59D8}, {0x2F866, 0x5A66}, {0x2F867, 0x36EE}, {0x2F868, 0x36FC}, {0x2F869, 0x5B08},
|
||||
{0x2F86A, 0x5B3E}, {0x2F86B, 0x5B3E}, {0x2F86C, 0x219C8}, {0x2F86D, 0x5BC3}, {0x2F86E, 0x5BD8}, {0x2F86F, 0x5BE7}, {0x2F870, 0x5BF3}, {0x2F871, 0x21B18}, {0x2F872, 0x5BFF}, {0x2F873, 0x5C06},
|
||||
{0x2F874, 0x5F53}, {0x2F875, 0x5C22}, {0x2F876, 0x3781}, {0x2F877, 0x5C60}, {0x2F878, 0x5C6E}, {0x2F879, 0x5CC0}, {0x2F87A, 0x5C8D}, {0x2F87B, 0x21DE4}, {0x2F87C, 0x5D43}, {0x2F87D, 0x21DE6},
|
||||
{0x2F87E, 0x5D6E}, {0x2F87F, 0x5D6B}, {0x2F880, 0x5D7C}, {0x2F881, 0x5DE1}, {0x2F882, 0x5DE2}, {0x2F883, 0x382F}, {0x2F884, 0x5DFD}, {0x2F885, 0x5E28}, {0x2F886, 0x5E3D}, {0x2F887, 0x5E69},
|
||||
{0x2F888, 0x3862}, {0x2F889, 0x22183}, {0x2F88A, 0x387C}, {0x2F88B, 0x5EB0}, {0x2F88C, 0x5EB3}, {0x2F88D, 0x5EB6}, {0x2F88E, 0x5ECA}, {0x2F88F, 0x2A392}, {0x2F890, 0x5EFE}, {0x2F891, 0x22331},
|
||||
{0x2F892, 0x22331}, {0x2F893, 0x8201}, {0x2F894, 0x5F22}, {0x2F895, 0x5F22}, {0x2F896, 0x38C7}, {0x2F897, 0x232B8}, {0x2F898, 0x261DA}, {0x2F899, 0x5F62}, {0x2F89A, 0x5F6B}, {0x2F89B, 0x38E3},
|
||||
{0x2F89C, 0x5F9A}, {0x2F89D, 0x5FCD}, {0x2F89E, 0x5FD7}, {0x2F89F, 0x5FF9}, {0x2F8A0, 0x6081}, {0x2F8A1, 0x393A}, {0x2F8A2, 0x391C}, {0x2F8A3, 0x6094}, {0x2F8A4, 0x226D4}, {0x2F8A5, 0x60C7},
|
||||
{0x2F8A6, 0x6148}, {0x2F8A7, 0x614C}, {0x2F8A8, 0x614E}, {0x2F8A9, 0x614C}, {0x2F8AA, 0x617A}, {0x2F8AB, 0x618E}, {0x2F8AC, 0x61B2}, {0x2F8AD, 0x61A4}, {0x2F8AE, 0x61AF}, {0x2F8AF, 0x61DE},
|
||||
{0x2F8B0, 0x61F2}, {0x2F8B1, 0x61F6}, {0x2F8B2, 0x6210}, {0x2F8B3, 0x621B}, {0x2F8B4, 0x625D}, {0x2F8B5, 0x62B1}, {0x2F8B6, 0x62D4}, {0x2F8B7, 0x6350}, {0x2F8B8, 0x22B0C}, {0x2F8B9, 0x633D},
|
||||
{0x2F8BA, 0x62FC}, {0x2F8BB, 0x6368}, {0x2F8BC, 0x6383}, {0x2F8BD, 0x63E4}, {0x2F8BE, 0x22BF1}, {0x2F8BF, 0x6422}, {0x2F8C0, 0x63C5}, {0x2F8C1, 0x63A9}, {0x2F8C2, 0x3A2E}, {0x2F8C3, 0x6469},
|
||||
{0x2F8C4, 0x647E}, {0x2F8C5, 0x649D}, {0x2F8C6, 0x6477}, {0x2F8C7, 0x3A6C}, {0x2F8C8, 0x654F}, {0x2F8C9, 0x656C}, {0x2F8CA, 0x2300A}, {0x2F8CB, 0x65E3}, {0x2F8CC, 0x66F8}, {0x2F8CD, 0x6649},
|
||||
{0x2F8CE, 0x3B19}, {0x2F8CF, 0x6691}, {0x2F8D0, 0x3B08}, {0x2F8D1, 0x3AE4}, {0x2F8D2, 0x5192}, {0x2F8D3, 0x5195}, {0x2F8D4, 0x6700}, {0x2F8D5, 0x669C}, {0x2F8D6, 0x80AD}, {0x2F8D7, 0x43D9},
|
||||
{0x2F8D8, 0x6717}, {0x2F8D9, 0x671B}, {0x2F8DA, 0x6721}, {0x2F8DB, 0x675E}, {0x2F8DC, 0x6753}, {0x2F8DD, 0x233C3}, {0x2F8DE, 0x3B49}, {0x2F8DF, 0x67FA}, {0x2F8E0, 0x6785}, {0x2F8E1, 0x6852},
|
||||
{0x2F8E2, 0x6885}, {0x2F8E3, 0x2346D}, {0x2F8E4, 0x688E}, {0x2F8E5, 0x681F}, {0x2F8E6, 0x6914}, {0x2F8E7, 0x3B9D}, {0x2F8E8, 0x6942}, {0x2F8E9, 0x69A3}, {0x2F8EA, 0x69EA}, {0x2F8EB, 0x6AA8},
|
||||
{0x2F8EC, 0x236A3}, {0x2F8ED, 0x6ADB}, {0x2F8EE, 0x3C18}, {0x2F8EF, 0x6B21}, {0x2F8F0, 0x238A7}, {0x2F8F1, 0x6B54}, {0x2F8F2, 0x3C4E}, {0x2F8F3, 0x6B72}, {0x2F8F4, 0x6B9F}, {0x2F8F5, 0x6BBA},
|
||||
{0x2F8F6, 0x6BBB}, {0x2F8F7, 0x23A8D}, {0x2F8F8, 0x21D0B}, {0x2F8F9, 0x23AFA}, {0x2F8FA, 0x6C4E}, {0x2F8FB, 0x23CBC}, {0x2F8FC, 0x6CBF}, {0x2F8FD, 0x6CCD}, {0x2F8FE, 0x6C67}, {0x2F8FF, 0x6D16},
|
||||
{0x2F900, 0x6D3E}, {0x2F901, 0x6D77}, {0x2F902, 0x6D41}, {0x2F903, 0x6D69}, {0x2F904, 0x6D78}, {0x2F905, 0x6D85}, {0x2F906, 0x23D1E}, {0x2F907, 0x6D34}, {0x2F908, 0x6E2F}, {0x2F909, 0x6E6E},
|
||||
{0x2F90A, 0x3D33}, {0x2F90B, 0x6ECB}, {0x2F90C, 0x6EC7}, {0x2F90D, 0x23ED1}, {0x2F90E, 0x6DF9}, {0x2F90F, 0x6F6E}, {0x2F910, 0x23F5E}, {0x2F911, 0x23F8E}, {0x2F912, 0x6FC6}, {0x2F913, 0x7039},
|
||||
{0x2F914, 0x701E}, {0x2F915, 0x701B}, {0x2F916, 0x3D96}, {0x2F917, 0x704A}, {0x2F918, 0x707D}, {0x2F919, 0x7077}, {0x2F91A, 0x70AD}, {0x2F91B, 0x20525}, {0x2F91C, 0x7145}, {0x2F91D, 0x24263},
|
||||
{0x2F91E, 0x719C}, {0x2F91F, 0x243AB}, {0x2F920, 0x7228}, {0x2F921, 0x7235}, {0x2F922, 0x7250}, {0x2F923, 0x24608}, {0x2F924, 0x7280}, {0x2F925, 0x7295}, {0x2F926, 0x24735}, {0x2F927, 0x24814},
|
||||
{0x2F928, 0x737A}, {0x2F929, 0x738B}, {0x2F92A, 0x3EAC}, {0x2F92B, 0x73A5}, {0x2F92C, 0x3EB8}, {0x2F92D, 0x3EB8}, {0x2F92E, 0x7447}, {0x2F92F, 0x745C}, {0x2F930, 0x7471}, {0x2F931, 0x7485},
|
||||
{0x2F932, 0x74CA}, {0x2F933, 0x3F1B}, {0x2F934, 0x7524}, {0x2F935, 0x24C36}, {0x2F936, 0x753E}, {0x2F937, 0x24C92}, {0x2F938, 0x7570}, {0x2F939, 0x2219F}, {0x2F93A, 0x7610}, {0x2F93B, 0x24FA1},
|
||||
{0x2F93C, 0x24FB8}, {0x2F93D, 0x25044}, {0x2F93E, 0x3FFC}, {0x2F93F, 0x4008}, {0x2F940, 0x76F4}, {0x2F941, 0x250F3}, {0x2F942, 0x250F2}, {0x2F943, 0x25119}, {0x2F944, 0x25133}, {0x2F945, 0x771E},
|
||||
{0x2F946, 0x771F}, {0x2F947, 0x771F}, {0x2F948, 0x774A}, {0x2F949, 0x4039}, {0x2F94A, 0x778B}, {0x2F94B, 0x4046}, {0x2F94C, 0x4096}, {0x2F94D, 0x2541D}, {0x2F94E, 0x784E}, {0x2F94F, 0x788C},
|
||||
{0x2F950, 0x78CC}, {0x2F951, 0x40E3}, {0x2F952, 0x25626}, {0x2F953, 0x7956}, {0x2F954, 0x2569A}, {0x2F955, 0x256C5}, {0x2F956, 0x798F}, {0x2F957, 0x79EB}, {0x2F958, 0x412F}, {0x2F959, 0x7A40},
|
||||
{0x2F95A, 0x7A4A}, {0x2F95B, 0x7A4F}, {0x2F95C, 0x2597C}, {0x2F95D, 0x25AA7}, {0x2F95E, 0x25AA7}, {0x2F95F, 0x7AEE}, {0x2F960, 0x4202}, {0x2F961, 0x25BAB}, {0x2F962, 0x7BC6}, {0x2F963, 0x7BC9},
|
||||
{0x2F964, 0x4227}, {0x2F965, 0x25C80}, {0x2F966, 0x7CD2}, {0x2F967, 0x42A0}, {0x2F968, 0x7CE8}, {0x2F969, 0x7CE3}, {0x2F96A, 0x7D00}, {0x2F96B, 0x25F86}, {0x2F96C, 0x7D63}, {0x2F96D, 0x4301},
|
||||
{0x2F96E, 0x7DC7}, {0x2F96F, 0x7E02}, {0x2F970, 0x7E45}, {0x2F971, 0x4334}, {0x2F972, 0x26228}, {0x2F973, 0x26247}, {0x2F974, 0x4359}, {0x2F975, 0x262D9}, {0x2F976, 0x7F7A}, {0x2F977, 0x2633E},
|
||||
{0x2F978, 0x7F95}, {0x2F979, 0x7FFA}, {0x2F97A, 0x8005}, {0x2F97B, 0x264DA}, {0x2F97C, 0x26523}, {0x2F97D, 0x8060}, {0x2F97E, 0x265A8}, {0x2F97F, 0x8070}, {0x2F980, 0x2335F}, {0x2F981, 0x43D5},
|
||||
{0x2F982, 0x80B2}, {0x2F983, 0x8103}, {0x2F984, 0x440B}, {0x2F985, 0x813E}, {0x2F986, 0x5AB5}, {0x2F987, 0x267A7}, {0x2F988, 0x267B5}, {0x2F989, 0x23393}, {0x2F98A, 0x2339C}, {0x2F98B, 0x8201},
|
||||
{0x2F98C, 0x8204}, {0x2F98D, 0x8F9E}, {0x2F98E, 0x446B}, {0x2F98F, 0x8291}, {0x2F990, 0x828B}, {0x2F991, 0x829D}, {0x2F992, 0x52B3}, {0x2F993, 0x82B1}, {0x2F994, 0x82B3}, {0x2F995, 0x82BD},
|
||||
{0x2F996, 0x82E6}, {0x2F997, 0x26B3C}, {0x2F998, 0x82E5}, {0x2F999, 0x831D}, {0x2F99A, 0x8363}, {0x2F99B, 0x83AD}, {0x2F99C, 0x8323}, {0x2F99D, 0x83BD}, {0x2F99E, 0x83E7}, {0x2F99F, 0x8457},
|
||||
{0x2F9A0, 0x8353}, {0x2F9A1, 0x83CA}, {0x2F9A2, 0x83CC}, {0x2F9A3, 0x83DC}, {0x2F9A4, 0x26C36}, {0x2F9A5, 0x26D6B}, {0x2F9A6, 0x26CD5}, {0x2F9A7, 0x452B}, {0x2F9A8, 0x84F1}, {0x2F9A9, 0x84F3},
|
||||
{0x2F9AA, 0x8516}, {0x2F9AB, 0x273CA}, {0x2F9AC, 0x8564}, {0x2F9AD, 0x26F2C}, {0x2F9AE, 0x455D}, {0x2F9AF, 0x4561}, {0x2F9B0, 0x26FB1}, {0x2F9B1, 0x270D2}, {0x2F9B2, 0x456B}, {0x2F9B3, 0x8650},
|
||||
{0x2F9B4, 0x865C}, {0x2F9B5, 0x8667}, {0x2F9B6, 0x8669}, {0x2F9B7, 0x86A9}, {0x2F9B8, 0x8688}, {0x2F9B9, 0x870E}, {0x2F9BA, 0x86E2}, {0x2F9BB, 0x8779}, {0x2F9BC, 0x8728}, {0x2F9BD, 0x876B},
|
||||
{0x2F9BE, 0x8786}, {0x2F9BF, 0x45D7}, {0x2F9C0, 0x87E1}, {0x2F9C1, 0x8801}, {0x2F9C2, 0x45F9}, {0x2F9C3, 0x8860}, {0x2F9C4, 0x8863}, {0x2F9C5, 0x27667}, {0x2F9C6, 0x88D7}, {0x2F9C7, 0x88DE},
|
||||
{0x2F9C8, 0x4635}, {0x2F9C9, 0x88FA}, {0x2F9CA, 0x34BB}, {0x2F9CB, 0x278AE}, {0x2F9CC, 0x27966}, {0x2F9CD, 0x46BE}, {0x2F9CE, 0x46C7}, {0x2F9CF, 0x8AA0}, {0x2F9D0, 0x8AED}, {0x2F9D1, 0x8B8A},
|
||||
{0x2F9D2, 0x8C55}, {0x2F9D3, 0x27CA8}, {0x2F9D4, 0x8CAB}, {0x2F9D5, 0x8CC1}, {0x2F9D6, 0x8D1B}, {0x2F9D7, 0x8D77}, {0x2F9D8, 0x27F2F}, {0x2F9D9, 0x20804}, {0x2F9DA, 0x8DCB}, {0x2F9DB, 0x8DBC},
|
||||
{0x2F9DC, 0x8DF0}, {0x2F9DD, 0x208DE}, {0x2F9DE, 0x8ED4}, {0x2F9DF, 0x8F38}, {0x2F9E0, 0x285D2}, {0x2F9E1, 0x285ED}, {0x2F9E2, 0x9094}, {0x2F9E3, 0x90F1}, {0x2F9E4, 0x9111}, {0x2F9E5, 0x2872E},
|
||||
{0x2F9E6, 0x911B}, {0x2F9E7, 0x9238}, {0x2F9E8, 0x92D7}, {0x2F9E9, 0x92D8}, {0x2F9EA, 0x927C}, {0x2F9EB, 0x93F9}, {0x2F9EC, 0x9415}, {0x2F9ED, 0x28BFA}, {0x2F9EE, 0x958B}, {0x2F9EF, 0x4995},
|
||||
{0x2F9F0, 0x95B7}, {0x2F9F1, 0x28D77}, {0x2F9F2, 0x49E6}, {0x2F9F3, 0x96C3}, {0x2F9F4, 0x5DB2}, {0x2F9F5, 0x9723}, {0x2F9F6, 0x29145}, {0x2F9F7, 0x2921A}, {0x2F9F8, 0x4A6E}, {0x2F9F9, 0x4A76},
|
||||
{0x2F9FA, 0x97E0}, {0x2F9FB, 0x2940A}, {0x2F9FC, 0x4AB2}, {0x2F9FD, 0x29496}, {0x2F9FE, 0x980B}, {0x2F9FF, 0x980B}, {0x2FA00, 0x9829}, {0x2FA01, 0x295B6}, {0x2FA02, 0x98E2}, {0x2FA03, 0x4B33},
|
||||
{0x2FA04, 0x9929}, {0x2FA05, 0x99A7}, {0x2FA06, 0x99C2}, {0x2FA07, 0x99FE}, {0x2FA08, 0x4BCE}, {0x2FA09, 0x29B30}, {0x2FA0A, 0x9B12}, {0x2FA0B, 0x9C40}, {0x2FA0C, 0x9CFD}, {0x2FA0D, 0x4CCE},
|
||||
{0x2FA0E, 0x4CED}, {0x2FA0F, 0x9D67}, {0x2FA10, 0x2A0CE}, {0x2FA11, 0x4CF8}, {0x2FA12, 0x2A105}, {0x2FA13, 0x2A20E}, {0x2FA14, 0x2A291}, {0x2FA15, 0x9EBB}, {0x2FA16, 0x4D56}, {0x2FA17, 0x9EF9},
|
||||
{0x2FA18, 0x9EFE}, {0x2FA19, 0x9F05}, {0x2FA1A, 0x9F0F}, {0x2FA1B, 0x9F16}, {0x2FA1D, 0x2A600},
|
||||
};
|
||||
|
||||
static std::string codepoint_to_utf8(uint32_t cp) {
|
||||
std::string result;
|
||||
if (/* 0x00 <= cp && */ cp <= 0x7f) {
|
||||
|
||||
Reference in New Issue
Block a user