mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-14 04:54:06 +00:00
unicode : clean-up
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
#include "unicode-data.h"
|
||||
#include "unicode-data.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
@@ -1649,7 +1649,3 @@ const std::map<char32_t, char32_t> unicode_map_lowercase = {
|
||||
{0x1E917, 0x1E939}, {0x1E918, 0x1E93A}, {0x1E919, 0x1E93B}, {0x1E91A, 0x1E93C}, {0x1E91B, 0x1E93D}, {0x1E91C, 0x1E93E},
|
||||
{0x1E91D, 0x1E93F}, {0x1E91E, 0x1E940}, {0x1E91F, 0x1E941}, {0x1E920, 0x1E942}, {0x1E921, 0x1E943},
|
||||
};
|
||||
|
||||
const std::set<std::string> unicode_regex_with_custom_preprocessor = {
|
||||
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)"
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user