webui: Move static build output from repo code to HF Bucket (#22937)

* ci: add workflow to publish webui to Hugging Face bucket

* ci: add webui release job to release workflow

* ci: test webui release job

* chore: Return to default minification strategy for build output files

* ci: extract webui build into separate workflow and job

* chore: Ignore webui static output + clean up references

* chore: Delete legacy webui static output

* chore: Ignore webui build static output

* fix: Workflow

* fix: Versioning naming

* chore: Update package name

* test: Test CI fix

* refactor: Naming

* server: implement webui build strategy with HF Bucket support

* chore: Remove test workflow

* chore: Use WebUI build workflow call in other workflows

* server: HF Buckets fallback for WebUI build

* refactor: App name variable

* refactor: Naming

* fix: Retrieve loading.html

* fix: workflow syntax

* fix: Rewrite malformed release.yml

* fix: Req param

* test: Re-add missing Playwright installation for CI tests

* refactor: Logic & security improvements

* refactor: Retrieve publishing jobs and DRY the workflows

* fix: Test workflow syntax

* fix: Upstream Release Tag for test workflow

* chore: Remove test workflow

* ci: Run WebUI jobs on `ubuntu-24.04-arm`

* refactor: Post-CR cleanup

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>

* refactor: CI cleanup

* refactor: Cleanup

* test: Test workflow

* refactor: use LLAMA_BUILD_NUMBER instead of LLAMA_BUILD_TAG for HF Bucket webui downloads

* server: add fallback mechanism for HF Bucket webui downloads from latest directory

* fix: Incorrect argument order in file(SHA256) calls for checksum verification

* refactor: Use cmake script for handling the HF Bucket download on build time

* feat: support local npm build for WebUI assets

* refactor: add `HF_ENABLED` flag to control WebUI build/download provisioning

* refactor: Cleanup

* chore: Remove test workflow

* fix: remove s390x from release workflow

* fix: add webui-build dependency to ubuntu-22-rocm and windows-hip

* Revert "fix: remove s390x from release workflow"

This reverts commit debcfffa9bc1e3112eae41f2d29741b682e4eb19.

* fix: Release workflow file

* fix: Proper release tag used for HF Bucket upload

* fix: Remove duplicate steps in release workflow

---------

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
Aleksander Grygier
2026-05-14 13:21:41 +02:00
committed by GitHub
parent 67b2b7f2f2
commit 253ba110bc
33 changed files with 684 additions and 13478 deletions

View File

@@ -53,14 +53,6 @@ charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset
[tools/server/public/**]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset
[benches/**]
indent_style = unset
indent_size = unset

4
.gitattributes vendored
View File

@@ -1,4 +0,0 @@
# Treat the generated single-file WebUI build as binary for diff purposes.
# Git's pack-file delta compression still works (byte-level), but this prevents
# git diff from printing the entire minified file on every change.
tools/server/public/index.html -diff

1
.github/labeler.yml vendored
View File

@@ -77,7 +77,6 @@ server/webui:
- changed-files:
- any-glob-to-any-file:
- tools/server/webui/**
- tools/server/public/**
server:
- changed-files:
- any-glob-to-any-file:

View File

@@ -36,7 +36,14 @@ env:
CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON"
jobs:
webui-build:
name: Build WebUI
uses: ./.github/workflows/webui-build.yml
macOS-cpu:
needs:
- webui-build
strategy:
matrix:
include:
@@ -64,6 +71,12 @@ jobs:
with:
fetch-depth: 0
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
@@ -100,6 +113,9 @@ jobs:
name: llama-bin-macos-${{ matrix.build }}.tar.gz
ubuntu-cpu:
needs:
- webui-build
strategy:
matrix:
include:
@@ -119,6 +135,12 @@ jobs:
with:
fetch-depth: 0
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: ccache
if: ${{ matrix.build != 's390x' }}
uses: ggml-org/ccache-action@v1.2.21
@@ -169,6 +191,9 @@ jobs:
name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz
ubuntu-vulkan:
needs:
- webui-build
strategy:
matrix:
include:
@@ -186,6 +211,12 @@ jobs:
with:
fetch-depth: 0
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
@@ -237,6 +268,9 @@ jobs:
name: llama-bin-ubuntu-vulkan-${{ matrix.build }}.tar.gz
android-arm64:
needs:
- webui-build
runs-on: ubuntu-latest
env:
@@ -249,6 +283,12 @@ jobs:
with:
fetch-depth: 0
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
@@ -306,6 +346,9 @@ jobs:
name: llama-bin-android-arm64.tar.gz
ubuntu-24-openvino:
needs:
- webui-build
runs-on: ubuntu-24.04
outputs:
@@ -327,6 +370,12 @@ jobs:
with:
fetch-depth: 0
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
@@ -386,6 +435,9 @@ jobs:
name: llama-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz
windows-cpu:
needs:
- webui-build
runs-on: windows-2025
strategy:
@@ -400,6 +452,12 @@ jobs:
with:
fetch-depth: 0
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
@@ -438,6 +496,9 @@ jobs:
name: llama-bin-win-cpu-${{ matrix.arch }}.zip
windows:
needs:
- webui-build
runs-on: windows-2025
env:
@@ -461,6 +522,12 @@ jobs:
id: checkout
uses: actions/checkout@v6
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
@@ -520,6 +587,9 @@ jobs:
name: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip
windows-cuda:
needs:
- webui-build
runs-on: windows-2022
strategy:
@@ -531,6 +601,12 @@ jobs:
id: checkout
uses: actions/checkout@v6
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: Install ccache
uses: ggml-org/ccache-action@v1.2.21
with:
@@ -591,6 +667,9 @@ jobs:
name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
windows-sycl:
needs:
- webui-build
runs-on: windows-2022
defaults:
@@ -629,6 +708,12 @@ jobs:
Expand-Archive -Path "level-zero-win-sdk.zip" -DestinationPath "C:/level-zero-sdk" -Force
"LEVEL_ZERO_V1_SDK_PATH=C:/level-zero-sdk" | Out-File -FilePath $env:GITHUB_ENV -Append
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
@@ -696,6 +781,9 @@ jobs:
name: llama-bin-win-sycl-x64.zip
ubuntu-24-sycl:
needs:
- webui-build
strategy:
matrix:
build: [fp32, fp16]
@@ -743,6 +831,12 @@ jobs:
wget -q "https://github.com/oneapi-src/level-zero/releases/download/v${LEVEL_ZERO_VERSION}/level-zero-devel_${LEVEL_ZERO_VERSION}%2B${LEVEL_ZERO_UBUNTU_VERSION}_amd64.deb" -O level-zero-devel.deb
sudo apt-get install -y ./level-zero.deb ./level-zero-devel.deb
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
@@ -782,6 +876,9 @@ jobs:
name: llama-bin-ubuntu-sycl-${{ matrix.build }}-x64.tar.gz
ubuntu-22-rocm:
needs:
- webui-build
runs-on: ubuntu-22.04
strategy:
@@ -798,6 +895,12 @@ jobs:
with:
fetch-depth: 0
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: Free up disk space
uses: ggml-org/free-disk-space@v1.3.1
with:
@@ -885,6 +988,9 @@ jobs:
name: llama-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz
windows-hip:
needs:
- webui-build
runs-on: windows-2022
env:
@@ -901,6 +1007,12 @@ jobs:
id: checkout
uses: actions/checkout@v6
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: Grab rocWMMA package
id: grab_rocwmma
run: |
@@ -1147,6 +1259,7 @@ jobs:
runs-on: ubuntu-slim
needs:
- webui-build
- windows
- windows-cpu
- windows-cuda
@@ -1162,6 +1275,9 @@ jobs:
- ios-xcode-build
- openEuler-cann
outputs:
tag_name: ${{ steps.tag.outputs.name }}
steps:
- name: Clone
id: checkout
@@ -1287,3 +1403,15 @@ jobs:
});
}
}
webui-publish:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
needs:
- release
uses: ./.github/workflows/webui-publish.yml
with:
version_tag: ${{ needs.release.outputs.tag_name }}
secrets:
hf_token: ${{ secrets.HF_TOKEN_WEBUI_STATIC_OUTPUT }}

View File

@@ -39,7 +39,12 @@ concurrency:
cancel-in-progress: true
jobs:
webui-build:
name: Build WebUI
uses: ./.github/workflows/webui-build.yml
server-metal:
needs: webui-build
runs-on: [self-hosted, llama-server, macOS, ARM64]
name: server-metal (${{ matrix.wf_name }})
@@ -67,6 +72,12 @@ jobs:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: Build
id: cmake_build
run: |

View File

@@ -1,7 +1,7 @@
name: Server WebUI
on:
workflow_dispatch: # allows manual triggering
workflow_dispatch:
inputs:
sha:
description: 'Commit SHA1 to build'
@@ -13,16 +13,14 @@ on:
paths: [
'.github/workflows/server-webui.yml',
'tools/server/webui/**.*',
'tools/server/tests/**.*',
'tools/server/public/**'
'tools/server/tests/**.*'
]
pull_request:
types: [opened, synchronize, reopened]
paths: [
'.github/workflows/server-webui.yml',
'tools/server/webui/**.*',
'tools/server/tests/**.*',
'tools/server/public/**'
'tools/server/tests/**.*'
]
env:
@@ -36,9 +34,14 @@ concurrency:
cancel-in-progress: true
jobs:
webui-check:
webui-build:
name: Build WebUI
uses: ./.github/workflows/webui-build.yml
webui-checks:
name: WebUI Checks
runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
needs: webui-build
runs-on: ubuntu-24.04-arm
continue-on-error: true
steps:
- name: Checkout code
@@ -51,7 +54,7 @@ jobs:
id: node
uses: actions/setup-node@v6
with:
node-version: "22"
node-version: "24"
cache: "npm"
cache-dependency-path: "tools/server/webui/package-lock.json"
@@ -71,6 +74,47 @@ jobs:
run: npm run lint
working-directory: tools/server/webui
- name: Install Playwright browsers
id: playwright
if: ${{ always() && steps.setup.conclusion == 'success' }}
run: npx playwright install --with-deps
working-directory: tools/server/webui
- name: Run Client tests
if: ${{ always() && steps.playwright.conclusion == 'success' }}
run: npm run test:client
working-directory: tools/server/webui
- name: Run Unit tests
if: ${{ always() && steps.playwright.conclusion == 'success' }}
run: npm run test:unit
working-directory: tools/server/webui
e2e-tests:
name: E2E Tests
needs: webui-build
runs-on: ubuntu-24.04-arm
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Setup Node.js
id: node
uses: actions/setup-node@v6
with:
node-version: "24"
cache: "npm"
cache-dependency-path: "tools/server/webui/package-lock.json"
- name: Install dependencies
id: setup
if: ${{ steps.node.conclusion == 'success' }}
run: npm ci
working-directory: tools/server/webui
- name: Build application
if: ${{ always() && steps.setup.conclusion == 'success' }}
run: npm run build
@@ -87,16 +131,6 @@ jobs:
run: npm run build-storybook
working-directory: tools/server/webui
- name: Run Client tests
if: ${{ always() && steps.playwright.conclusion == 'success' }}
run: npm run test:client
working-directory: tools/server/webui
- name: Run Unit tests
if: ${{ always() && steps.playwright.conclusion == 'success' }}
run: npm run test:unit
working-directory: tools/server/webui
- name: Run UI tests
if: ${{ always() && steps.playwright.conclusion == 'success' }}
run: npm run test:ui -- --testTimeout=60000

View File

@@ -54,7 +54,12 @@ concurrency:
cancel-in-progress: true
jobs:
webui-build:
name: Build WebUI
uses: ./.github/workflows/webui-build.yml
server:
needs: webui-build
runs-on: ubuntu-latest
name: server (${{ matrix.wf_name }})
@@ -93,6 +98,12 @@ jobs:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: Build
id: cmake_build
run: |
@@ -125,6 +136,7 @@ jobs:
SLOW_TESTS=1 pytest -v -x
server-windows:
needs: webui-build
runs-on: windows-2022
steps:
@@ -135,6 +147,12 @@ jobs:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: Build
id: cmake_build
run: |

44
.github/workflows/webui-build.yml vendored Normal file
View File

@@ -0,0 +1,44 @@
name: Build WebUI
on:
workflow_call:
jobs:
build:
name: Build WebUI
runs-on: ubuntu-slim
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Setup Node.js
uses: actions/setup-node@v6
with:
node-version: "24"
cache: "npm"
cache-dependency-path: "tools/server/webui/package-lock.json"
- name: Install dependencies
run: npm ci
working-directory: tools/server/webui
- name: Build application
run: npm run build
working-directory: tools/server/webui
- name: Generate checksums
run: |
cd tools/server/public
for f in *; do
sha256sum "$f" | awk '{print $1, $2}' >> checksums.txt
done
- name: Upload built webui
uses: actions/upload-artifact@v6
with:
name: webui-build
path: tools/server/public/
retention-days: 1

65
.github/workflows/webui-publish.yml vendored Normal file
View File

@@ -0,0 +1,65 @@
name: WebUI Publish
on:
workflow_call:
inputs:
version_tag:
description: 'Version tag to publish under (e.g., b1234)'
required: true
type: string
secrets:
hf_token:
description: 'Hugging Face token with write access'
required: true
jobs:
publish:
name: Publish WebUI Static Output
runs-on: ubuntu-24.04-arm
permissions:
contents: read
env:
HF_BUCKET_NAME: ${{ vars.HF_BUCKET_WEBUI_STATIC_OUTPUT }}
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 1
- name: Download WebUI build artifact
uses: actions/download-artifact@v7
with:
name: webui-build
path: tools/server/public/
- name: Install Hugging Face Hub CLI
run: pip install -U huggingface_hub
- name: Authenticate with Hugging Face
run: hf auth login --token ${{ secrets.hf_token }}
- name: Sync built files to Hugging Face bucket (version tag)
run: |
# Upload the built files to the Hugging Face bucket under the release version
hf buckets sync tools/server/public hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/${{ inputs.version_tag }} --delete --quiet
- name: Sync built files to Hugging Face bucket (latest)
run: |
# Also upload to the 'latest' directory for fallback downloads
hf buckets sync tools/server/public hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/latest --delete --quiet
- name: Verify upload
run: |
# List the files in the bucket to verify the upload
hf buckets list hf://buckets/ggml-org/${{ env.HF_BUCKET_NAME }}/${{ inputs.version_tag }} -R -h
- name: Clean up root-level files
run: |
# Clean up any old root-level files from previous non-versioned deployments
hf buckets rm ggml-org/${{ env.HF_BUCKET_NAME }}/index.html --yes 2>/dev/null || true
hf buckets rm ggml-org/${{ env.HF_BUCKET_NAME }}/bundle.js --yes 2>/dev/null || true
hf buckets rm ggml-org/${{ env.HF_BUCKET_NAME }}/bundle.css --yes 2>/dev/null || true
hf buckets rm ggml-org/${{ env.HF_BUCKET_NAME }}/loading.html --yes 2>/dev/null || true

3
.gitignore vendored
View File

@@ -54,6 +54,7 @@
/tmp/
/autogen-*.md
/common/build-info.cpp
/tools/server/public
# Deprecated
@@ -96,8 +97,6 @@
/tools/server/webui/node_modules
/tools/server/webui/dist
# we no longer use gz for index.html
/tools/server/public/index.html.gz
# Python

View File

@@ -104,13 +104,14 @@ option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
# extra artifacts
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_TOOLS "llama: build tools" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_WEBUI "llama: build the embedded Web UI for server" ON)
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
option(LLAMA_TESTS_INSTALL "llama: install tests" ON)
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_TOOLS "llama: build tools" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_WEBUI "llama: build the embedded Web UI for server" ON)
option(LLAMA_USE_PREBUILT_WEBUI "llama: use prebuilt WebUI from HF Bucket when available (requires LLAMA_BUILD_WEBUI=ON)" ON)
option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_DEFAULT})
option(LLAMA_TESTS_INSTALL "llama: install tests" ON)
# 3rd party libs
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" ON)

View File

@@ -605,7 +605,11 @@ struct common_params {
std::map<std::string, std::string> default_template_kwargs;
// webui configs
bool webui = true;
#ifdef LLAMA_WEBUI_DEFAULT_ENABLED
bool webui = LLAMA_WEBUI_DEFAULT_ENABLED != 0;
#else
bool webui = true; // default to enabled when not set
#endif
bool webui_mcp_proxy = false;
std::string webui_config_json;

View File

@@ -148,7 +148,6 @@ You can use GBNF grammars:
- In [llama-cli](../tools/cli) and [llama-completion](../tools/completion), passed as the `--json` / `-j` flag
- To convert to a grammar ahead of time:
- in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
- in JavaScript with [json-schema-to-grammar.mjs](../tools/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../tools/server)'s Web UI)
> [!NOTE]
> The JSON schema is only used to constrain the model output and is not injected into the prompt. The model has no visibility into the schema, so if you want it to understand the expected structure, describe it explicitly in your prompt. This does not apply to tool calling, where schemas are injected into the prompt.

View File

@@ -0,0 +1,213 @@
# Download webui assets from Hugging Face Bucket at build time
# Usage: cmake -DPUBLIC_DIR=... -DHF_BUCKET=... -DHF_VERSION=... -DASSETS="a;b;c" -P scripts/webui-download.cmake
#
# Asset provisioning priority:
# 1. Pre-built assets already in PUBLIC_DIR (cached from a previous run)
# 2. Local npm build (if NPM_DIR is provided and has package.json)
# 3. Hugging Face Bucket download (version-specific, then 'latest' fallback)
cmake_minimum_required(VERSION 3.16)
set(PUBLIC_DIR "" CACHE STRING "Directory to store/download assets")
set(HF_BUCKET "" CACHE STRING "Hugging Face bucket name")
set(HF_VERSION "" CACHE STRING "Version to download (empty = resolve from git)")
set(ASSETS "" CACHE STRING "Semicolon-separated list of asset filenames")
set(STAMP_FILE "" CACHE STRING "Stamp file to create on success (optional)")
set(SOURCE_DIR "" CACHE STRING "Project source root (to resolve version from git)")
set(NPM_DIR "" CACHE STRING "WebUI source directory (to run npm build)")
set(HF_ENABLED "" CACHE STRING "Whether to allow HF Bucket download (ON/OFF)")
# ---------------------------------------------------------------------------
# 1. Resolve version from git if not provided at configure time
# ---------------------------------------------------------------------------
set(RESOLVED_VERSION "${HF_VERSION}")
if("${RESOLVED_VERSION}" STREQUAL "" AND NOT "${SOURCE_DIR}" STREQUAL "")
if(EXISTS "${SOURCE_DIR}/cmake/build-info.cmake")
include("${SOURCE_DIR}/cmake/build-info.cmake")
if(NOT "${BUILD_NUMBER}" STREQUAL "" AND NOT BUILD_NUMBER EQUAL 0)
set(RESOLVED_VERSION "${BUILD_NUMBER}")
message(STATUS "WebUI: resolved version from git: ${RESOLVED_VERSION}")
endif()
endif()
endif()
# ---------------------------------------------------------------------------
# 2. Check stamp freshness — re-download if resolved version changed
# ---------------------------------------------------------------------------
set(FORCE_REBUILD FALSE)
if(NOT "${STAMP_FILE}" STREQUAL "" AND EXISTS "${STAMP_FILE}")
file(READ "${STAMP_FILE}" STAMPED_VERSION)
string(STRIP "${STAMPED_VERSION}" STAMPED_VERSION)
if(NOT "${STAMPED_VERSION}" STREQUAL "${RESOLVED_VERSION}")
message(STATUS "WebUI: version changed (${STAMPED_VERSION} -> ${RESOLVED_VERSION}), re-building")
set(FORCE_REBUILD TRUE)
endif()
endif()
# ---------------------------------------------------------------------------
# 3. Check if assets already exist (cached from a previous run)
# ---------------------------------------------------------------------------
set(ALL_EXISTS TRUE)
foreach(asset ${ASSETS})
if(NOT EXISTS "${PUBLIC_DIR}/${asset}")
set(ALL_EXISTS FALSE)
break()
endif()
endforeach()
if(ALL_EXISTS AND NOT FORCE_REBUILD)
message(STATUS "WebUI: all assets already exist in ${PUBLIC_DIR}, skipping")
return()
endif()
file(MAKE_DIRECTORY "${PUBLIC_DIR}")
# ---------------------------------------------------------------------------
# 4. Priority 2: build from source via npm (fast path for developers)
# ---------------------------------------------------------------------------
set(PROVISION_SUCCESS FALSE)
if(NOT PROVISION_SUCCESS AND NOT "${NPM_DIR}" STREQUAL "")
if(EXISTS "${NPM_DIR}/package.json")
message(STATUS "WebUI: building from source in ${NPM_DIR}")
# Run npm install if node_modules is missing
if(NOT EXISTS "${NPM_DIR}/node_modules")
message(STATUS "WebUI: running npm install (first time)")
execute_process(
COMMAND npm install
WORKING_DIRECTORY "${NPM_DIR}"
RESULT_VARIABLE NPM_INSTALL_RESULT
OUTPUT_VARIABLE NPM_OUT
ERROR_VARIABLE NPM_ERR
)
if(NOT NPM_INSTALL_RESULT EQUAL 0)
message(STATUS "WebUI: npm install failed (${NPM_INSTALL_RESULT}), falling back to download")
message(STATUS " stderr: ${NPM_ERR}")
endif()
endif()
# Run the build
execute_process(
COMMAND npm run build
WORKING_DIRECTORY "${NPM_DIR}"
RESULT_VARIABLE NPM_BUILD_RESULT
OUTPUT_VARIABLE NPM_OUT
ERROR_VARIABLE NPM_ERR
)
if(NPM_BUILD_RESULT EQUAL 0)
# Verify that the expected assets were produced
set(ALL_BUILT TRUE)
foreach(asset ${ASSETS})
if(NOT EXISTS "${PUBLIC_DIR}/${asset}")
set(ALL_BUILT FALSE)
break()
endif()
endforeach()
if(ALL_BUILT)
message(STATUS "WebUI: local npm build succeeded")
set(PROVISION_SUCCESS TRUE)
else()
message(STATUS "WebUI: npm build completed but assets missing from ${PUBLIC_DIR}, falling back to download")
endif()
else()
message(STATUS "WebUI: npm build failed (${NPM_BUILD_RESULT}), falling back to download")
message(STATUS " stderr: ${NPM_ERR}")
endif()
else()
message(STATUS "WebUI: NPM_DIR (${NPM_DIR}) has no package.json, skipping npm build")
endif()
endif()
# ---------------------------------------------------------------------------
# 5. Priority 3: download from Hugging Face Bucket (if enabled)
# ---------------------------------------------------------------------------
if(NOT PROVISION_SUCCESS AND HF_ENABLED)
# Build list of URLs to try — version-specific first, then 'latest'
set(URL_ENTRIES "")
if(NOT "${RESOLVED_VERSION}" STREQUAL "")
list(APPEND URL_ENTRIES
"version:https://huggingface.co/buckets/ggml-org/${HF_BUCKET}/resolve/${RESOLVED_VERSION}")
endif()
list(APPEND URL_ENTRIES
"latest:https://huggingface.co/buckets/ggml-org/${HF_BUCKET}/resolve/latest")
foreach(entry ${URL_ENTRIES})
string(REGEX REPLACE "^([^:]+):.*$" "\\1" url_label "${entry}")
string(REGEX REPLACE "^[^:]+:(.*)$" "\\1" base_url "${entry}")
message(STATUS "WebUI: downloading assets from ${url_label}: ${base_url}")
# Download each asset
set(ALL_OK TRUE)
foreach(asset ${ASSETS})
set(download_url "${base_url}/${asset}?download=true")
set(download_path "${PUBLIC_DIR}/${asset}")
file(DOWNLOAD "${download_url}" "${download_path}"
STATUS download_status TIMEOUT 60
)
list(GET download_status 0 download_result)
if(NOT download_result EQUAL 0)
list(GET download_status 1 error_message)
message(STATUS "WebUI: failed to download ${asset} from ${url_label}: ${error_message}")
set(ALL_OK FALSE)
break()
endif()
message(STATUS "WebUI: downloaded ${asset}")
endforeach()
if(NOT ALL_OK)
continue()
endif()
# Verify checksums if the server provides them
file(DOWNLOAD "${base_url}/checksums.txt?download=true"
"${PUBLIC_DIR}/checksums.txt"
STATUS checksum_status TIMEOUT 30
)
list(GET checksum_status 0 checksum_result)
if(checksum_result EQUAL 0)
message(STATUS "WebUI: verifying checksums...")
file(STRINGS "${PUBLIC_DIR}/checksums.txt" CHECKSUMS_CONTENT)
foreach(asset ${ASSETS})
set(download_path "${PUBLIC_DIR}/${asset}")
file(SHA256 "${download_path}" asset_hash)
string(TOUPPER "${asset_hash}" EXPECTED_HASH_UPPER)
string(REGEX MATCH "${EXPECTED_HASH_UPPER}[ \\t]+${asset}" CHECKSUM_LINE "${CHECKSUMS_CONTENT}")
if(NOT CHECKSUM_LINE)
message(WARNING "WebUI: checksum verification failed for ${asset}")
set(ALL_OK FALSE)
break()
endif()
endforeach()
if(ALL_OK)
message(STATUS "WebUI: all checksums verified")
endif()
endif()
if(ALL_OK)
set(PROVISION_SUCCESS TRUE)
break()
endif()
endforeach()
if(PROVISION_SUCCESS)
message(STATUS "WebUI: provisioning complete")
else()
message(WARNING "WebUI: failed to download assets from HF Bucket (${HF_BUCKET})")
endif()
endif()
# ---------------------------------------------------------------------------
# 6. Write stamp file on success (stores resolved version for freshness check)
# ---------------------------------------------------------------------------
if(PROVISION_SUCCESS)
if(NOT "${STAMP_FILE}" STREQUAL "")
file(WRITE "${STAMP_FILE}" "${RESOLVED_VERSION}")
endif()
else()
message(WARNING "WebUI: no source available. Neither local build (${NPM_DIR}) nor HF Bucket download succeeded.")
message(WARNING "WebUI: building server without embedded WebUI. Set LLAMA_BUILD_WEBUI=OFF to suppress this warning.")
endif()

View File

@@ -40,7 +40,9 @@ set(TARGET_SRCS
server-models.h
)
option(LLAMA_BUILD_WEBUI "Build the embedded Web UI" ON)
# Option to specify custom HF bucket for webui (defaults to llama-ui)
# Usage: cmake -B build -DLLAMA_WEBUI_HF_BUCKET=llama-ui
set(LLAMA_WEBUI_HF_BUCKET "llama-ui" CACHE STRING "Hugging Face bucket name for prebuilt webui assets")
if (LLAMA_BUILD_WEBUI)
set(PUBLIC_ASSETS
@@ -50,19 +52,108 @@ if (LLAMA_BUILD_WEBUI)
loading.html
)
# Determine source of webui assets (priority: local > HF Bucket)
set(WEBUI_SOURCE "")
set(WEBUI_SOURCE_DIR "")
# Priority 1: Check for local webui build output
set(LOCAL_WEBUI_DIR "${CMAKE_CURRENT_SOURCE_DIR}/public")
# Verify all required assets exist before declaring local source valid
set(ALL_ASSETS_PRESENT TRUE)
foreach(asset ${PUBLIC_ASSETS})
set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
list(APPEND TARGET_SRCS ${output})
add_custom_command(
DEPENDS "${input}"
OUTPUT "${output}"
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
)
set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
if(NOT EXISTS "${LOCAL_WEBUI_DIR}/${asset}")
set(ALL_ASSETS_PRESENT FALSE)
break()
endif()
endforeach()
add_definitions(-DLLAMA_BUILD_WEBUI)
if(ALL_ASSETS_PRESENT)
set(WEBUI_SOURCE "local")
set(WEBUI_SOURCE_DIR "${LOCAL_WEBUI_DIR}")
message(STATUS "WebUI: using local build from ${WEBUI_SOURCE_DIR}")
endif()
# Priority 2: Build-time asset provisioning (npm build → HF Bucket fallback)
if(NOT WEBUI_SOURCE_DIR)
if(DEFINED LLAMA_BUILD_NUMBER)
set(HF_WEBUI_VERSION "${LLAMA_BUILD_NUMBER}")
message(STATUS "WebUI: using LLAMA_BUILD_NUMBER=${HF_WEBUI_VERSION}")
else()
set(HF_WEBUI_VERSION "")
message(STATUS "WebUI: LLAMA_BUILD_NUMBER not defined")
endif()
# Stamp file embeds the version tag so a changed build number triggers
# a fresh provision run on the next `cmake --build` without reconfiguring.
if("${HF_WEBUI_VERSION}" STREQUAL "")
set(WEBUI_VERSION_TAG "provisioned")
else()
set(WEBUI_VERSION_TAG "${HF_WEBUI_VERSION}")
endif()
set(WEBUI_STAMP "${CMAKE_CURRENT_BINARY_DIR}/.webui-${WEBUI_VERSION_TAG}.stamp")
# Escape semicolons so the CMake list is passed as a single -D argument
string(REPLACE ";" "\\;" PUBLIC_ASSETS_ESC "${PUBLIC_ASSETS}")
add_custom_command(
OUTPUT ${WEBUI_STAMP}
COMMAND ${CMAKE_COMMAND}
"-DSOURCE_DIR=${PROJECT_SOURCE_DIR}"
"-DPUBLIC_DIR=${CMAKE_CURRENT_SOURCE_DIR}/public"
"-DHF_BUCKET=${LLAMA_WEBUI_HF_BUCKET}"
"-DHF_VERSION=${HF_WEBUI_VERSION}"
"-DHF_ENABLED=${LLAMA_USE_PREBUILT_WEBUI}"
"-DASSETS=${PUBLIC_ASSETS_ESC}"
"-DSTAMP_FILE=${WEBUI_STAMP}"
"-DNPM_DIR=${CMAKE_CURRENT_SOURCE_DIR}/webui"
-P ${PROJECT_SOURCE_DIR}/scripts/webui-download.cmake
COMMENT "Building/provisioning WebUI assets (npm build -> HF Bucket fallback)"
)
set(WEBUI_SOURCE "provisioned")
set(WEBUI_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/public")
endif()
# Process assets from the determined source
if(WEBUI_SOURCE_DIR)
foreach(asset ${PUBLIC_ASSETS})
set(input "${WEBUI_SOURCE_DIR}/${asset}")
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
list(APPEND TARGET_SRCS ${output})
if(WEBUI_SOURCE STREQUAL "local")
# Local build: files exist at configure time
if(NOT EXISTS "${input}")
message(FATAL_ERROR "WebUI asset not found: ${input}")
endif()
set(dependency "${input}")
else()
# HF Bucket: files are downloaded at build time
set(dependency "${WEBUI_STAMP}")
endif()
add_custom_command(
DEPENDS ${dependency}
OUTPUT "${output}"
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
)
set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
endforeach()
add_definitions(-DLLAMA_BUILD_WEBUI)
add_definitions(-DLLAMA_WEBUI_DEFAULT_ENABLED=1)
message(STATUS "WebUI: embedded with source: ${WEBUI_SOURCE}")
else()
# WebUI source not found - issue warning but don't fail the build
# The server will still build but without webui embedded
message(WARNING "WebUI: no source available. Neither local build (tools/server/public/) nor HF Bucket download succeeded.")
message(WARNING "WebUI: building server without embedded WebUI. Set LLAMA_BUILD_WEBUI=OFF to suppress this warning.")
add_definitions(-DLLAMA_WEBUI_DEFAULT_ENABLED=0)
endif()
else()
# WebUI is disabled at build time
add_definitions(-DLLAMA_WEBUI_DEFAULT_ENABLED=0)
endif()
add_executable(${TARGET} ${TARGET_SRCS})

View File

@@ -1822,43 +1822,3 @@ You can specify default preferences for the web UI using `--webui-config <JSON c
```
You may find available preferences in [settings-config.ts](webui/src/lib/constants/settings-config.ts).
### Legacy completion web UI
A new chat-based UI has replaced the old completion-based since [this PR](https://github.com/ggml-org/llama.cpp/pull/10175). If you want to use the old completion, start the server with `--path ./tools/server/public_legacy`
For example:
```sh
./llama-server -m my_model.gguf -c 8192 --path ./tools/server/public_legacy
```
### Extending or building alternative Web Front End
You can extend the front end by running the server binary with `--path` set to `./your-directory` and importing `/completion.js` to get access to the llamaComplete() method.
Read the documentation in `/completion.js` to see convenient ways to access llama.
A simple example is below:
```html
<html>
<body>
<pre>
<script type="module">
import { llama } from '/completion.js'
const prompt = `### Instruction:
Write dad jokes, each one paragraph.
You can use html formatting if needed.
### Response:`
for await (const chunk of llama(prompt)) {
document.write(chunk.data.content)
}
</script>
</pre>
</body>
</html>
```

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -1,12 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="refresh" content="5">
</head>
<body>
<div id="loading">
The model is loading. Please wait.<br/>
The user interface will appear soon.
</div>
</body>
</html>

View File

@@ -238,30 +238,30 @@ bool server_http_context::init(const common_params & params) {
};
auto middleware_server_state = [this](const httplib::Request & req, httplib::Response & res) {
(void)req; // suppress unused parameter warning when LLAMA_BUILD_WEBUI is not defined
bool ready = is_ready.load();
if (!ready) {
#ifdef LLAMA_BUILD_WEBUI
auto tmp = string_split<std::string>(req.path, '.');
if (req.path == "/" || tmp.back() == "html") {
if (req.path == "/" || (tmp.size() > 0 && tmp.back() == "html")) {
res.status = 503;
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
} else
#endif
{
// no endpoints is allowed to be accessed when the server is not ready
// this is to prevent any data races or inconsistent states
res.status = 503;
res.set_content(
safe_json_to_str(json {
{"error", {
{"message", "Loading model"},
{"type", "unavailable_error"},
{"code", 503}
}}
}),
"application/json; charset=utf-8"
);
return false;
}
#endif
// no endpoints are allowed to be accessed when the server is not ready
// this is to prevent any data races or inconsistent states
res.status = 503;
res.set_content(
safe_json_to_str(json {
{"error", {
{"message", "Loading model"},
{"type", "unavailable_error"},
{"code", 503}
}}
}),
"application/json; charset=utf-8"
);
return false;
}
return true;

View File

@@ -1,6 +1,6 @@
# llama.cpp Web UI
# llama-ui
A modern, feature-rich web interface for llama.cpp built with SvelteKit. This UI provides an intuitive chat interface with advanced file handling, conversation management, and comprehensive model interaction capabilities.
A modern, feature-rich web interface for llama-server built with SvelteKit. This UI provides an intuitive chat interface with advanced file handling, conversation management, and comprehensive model interaction capabilities.
The WebUI supports two server operation modes:

View File

@@ -1,11 +1,11 @@
{
"name": "llama-server-webui",
"name": "llama-ui",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "llama-server-webui",
"name": "llama-ui",
"version": "1.0.0",
"dependencies": {
"@modelcontextprotocol/sdk": "^1.25.1",

View File

@@ -1,5 +1,5 @@
{
"name": "llama-server-webui",
"name": "llama-ui",
"private": true,
"version": "1.0.0",
"type": "module",

View File

@@ -1,7 +1,7 @@
#!/bin/bash
# Development script for llama.cpp webui
#
# Development script for llama-ui
#
# This script starts the webui development servers (Storybook and Vite).
# Note: You need to start llama-server separately.
#
@@ -14,12 +14,12 @@ cd ../../../
# Check and install git hooks if missing
check_and_install_hooks() {
local hooks_missing=false
# Check for required hooks
if [ ! -f ".git/hooks/pre-commit" ] || [ ! -f ".git/hooks/pre-push" ] || [ ! -f ".git/hooks/post-push" ]; then
hooks_missing=true
fi
if [ "$hooks_missing" = true ]; then
echo "🔧 Git hooks missing, installing them..."
cd tools/server/webui

View File

@@ -1,7 +1,7 @@
#!/bin/bash
# Script to install pre-commit hook for webui
# Pre-commit: formats, checks, builds, and stages build output
# Pre-commit: formats, checks, and builds webui
REPO_ROOT=$(git rev-parse --show-toplevel)
PRE_COMMIT_HOOK="$REPO_ROOT/.git/hooks/pre-commit"
@@ -56,11 +56,7 @@ if git diff --cached --name-only | grep -q "^tools/server/webui/"; then
exit 1
fi
# Stage the build output alongside the source changes
cd "$REPO_ROOT"
git add tools/server/public/
echo "✅ Webui built and build output staged"
echo "✅ Webui built successfully"
fi
exit 0
@@ -75,7 +71,7 @@ if [ $? -eq 0 ]; then
echo ""
echo "The hook will automatically:"
echo " • Format, lint and check webui code before commits"
echo " • Build webui and stage tools/server/public/ into the same commit"
echo " • Build webui"
else
echo "❌ Failed to make hook executable"
exit 1

View File

@@ -8,8 +8,7 @@
message?: string;
}
let { class: className = '', message = 'Initializing connection to llama.cpp server...' }: Props =
$props();
let { class: className = '', message = 'Initializing connection to server...' }: Props = $props();
</script>
<div class="flex h-full items-center justify-center {className}">

View File

@@ -5,7 +5,7 @@ import { ROUTES } from './routes';
export const FORK_TREE_DEPTH_PADDING = 8;
export const SYSTEM_MESSAGE_PLACEHOLDER = 'System message';
export const APP_NAME = import.meta.env.VITE_PUBLIC_APP_NAME || 'llama.cpp';
export const APP_NAME = import.meta.env.VITE_PUBLIC_APP_NAME || 'llama-ui';
export const ICON_STRIP_TRANSITION_DURATION = 150;
export const ICON_STRIP_TRANSITION_DELAY_MULTIPLIER = 50;

View File

@@ -76,7 +76,7 @@ export class ChatService {
*/
/**
* Sends a chat completion request to the llama.cpp server.
* Sends a chat completion request to the llama-server.
* Supports both streaming and non-streaming responses with comprehensive parameter configuration.
* Automatically converts database messages with attachments to the appropriate API format.
*

View File

@@ -3,7 +3,7 @@
import { page } from '$app/state';
import { afterNavigate } from '$app/navigation';
import { DialogModelNotAvailable } from '$lib/components/app';
import { ROUTES } from '$lib/constants/routes';
import { APP_NAME, ROUTES } from '$lib/constants';
import { chatStore, isLoading } from '$lib/stores/chat.svelte';
import { conversationsStore, activeConversation } from '$lib/stores/conversations.svelte';
import { modelsStore, modelOptions } from '$lib/stores/models.svelte';
@@ -125,7 +125,7 @@
</script>
<svelte:head>
<title>{activeConversation()?.name || 'Chat'} - llama.cpp</title>
<title>{activeConversation()?.name || 'Chat'} - {APP_NAME}</title>
</svelte:head>
<DialogModelNotAvailable

View File

@@ -27,7 +27,7 @@ const config = {
$styles: 'src/styles'
},
version: {
name: 'llama-server-webui'
name: 'llama-ui'
}
},

View File

@@ -4,11 +4,11 @@ import { Meta } from '@storybook/addon-docs/blocks';
# llama.cpp Web UI
Welcome to the **llama.cpp Web UI** component library! This Storybook showcases the components used in the modern web interface for the llama.cpp server.
Welcome to the **llama-ui** component library! This Storybook showcases the components used in the modern web interface for the llama-server.
## 🚀 About This Project
WebUI is a modern web interface for the llama.cpp server, built with SvelteKit and ShadCN UI. Features include:
WebUI is a modern web interface for the llama-server, built with SvelteKit and ShadCN UI. Features include:
- **Real-time chat conversations** with AI assistants
- **Multi-conversation management** with persistent storage

View File

@@ -23,11 +23,6 @@ export default defineConfig({
minify: true
},
esbuild: {
lineLimit: 500,
minifyIdentifiers: false
},
css: {
preprocessorOptions: {
scss: {