mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-05-15 21:44:05 +00:00
CI : support IOT device (IQ9) (#22987)
* update test scripts * align CI behavior between linux and android * remove automatically cancel in 15min * enable cancel-in-progress * fix ty check issue * update and fix pylint issue * update runner such that we are not restricted by the 15min limit rule * fix flake8 lint issue * update runner according to review feedback * code update according to review feedback * switch from llama-cli to llama-completion binary with -no-cnv flag
This commit is contained in:
46
.github/workflows/build-and-test-snapdragon.yml
vendored
46
.github/workflows/build-and-test-snapdragon.yml
vendored
@@ -58,14 +58,45 @@ jobs:
|
||||
name: llama-cpp-android-arm64-snapdragon
|
||||
path: pkg-snapdragon/llama.cpp
|
||||
|
||||
linux-iot-snapdragon:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: 'ghcr.io/snapdragon-toolchain/arm64-linux:v0.1'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: false
|
||||
|
||||
- name: Build Llama.CPP for Snapdragon Linux IoT
|
||||
id: build_llama_cpp_snapdragon_linux
|
||||
run: |
|
||||
cp docs/backend/snapdragon/CMakeUserPresets.json .
|
||||
cmake --preset arm64-linux-snapdragon-release -B build-snapdragon -DGGML_OPENCL=ON
|
||||
cmake --build build-snapdragon -j $(nproc)
|
||||
cmake --install build-snapdragon --prefix pkg-snapdragon/llama.cpp
|
||||
|
||||
- name: Upload Llama.CPP Snapdragon Linux IoT Build Artifact
|
||||
if: ${{ always() && steps.build_llama_cpp_snapdragon_linux.outcome == 'success' }}
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: llama-cpp-linux-arm64-snapdragon
|
||||
path: pkg-snapdragon/llama.cpp
|
||||
|
||||
test-snapdragon-qdc:
|
||||
name: Test on QDC Android Device (${{ matrix.device }})
|
||||
needs: [android-ndk-snapdragon]
|
||||
runs-on: ubuntu-slim
|
||||
name: Test on QDC Device (${{ matrix.device }})
|
||||
needs: [android-ndk-snapdragon, linux-iot-snapdragon]
|
||||
runs-on: ubuntu-24.04-arm
|
||||
timeout-minutes: 90
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
device: [SM8750, SM8650, SM8850]
|
||||
device: [SM8750, SM8850, QCS9075M]
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -74,11 +105,11 @@ jobs:
|
||||
- name: Download build artifact
|
||||
uses: actions/download-artifact@v7
|
||||
with:
|
||||
name: llama-cpp-android-arm64-snapdragon
|
||||
name: ${{ startsWith(matrix.device, 'QCS') && 'llama-cpp-linux-arm64-snapdragon' || 'llama-cpp-android-arm64-snapdragon' }}
|
||||
path: pkg-snapdragon/llama.cpp
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.x'
|
||||
cache: pip
|
||||
@@ -107,7 +138,8 @@ jobs:
|
||||
--test all \
|
||||
--pkg-dir pkg-snapdragon/llama.cpp \
|
||||
--model-url "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \
|
||||
--device ${{ matrix.device }}
|
||||
--device ${{ matrix.device }} \
|
||||
${{ startsWith(matrix.device, 'QCS') && '--retries 2 --retry-delay 300' || '' }}
|
||||
env:
|
||||
QDC_API_KEY: ${{ secrets.QDC_API_KEY }}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Run llama.cpp Hexagon Android tests in a single QDC Appium job.
|
||||
"""Run llama.cpp Hexagon tests in a single QDC job.
|
||||
|
||||
Bundles test scripts into one artifact and submits a single QDC job:
|
||||
|
||||
@@ -10,6 +10,10 @@ Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions).
|
||||
Prerequisites:
|
||||
pip install /path/to/qualcomm_device_cloud_sdk*.whl
|
||||
|
||||
Platform is inferred from --device:
|
||||
android Appium + pytest (Android phones: SM8750 / SM8650 / SM8850)
|
||||
linux BASH (Linux IoT: QCS9075M)
|
||||
|
||||
Required environment variables:
|
||||
QDC_API_KEY API key from QDC UI -> Users -> Settings -> API Keys
|
||||
|
||||
@@ -23,6 +27,7 @@ Usage:
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import enum
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
@@ -30,15 +35,35 @@ import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from qualcomm_device_cloud_sdk.api import qdc_api # ty: ignore[unresolved-import]
|
||||
from qualcomm_device_cloud_sdk.logging import configure_logging # ty: ignore[unresolved-import]
|
||||
from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework # ty: ignore[unresolved-import]
|
||||
from qualcomm_device_cloud_sdk.api import qdc_api
|
||||
from qualcomm_device_cloud_sdk.logging import configure_logging
|
||||
from qualcomm_device_cloud_sdk.models import (
|
||||
ArtifactType,
|
||||
JobMode,
|
||||
JobState,
|
||||
JobSubmissionParameter,
|
||||
JobType,
|
||||
TestFramework,
|
||||
)
|
||||
|
||||
# configure_logging only sets up the SDK logger; basicConfig is needed for
|
||||
# our own log.info to reach stdout.
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(name)s %(levelname)s - %(message)s",
|
||||
handlers=[logging.StreamHandler()],
|
||||
)
|
||||
configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()])
|
||||
# Silence per-poll GET/status spam from the SDK and its HTTP client.
|
||||
logging.getLogger("qualcomm_device_cloud").setLevel(logging.WARNING)
|
||||
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
POLL_INTERVAL = 30
|
||||
@@ -47,23 +72,56 @@ LOG_UPLOAD_TIMEOUT = 600
|
||||
CAPACITY_TIMEOUT = 1800
|
||||
CAPACITY_POLL = 60
|
||||
MAX_CONCURRENT_JOBS = 5
|
||||
DEFAULT_RETRIES = 0
|
||||
RETRY_DELAY = 300
|
||||
TERMINAL_STATES = {JobState.COMPLETED, JobState.CANCELED}
|
||||
NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED}
|
||||
|
||||
_SCRIPTS_DIR = Path(__file__).parent
|
||||
_TESTS_DIR = _SCRIPTS_DIR / "tests"
|
||||
_RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py"
|
||||
_RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py"
|
||||
_UTILS = _TESTS_DIR / "utils.py"
|
||||
_CONFTEST = _TESTS_DIR / "conftest.py"
|
||||
_REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt"
|
||||
|
||||
class DeviceUnavailableError(Exception):
|
||||
"""Raised when the QDC device resource is not available (retryable)."""
|
||||
|
||||
|
||||
_SCRIPTS_DIR = Path(__file__).parent
|
||||
_TESTS_DIR = _SCRIPTS_DIR / "tests"
|
||||
|
||||
# --- Shared test assets -------------------------------------------------------
|
||||
_UTILS = _TESTS_DIR / "utils.py"
|
||||
_CONFTEST = _TESTS_DIR / "conftest.py"
|
||||
_PYTEST_LINE_RE = re.compile(
|
||||
r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)"
|
||||
)
|
||||
_EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"}
|
||||
_EXCLUDED_LOGS = {
|
||||
"qdc_android_whole_host-000.log",
|
||||
"qdc_kernel_host-000.log",
|
||||
"qdc_LE_whole_host-000.log",
|
||||
"qdc_LE_kernel_host-000.log",
|
||||
"script.log",
|
||||
}
|
||||
_NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES}
|
||||
|
||||
# --- Android (Appium + pytest) assets ----------------------------------------
|
||||
_RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py"
|
||||
_RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py"
|
||||
_REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt"
|
||||
_UPSTREAM_ADB_SCRIPTS = (
|
||||
"https://raw.githubusercontent.com/ggml-org/llama.cpp/master/scripts/snapdragon/adb"
|
||||
)
|
||||
_ADB_SCRIPT_NAMES = [
|
||||
"run-bench.sh",
|
||||
"run-cli.sh",
|
||||
"run-completion.sh",
|
||||
"run-tool.sh",
|
||||
]
|
||||
|
||||
# --- Linux (BASH) assets ------------------------------------------------------
|
||||
_RUN_LINUX_TEMPLATE = _TESTS_DIR / "linux" / "run_linux.sh"
|
||||
_LINUX_ENTRY_SCRIPT = "/bin/bash /data/local/tmp/TestContent/run_linux.sh"
|
||||
|
||||
# =============================================================================
|
||||
# Artifact builders (per platform)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@dataclass
|
||||
class JobResult:
|
||||
@@ -73,35 +131,58 @@ class JobResult:
|
||||
failure_details: dict[str, str] = field(default_factory=dict)
|
||||
|
||||
|
||||
def build_artifact_zip(
|
||||
def _write_lf(path: Path, content: str) -> None:
|
||||
"""Write text with LF line endings (required by /bin/bash on Linux)."""
|
||||
with open(path, "w", encoding="utf-8", newline="\n") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def _build_android_artifact(
|
||||
pkg_dir: Path,
|
||||
stage_dir: Path,
|
||||
*,
|
||||
test_mode: str = "bench",
|
||||
model_url: str | None = None,
|
||||
test_mode: str,
|
||||
model_url: str | None,
|
||||
) -> Path:
|
||||
"""Bundle everything into a single QDC artifact zip.
|
||||
"""Android zip (Appium/pytest). Extracted by QDC under /qdc/appium/.
|
||||
|
||||
Zip structure (extracted by QDC to /qdc/appium/ on the runner):
|
||||
Zip structure:
|
||||
llama_cpp_bundle/ installed package (adb pushed to /data/local/tmp/)
|
||||
run-{bench,cli,completion,tool}.sh upstream adb wrappers (patched)
|
||||
tests/
|
||||
utils.py shared helpers (paths, run_adb_command, …)
|
||||
conftest.py shared pytest fixtures (driver)
|
||||
test_bench_posix.py bench + cli tests (<<MODEL_URL>> substituted)
|
||||
AND/OR
|
||||
test_backend_ops_posix.py test-backend-ops -b HTP0
|
||||
utils.py shared adb helpers
|
||||
conftest.py Appium pytest fixtures
|
||||
test_bench_posix.py bench + cli tests (for --test bench or all)
|
||||
test_backend_ops_posix.py test-backend-ops on HTP0
|
||||
requirements.txt
|
||||
pytest.ini addopts = --junitxml=results.xml
|
||||
"""
|
||||
shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle")
|
||||
bundle_dir = stage_dir / "llama_cpp_bundle"
|
||||
shutil.copytree(pkg_dir, bundle_dir)
|
||||
|
||||
# Download upstream adb scripts so they land at /qdc/appium/ on the QDC
|
||||
# runner. They wrap `adb shell` internally. Patch in `chmod +x bin/* lib/*`
|
||||
# right after `cd $basedir` so device binaries are executable.
|
||||
for name in _ADB_SCRIPT_NAMES:
|
||||
url = f"{_UPSTREAM_ADB_SCRIPTS}/{name}"
|
||||
dest = stage_dir / name
|
||||
log.info("Downloading %s", url)
|
||||
urllib.request.urlretrieve(url, str(dest))
|
||||
content = dest.read_text()
|
||||
content = content.replace(
|
||||
"cd $basedir;",
|
||||
"cd $basedir; chmod +x bin/* lib/* 2>/dev/null;",
|
||||
)
|
||||
dest.write_text(content)
|
||||
dest.chmod(0o755)
|
||||
|
||||
tests_dir = stage_dir / "tests"
|
||||
tests_dir.mkdir()
|
||||
|
||||
shutil.copy(_UTILS, tests_dir / "utils.py")
|
||||
shutil.copy(_UTILS, tests_dir / "utils.py")
|
||||
shutil.copy(_CONFTEST, tests_dir / "conftest.py")
|
||||
|
||||
if test_mode in ("bench", "all"):
|
||||
assert model_url is not None, "--model-url is required for bench/all test modes"
|
||||
assert model_url is not None
|
||||
(tests_dir / "test_bench_posix.py").write_text(
|
||||
_RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url)
|
||||
)
|
||||
@@ -109,33 +190,140 @@ def build_artifact_zip(
|
||||
shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py")
|
||||
|
||||
shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt")
|
||||
(stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n")
|
||||
(stage_dir / "pytest.ini").write_text(
|
||||
"[pytest]\naddopts = --junitxml=results.xml\n"
|
||||
)
|
||||
|
||||
zip_base = str(stage_dir / "artifact")
|
||||
shutil.make_archive(zip_base, "zip", stage_dir)
|
||||
return Path(f"{zip_base}.zip")
|
||||
|
||||
|
||||
def _build_linux_artifact(
|
||||
pkg_dir: Path,
|
||||
stage_dir: Path,
|
||||
test_mode: str,
|
||||
model_url: str | None,
|
||||
) -> Path:
|
||||
"""Linux IoT zip (BASH framework). Extracted by QDC to /data/local/tmp/TestContent/.
|
||||
|
||||
Zip structure:
|
||||
run_linux.sh entry script (placeholder-substituted, LF line endings)
|
||||
llama_cpp_bundle/ installed package
|
||||
"""
|
||||
bundle_dir = stage_dir / "llama_cpp_bundle"
|
||||
shutil.copytree(pkg_dir, bundle_dir)
|
||||
|
||||
template = _RUN_LINUX_TEMPLATE.read_text(encoding="utf-8")
|
||||
rendered = template.replace("{MODEL_URL}", model_url or "").replace(
|
||||
"{TEST_MODE}", test_mode
|
||||
)
|
||||
script_path = stage_dir / "run_linux.sh"
|
||||
_write_lf(script_path, rendered)
|
||||
script_path.chmod(0o755)
|
||||
|
||||
zip_base = str(stage_dir / "artifact")
|
||||
shutil.make_archive(zip_base, "zip", stage_dir)
|
||||
return Path(f"{zip_base}.zip")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Platform enum + strategy table
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class Platform(enum.Enum):
|
||||
ANDROID = "android"
|
||||
LINUX = "linux"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PlatformSpec:
|
||||
test_framework: TestFramework
|
||||
entry_script: str | None
|
||||
build_artifact: Callable[[Path, Path, str, str | None], Path]
|
||||
job_name_fmt: str
|
||||
|
||||
|
||||
PLATFORM_SPECS: dict[Platform, PlatformSpec] = {
|
||||
Platform.ANDROID: PlatformSpec(
|
||||
test_framework=TestFramework.APPIUM,
|
||||
entry_script=None,
|
||||
build_artifact=_build_android_artifact,
|
||||
job_name_fmt="{base}",
|
||||
),
|
||||
Platform.LINUX: PlatformSpec(
|
||||
test_framework=TestFramework.BASH,
|
||||
entry_script=_LINUX_ENTRY_SCRIPT,
|
||||
build_artifact=_build_linux_artifact,
|
||||
job_name_fmt="{base} (Linux)",
|
||||
),
|
||||
}
|
||||
|
||||
DEVICE_PLATFORM: dict[str, Platform] = {
|
||||
"SM8750": Platform.ANDROID,
|
||||
"SM8650": Platform.ANDROID,
|
||||
"SM8850": Platform.ANDROID,
|
||||
"QCS9075M": Platform.LINUX,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Shared QDC job plumbing
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def wait_for_job(client, job_id: str, timeout: int) -> str:
|
||||
elapsed = 0
|
||||
last_state = None
|
||||
consecutive_errors = 0
|
||||
max_consecutive_errors = 5
|
||||
while elapsed < timeout:
|
||||
raw = qdc_api.get_job_status(client, job_id)
|
||||
try:
|
||||
raw = qdc_api.get_job_status(client, job_id)
|
||||
consecutive_errors = 0
|
||||
except Exception as e:
|
||||
consecutive_errors += 1
|
||||
log.warning(
|
||||
"Transient error polling job %s (%d/%d): %s",
|
||||
job_id,
|
||||
consecutive_errors,
|
||||
max_consecutive_errors,
|
||||
e,
|
||||
)
|
||||
if consecutive_errors >= max_consecutive_errors:
|
||||
raise
|
||||
time.sleep(POLL_INTERVAL)
|
||||
elapsed += POLL_INTERVAL
|
||||
continue
|
||||
try:
|
||||
status = JobState(raw)
|
||||
except ValueError:
|
||||
status = raw
|
||||
if status in TERMINAL_STATES:
|
||||
return raw.lower()
|
||||
log.info("Job %s: %s", job_id, raw)
|
||||
if raw != last_state:
|
||||
log.info("Job %s: %s", job_id, raw)
|
||||
last_state = raw
|
||||
time.sleep(POLL_INTERVAL)
|
||||
elapsed += POLL_INTERVAL
|
||||
# Abort to free the QDC concurrency slot instead of leaking it.
|
||||
try:
|
||||
qdc_api.abort_job(client, job_id)
|
||||
log.warning("Aborted job %s after timeout to free concurrency slot", job_id)
|
||||
except Exception as e:
|
||||
log.warning("Failed to abort job %s: %s", job_id, e)
|
||||
raise TimeoutError(f"Job {job_id} did not finish within {timeout}s")
|
||||
|
||||
|
||||
def wait_for_log_upload(client, job_id: str) -> None:
|
||||
elapsed = 0
|
||||
while elapsed <= LOG_UPLOAD_TIMEOUT:
|
||||
status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
|
||||
try:
|
||||
status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
|
||||
except Exception as e:
|
||||
log.warning("get_job_log_upload_status failed: %s — will retry", e)
|
||||
status = ""
|
||||
if status in {"completed", "failed"}:
|
||||
return
|
||||
log.info("Waiting for log upload (status=%s) ...", status)
|
||||
@@ -150,17 +338,33 @@ def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None:
|
||||
while elapsed < CAPACITY_TIMEOUT:
|
||||
jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50)
|
||||
if jobs_page is None:
|
||||
log.warning("Could not retrieve job list; proceeding without capacity check")
|
||||
log.warning(
|
||||
"Could not retrieve job list; proceeding without capacity check"
|
||||
)
|
||||
return
|
||||
items = getattr(jobs_page, "data", []) or []
|
||||
active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES)
|
||||
active = sum(
|
||||
1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES
|
||||
)
|
||||
if active < max_jobs:
|
||||
log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs)
|
||||
return
|
||||
log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL)
|
||||
log.info(
|
||||
"Active QDC jobs: %d / %d — waiting %ds ...",
|
||||
active,
|
||||
max_jobs,
|
||||
CAPACITY_POLL,
|
||||
)
|
||||
time.sleep(CAPACITY_POLL)
|
||||
elapsed += CAPACITY_POLL
|
||||
log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT)
|
||||
raise TimeoutError(
|
||||
f"Capacity wait timed out after {CAPACITY_TIMEOUT}s"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Log parsing helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]:
|
||||
@@ -192,10 +396,26 @@ def _parse_pytest_output(content: str) -> dict[str, bool]:
|
||||
|
||||
|
||||
def fetch_logs_and_parse_tests(
|
||||
client, job_id: str
|
||||
client, job_id: str, max_retries: int = 5, retry_delay: int = 30
|
||||
) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]:
|
||||
"""Returns (test_results, raw_logs, failure_details)."""
|
||||
log_files = qdc_api.get_job_log_files(client, job_id)
|
||||
log_files = None
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
log_files = qdc_api.get_job_log_files(client, job_id)
|
||||
break
|
||||
except Exception as e:
|
||||
if attempt < max_retries:
|
||||
log.warning(
|
||||
"get_job_log_files failed (attempt %d/%d): %s — retrying in %ds",
|
||||
attempt, max_retries, e, retry_delay,
|
||||
)
|
||||
time.sleep(retry_delay)
|
||||
else:
|
||||
log.error(
|
||||
"get_job_log_files failed after %d attempts: %s", max_retries, e
|
||||
)
|
||||
return {}, {}, {}
|
||||
if not log_files:
|
||||
log.warning("No log files returned for job %s", job_id)
|
||||
return {}, {}, {}
|
||||
@@ -207,8 +427,8 @@ def fetch_logs_and_parse_tests(
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
for lf in log_files:
|
||||
log.info("Downloading log file: %s", lf.filename)
|
||||
zip_path = os.path.join(tmpdir, "log.zip")
|
||||
log.info("Downloading log file: %s", lf.filename)
|
||||
qdc_api.download_job_log_files(client, lf.filename, zip_path)
|
||||
try:
|
||||
shutil.unpack_archive(zip_path, tmpdir, "zip")
|
||||
@@ -226,12 +446,15 @@ def fetch_logs_and_parse_tests(
|
||||
elif fname.endswith(".log"):
|
||||
if fname in _EXCLUDED_LOGS:
|
||||
continue
|
||||
log.info("--- %s ---", fname)
|
||||
log.info("%s", content)
|
||||
log.info("--- %s ---\n%s", fname, content)
|
||||
raw_logs[fname] = content
|
||||
pytest_fallback.update(_parse_pytest_output(content))
|
||||
|
||||
return (test_results if test_results else pytest_fallback), raw_logs, failure_details
|
||||
return (
|
||||
(test_results if test_results else pytest_fallback),
|
||||
raw_logs,
|
||||
failure_details,
|
||||
)
|
||||
|
||||
|
||||
def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
|
||||
@@ -289,30 +512,106 @@ def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
|
||||
f.write("\n".join(lines) + "\n")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# CLI + main
|
||||
# =============================================================================
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
p = argparse.ArgumentParser(
|
||||
description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
p.add_argument("--pkg-dir", required=True, type=Path,
|
||||
p.add_argument("--pkg-dir", required=True, type=Path,
|
||||
help="Installed llama.cpp package directory (contains bin/ and lib/)")
|
||||
p.add_argument("--model-url",
|
||||
help="Direct URL to the GGUF model file (required for --test bench)")
|
||||
p.add_argument("--device", required=True,
|
||||
p.add_argument("--device", required=True,
|
||||
help="QDC chipset name, e.g. SM8750")
|
||||
p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench",
|
||||
help="Test suite to run (default: bench)")
|
||||
p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS",
|
||||
help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})")
|
||||
p.add_argument("--retries", type=int, default=DEFAULT_RETRIES, metavar="N",
|
||||
help="Number of retries when device is unavailable (default: 0)")
|
||||
p.add_argument("--retry-delay", type=int, default=RETRY_DELAY, metavar="SECONDS",
|
||||
help=f"Seconds to wait between retries (default: {RETRY_DELAY})")
|
||||
args = p.parse_args()
|
||||
if args.test in ("bench", "all") and not args.model_url:
|
||||
p.error("--model-url is required when --test bench or --test all")
|
||||
return args
|
||||
|
||||
|
||||
def _submit_and_run_job(client, args, spec, target_id, artifact_id) -> JobResult:
|
||||
"""Submit a QDC job and wait for results.
|
||||
|
||||
Raises DeviceUnavailableError for transient device/resource issues that
|
||||
are worth retrying. Returns JobResult for definitive outcomes (pass or
|
||||
test failure).
|
||||
"""
|
||||
try:
|
||||
wait_for_capacity(client)
|
||||
except TimeoutError:
|
||||
raise DeviceUnavailableError("Capacity wait timed out — device busy")
|
||||
|
||||
job_name = spec.job_name_fmt.format(base="llama.cpp Hexagon tests")
|
||||
|
||||
job_id = qdc_api.submit_job(
|
||||
public_api_client=client,
|
||||
target_id=target_id,
|
||||
job_name=job_name,
|
||||
external_job_id=None,
|
||||
job_type=JobType.AUTOMATED,
|
||||
job_mode=JobMode.APPLICATION,
|
||||
timeout=max(1, args.job_timeout // 60),
|
||||
test_framework=spec.test_framework,
|
||||
entry_script=spec.entry_script,
|
||||
job_artifacts=[artifact_id],
|
||||
monkey_events=None,
|
||||
monkey_session_timeout=None,
|
||||
job_parameters=[JobSubmissionParameter.WIFIENABLED],
|
||||
)
|
||||
if job_id is None:
|
||||
raise DeviceUnavailableError("Job submission failed — device may be unavailable")
|
||||
log.info("Job submitted: %s (device=%s)", job_id, args.device)
|
||||
|
||||
try:
|
||||
job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
|
||||
except TimeoutError as e:
|
||||
raise DeviceUnavailableError(str(e))
|
||||
log.info("Job %s finished: %s", job_id, job_status)
|
||||
|
||||
wait_for_log_upload(client, job_id)
|
||||
tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
|
||||
|
||||
job_ok = job_status == JobState.COMPLETED.value.lower()
|
||||
|
||||
if not job_ok and not tests:
|
||||
raise DeviceUnavailableError(
|
||||
f"Job did not complete (status={job_status}) and produced no test results"
|
||||
)
|
||||
|
||||
passed = job_ok and all(tests.values()) if tests else job_ok
|
||||
if spec.test_framework == TestFramework.BASH and not tests:
|
||||
log.error("No test results recovered (state=%s). Script likely never ran.", job_status)
|
||||
passed = False
|
||||
if not passed:
|
||||
log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
|
||||
|
||||
return JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
|
||||
platform = DEVICE_PLATFORM.get(args.device)
|
||||
if platform is None:
|
||||
log.error(
|
||||
"Unknown device %r. Known: %s",
|
||||
args.device, ", ".join(sorted(DEVICE_PLATFORM.keys())),
|
||||
)
|
||||
return 1
|
||||
spec = PLATFORM_SPECS[platform]
|
||||
|
||||
api_key = os.environ.get("QDC_API_KEY")
|
||||
if not api_key:
|
||||
log.error("QDC_API_KEY environment variable must be set")
|
||||
@@ -334,10 +633,9 @@ def main() -> int:
|
||||
return 1
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
log.info("Building artifact ...")
|
||||
zip_path = build_artifact_zip(
|
||||
args.pkg_dir, Path(tmpdir),
|
||||
test_mode=args.test, model_url=args.model_url,
|
||||
log.info("Building %s artifact (test=%s) ...", platform.value, args.test)
|
||||
zip_path = spec.build_artifact(
|
||||
args.pkg_dir, Path(tmpdir), args.test, args.model_url
|
||||
)
|
||||
log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000)
|
||||
artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT)
|
||||
@@ -346,46 +644,31 @@ def main() -> int:
|
||||
log.error("Artifact upload failed")
|
||||
return 1
|
||||
|
||||
wait_for_capacity(client)
|
||||
|
||||
job_id = qdc_api.submit_job(
|
||||
public_api_client=client,
|
||||
target_id=target_id,
|
||||
job_name="llama.cpp Hexagon tests",
|
||||
external_job_id=None,
|
||||
job_type=JobType.AUTOMATED,
|
||||
job_mode=JobMode.APPLICATION,
|
||||
timeout=max(1, args.job_timeout // 60),
|
||||
test_framework=TestFramework.APPIUM,
|
||||
entry_script=None,
|
||||
job_artifacts=[artifact_id],
|
||||
monkey_events=None,
|
||||
monkey_session_timeout=None,
|
||||
job_parameters=[JobSubmissionParameter.WIFIENABLED],
|
||||
)
|
||||
if job_id is None:
|
||||
log.error("Job submission failed")
|
||||
max_attempts = 1 + args.retries
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
try:
|
||||
result = _submit_and_run_job(client, args, spec, target_id, artifact_id)
|
||||
break
|
||||
except DeviceUnavailableError as e:
|
||||
if attempt < max_attempts:
|
||||
log.warning(
|
||||
"Attempt %d/%d failed (device unavailable): %s — retrying in %ds",
|
||||
attempt, max_attempts, e, args.retry_delay,
|
||||
)
|
||||
time.sleep(args.retry_delay)
|
||||
else:
|
||||
log.error(
|
||||
"Attempt %d/%d failed (device unavailable): %s — no retries left",
|
||||
attempt, max_attempts, e,
|
||||
)
|
||||
write_summary(
|
||||
JobResult(passed=False, tests={}),
|
||||
title=f"QDC Device Unavailable ({args.device})",
|
||||
)
|
||||
return 1
|
||||
else:
|
||||
return 1
|
||||
log.info("Job submitted: %s (device=%s)", job_id, args.device)
|
||||
|
||||
try:
|
||||
job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
|
||||
except TimeoutError as e:
|
||||
log.error("%s", e)
|
||||
write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})")
|
||||
return 1
|
||||
log.info("Job %s finished: %s", job_id, job_status)
|
||||
|
||||
wait_for_log_upload(client, job_id)
|
||||
tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
|
||||
|
||||
passed = job_status == JobState.COMPLETED.value.lower()
|
||||
if tests:
|
||||
passed = passed and all(tests.values())
|
||||
if not passed:
|
||||
log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
|
||||
|
||||
result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
|
||||
if args.test == "backend-ops":
|
||||
title = f"Backend Ops — HTP0 ({args.device})"
|
||||
elif args.test == "all":
|
||||
@@ -394,7 +677,7 @@ def main() -> int:
|
||||
title = f"QDC Test Results ({args.device})"
|
||||
write_summary(result, title=title)
|
||||
|
||||
return 0 if passed else 1
|
||||
return 0 if result.passed else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
232
scripts/snapdragon/qdc/tests/linux/run_linux.sh
Normal file
232
scripts/snapdragon/qdc/tests/linux/run_linux.sh
Normal file
@@ -0,0 +1,232 @@
|
||||
#!/bin/bash
|
||||
# llama.cpp Hexagon test entry script for QDC Linux IoT (BASH framework).
|
||||
#
|
||||
# Placeholders substituted by run_qdc_jobs.py (--platform linux) before upload:
|
||||
# {MODEL_URL} direct URL to a .gguf model file
|
||||
# {TEST_MODE} bench | backend-ops | all
|
||||
#
|
||||
# QDC extracts the artifact zip to /data/local/tmp/TestContent/ and invokes
|
||||
# this script via: /bin/bash /data/local/tmp/TestContent/run_linux.sh
|
||||
# Any files written under /data/local/tmp/QDC_logs/ are auto-uploaded.
|
||||
|
||||
set +e
|
||||
umask 022
|
||||
|
||||
LOG_DIR=/data/local/tmp/QDC_logs
|
||||
BUNDLE_DIR=/data/local/tmp/TestContent/llama_cpp_bundle
|
||||
MODEL_DIR=/data/local/tmp/gguf
|
||||
MODEL_PATH="$MODEL_DIR/model.gguf"
|
||||
RESULTS_XML="$LOG_DIR/results.xml"
|
||||
|
||||
mkdir -p "$LOG_DIR" "$MODEL_DIR"
|
||||
# Redirect all parent-shell output to script.log so QDC auto-uploads it;
|
||||
# per-case runs still capture their own stdout/stderr into dedicated logs.
|
||||
exec > "$LOG_DIR/script.log" 2>&1
|
||||
|
||||
echo "=== env ==="
|
||||
date -u
|
||||
uname -a
|
||||
pwd
|
||||
|
||||
mount -o rw,remount / 2>/dev/null || true
|
||||
|
||||
cd "$BUNDLE_DIR" || { echo "FATAL: bundle missing at $BUNDLE_DIR"; exit 1; }
|
||||
chmod +x bin/* 2>/dev/null
|
||||
export LD_LIBRARY_PATH="$BUNDLE_DIR/lib:$LD_LIBRARY_PATH"
|
||||
export ADSP_LIBRARY_PATH="$BUNDLE_DIR/lib"
|
||||
export GGML_HEXAGON_EXPERIMENTAL=1
|
||||
|
||||
echo "=== download model ==="
|
||||
MODEL_URL="{MODEL_URL}"
|
||||
if [ -z "$MODEL_URL" ]; then
|
||||
echo "No model URL provided, skipping download"
|
||||
elif [ ! -f "$MODEL_PATH" ]; then
|
||||
curl -L -fS --retry 3 --retry-delay 5 -o "$MODEL_PATH" "$MODEL_URL"
|
||||
curl_rc=$?
|
||||
if [ $curl_rc -ne 0 ]; then
|
||||
echo "FATAL: model download failed (rc=$curl_rc)"
|
||||
exit 1
|
||||
fi
|
||||
ls -la "$MODEL_PATH"
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# JUnit XML helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
xml_open() {
|
||||
printf '%s\n' \
|
||||
'<?xml version="1.0" encoding="utf-8"?>' \
|
||||
"<testsuites>" \
|
||||
"<testsuite name=\"llama_cpp_linux\">" \
|
||||
> "$RESULTS_XML"
|
||||
}
|
||||
|
||||
xml_close() {
|
||||
printf '%s\n' '</testsuite>' '</testsuites>' >> "$RESULTS_XML"
|
||||
}
|
||||
|
||||
xml_case_pass() {
|
||||
local classname=$1 name=$2
|
||||
printf '<testcase classname="%s" name="%s"/>\n' "$classname" "$name" >> "$RESULTS_XML"
|
||||
}
|
||||
|
||||
xml_case_fail() {
|
||||
local classname=$1 name=$2 rc=$3 logfile=$4
|
||||
{
|
||||
printf '<testcase classname="%s" name="%s">\n' "$classname" "$name"
|
||||
printf '<failure message="exit %s"><![CDATA[\n' "$rc"
|
||||
tail -c 4096 "$logfile" 2>/dev/null | sed 's/]]>/]] >/g'
|
||||
printf '\n]]></failure>\n</testcase>\n'
|
||||
} >> "$RESULTS_XML"
|
||||
}
|
||||
|
||||
# Map backend name -> "NDEV --device" pair. "none" means no offload (CPU).
|
||||
backend_env() {
|
||||
case "$1" in
|
||||
cpu) echo "0 none" ;;
|
||||
gpu) echo "0 GPUOpenCL" ;;
|
||||
npu) echo "1 HTP0" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
backend_log_name() {
|
||||
case "$1" in
|
||||
cpu) echo "cpu" ;;
|
||||
gpu) echo "gpu" ;;
|
||||
npu) echo "htp" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
|
||||
backend_device_name() {
|
||||
case "$1" in
|
||||
cpu) echo "none" ;;
|
||||
gpu) echo "GPUOpenCL" ;;
|
||||
npu) echo "HTP0" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# Append a diagnostic block when a per-case `timeout N` fires (rc=124). The
|
||||
# naked log file at that point usually just ends mid-OpenCL-init with no
|
||||
# stderr, which is hard to read in CI summaries.
|
||||
note_timeout_if_triggered() {
|
||||
local rc=$1 budget=$2 log=$3
|
||||
[ "$rc" -eq 124 ] || return 0
|
||||
{
|
||||
printf '\n'
|
||||
printf '=== TIMEOUT after %ss ===\n' "$budget"
|
||||
printf 'uptime: '; uptime 2>/dev/null
|
||||
printf 'free -m:\n'; free -m 2>/dev/null
|
||||
printf 'loadavg: '; cat /proc/loadavg 2>/dev/null
|
||||
} >> "$log"
|
||||
}
|
||||
|
||||
completion_extra_args() {
|
||||
case "$1" in
|
||||
cpu) echo "--device none --ctx-size 128 -no-cnv -n 32 --seed 42 --batch-size 128" ;;
|
||||
gpu) echo "--device GPUOpenCL --ctx-size 128 -no-cnv -n 32 --seed 42 --ubatch-size 512" ;;
|
||||
npu) echo "--device HTP0 --ctx-size 128 -no-cnv -n 32 --seed 42 --ubatch-size 1024" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
run_completion_case() {
|
||||
local name=$1
|
||||
local parts=($(backend_env "$name"))
|
||||
local ndev=${parts[0]} device=${parts[1]}
|
||||
local device_log_name=$(backend_device_name "$name")
|
||||
local log="$LOG_DIR/llama_completion_${device_log_name}.log"
|
||||
local prompt="$LOG_DIR/bench_prompt.txt"
|
||||
echo 'What is the capital of France?' > "$prompt"
|
||||
local extra
|
||||
extra=$(completion_extra_args "$name")
|
||||
echo "=== [completion:$name] llama-completion --device $device (NDEV=$ndev) ==="
|
||||
timeout 600 env GGML_HEXAGON_NDEV=$ndev ./bin/llama-completion \
|
||||
-m "$MODEL_PATH" \
|
||||
-f "$prompt" \
|
||||
$extra \
|
||||
> "$log" 2>&1 < /dev/null
|
||||
local rc=$?
|
||||
note_timeout_if_triggered "$rc" 600 "$log"
|
||||
if [ $rc -eq 0 ]; then
|
||||
xml_case_pass "tests.test_bench_posix" "test_llama_completion[$name]"
|
||||
else
|
||||
xml_case_fail "tests.test_bench_posix" "test_llama_completion[$name]" "$rc" "$log"
|
||||
fi
|
||||
}
|
||||
|
||||
run_bench_case() {
|
||||
local name=$1
|
||||
local parts=($(backend_env "$name"))
|
||||
local ndev=${parts[0]} device=${parts[1]}
|
||||
local log_suffix=$(backend_log_name "$name")
|
||||
local log="$LOG_DIR/llama_bench_${log_suffix}.log"
|
||||
echo "=== [bench:$name] llama-bench --device $device (NDEV=$ndev) ==="
|
||||
timeout 600 env GGML_HEXAGON_NDEV=$ndev ./bin/llama-bench \
|
||||
-m "$MODEL_PATH" \
|
||||
--device "$device" \
|
||||
-ngl 99 \
|
||||
--batch-size 128 \
|
||||
-t 4 \
|
||||
-p 128 \
|
||||
-n 32 \
|
||||
> "$log" 2>&1
|
||||
local rc=$?
|
||||
note_timeout_if_triggered "$rc" 600 "$log"
|
||||
if [ $rc -eq 0 ]; then
|
||||
xml_case_pass "tests.test_bench_posix" "test_llama_bench[$name]"
|
||||
else
|
||||
xml_case_fail "tests.test_bench_posix" "test_llama_bench[$name]" "$rc" "$log"
|
||||
fi
|
||||
}
|
||||
|
||||
run_backend_ops_case() {
|
||||
local dtype=$1
|
||||
local log="$LOG_DIR/backend_ops_${dtype}.log"
|
||||
local pattern
|
||||
case "$dtype" in
|
||||
q4_0)
|
||||
# Matches Android: exclude a known-broken shape on NPU.
|
||||
pattern='^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$'
|
||||
;;
|
||||
*)
|
||||
pattern="type_a=${dtype}"
|
||||
;;
|
||||
esac
|
||||
echo "=== [backend-ops:$dtype] test-backend-ops -b HTP0 -o MUL_MAT ==="
|
||||
timeout 600 env GGML_HEXAGON_NDEV=1 GGML_HEXAGON_HOSTBUF=0 ./bin/test-backend-ops \
|
||||
-b HTP0 -o MUL_MAT -p "$pattern" \
|
||||
> "$log" 2>&1
|
||||
local rc=$?
|
||||
note_timeout_if_triggered "$rc" 600 "$log"
|
||||
if [ $rc -eq 0 ]; then
|
||||
xml_case_pass "tests.test_backend_ops_posix" "test_backend_ops_htp0[$dtype]"
|
||||
else
|
||||
xml_case_fail "tests.test_backend_ops_posix" "test_backend_ops_htp0[$dtype]" "$rc" "$log"
|
||||
fi
|
||||
}
|
||||
|
||||
xml_open
|
||||
|
||||
case "{TEST_MODE}" in
|
||||
bench)
|
||||
for b in cpu gpu npu; do run_completion_case "$b"; done
|
||||
for b in cpu gpu npu; do run_bench_case "$b"; done
|
||||
;;
|
||||
backend-ops)
|
||||
for d in mxfp4 fp16 q4_0; do run_backend_ops_case "$d"; done
|
||||
;;
|
||||
all)
|
||||
for b in cpu gpu npu; do run_completion_case "$b"; done
|
||||
for b in cpu gpu npu; do run_bench_case "$b"; done
|
||||
for d in mxfp4 fp16 q4_0; do run_backend_ops_case "$d"; done
|
||||
;;
|
||||
*)
|
||||
echo "FATAL: unsupported TEST_MODE={TEST_MODE}"
|
||||
;;
|
||||
esac
|
||||
|
||||
xml_close
|
||||
echo "=== done ==="
|
||||
# Host parses results.xml to decide pass/fail.
|
||||
exit 0
|
||||
@@ -1,8 +1,9 @@
|
||||
"""
|
||||
On-device test-backend-ops runner for llama.cpp (HTP0 backend).
|
||||
|
||||
Executed by QDC's Appium test framework on the QDC runner.
|
||||
On Android: executed by QDC's Appium test framework on the QDC runner.
|
||||
The runner has ADB access to the allocated device.
|
||||
On Linux: runs test-backend-ops directly via run_linux.sh (BASH framework).
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -10,7 +11,12 @@ import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
|
||||
from utils import (
|
||||
BIN_PATH,
|
||||
push_bundle_if_needed,
|
||||
run_script,
|
||||
write_qdc_log,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
@@ -20,17 +26,21 @@ def install(driver):
|
||||
|
||||
@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
|
||||
def test_backend_ops_htp0(type_a):
|
||||
cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
|
||||
if type_a == "q4_0":
|
||||
cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
|
||||
pattern = r'^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$'
|
||||
else:
|
||||
cmd += f" -p type_a={type_a}"
|
||||
result = run_adb_command(
|
||||
cmd,
|
||||
check=False,
|
||||
pattern = f"type_a={type_a}"
|
||||
|
||||
quoted_pattern = f'"{pattern}"' if type_a == "q4_0" else pattern
|
||||
result = run_script(
|
||||
"run-tool.sh",
|
||||
extra_env={"HB": "0"},
|
||||
extra_args=["test-backend-ops", "-b", "HTP0", "-o", "MUL_MAT", "-p", quoted_pattern],
|
||||
)
|
||||
write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
|
||||
assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
|
||||
assert result.returncode == 0, (
|
||||
f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
"""
|
||||
On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
|
||||
|
||||
Executed by QDC's Appium test framework on the QDC runner.
|
||||
The runner has ADB access to the allocated device.
|
||||
On Android: calls upstream run-*.sh scripts from llama.cpp/scripts/snapdragon/adb/
|
||||
on the QDC runner host (scripts wrap commands in ``adb shell`` internally).
|
||||
|
||||
On Linux: runs llama-bench directly via run_linux.sh (BASH framework).
|
||||
|
||||
Placeholders replaced at artifact creation time by run_qdc_jobs.py:
|
||||
<<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device via curl)
|
||||
<<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device)
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -14,58 +16,75 @@ import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
|
||||
from utils import (
|
||||
BIN_PATH,
|
||||
MODEL_DEVICE_PATH,
|
||||
MODEL_NAME,
|
||||
PROMPT_DIR,
|
||||
push_bundle_if_needed,
|
||||
run_adb_command,
|
||||
run_script,
|
||||
write_qdc_log,
|
||||
)
|
||||
|
||||
MODEL_PATH = "/data/local/tmp/model.gguf"
|
||||
PROMPT = "What is the capital of France?"
|
||||
CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42"
|
||||
MODEL_URL = "<<MODEL_URL>>"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session", autouse=True)
|
||||
def install(driver):
|
||||
push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
|
||||
|
||||
# Skip model download if already present
|
||||
run_adb_command(f"mkdir -p /data/local/tmp/gguf {PROMPT_DIR}")
|
||||
run_adb_command(f"echo 'What is the capital of France?' > {PROMPT_DIR}/bench_prompt.txt")
|
||||
check = subprocess.run(
|
||||
["adb", "shell", f"ls {MODEL_PATH}"],
|
||||
["adb", "shell", f"ls {MODEL_DEVICE_PATH}"],
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
)
|
||||
if check.returncode != 0:
|
||||
run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
|
||||
run_adb_command(f'curl -L -J --output {MODEL_DEVICE_PATH} "{MODEL_URL}"')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("device,extra_flags", [
|
||||
pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"),
|
||||
pytest.param("GPUOpenCL", "", id="gpu"),
|
||||
pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"),
|
||||
])
|
||||
def test_llama_completion(device, extra_flags):
|
||||
result = run_adb_command(
|
||||
f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
|
||||
f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
|
||||
f' -p "{PROMPT}"',
|
||||
check=False,
|
||||
@pytest.mark.parametrize(
|
||||
"device",
|
||||
[
|
||||
pytest.param("none", id="cpu"),
|
||||
pytest.param("GPUOpenCL", id="gpu"),
|
||||
pytest.param("HTP0", id="npu"),
|
||||
],
|
||||
)
|
||||
def test_llama_completion(device):
|
||||
result = run_script(
|
||||
"run-completion.sh",
|
||||
extra_env={"D": device, "M": MODEL_NAME},
|
||||
extra_args=["--batch-size", "128", "-n", "128", "--seed", "42",
|
||||
"-f", f"{PROMPT_DIR}/bench_prompt.txt"],
|
||||
)
|
||||
write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
|
||||
assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
|
||||
assert result.returncode == 0, (
|
||||
f"llama-completion {device} failed (exit {result.returncode})"
|
||||
)
|
||||
|
||||
|
||||
_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("device", [
|
||||
pytest.param("none", id="cpu"),
|
||||
pytest.param("GPUOpenCL", id="gpu"),
|
||||
pytest.param("HTP0", id="npu"),
|
||||
])
|
||||
@pytest.mark.parametrize(
|
||||
"device",
|
||||
[
|
||||
pytest.param("none", id="cpu"),
|
||||
pytest.param("GPUOpenCL", id="gpu"),
|
||||
pytest.param("HTP0", id="npu"),
|
||||
],
|
||||
)
|
||||
def test_llama_bench(device):
|
||||
result = run_adb_command(
|
||||
f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
|
||||
f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
|
||||
check=False,
|
||||
result = run_script(
|
||||
"run-bench.sh",
|
||||
extra_env={"D": device, "M": MODEL_NAME},
|
||||
extra_args=["--batch-size", "128", "-p", "128", "-n", "32"],
|
||||
)
|
||||
write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
|
||||
assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
|
||||
assert result.returncode == 0, (
|
||||
f"llama-bench {device} failed (exit {result.returncode})"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""Shared helpers for QDC on-device test runners."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
@@ -13,16 +15,14 @@ log = logging.getLogger(__name__)
|
||||
# On-device paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
BUNDLE_PATH = "/data/local/tmp/llama_cpp_bundle"
|
||||
BUNDLE_PATH = "/data/local/tmp/llama.cpp"
|
||||
BIN_PATH = f"{BUNDLE_PATH}/bin"
|
||||
LIB_PATH = f"{BUNDLE_PATH}/lib"
|
||||
QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
|
||||
LIB_PATH = f"{BUNDLE_PATH}/lib"
|
||||
BIN_PATH = f"{BUNDLE_PATH}/bin"
|
||||
ENV_PREFIX = (
|
||||
f"export LD_LIBRARY_PATH={LIB_PATH} && "
|
||||
f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
|
||||
f"chmod +x {BIN_PATH}/* &&"
|
||||
)
|
||||
CMD_PREFIX = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
|
||||
SCRIPTS_DIR = "/qdc/appium"
|
||||
MODEL_NAME = "model.gguf"
|
||||
MODEL_DEVICE_PATH = "/data/local/tmp/gguf/model.gguf"
|
||||
PROMPT_DIR = "/data/local/tmp/scorecard_prompts"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Appium session options
|
||||
@@ -34,16 +34,47 @@ options.set_capability("platformName", "Android")
|
||||
options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ADB helpers
|
||||
# Shell / process helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def write_qdc_log(filename: str, content: str) -> None:
|
||||
"""Write content as a log file for QDC log collection."""
|
||||
subprocess.run(
|
||||
["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
|
||||
f.write(content)
|
||||
tmp_path = f.name
|
||||
try:
|
||||
subprocess.run(
|
||||
["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
def ensure_bundle(check_binary: str | None = None) -> None:
|
||||
"""Ensure the llama_cpp_bundle is available on the target device."""
|
||||
push_bundle_if_needed(check_binary or f"{BIN_PATH}/llama-cli")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Android / Linux host helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
|
||||
# Append exit-code sentinel because `adb shell` doesn't reliably propagate
|
||||
# the on-device exit code (older ADB versions always return 0).
|
||||
"""Run a command on-device via ``adb shell`` with exit-code sentinel."""
|
||||
raw = subprocess.run(
|
||||
["adb", "shell", f"{cmd}; echo __RC__:$?"],
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
stdout = raw.stdout
|
||||
returncode = raw.returncode
|
||||
@@ -55,39 +86,58 @@ def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProc
|
||||
stdout = "\n".join(lines[:-1]) + "\n"
|
||||
except ValueError:
|
||||
pass
|
||||
log.info("%s", stdout)
|
||||
log.info(stdout)
|
||||
result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
|
||||
if check:
|
||||
assert returncode == 0, f"Command failed (exit {returncode})"
|
||||
return result
|
||||
|
||||
|
||||
def write_qdc_log(filename: str, content: str) -> None:
|
||||
"""Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
|
||||
subprocess.run(
|
||||
["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
def run_script(
|
||||
script: str,
|
||||
extra_env: dict[str, str] | None = None,
|
||||
extra_args: list[str] | None = None,
|
||||
) -> subprocess.CompletedProcess:
|
||||
"""Run an upstream shell script from /qdc/appium/ on the QDC runner host."""
|
||||
env = os.environ.copy()
|
||||
env["GGML_HEXAGON_EXPERIMENTAL"] = "1"
|
||||
if extra_env:
|
||||
env.update(extra_env)
|
||||
cmd = [f"{SCRIPTS_DIR}/{script}"] + (extra_args or [])
|
||||
result = subprocess.run(
|
||||
cmd, env=env,
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
)
|
||||
log.info(result.stdout)
|
||||
return result
|
||||
|
||||
|
||||
def adb_shell(cmd: str) -> None:
|
||||
"""Run a command via adb shell (fire-and-forget, no error check)."""
|
||||
subprocess.run(
|
||||
["adb", "shell", "sh", "-c", cmd],
|
||||
capture_output=True, encoding="utf-8", errors="replace", check=False,
|
||||
)
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
|
||||
f.write(content)
|
||||
tmp_path = f.name
|
||||
try:
|
||||
subprocess.run(
|
||||
["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
)
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
|
||||
def push_bundle_if_needed(check_binary: str) -> None:
|
||||
"""Push llama_cpp_bundle to the device if check_binary is not already present."""
|
||||
result = subprocess.run(
|
||||
["adb", "shell", f"ls {check_binary}"],
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
subprocess.run(
|
||||
["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"],
|
||||
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
||||
["adb", "push", "/qdc/appium/llama_cpp_bundle/", BUNDLE_PATH],
|
||||
text=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
subprocess.run(
|
||||
["adb", "shell", f"find {BUNDLE_PATH}/bin -type f -exec chmod 755 {{}} +"],
|
||||
text=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user