CI : support IOT device (IQ9) (#22987)

* update test scripts

* align CI behavior between linux and android

* remove automatically cancel in 15min

* enable cancel-in-progress

* fix ty check issue

* update and fix pylint issue

* update runner such that we are not restricted by the 15min limit rule

* fix flake8 lint issue

* update runner according to review feedback

* code update according to review feedback

* switch from llama-cli to llama-completion binary with -no-cnv flag
This commit is contained in:
Zack Li
2026-05-14 13:58:34 -07:00
committed by GitHub
parent 834a243664
commit d81e63dcfd
7 changed files with 793 additions and 167 deletions

View File

@@ -58,14 +58,45 @@ jobs:
name: llama-cpp-android-arm64-snapdragon
path: pkg-snapdragon/llama.cpp
linux-iot-snapdragon:
runs-on: ubuntu-latest
container:
image: 'ghcr.io/snapdragon-toolchain/arm64-linux:v0.1'
defaults:
run:
shell: bash
steps:
- name: Clone
uses: actions/checkout@v6
with:
fetch-depth: 0
lfs: false
- name: Build Llama.CPP for Snapdragon Linux IoT
id: build_llama_cpp_snapdragon_linux
run: |
cp docs/backend/snapdragon/CMakeUserPresets.json .
cmake --preset arm64-linux-snapdragon-release -B build-snapdragon -DGGML_OPENCL=ON
cmake --build build-snapdragon -j $(nproc)
cmake --install build-snapdragon --prefix pkg-snapdragon/llama.cpp
- name: Upload Llama.CPP Snapdragon Linux IoT Build Artifact
if: ${{ always() && steps.build_llama_cpp_snapdragon_linux.outcome == 'success' }}
uses: actions/upload-artifact@v6
with:
name: llama-cpp-linux-arm64-snapdragon
path: pkg-snapdragon/llama.cpp
test-snapdragon-qdc:
name: Test on QDC Android Device (${{ matrix.device }})
needs: [android-ndk-snapdragon]
runs-on: ubuntu-slim
name: Test on QDC Device (${{ matrix.device }})
needs: [android-ndk-snapdragon, linux-iot-snapdragon]
runs-on: ubuntu-24.04-arm
timeout-minutes: 90
strategy:
fail-fast: false
matrix:
device: [SM8750, SM8650, SM8850]
device: [SM8750, SM8850, QCS9075M]
steps:
- name: Checkout
@@ -74,11 +105,11 @@ jobs:
- name: Download build artifact
uses: actions/download-artifact@v7
with:
name: llama-cpp-android-arm64-snapdragon
name: ${{ startsWith(matrix.device, 'QCS') && 'llama-cpp-linux-arm64-snapdragon' || 'llama-cpp-android-arm64-snapdragon' }}
path: pkg-snapdragon/llama.cpp
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.x'
cache: pip
@@ -107,7 +138,8 @@ jobs:
--test all \
--pkg-dir pkg-snapdragon/llama.cpp \
--model-url "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \
--device ${{ matrix.device }}
--device ${{ matrix.device }} \
${{ startsWith(matrix.device, 'QCS') && '--retries 2 --retry-delay 300' || '' }}
env:
QDC_API_KEY: ${{ secrets.QDC_API_KEY }}

View File

@@ -1,4 +1,4 @@
"""Run llama.cpp Hexagon Android tests in a single QDC Appium job.
"""Run llama.cpp Hexagon tests in a single QDC job.
Bundles test scripts into one artifact and submits a single QDC job:
@@ -10,6 +10,10 @@ Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions).
Prerequisites:
pip install /path/to/qualcomm_device_cloud_sdk*.whl
Platform is inferred from --device:
android Appium + pytest (Android phones: SM8750 / SM8650 / SM8850)
linux BASH (Linux IoT: QCS9075M)
Required environment variables:
QDC_API_KEY API key from QDC UI -> Users -> Settings -> API Keys
@@ -23,6 +27,7 @@ Usage:
from __future__ import annotations
import argparse
import enum
import logging
import os
import re
@@ -30,15 +35,35 @@ import shutil
import sys
import tempfile
import time
import urllib.request
import xml.etree.ElementTree as ET
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable
from qualcomm_device_cloud_sdk.api import qdc_api # ty: ignore[unresolved-import]
from qualcomm_device_cloud_sdk.logging import configure_logging # ty: ignore[unresolved-import]
from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework # ty: ignore[unresolved-import]
from qualcomm_device_cloud_sdk.api import qdc_api
from qualcomm_device_cloud_sdk.logging import configure_logging
from qualcomm_device_cloud_sdk.models import (
ArtifactType,
JobMode,
JobState,
JobSubmissionParameter,
JobType,
TestFramework,
)
# configure_logging only sets up the SDK logger; basicConfig is needed for
# our own log.info to reach stdout.
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(name)s %(levelname)s - %(message)s",
handlers=[logging.StreamHandler()],
)
configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()])
# Silence per-poll GET/status spam from the SDK and its HTTP client.
logging.getLogger("qualcomm_device_cloud").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("httpcore").setLevel(logging.WARNING)
log = logging.getLogger(__name__)
POLL_INTERVAL = 30
@@ -47,23 +72,56 @@ LOG_UPLOAD_TIMEOUT = 600
CAPACITY_TIMEOUT = 1800
CAPACITY_POLL = 60
MAX_CONCURRENT_JOBS = 5
DEFAULT_RETRIES = 0
RETRY_DELAY = 300
TERMINAL_STATES = {JobState.COMPLETED, JobState.CANCELED}
NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED}
_SCRIPTS_DIR = Path(__file__).parent
_TESTS_DIR = _SCRIPTS_DIR / "tests"
_RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py"
_RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py"
_UTILS = _TESTS_DIR / "utils.py"
_CONFTEST = _TESTS_DIR / "conftest.py"
_REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt"
class DeviceUnavailableError(Exception):
"""Raised when the QDC device resource is not available (retryable)."""
_SCRIPTS_DIR = Path(__file__).parent
_TESTS_DIR = _SCRIPTS_DIR / "tests"
# --- Shared test assets -------------------------------------------------------
_UTILS = _TESTS_DIR / "utils.py"
_CONFTEST = _TESTS_DIR / "conftest.py"
_PYTEST_LINE_RE = re.compile(
r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)"
)
_EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"}
_EXCLUDED_LOGS = {
"qdc_android_whole_host-000.log",
"qdc_kernel_host-000.log",
"qdc_LE_whole_host-000.log",
"qdc_LE_kernel_host-000.log",
"script.log",
}
_NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES}
# --- Android (Appium + pytest) assets ----------------------------------------
_RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py"
_RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py"
_REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt"
_UPSTREAM_ADB_SCRIPTS = (
"https://raw.githubusercontent.com/ggml-org/llama.cpp/master/scripts/snapdragon/adb"
)
_ADB_SCRIPT_NAMES = [
"run-bench.sh",
"run-cli.sh",
"run-completion.sh",
"run-tool.sh",
]
# --- Linux (BASH) assets ------------------------------------------------------
_RUN_LINUX_TEMPLATE = _TESTS_DIR / "linux" / "run_linux.sh"
_LINUX_ENTRY_SCRIPT = "/bin/bash /data/local/tmp/TestContent/run_linux.sh"
# =============================================================================
# Artifact builders (per platform)
# =============================================================================
@dataclass
class JobResult:
@@ -73,35 +131,58 @@ class JobResult:
failure_details: dict[str, str] = field(default_factory=dict)
def build_artifact_zip(
def _write_lf(path: Path, content: str) -> None:
"""Write text with LF line endings (required by /bin/bash on Linux)."""
with open(path, "w", encoding="utf-8", newline="\n") as f:
f.write(content)
def _build_android_artifact(
pkg_dir: Path,
stage_dir: Path,
*,
test_mode: str = "bench",
model_url: str | None = None,
test_mode: str,
model_url: str | None,
) -> Path:
"""Bundle everything into a single QDC artifact zip.
"""Android zip (Appium/pytest). Extracted by QDC under /qdc/appium/.
Zip structure (extracted by QDC to /qdc/appium/ on the runner):
Zip structure:
llama_cpp_bundle/ installed package (adb pushed to /data/local/tmp/)
run-{bench,cli,completion,tool}.sh upstream adb wrappers (patched)
tests/
utils.py shared helpers (paths, run_adb_command, …)
conftest.py shared pytest fixtures (driver)
test_bench_posix.py bench + cli tests (<<MODEL_URL>> substituted)
AND/OR
test_backend_ops_posix.py test-backend-ops -b HTP0
utils.py shared adb helpers
conftest.py Appium pytest fixtures
test_bench_posix.py bench + cli tests (for --test bench or all)
test_backend_ops_posix.py test-backend-ops on HTP0
requirements.txt
pytest.ini addopts = --junitxml=results.xml
"""
shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle")
bundle_dir = stage_dir / "llama_cpp_bundle"
shutil.copytree(pkg_dir, bundle_dir)
# Download upstream adb scripts so they land at /qdc/appium/ on the QDC
# runner. They wrap `adb shell` internally. Patch in `chmod +x bin/* lib/*`
# right after `cd $basedir` so device binaries are executable.
for name in _ADB_SCRIPT_NAMES:
url = f"{_UPSTREAM_ADB_SCRIPTS}/{name}"
dest = stage_dir / name
log.info("Downloading %s", url)
urllib.request.urlretrieve(url, str(dest))
content = dest.read_text()
content = content.replace(
"cd $basedir;",
"cd $basedir; chmod +x bin/* lib/* 2>/dev/null;",
)
dest.write_text(content)
dest.chmod(0o755)
tests_dir = stage_dir / "tests"
tests_dir.mkdir()
shutil.copy(_UTILS, tests_dir / "utils.py")
shutil.copy(_UTILS, tests_dir / "utils.py")
shutil.copy(_CONFTEST, tests_dir / "conftest.py")
if test_mode in ("bench", "all"):
assert model_url is not None, "--model-url is required for bench/all test modes"
assert model_url is not None
(tests_dir / "test_bench_posix.py").write_text(
_RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url)
)
@@ -109,33 +190,140 @@ def build_artifact_zip(
shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py")
shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt")
(stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n")
(stage_dir / "pytest.ini").write_text(
"[pytest]\naddopts = --junitxml=results.xml\n"
)
zip_base = str(stage_dir / "artifact")
shutil.make_archive(zip_base, "zip", stage_dir)
return Path(f"{zip_base}.zip")
def _build_linux_artifact(
pkg_dir: Path,
stage_dir: Path,
test_mode: str,
model_url: str | None,
) -> Path:
"""Linux IoT zip (BASH framework). Extracted by QDC to /data/local/tmp/TestContent/.
Zip structure:
run_linux.sh entry script (placeholder-substituted, LF line endings)
llama_cpp_bundle/ installed package
"""
bundle_dir = stage_dir / "llama_cpp_bundle"
shutil.copytree(pkg_dir, bundle_dir)
template = _RUN_LINUX_TEMPLATE.read_text(encoding="utf-8")
rendered = template.replace("{MODEL_URL}", model_url or "").replace(
"{TEST_MODE}", test_mode
)
script_path = stage_dir / "run_linux.sh"
_write_lf(script_path, rendered)
script_path.chmod(0o755)
zip_base = str(stage_dir / "artifact")
shutil.make_archive(zip_base, "zip", stage_dir)
return Path(f"{zip_base}.zip")
# =============================================================================
# Platform enum + strategy table
# =============================================================================
class Platform(enum.Enum):
ANDROID = "android"
LINUX = "linux"
@dataclass(frozen=True)
class PlatformSpec:
test_framework: TestFramework
entry_script: str | None
build_artifact: Callable[[Path, Path, str, str | None], Path]
job_name_fmt: str
PLATFORM_SPECS: dict[Platform, PlatformSpec] = {
Platform.ANDROID: PlatformSpec(
test_framework=TestFramework.APPIUM,
entry_script=None,
build_artifact=_build_android_artifact,
job_name_fmt="{base}",
),
Platform.LINUX: PlatformSpec(
test_framework=TestFramework.BASH,
entry_script=_LINUX_ENTRY_SCRIPT,
build_artifact=_build_linux_artifact,
job_name_fmt="{base} (Linux)",
),
}
DEVICE_PLATFORM: dict[str, Platform] = {
"SM8750": Platform.ANDROID,
"SM8650": Platform.ANDROID,
"SM8850": Platform.ANDROID,
"QCS9075M": Platform.LINUX,
}
# =============================================================================
# Shared QDC job plumbing
# =============================================================================
def wait_for_job(client, job_id: str, timeout: int) -> str:
elapsed = 0
last_state = None
consecutive_errors = 0
max_consecutive_errors = 5
while elapsed < timeout:
raw = qdc_api.get_job_status(client, job_id)
try:
raw = qdc_api.get_job_status(client, job_id)
consecutive_errors = 0
except Exception as e:
consecutive_errors += 1
log.warning(
"Transient error polling job %s (%d/%d): %s",
job_id,
consecutive_errors,
max_consecutive_errors,
e,
)
if consecutive_errors >= max_consecutive_errors:
raise
time.sleep(POLL_INTERVAL)
elapsed += POLL_INTERVAL
continue
try:
status = JobState(raw)
except ValueError:
status = raw
if status in TERMINAL_STATES:
return raw.lower()
log.info("Job %s: %s", job_id, raw)
if raw != last_state:
log.info("Job %s: %s", job_id, raw)
last_state = raw
time.sleep(POLL_INTERVAL)
elapsed += POLL_INTERVAL
# Abort to free the QDC concurrency slot instead of leaking it.
try:
qdc_api.abort_job(client, job_id)
log.warning("Aborted job %s after timeout to free concurrency slot", job_id)
except Exception as e:
log.warning("Failed to abort job %s: %s", job_id, e)
raise TimeoutError(f"Job {job_id} did not finish within {timeout}s")
def wait_for_log_upload(client, job_id: str) -> None:
elapsed = 0
while elapsed <= LOG_UPLOAD_TIMEOUT:
status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
try:
status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
except Exception as e:
log.warning("get_job_log_upload_status failed: %s — will retry", e)
status = ""
if status in {"completed", "failed"}:
return
log.info("Waiting for log upload (status=%s) ...", status)
@@ -150,17 +338,33 @@ def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None:
while elapsed < CAPACITY_TIMEOUT:
jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50)
if jobs_page is None:
log.warning("Could not retrieve job list; proceeding without capacity check")
log.warning(
"Could not retrieve job list; proceeding without capacity check"
)
return
items = getattr(jobs_page, "data", []) or []
active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES)
active = sum(
1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES
)
if active < max_jobs:
log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs)
return
log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL)
log.info(
"Active QDC jobs: %d / %d — waiting %ds ...",
active,
max_jobs,
CAPACITY_POLL,
)
time.sleep(CAPACITY_POLL)
elapsed += CAPACITY_POLL
log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT)
raise TimeoutError(
f"Capacity wait timed out after {CAPACITY_TIMEOUT}s"
)
# ---------------------------------------------------------------------------
# Log parsing helpers
# ---------------------------------------------------------------------------
def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]:
@@ -192,10 +396,26 @@ def _parse_pytest_output(content: str) -> dict[str, bool]:
def fetch_logs_and_parse_tests(
client, job_id: str
client, job_id: str, max_retries: int = 5, retry_delay: int = 30
) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]:
"""Returns (test_results, raw_logs, failure_details)."""
log_files = qdc_api.get_job_log_files(client, job_id)
log_files = None
for attempt in range(1, max_retries + 1):
try:
log_files = qdc_api.get_job_log_files(client, job_id)
break
except Exception as e:
if attempt < max_retries:
log.warning(
"get_job_log_files failed (attempt %d/%d): %s — retrying in %ds",
attempt, max_retries, e, retry_delay,
)
time.sleep(retry_delay)
else:
log.error(
"get_job_log_files failed after %d attempts: %s", max_retries, e
)
return {}, {}, {}
if not log_files:
log.warning("No log files returned for job %s", job_id)
return {}, {}, {}
@@ -207,8 +427,8 @@ def fetch_logs_and_parse_tests(
with tempfile.TemporaryDirectory() as tmpdir:
for lf in log_files:
log.info("Downloading log file: %s", lf.filename)
zip_path = os.path.join(tmpdir, "log.zip")
log.info("Downloading log file: %s", lf.filename)
qdc_api.download_job_log_files(client, lf.filename, zip_path)
try:
shutil.unpack_archive(zip_path, tmpdir, "zip")
@@ -226,12 +446,15 @@ def fetch_logs_and_parse_tests(
elif fname.endswith(".log"):
if fname in _EXCLUDED_LOGS:
continue
log.info("--- %s ---", fname)
log.info("%s", content)
log.info("--- %s ---\n%s", fname, content)
raw_logs[fname] = content
pytest_fallback.update(_parse_pytest_output(content))
return (test_results if test_results else pytest_fallback), raw_logs, failure_details
return (
(test_results if test_results else pytest_fallback),
raw_logs,
failure_details,
)
def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
@@ -289,30 +512,106 @@ def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
f.write("\n".join(lines) + "\n")
# =============================================================================
# CLI + main
# =============================================================================
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
p.add_argument("--pkg-dir", required=True, type=Path,
p.add_argument("--pkg-dir", required=True, type=Path,
help="Installed llama.cpp package directory (contains bin/ and lib/)")
p.add_argument("--model-url",
help="Direct URL to the GGUF model file (required for --test bench)")
p.add_argument("--device", required=True,
p.add_argument("--device", required=True,
help="QDC chipset name, e.g. SM8750")
p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench",
help="Test suite to run (default: bench)")
p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS",
help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})")
p.add_argument("--retries", type=int, default=DEFAULT_RETRIES, metavar="N",
help="Number of retries when device is unavailable (default: 0)")
p.add_argument("--retry-delay", type=int, default=RETRY_DELAY, metavar="SECONDS",
help=f"Seconds to wait between retries (default: {RETRY_DELAY})")
args = p.parse_args()
if args.test in ("bench", "all") and not args.model_url:
p.error("--model-url is required when --test bench or --test all")
return args
def _submit_and_run_job(client, args, spec, target_id, artifact_id) -> JobResult:
"""Submit a QDC job and wait for results.
Raises DeviceUnavailableError for transient device/resource issues that
are worth retrying. Returns JobResult for definitive outcomes (pass or
test failure).
"""
try:
wait_for_capacity(client)
except TimeoutError:
raise DeviceUnavailableError("Capacity wait timed out — device busy")
job_name = spec.job_name_fmt.format(base="llama.cpp Hexagon tests")
job_id = qdc_api.submit_job(
public_api_client=client,
target_id=target_id,
job_name=job_name,
external_job_id=None,
job_type=JobType.AUTOMATED,
job_mode=JobMode.APPLICATION,
timeout=max(1, args.job_timeout // 60),
test_framework=spec.test_framework,
entry_script=spec.entry_script,
job_artifacts=[artifact_id],
monkey_events=None,
monkey_session_timeout=None,
job_parameters=[JobSubmissionParameter.WIFIENABLED],
)
if job_id is None:
raise DeviceUnavailableError("Job submission failed — device may be unavailable")
log.info("Job submitted: %s (device=%s)", job_id, args.device)
try:
job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
except TimeoutError as e:
raise DeviceUnavailableError(str(e))
log.info("Job %s finished: %s", job_id, job_status)
wait_for_log_upload(client, job_id)
tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
job_ok = job_status == JobState.COMPLETED.value.lower()
if not job_ok and not tests:
raise DeviceUnavailableError(
f"Job did not complete (status={job_status}) and produced no test results"
)
passed = job_ok and all(tests.values()) if tests else job_ok
if spec.test_framework == TestFramework.BASH and not tests:
log.error("No test results recovered (state=%s). Script likely never ran.", job_status)
passed = False
if not passed:
log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
return JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
def main() -> int:
args = parse_args()
platform = DEVICE_PLATFORM.get(args.device)
if platform is None:
log.error(
"Unknown device %r. Known: %s",
args.device, ", ".join(sorted(DEVICE_PLATFORM.keys())),
)
return 1
spec = PLATFORM_SPECS[platform]
api_key = os.environ.get("QDC_API_KEY")
if not api_key:
log.error("QDC_API_KEY environment variable must be set")
@@ -334,10 +633,9 @@ def main() -> int:
return 1
with tempfile.TemporaryDirectory() as tmpdir:
log.info("Building artifact ...")
zip_path = build_artifact_zip(
args.pkg_dir, Path(tmpdir),
test_mode=args.test, model_url=args.model_url,
log.info("Building %s artifact (test=%s) ...", platform.value, args.test)
zip_path = spec.build_artifact(
args.pkg_dir, Path(tmpdir), args.test, args.model_url
)
log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000)
artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT)
@@ -346,46 +644,31 @@ def main() -> int:
log.error("Artifact upload failed")
return 1
wait_for_capacity(client)
job_id = qdc_api.submit_job(
public_api_client=client,
target_id=target_id,
job_name="llama.cpp Hexagon tests",
external_job_id=None,
job_type=JobType.AUTOMATED,
job_mode=JobMode.APPLICATION,
timeout=max(1, args.job_timeout // 60),
test_framework=TestFramework.APPIUM,
entry_script=None,
job_artifacts=[artifact_id],
monkey_events=None,
monkey_session_timeout=None,
job_parameters=[JobSubmissionParameter.WIFIENABLED],
)
if job_id is None:
log.error("Job submission failed")
max_attempts = 1 + args.retries
for attempt in range(1, max_attempts + 1):
try:
result = _submit_and_run_job(client, args, spec, target_id, artifact_id)
break
except DeviceUnavailableError as e:
if attempt < max_attempts:
log.warning(
"Attempt %d/%d failed (device unavailable): %s — retrying in %ds",
attempt, max_attempts, e, args.retry_delay,
)
time.sleep(args.retry_delay)
else:
log.error(
"Attempt %d/%d failed (device unavailable): %s — no retries left",
attempt, max_attempts, e,
)
write_summary(
JobResult(passed=False, tests={}),
title=f"QDC Device Unavailable ({args.device})",
)
return 1
else:
return 1
log.info("Job submitted: %s (device=%s)", job_id, args.device)
try:
job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
except TimeoutError as e:
log.error("%s", e)
write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})")
return 1
log.info("Job %s finished: %s", job_id, job_status)
wait_for_log_upload(client, job_id)
tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
passed = job_status == JobState.COMPLETED.value.lower()
if tests:
passed = passed and all(tests.values())
if not passed:
log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
if args.test == "backend-ops":
title = f"Backend Ops — HTP0 ({args.device})"
elif args.test == "all":
@@ -394,7 +677,7 @@ def main() -> int:
title = f"QDC Test Results ({args.device})"
write_summary(result, title=title)
return 0 if passed else 1
return 0 if result.passed else 1
if __name__ == "__main__":

View File

@@ -0,0 +1,232 @@
#!/bin/bash
# llama.cpp Hexagon test entry script for QDC Linux IoT (BASH framework).
#
# Placeholders substituted by run_qdc_jobs.py (--platform linux) before upload:
# {MODEL_URL} direct URL to a .gguf model file
# {TEST_MODE} bench | backend-ops | all
#
# QDC extracts the artifact zip to /data/local/tmp/TestContent/ and invokes
# this script via: /bin/bash /data/local/tmp/TestContent/run_linux.sh
# Any files written under /data/local/tmp/QDC_logs/ are auto-uploaded.
set +e
umask 022
LOG_DIR=/data/local/tmp/QDC_logs
BUNDLE_DIR=/data/local/tmp/TestContent/llama_cpp_bundle
MODEL_DIR=/data/local/tmp/gguf
MODEL_PATH="$MODEL_DIR/model.gguf"
RESULTS_XML="$LOG_DIR/results.xml"
mkdir -p "$LOG_DIR" "$MODEL_DIR"
# Redirect all parent-shell output to script.log so QDC auto-uploads it;
# per-case runs still capture their own stdout/stderr into dedicated logs.
exec > "$LOG_DIR/script.log" 2>&1
echo "=== env ==="
date -u
uname -a
pwd
mount -o rw,remount / 2>/dev/null || true
cd "$BUNDLE_DIR" || { echo "FATAL: bundle missing at $BUNDLE_DIR"; exit 1; }
chmod +x bin/* 2>/dev/null
export LD_LIBRARY_PATH="$BUNDLE_DIR/lib:$LD_LIBRARY_PATH"
export ADSP_LIBRARY_PATH="$BUNDLE_DIR/lib"
export GGML_HEXAGON_EXPERIMENTAL=1
echo "=== download model ==="
MODEL_URL="{MODEL_URL}"
if [ -z "$MODEL_URL" ]; then
echo "No model URL provided, skipping download"
elif [ ! -f "$MODEL_PATH" ]; then
curl -L -fS --retry 3 --retry-delay 5 -o "$MODEL_PATH" "$MODEL_URL"
curl_rc=$?
if [ $curl_rc -ne 0 ]; then
echo "FATAL: model download failed (rc=$curl_rc)"
exit 1
fi
ls -la "$MODEL_PATH"
fi
# ---------------------------------------------------------------------------
# JUnit XML helpers
# ---------------------------------------------------------------------------
xml_open() {
printf '%s\n' \
'<?xml version="1.0" encoding="utf-8"?>' \
"<testsuites>" \
"<testsuite name=\"llama_cpp_linux\">" \
> "$RESULTS_XML"
}
xml_close() {
printf '%s\n' '</testsuite>' '</testsuites>' >> "$RESULTS_XML"
}
xml_case_pass() {
local classname=$1 name=$2
printf '<testcase classname="%s" name="%s"/>\n' "$classname" "$name" >> "$RESULTS_XML"
}
xml_case_fail() {
local classname=$1 name=$2 rc=$3 logfile=$4
{
printf '<testcase classname="%s" name="%s">\n' "$classname" "$name"
printf '<failure message="exit %s"><![CDATA[\n' "$rc"
tail -c 4096 "$logfile" 2>/dev/null | sed 's/]]>/]] >/g'
printf '\n]]></failure>\n</testcase>\n'
} >> "$RESULTS_XML"
}
# Map backend name -> "NDEV --device" pair. "none" means no offload (CPU).
backend_env() {
case "$1" in
cpu) echo "0 none" ;;
gpu) echo "0 GPUOpenCL" ;;
npu) echo "1 HTP0" ;;
esac
}
backend_log_name() {
case "$1" in
cpu) echo "cpu" ;;
gpu) echo "gpu" ;;
npu) echo "htp" ;;
esac
}
backend_device_name() {
case "$1" in
cpu) echo "none" ;;
gpu) echo "GPUOpenCL" ;;
npu) echo "HTP0" ;;
esac
}
# Append a diagnostic block when a per-case `timeout N` fires (rc=124). The
# naked log file at that point usually just ends mid-OpenCL-init with no
# stderr, which is hard to read in CI summaries.
note_timeout_if_triggered() {
local rc=$1 budget=$2 log=$3
[ "$rc" -eq 124 ] || return 0
{
printf '\n'
printf '=== TIMEOUT after %ss ===\n' "$budget"
printf 'uptime: '; uptime 2>/dev/null
printf 'free -m:\n'; free -m 2>/dev/null
printf 'loadavg: '; cat /proc/loadavg 2>/dev/null
} >> "$log"
}
completion_extra_args() {
case "$1" in
cpu) echo "--device none --ctx-size 128 -no-cnv -n 32 --seed 42 --batch-size 128" ;;
gpu) echo "--device GPUOpenCL --ctx-size 128 -no-cnv -n 32 --seed 42 --ubatch-size 512" ;;
npu) echo "--device HTP0 --ctx-size 128 -no-cnv -n 32 --seed 42 --ubatch-size 1024" ;;
esac
}
run_completion_case() {
local name=$1
local parts=($(backend_env "$name"))
local ndev=${parts[0]} device=${parts[1]}
local device_log_name=$(backend_device_name "$name")
local log="$LOG_DIR/llama_completion_${device_log_name}.log"
local prompt="$LOG_DIR/bench_prompt.txt"
echo 'What is the capital of France?' > "$prompt"
local extra
extra=$(completion_extra_args "$name")
echo "=== [completion:$name] llama-completion --device $device (NDEV=$ndev) ==="
timeout 600 env GGML_HEXAGON_NDEV=$ndev ./bin/llama-completion \
-m "$MODEL_PATH" \
-f "$prompt" \
$extra \
> "$log" 2>&1 < /dev/null
local rc=$?
note_timeout_if_triggered "$rc" 600 "$log"
if [ $rc -eq 0 ]; then
xml_case_pass "tests.test_bench_posix" "test_llama_completion[$name]"
else
xml_case_fail "tests.test_bench_posix" "test_llama_completion[$name]" "$rc" "$log"
fi
}
run_bench_case() {
local name=$1
local parts=($(backend_env "$name"))
local ndev=${parts[0]} device=${parts[1]}
local log_suffix=$(backend_log_name "$name")
local log="$LOG_DIR/llama_bench_${log_suffix}.log"
echo "=== [bench:$name] llama-bench --device $device (NDEV=$ndev) ==="
timeout 600 env GGML_HEXAGON_NDEV=$ndev ./bin/llama-bench \
-m "$MODEL_PATH" \
--device "$device" \
-ngl 99 \
--batch-size 128 \
-t 4 \
-p 128 \
-n 32 \
> "$log" 2>&1
local rc=$?
note_timeout_if_triggered "$rc" 600 "$log"
if [ $rc -eq 0 ]; then
xml_case_pass "tests.test_bench_posix" "test_llama_bench[$name]"
else
xml_case_fail "tests.test_bench_posix" "test_llama_bench[$name]" "$rc" "$log"
fi
}
run_backend_ops_case() {
local dtype=$1
local log="$LOG_DIR/backend_ops_${dtype}.log"
local pattern
case "$dtype" in
q4_0)
# Matches Android: exclude a known-broken shape on NPU.
pattern='^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$'
;;
*)
pattern="type_a=${dtype}"
;;
esac
echo "=== [backend-ops:$dtype] test-backend-ops -b HTP0 -o MUL_MAT ==="
timeout 600 env GGML_HEXAGON_NDEV=1 GGML_HEXAGON_HOSTBUF=0 ./bin/test-backend-ops \
-b HTP0 -o MUL_MAT -p "$pattern" \
> "$log" 2>&1
local rc=$?
note_timeout_if_triggered "$rc" 600 "$log"
if [ $rc -eq 0 ]; then
xml_case_pass "tests.test_backend_ops_posix" "test_backend_ops_htp0[$dtype]"
else
xml_case_fail "tests.test_backend_ops_posix" "test_backend_ops_htp0[$dtype]" "$rc" "$log"
fi
}
xml_open
case "{TEST_MODE}" in
bench)
for b in cpu gpu npu; do run_completion_case "$b"; done
for b in cpu gpu npu; do run_bench_case "$b"; done
;;
backend-ops)
for d in mxfp4 fp16 q4_0; do run_backend_ops_case "$d"; done
;;
all)
for b in cpu gpu npu; do run_completion_case "$b"; done
for b in cpu gpu npu; do run_bench_case "$b"; done
for d in mxfp4 fp16 q4_0; do run_backend_ops_case "$d"; done
;;
*)
echo "FATAL: unsupported TEST_MODE={TEST_MODE}"
;;
esac
xml_close
echo "=== done ==="
# Host parses results.xml to decide pass/fail.
exit 0

View File

@@ -1,8 +1,9 @@
"""
On-device test-backend-ops runner for llama.cpp (HTP0 backend).
Executed by QDC's Appium test framework on the QDC runner.
On Android: executed by QDC's Appium test framework on the QDC runner.
The runner has ADB access to the allocated device.
On Linux: runs test-backend-ops directly via run_linux.sh (BASH framework).
"""
import os
@@ -10,7 +11,12 @@ import sys
import pytest
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
from utils import (
BIN_PATH,
push_bundle_if_needed,
run_script,
write_qdc_log,
)
@pytest.fixture(scope="session", autouse=True)
@@ -20,17 +26,21 @@ def install(driver):
@pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
def test_backend_ops_htp0(type_a):
cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
if type_a == "q4_0":
cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
pattern = r'^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$'
else:
cmd += f" -p type_a={type_a}"
result = run_adb_command(
cmd,
check=False,
pattern = f"type_a={type_a}"
quoted_pattern = f'"{pattern}"' if type_a == "q4_0" else pattern
result = run_script(
"run-tool.sh",
extra_env={"HB": "0"},
extra_args=["test-backend-ops", "-b", "HTP0", "-o", "MUL_MAT", "-p", quoted_pattern],
)
write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
assert result.returncode == 0, (
f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
)
if __name__ == "__main__":

View File

@@ -1,11 +1,13 @@
"""
On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
Executed by QDC's Appium test framework on the QDC runner.
The runner has ADB access to the allocated device.
On Android: calls upstream run-*.sh scripts from llama.cpp/scripts/snapdragon/adb/
on the QDC runner host (scripts wrap commands in ``adb shell`` internally).
On Linux: runs llama-bench directly via run_linux.sh (BASH framework).
Placeholders replaced at artifact creation time by run_qdc_jobs.py:
<<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device via curl)
<<MODEL_URL>> Direct URL to the GGUF model file (downloaded on-device)
"""
import os
@@ -14,58 +16,75 @@ import sys
import pytest
from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
from utils import (
BIN_PATH,
MODEL_DEVICE_PATH,
MODEL_NAME,
PROMPT_DIR,
push_bundle_if_needed,
run_adb_command,
run_script,
write_qdc_log,
)
MODEL_PATH = "/data/local/tmp/model.gguf"
PROMPT = "What is the capital of France?"
CLI_OPTS = "--batch-size 128 -n 128 -no-cnv --seed 42"
MODEL_URL = "<<MODEL_URL>>"
@pytest.fixture(scope="session", autouse=True)
def install(driver):
push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
# Skip model download if already present
run_adb_command(f"mkdir -p /data/local/tmp/gguf {PROMPT_DIR}")
run_adb_command(f"echo 'What is the capital of France?' > {PROMPT_DIR}/bench_prompt.txt")
check = subprocess.run(
["adb", "shell", f"ls {MODEL_PATH}"],
["adb", "shell", f"ls {MODEL_DEVICE_PATH}"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
if check.returncode != 0:
run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
run_adb_command(f'curl -L -J --output {MODEL_DEVICE_PATH} "{MODEL_URL}"')
@pytest.mark.parametrize("device,extra_flags", [
pytest.param("none", "-ctk q8_0 -ctv q8_0", id="cpu"),
pytest.param("GPUOpenCL", "", id="gpu"),
pytest.param("HTP0", "-ctk q8_0 -ctv q8_0", id="npu"),
])
def test_llama_completion(device, extra_flags):
result = run_adb_command(
f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
f' -p "{PROMPT}"',
check=False,
@pytest.mark.parametrize(
"device",
[
pytest.param("none", id="cpu"),
pytest.param("GPUOpenCL", id="gpu"),
pytest.param("HTP0", id="npu"),
],
)
def test_llama_completion(device):
result = run_script(
"run-completion.sh",
extra_env={"D": device, "M": MODEL_NAME},
extra_args=["--batch-size", "128", "-n", "128", "--seed", "42",
"-f", f"{PROMPT_DIR}/bench_prompt.txt"],
)
write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
assert result.returncode == 0, (
f"llama-completion {device} failed (exit {result.returncode})"
)
_DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
@pytest.mark.parametrize("device", [
pytest.param("none", id="cpu"),
pytest.param("GPUOpenCL", id="gpu"),
pytest.param("HTP0", id="npu"),
])
@pytest.mark.parametrize(
"device",
[
pytest.param("none", id="cpu"),
pytest.param("GPUOpenCL", id="gpu"),
pytest.param("HTP0", id="npu"),
],
)
def test_llama_bench(device):
result = run_adb_command(
f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
check=False,
result = run_script(
"run-bench.sh",
extra_env={"D": device, "M": MODEL_NAME},
extra_args=["--batch-size", "128", "-p", "128", "-n", "32"],
)
write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
assert result.returncode == 0, (
f"llama-bench {device} failed (exit {result.returncode})"
)
if __name__ == "__main__":

View File

@@ -1,5 +1,7 @@
"""Shared helpers for QDC on-device test runners."""
from __future__ import annotations
import logging
import os
import subprocess
@@ -13,16 +15,14 @@ log = logging.getLogger(__name__)
# On-device paths
# ---------------------------------------------------------------------------
BUNDLE_PATH = "/data/local/tmp/llama_cpp_bundle"
BUNDLE_PATH = "/data/local/tmp/llama.cpp"
BIN_PATH = f"{BUNDLE_PATH}/bin"
LIB_PATH = f"{BUNDLE_PATH}/lib"
QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
LIB_PATH = f"{BUNDLE_PATH}/lib"
BIN_PATH = f"{BUNDLE_PATH}/bin"
ENV_PREFIX = (
f"export LD_LIBRARY_PATH={LIB_PATH} && "
f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
f"chmod +x {BIN_PATH}/* &&"
)
CMD_PREFIX = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
SCRIPTS_DIR = "/qdc/appium"
MODEL_NAME = "model.gguf"
MODEL_DEVICE_PATH = "/data/local/tmp/gguf/model.gguf"
PROMPT_DIR = "/data/local/tmp/scorecard_prompts"
# ---------------------------------------------------------------------------
# Appium session options
@@ -34,16 +34,47 @@ options.set_capability("platformName", "Android")
options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
# ---------------------------------------------------------------------------
# ADB helpers
# Shell / process helpers
# ---------------------------------------------------------------------------
def write_qdc_log(filename: str, content: str) -> None:
"""Write content as a log file for QDC log collection."""
subprocess.run(
["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
f.write(content)
tmp_path = f.name
try:
subprocess.run(
["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
finally:
os.unlink(tmp_path)
def ensure_bundle(check_binary: str | None = None) -> None:
"""Ensure the llama_cpp_bundle is available on the target device."""
push_bundle_if_needed(check_binary or f"{BIN_PATH}/llama-cli")
# ---------------------------------------------------------------------------
# Android / Linux host helpers
# ---------------------------------------------------------------------------
def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
# Append exit-code sentinel because `adb shell` doesn't reliably propagate
# the on-device exit code (older ADB versions always return 0).
"""Run a command on-device via ``adb shell`` with exit-code sentinel."""
raw = subprocess.run(
["adb", "shell", f"{cmd}; echo __RC__:$?"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
stdout = raw.stdout
returncode = raw.returncode
@@ -55,39 +86,58 @@ def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProc
stdout = "\n".join(lines[:-1]) + "\n"
except ValueError:
pass
log.info("%s", stdout)
log.info(stdout)
result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
if check:
assert returncode == 0, f"Command failed (exit {returncode})"
return result
def write_qdc_log(filename: str, content: str) -> None:
"""Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
subprocess.run(
["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
def run_script(
script: str,
extra_env: dict[str, str] | None = None,
extra_args: list[str] | None = None,
) -> subprocess.CompletedProcess:
"""Run an upstream shell script from /qdc/appium/ on the QDC runner host."""
env = os.environ.copy()
env["GGML_HEXAGON_EXPERIMENTAL"] = "1"
if extra_env:
env.update(extra_env)
cmd = [f"{SCRIPTS_DIR}/{script}"] + (extra_args or [])
result = subprocess.run(
cmd, env=env,
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
log.info(result.stdout)
return result
def adb_shell(cmd: str) -> None:
"""Run a command via adb shell (fire-and-forget, no error check)."""
subprocess.run(
["adb", "shell", "sh", "-c", cmd],
capture_output=True, encoding="utf-8", errors="replace", check=False,
)
with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
f.write(content)
tmp_path = f.name
try:
subprocess.run(
["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
)
finally:
os.unlink(tmp_path)
def push_bundle_if_needed(check_binary: str) -> None:
"""Push llama_cpp_bundle to the device if check_binary is not already present."""
result = subprocess.run(
["adb", "shell", f"ls {check_binary}"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
if result.returncode != 0:
subprocess.run(
["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"],
text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
["adb", "push", "/qdc/appium/llama_cpp_bundle/", BUNDLE_PATH],
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
subprocess.run(
["adb", "shell", f"find {BUNDLE_PATH}/bin -type f -exec chmod 755 {{}} +"],
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)

View File

@@ -13,7 +13,7 @@ exclude = [
[[overrides]]
include = [
"./tools/server/tests/**",
"./scripts/snapdragon/qdc/tests/**",
"./scripts/snapdragon/qdc/**",
]
[overrides.rules]