From d81e63dcfd104cbfd7409a2bc6c09706d4b313be Mon Sep 17 00:00:00 2001
From: Zack Li <39573601+zhiyuan8@users.noreply.github.com>
Date: Thu, 14 May 2026 13:58:34 -0700
Subject: [PATCH] CI : support IOT device (IQ9) (#22987)

* update test scripts

* align CI behavior between linux and android

* remove automatically cancel in 15min

* enable cancel-in-progress

* fix ty check issue

* update and fix pylint issue

* update runner such that we are not restricted by the 15min limit rule

* fix flake8 lint issue

* update runner according to review feedback

* code update according to review feedback

* switch from llama-cli to llama-completion binary with -no-cnv flag
---
 .../workflows/build-and-test-snapdragon.yml   |  46 +-
 scripts/snapdragon/qdc/run_qdc_jobs.py        | 453 ++++++++++++++----
 .../snapdragon/qdc/tests/linux/run_linux.sh   | 232 +++++++++
 .../qdc/tests/run_backend_ops_posix.py        |  28 +-
 .../qdc/tests/run_bench_tests_posix.py        |  85 ++--
 scripts/snapdragon/qdc/tests/utils.py         | 114 +++--
 ty.toml                                       |   2 +-
 7 files changed, 793 insertions(+), 167 deletions(-)
 create mode 100644 scripts/snapdragon/qdc/tests/linux/run_linux.sh

diff --git a/.github/workflows/build-and-test-snapdragon.yml b/.github/workflows/build-and-test-snapdragon.yml
index deed8e808b..ef3fe502fa 100644
--- a/.github/workflows/build-and-test-snapdragon.yml
+++ b/.github/workflows/build-and-test-snapdragon.yml
@@ -58,14 +58,45 @@ jobs:
           name: llama-cpp-android-arm64-snapdragon
           path: pkg-snapdragon/llama.cpp
 
+  linux-iot-snapdragon:
+    runs-on: ubuntu-latest
+    container:
+      image: 'ghcr.io/snapdragon-toolchain/arm64-linux:v0.1'
+    defaults:
+      run:
+        shell: bash
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+          lfs: false
+
+      - name: Build Llama.CPP for Snapdragon Linux IoT
+        id: build_llama_cpp_snapdragon_linux
+        run: |
+          cp docs/backend/snapdragon/CMakeUserPresets.json .
+          cmake --preset arm64-linux-snapdragon-release -B build-snapdragon -DGGML_OPENCL=ON
+          cmake --build build-snapdragon -j $(nproc)
+          cmake --install build-snapdragon --prefix pkg-snapdragon/llama.cpp
+
+      - name: Upload Llama.CPP Snapdragon Linux IoT Build Artifact
+        if: ${{ always() && steps.build_llama_cpp_snapdragon_linux.outcome == 'success' }}
+        uses: actions/upload-artifact@v6
+        with:
+          name: llama-cpp-linux-arm64-snapdragon
+          path: pkg-snapdragon/llama.cpp
+
   test-snapdragon-qdc:
-    name: Test on QDC Android Device (${{ matrix.device }})
-    needs: [android-ndk-snapdragon]
-    runs-on: ubuntu-slim
+    name: Test on QDC Device (${{ matrix.device }})
+    needs: [android-ndk-snapdragon, linux-iot-snapdragon]
+    runs-on: ubuntu-24.04-arm
+    timeout-minutes: 90
     strategy:
       fail-fast: false
       matrix:
-        device: [SM8750, SM8650, SM8850]
+        device: [SM8750, SM8850, QCS9075M]
 
     steps:
       - name: Checkout
@@ -74,11 +105,11 @@ jobs:
       - name: Download build artifact
         uses: actions/download-artifact@v7
         with:
-          name: llama-cpp-android-arm64-snapdragon
+          name: ${{ startsWith(matrix.device, 'QCS') && 'llama-cpp-linux-arm64-snapdragon' || 'llama-cpp-android-arm64-snapdragon' }}
           path: pkg-snapdragon/llama.cpp
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.x'
           cache: pip
@@ -107,7 +138,8 @@ jobs:
               --test       all \
               --pkg-dir    pkg-snapdragon/llama.cpp \
               --model-url  "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \
-              --device     ${{ matrix.device }}
+              --device     ${{ matrix.device }} \
+              ${{ startsWith(matrix.device, 'QCS') && '--retries 2 --retry-delay 300' || '' }}
         env:
           QDC_API_KEY: ${{ secrets.QDC_API_KEY }}
 
diff --git a/scripts/snapdragon/qdc/run_qdc_jobs.py b/scripts/snapdragon/qdc/run_qdc_jobs.py
index b4eede3d01..f1b0453eec 100644
--- a/scripts/snapdragon/qdc/run_qdc_jobs.py
+++ b/scripts/snapdragon/qdc/run_qdc_jobs.py
@@ -1,4 +1,4 @@
-"""Run llama.cpp Hexagon Android tests in a single QDC Appium job.
+"""Run llama.cpp Hexagon tests in a single QDC job.
 
 Bundles test scripts into one artifact and submits a single QDC job:
 
@@ -10,6 +10,10 @@ Results are written to $GITHUB_STEP_SUMMARY when set (GitHub Actions).
 Prerequisites:
   pip install /path/to/qualcomm_device_cloud_sdk*.whl
 
+Platform is inferred from --device:
+  android  Appium + pytest (Android phones: SM8750 / SM8650 / SM8850)
+  linux    BASH (Linux IoT: QCS9075M)
+
 Required environment variables:
   QDC_API_KEY   API key from QDC UI -> Users -> Settings -> API Keys
 
@@ -23,6 +27,7 @@ Usage:
 from __future__ import annotations
 
 import argparse
+import enum
 import logging
 import os
 import re
@@ -30,15 +35,35 @@ import shutil
 import sys
 import tempfile
 import time
+import urllib.request
 import xml.etree.ElementTree as ET
 from dataclasses import dataclass, field
 from pathlib import Path
+from typing import Callable
 
-from qualcomm_device_cloud_sdk.api import qdc_api  # ty: ignore[unresolved-import]
-from qualcomm_device_cloud_sdk.logging import configure_logging  # ty: ignore[unresolved-import]
-from qualcomm_device_cloud_sdk.models import ArtifactType, JobMode, JobState, JobSubmissionParameter, JobType, TestFramework  # ty: ignore[unresolved-import]
+from qualcomm_device_cloud_sdk.api import qdc_api
+from qualcomm_device_cloud_sdk.logging import configure_logging
+from qualcomm_device_cloud_sdk.models import (
+    ArtifactType,
+    JobMode,
+    JobState,
+    JobSubmissionParameter,
+    JobType,
+    TestFramework,
+)
 
+# configure_logging only sets up the SDK logger; basicConfig is needed for
+# our own log.info to reach stdout.
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(name)s %(levelname)s - %(message)s",
+    handlers=[logging.StreamHandler()],
+)
 configure_logging(level=logging.INFO, handlers=[logging.StreamHandler()])
+# Silence per-poll GET/status spam from the SDK and its HTTP client.
+logging.getLogger("qualcomm_device_cloud").setLevel(logging.WARNING)
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("httpcore").setLevel(logging.WARNING)
 log = logging.getLogger(__name__)
 
 POLL_INTERVAL        = 30
@@ -47,23 +72,56 @@ LOG_UPLOAD_TIMEOUT   = 600
 CAPACITY_TIMEOUT     = 1800
 CAPACITY_POLL        = 60
 MAX_CONCURRENT_JOBS  = 5
+DEFAULT_RETRIES      = 0
+RETRY_DELAY          = 300
 TERMINAL_STATES     = {JobState.COMPLETED, JobState.CANCELED}
 NON_TERMINAL_STATES = {JobState.DISPATCHED, JobState.RUNNING, JobState.SETUP, JobState.SUBMITTED}
 
-_SCRIPTS_DIR      = Path(__file__).parent
-_TESTS_DIR        = _SCRIPTS_DIR / "tests"
-_RUN_BENCH        = _TESTS_DIR / "run_bench_tests_posix.py"
-_RUN_BACKEND_OPS  = _TESTS_DIR / "run_backend_ops_posix.py"
-_UTILS            = _TESTS_DIR / "utils.py"
-_CONFTEST         = _TESTS_DIR / "conftest.py"
-_REQUIREMENTS     = _SCRIPTS_DIR / "requirements.txt"
 
+class DeviceUnavailableError(Exception):
+    """Raised when the QDC device resource is not available (retryable)."""
+
+
+_SCRIPTS_DIR = Path(__file__).parent
+_TESTS_DIR = _SCRIPTS_DIR / "tests"
+
+# --- Shared test assets -------------------------------------------------------
+_UTILS = _TESTS_DIR / "utils.py"
+_CONFTEST = _TESTS_DIR / "conftest.py"
 _PYTEST_LINE_RE = re.compile(
     r"(?:[\w/]+\.py::)?(?:\w+::)?([\w\[\].-]+)\s+(PASSED|FAILED|ERROR|SKIPPED)"
 )
-_EXCLUDED_LOGS = {"qdc_android_whole_host-000.log", "qdc_kernel_host-000.log"}
+_EXCLUDED_LOGS = {
+    "qdc_android_whole_host-000.log",
+    "qdc_kernel_host-000.log",
+    "qdc_LE_whole_host-000.log",
+    "qdc_LE_kernel_host-000.log",
+    "script.log",
+}
 _NON_TERMINAL_STATE_VALUES = {s.value for s in NON_TERMINAL_STATES}
 
+# --- Android (Appium + pytest) assets ----------------------------------------
+_RUN_BENCH = _TESTS_DIR / "run_bench_tests_posix.py"
+_RUN_BACKEND_OPS = _TESTS_DIR / "run_backend_ops_posix.py"
+_REQUIREMENTS = _SCRIPTS_DIR / "requirements.txt"
+_UPSTREAM_ADB_SCRIPTS = (
+    "https://raw.githubusercontent.com/ggml-org/llama.cpp/master/scripts/snapdragon/adb"
+)
+_ADB_SCRIPT_NAMES = [
+    "run-bench.sh",
+    "run-cli.sh",
+    "run-completion.sh",
+    "run-tool.sh",
+]
+
+# --- Linux (BASH) assets ------------------------------------------------------
+_RUN_LINUX_TEMPLATE = _TESTS_DIR / "linux" / "run_linux.sh"
+_LINUX_ENTRY_SCRIPT = "/bin/bash /data/local/tmp/TestContent/run_linux.sh"
+
+# =============================================================================
+# Artifact builders (per platform)
+# =============================================================================
+
 
 @dataclass
 class JobResult:
@@ -73,35 +131,58 @@ class JobResult:
     failure_details: dict[str, str] = field(default_factory=dict)
 
 
-def build_artifact_zip(
+def _write_lf(path: Path, content: str) -> None:
+    """Write text with LF line endings (required by /bin/bash on Linux)."""
+    with open(path, "w", encoding="utf-8", newline="\n") as f:
+        f.write(content)
+
+
+def _build_android_artifact(
     pkg_dir: Path,
     stage_dir: Path,
-    *,
-    test_mode: str = "bench",
-    model_url: str | None = None,
+    test_mode: str,
+    model_url: str | None,
 ) -> Path:
-    """Bundle everything into a single QDC artifact zip.
+    """Android zip (Appium/pytest). Extracted by QDC under /qdc/appium/.
 
-    Zip structure (extracted by QDC to /qdc/appium/ on the runner):
+    Zip structure:
       llama_cpp_bundle/            installed package (adb pushed to /data/local/tmp/)
+      run-{bench,cli,completion,tool}.sh  upstream adb wrappers (patched)
       tests/
-        utils.py                   shared helpers (paths, run_adb_command, …)
-        conftest.py                shared pytest fixtures (driver)
-        test_bench_posix.py        bench + cli tests (<<MODEL_URL>> substituted)
-          AND/OR
-        test_backend_ops_posix.py  test-backend-ops -b HTP0
+        utils.py                   shared adb helpers
+        conftest.py                Appium pytest fixtures
+        test_bench_posix.py        bench + cli tests (for --test bench or all)
+        test_backend_ops_posix.py  test-backend-ops on HTP0
       requirements.txt
+      pytest.ini                   addopts = --junitxml=results.xml
     """
-    shutil.copytree(pkg_dir, stage_dir / "llama_cpp_bundle")
+    bundle_dir = stage_dir / "llama_cpp_bundle"
+    shutil.copytree(pkg_dir, bundle_dir)
+
+    # Download upstream adb scripts so they land at /qdc/appium/ on the QDC
+    # runner. They wrap `adb shell` internally. Patch in `chmod +x bin/* lib/*`
+    # right after `cd $basedir` so device binaries are executable.
+    for name in _ADB_SCRIPT_NAMES:
+        url = f"{_UPSTREAM_ADB_SCRIPTS}/{name}"
+        dest = stage_dir / name
+        log.info("Downloading %s", url)
+        urllib.request.urlretrieve(url, str(dest))
+        content = dest.read_text()
+        content = content.replace(
+            "cd $basedir;",
+            "cd $basedir; chmod +x bin/* lib/* 2>/dev/null;",
+        )
+        dest.write_text(content)
+        dest.chmod(0o755)
 
     tests_dir = stage_dir / "tests"
     tests_dir.mkdir()
 
-    shutil.copy(_UTILS,    tests_dir / "utils.py")
+    shutil.copy(_UTILS, tests_dir / "utils.py")
     shutil.copy(_CONFTEST, tests_dir / "conftest.py")
 
     if test_mode in ("bench", "all"):
-        assert model_url is not None, "--model-url is required for bench/all test modes"
+        assert model_url is not None
         (tests_dir / "test_bench_posix.py").write_text(
             _RUN_BENCH.read_text().replace("<<MODEL_URL>>", model_url)
         )
@@ -109,33 +190,140 @@ def build_artifact_zip(
         shutil.copy(_RUN_BACKEND_OPS, tests_dir / "test_backend_ops_posix.py")
 
     shutil.copy(_REQUIREMENTS, stage_dir / "requirements.txt")
-    (stage_dir / "pytest.ini").write_text("[pytest]\naddopts = --junitxml=results.xml\n")
+    (stage_dir / "pytest.ini").write_text(
+        "[pytest]\naddopts = --junitxml=results.xml\n"
+    )
 
     zip_base = str(stage_dir / "artifact")
     shutil.make_archive(zip_base, "zip", stage_dir)
     return Path(f"{zip_base}.zip")
 
 
+def _build_linux_artifact(
+    pkg_dir: Path,
+    stage_dir: Path,
+    test_mode: str,
+    model_url: str | None,
+) -> Path:
+    """Linux IoT zip (BASH framework). Extracted by QDC to /data/local/tmp/TestContent/.
+
+    Zip structure:
+      run_linux.sh               entry script (placeholder-substituted, LF line endings)
+      llama_cpp_bundle/          installed package
+    """
+    bundle_dir = stage_dir / "llama_cpp_bundle"
+    shutil.copytree(pkg_dir, bundle_dir)
+
+    template = _RUN_LINUX_TEMPLATE.read_text(encoding="utf-8")
+    rendered = template.replace("{MODEL_URL}", model_url or "").replace(
+        "{TEST_MODE}", test_mode
+    )
+    script_path = stage_dir / "run_linux.sh"
+    _write_lf(script_path, rendered)
+    script_path.chmod(0o755)
+
+    zip_base = str(stage_dir / "artifact")
+    shutil.make_archive(zip_base, "zip", stage_dir)
+    return Path(f"{zip_base}.zip")
+
+
+# =============================================================================
+# Platform enum + strategy table
+# =============================================================================
+
+
+class Platform(enum.Enum):
+    ANDROID = "android"
+    LINUX = "linux"
+
+
+@dataclass(frozen=True)
+class PlatformSpec:
+    test_framework: TestFramework
+    entry_script: str | None
+    build_artifact: Callable[[Path, Path, str, str | None], Path]
+    job_name_fmt: str
+
+
+PLATFORM_SPECS: dict[Platform, PlatformSpec] = {
+    Platform.ANDROID: PlatformSpec(
+        test_framework=TestFramework.APPIUM,
+        entry_script=None,
+        build_artifact=_build_android_artifact,
+        job_name_fmt="{base}",
+    ),
+    Platform.LINUX: PlatformSpec(
+        test_framework=TestFramework.BASH,
+        entry_script=_LINUX_ENTRY_SCRIPT,
+        build_artifact=_build_linux_artifact,
+        job_name_fmt="{base} (Linux)",
+    ),
+}
+
+DEVICE_PLATFORM: dict[str, Platform] = {
+    "SM8750": Platform.ANDROID,
+    "SM8650": Platform.ANDROID,
+    "SM8850": Platform.ANDROID,
+    "QCS9075M": Platform.LINUX,
+}
+
+
+# =============================================================================
+# Shared QDC job plumbing
+# =============================================================================
+
+
 def wait_for_job(client, job_id: str, timeout: int) -> str:
     elapsed = 0
+    last_state = None
+    consecutive_errors = 0
+    max_consecutive_errors = 5
     while elapsed < timeout:
-        raw = qdc_api.get_job_status(client, job_id)
+        try:
+            raw = qdc_api.get_job_status(client, job_id)
+            consecutive_errors = 0
+        except Exception as e:
+            consecutive_errors += 1
+            log.warning(
+                "Transient error polling job %s (%d/%d): %s",
+                job_id,
+                consecutive_errors,
+                max_consecutive_errors,
+                e,
+            )
+            if consecutive_errors >= max_consecutive_errors:
+                raise
+            time.sleep(POLL_INTERVAL)
+            elapsed += POLL_INTERVAL
+            continue
         try:
             status = JobState(raw)
         except ValueError:
             status = raw
         if status in TERMINAL_STATES:
             return raw.lower()
-        log.info("Job %s: %s", job_id, raw)
+        if raw != last_state:
+            log.info("Job %s: %s", job_id, raw)
+            last_state = raw
         time.sleep(POLL_INTERVAL)
         elapsed += POLL_INTERVAL
+    # Abort to free the QDC concurrency slot instead of leaking it.
+    try:
+        qdc_api.abort_job(client, job_id)
+        log.warning("Aborted job %s after timeout to free concurrency slot", job_id)
+    except Exception as e:
+        log.warning("Failed to abort job %s: %s", job_id, e)
     raise TimeoutError(f"Job {job_id} did not finish within {timeout}s")
 
 
 def wait_for_log_upload(client, job_id: str) -> None:
     elapsed = 0
     while elapsed <= LOG_UPLOAD_TIMEOUT:
-        status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
+        try:
+            status = (qdc_api.get_job_log_upload_status(client, job_id) or "").lower()
+        except Exception as e:
+            log.warning("get_job_log_upload_status failed: %s — will retry", e)
+            status = ""
         if status in {"completed", "failed"}:
             return
         log.info("Waiting for log upload (status=%s) ...", status)
@@ -150,17 +338,33 @@ def wait_for_capacity(client, max_jobs: int = MAX_CONCURRENT_JOBS) -> None:
     while elapsed < CAPACITY_TIMEOUT:
         jobs_page = qdc_api.get_jobs_list(client, page_number=0, page_size=50)
         if jobs_page is None:
-            log.warning("Could not retrieve job list; proceeding without capacity check")
+            log.warning(
+                "Could not retrieve job list; proceeding without capacity check"
+            )
             return
         items = getattr(jobs_page, "data", []) or []
-        active = sum(1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES)
+        active = sum(
+            1 for j in items if getattr(j, "state", None) in _NON_TERMINAL_STATE_VALUES
+        )
         if active < max_jobs:
             log.info("Active QDC jobs: %d / %d — proceeding", active, max_jobs)
             return
-        log.info("Active QDC jobs: %d / %d — waiting %ds ...", active, max_jobs, CAPACITY_POLL)
+        log.info(
+            "Active QDC jobs: %d / %d — waiting %ds ...",
+            active,
+            max_jobs,
+            CAPACITY_POLL,
+        )
         time.sleep(CAPACITY_POLL)
         elapsed += CAPACITY_POLL
-    log.warning("Capacity wait timed out after %ds; proceeding anyway", CAPACITY_TIMEOUT)
+    raise TimeoutError(
+        f"Capacity wait timed out after {CAPACITY_TIMEOUT}s"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Log parsing helpers
+# ---------------------------------------------------------------------------
 
 
 def _parse_junit_xml(content: str) -> tuple[dict[str, bool], dict[str, str]]:
@@ -192,10 +396,26 @@ def _parse_pytest_output(content: str) -> dict[str, bool]:
 
 
 def fetch_logs_and_parse_tests(
-    client, job_id: str
+    client, job_id: str, max_retries: int = 5, retry_delay: int = 30
 ) -> tuple[dict[str, bool], dict[str, str], dict[str, str]]:
     """Returns (test_results, raw_logs, failure_details)."""
-    log_files = qdc_api.get_job_log_files(client, job_id)
+    log_files = None
+    for attempt in range(1, max_retries + 1):
+        try:
+            log_files = qdc_api.get_job_log_files(client, job_id)
+            break
+        except Exception as e:
+            if attempt < max_retries:
+                log.warning(
+                    "get_job_log_files failed (attempt %d/%d): %s — retrying in %ds",
+                    attempt, max_retries, e, retry_delay,
+                )
+                time.sleep(retry_delay)
+            else:
+                log.error(
+                    "get_job_log_files failed after %d attempts: %s", max_retries, e
+                )
+                return {}, {}, {}
     if not log_files:
         log.warning("No log files returned for job %s", job_id)
         return {}, {}, {}
@@ -207,8 +427,8 @@ def fetch_logs_and_parse_tests(
 
     with tempfile.TemporaryDirectory() as tmpdir:
         for lf in log_files:
-            log.info("Downloading log file: %s", lf.filename)
             zip_path = os.path.join(tmpdir, "log.zip")
+            log.info("Downloading log file: %s", lf.filename)
             qdc_api.download_job_log_files(client, lf.filename, zip_path)
             try:
                 shutil.unpack_archive(zip_path, tmpdir, "zip")
@@ -226,12 +446,15 @@ def fetch_logs_and_parse_tests(
                 elif fname.endswith(".log"):
                     if fname in _EXCLUDED_LOGS:
                         continue
-                    log.info("--- %s ---", fname)
-                    log.info("%s", content)
+                    log.info("--- %s ---\n%s", fname, content)
                     raw_logs[fname] = content
                     pytest_fallback.update(_parse_pytest_output(content))
 
-    return (test_results if test_results else pytest_fallback), raw_logs, failure_details
+    return (
+        (test_results if test_results else pytest_fallback),
+        raw_logs,
+        failure_details,
+    )
 
 
 def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
@@ -289,30 +512,106 @@ def write_summary(result: JobResult, title: str = "QDC Test Results") -> None:
         f.write("\n".join(lines) + "\n")
 
 
+# =============================================================================
+# CLI + main
+# =============================================================================
+
 def parse_args() -> argparse.Namespace:
     p = argparse.ArgumentParser(
         description=__doc__,
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
-    p.add_argument("--pkg-dir",   required=True, type=Path,
+    p.add_argument("--pkg-dir", required=True, type=Path,
                    help="Installed llama.cpp package directory (contains bin/ and lib/)")
     p.add_argument("--model-url",
                    help="Direct URL to the GGUF model file (required for --test bench)")
-    p.add_argument("--device",    required=True,
+    p.add_argument("--device", required=True,
                    help="QDC chipset name, e.g. SM8750")
     p.add_argument("--test", choices=["bench", "backend-ops", "all"], default="bench",
                    help="Test suite to run (default: bench)")
     p.add_argument("--job-timeout", type=int, default=JOB_TIMEOUT, metavar="SECONDS",
                    help=f"Max seconds to wait for job completion (default: {JOB_TIMEOUT})")
+    p.add_argument("--retries", type=int, default=DEFAULT_RETRIES, metavar="N",
+                   help="Number of retries when device is unavailable (default: 0)")
+    p.add_argument("--retry-delay", type=int, default=RETRY_DELAY, metavar="SECONDS",
+                   help=f"Seconds to wait between retries (default: {RETRY_DELAY})")
     args = p.parse_args()
     if args.test in ("bench", "all") and not args.model_url:
         p.error("--model-url is required when --test bench or --test all")
     return args
 
 
+def _submit_and_run_job(client, args, spec, target_id, artifact_id) -> JobResult:
+    """Submit a QDC job and wait for results.
+
+    Raises DeviceUnavailableError for transient device/resource issues that
+    are worth retrying. Returns JobResult for definitive outcomes (pass or
+    test failure).
+    """
+    try:
+        wait_for_capacity(client)
+    except TimeoutError:
+        raise DeviceUnavailableError("Capacity wait timed out — device busy")
+
+    job_name = spec.job_name_fmt.format(base="llama.cpp Hexagon tests")
+
+    job_id = qdc_api.submit_job(
+        public_api_client=client,
+        target_id=target_id,
+        job_name=job_name,
+        external_job_id=None,
+        job_type=JobType.AUTOMATED,
+        job_mode=JobMode.APPLICATION,
+        timeout=max(1, args.job_timeout // 60),
+        test_framework=spec.test_framework,
+        entry_script=spec.entry_script,
+        job_artifacts=[artifact_id],
+        monkey_events=None,
+        monkey_session_timeout=None,
+        job_parameters=[JobSubmissionParameter.WIFIENABLED],
+    )
+    if job_id is None:
+        raise DeviceUnavailableError("Job submission failed — device may be unavailable")
+    log.info("Job submitted: %s  (device=%s)", job_id, args.device)
+
+    try:
+        job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
+    except TimeoutError as e:
+        raise DeviceUnavailableError(str(e))
+    log.info("Job %s finished: %s", job_id, job_status)
+
+    wait_for_log_upload(client, job_id)
+    tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
+
+    job_ok = job_status == JobState.COMPLETED.value.lower()
+
+    if not job_ok and not tests:
+        raise DeviceUnavailableError(
+            f"Job did not complete (status={job_status}) and produced no test results"
+        )
+
+    passed = job_ok and all(tests.values()) if tests else job_ok
+    if spec.test_framework == TestFramework.BASH and not tests:
+        log.error("No test results recovered (state=%s). Script likely never ran.", job_status)
+        passed = False
+    if not passed:
+        log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
+
+    return JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
+
+
 def main() -> int:
     args = parse_args()
 
+    platform = DEVICE_PLATFORM.get(args.device)
+    if platform is None:
+        log.error(
+            "Unknown device %r. Known: %s",
+            args.device, ", ".join(sorted(DEVICE_PLATFORM.keys())),
+        )
+        return 1
+    spec = PLATFORM_SPECS[platform]
+
     api_key = os.environ.get("QDC_API_KEY")
     if not api_key:
         log.error("QDC_API_KEY environment variable must be set")
@@ -334,10 +633,9 @@ def main() -> int:
         return 1
 
     with tempfile.TemporaryDirectory() as tmpdir:
-        log.info("Building artifact ...")
-        zip_path = build_artifact_zip(
-            args.pkg_dir, Path(tmpdir),
-            test_mode=args.test, model_url=args.model_url,
+        log.info("Building %s artifact (test=%s) ...", platform.value, args.test)
+        zip_path = spec.build_artifact(
+            args.pkg_dir, Path(tmpdir), args.test, args.model_url
         )
         log.info("Uploading artifact (%d MB) ...", zip_path.stat().st_size // 1_000_000)
         artifact_id = qdc_api.upload_file(client, str(zip_path), ArtifactType.TESTSCRIPT)
@@ -346,46 +644,31 @@ def main() -> int:
         log.error("Artifact upload failed")
         return 1
 
-    wait_for_capacity(client)
-
-    job_id = qdc_api.submit_job(
-        public_api_client=client,
-        target_id=target_id,
-        job_name="llama.cpp Hexagon tests",
-        external_job_id=None,
-        job_type=JobType.AUTOMATED,
-        job_mode=JobMode.APPLICATION,
-        timeout=max(1, args.job_timeout // 60),
-        test_framework=TestFramework.APPIUM,
-        entry_script=None,
-        job_artifacts=[artifact_id],
-        monkey_events=None,
-        monkey_session_timeout=None,
-        job_parameters=[JobSubmissionParameter.WIFIENABLED],
-    )
-    if job_id is None:
-        log.error("Job submission failed")
+    max_attempts = 1 + args.retries
+    for attempt in range(1, max_attempts + 1):
+        try:
+            result = _submit_and_run_job(client, args, spec, target_id, artifact_id)
+            break
+        except DeviceUnavailableError as e:
+            if attempt < max_attempts:
+                log.warning(
+                    "Attempt %d/%d failed (device unavailable): %s — retrying in %ds",
+                    attempt, max_attempts, e, args.retry_delay,
+                )
+                time.sleep(args.retry_delay)
+            else:
+                log.error(
+                    "Attempt %d/%d failed (device unavailable): %s — no retries left",
+                    attempt, max_attempts, e,
+                )
+                write_summary(
+                    JobResult(passed=False, tests={}),
+                    title=f"QDC Device Unavailable ({args.device})",
+                )
+                return 1
+    else:
         return 1
-    log.info("Job submitted: %s  (device=%s)", job_id, args.device)
 
-    try:
-        job_status = wait_for_job(client, job_id, timeout=args.job_timeout)
-    except TimeoutError as e:
-        log.error("%s", e)
-        write_summary(JobResult(passed=False, tests={}), title=f"QDC Job Timed Out ({args.device})")
-        return 1
-    log.info("Job %s finished: %s", job_id, job_status)
-
-    wait_for_log_upload(client, job_id)
-    tests, raw_logs, failure_details = fetch_logs_and_parse_tests(client, job_id)
-
-    passed = job_status == JobState.COMPLETED.value.lower()
-    if tests:
-        passed = passed and all(tests.values())
-    if not passed:
-        log.error("Job did not complete successfully or tests failed (status=%s)", job_status)
-
-    result = JobResult(passed=passed, tests=tests, raw_logs=raw_logs, failure_details=failure_details)
     if args.test == "backend-ops":
         title = f"Backend Ops — HTP0 ({args.device})"
     elif args.test == "all":
@@ -394,7 +677,7 @@ def main() -> int:
         title = f"QDC Test Results ({args.device})"
     write_summary(result, title=title)
 
-    return 0 if passed else 1
+    return 0 if result.passed else 1
 
 
 if __name__ == "__main__":
diff --git a/scripts/snapdragon/qdc/tests/linux/run_linux.sh b/scripts/snapdragon/qdc/tests/linux/run_linux.sh
new file mode 100644
index 0000000000..a6abf8ec30
--- /dev/null
+++ b/scripts/snapdragon/qdc/tests/linux/run_linux.sh
@@ -0,0 +1,232 @@
+#!/bin/bash
+# llama.cpp Hexagon test entry script for QDC Linux IoT (BASH framework).
+#
+# Placeholders substituted by run_qdc_jobs.py (--platform linux) before upload:
+#   {MODEL_URL}   direct URL to a .gguf model file
+#   {TEST_MODE}   bench | backend-ops | all
+#
+# QDC extracts the artifact zip to /data/local/tmp/TestContent/ and invokes
+# this script via: /bin/bash /data/local/tmp/TestContent/run_linux.sh
+# Any files written under /data/local/tmp/QDC_logs/ are auto-uploaded.
+
+set +e
+umask 022
+
+LOG_DIR=/data/local/tmp/QDC_logs
+BUNDLE_DIR=/data/local/tmp/TestContent/llama_cpp_bundle
+MODEL_DIR=/data/local/tmp/gguf
+MODEL_PATH="$MODEL_DIR/model.gguf"
+RESULTS_XML="$LOG_DIR/results.xml"
+
+mkdir -p "$LOG_DIR" "$MODEL_DIR"
+# Redirect all parent-shell output to script.log so QDC auto-uploads it;
+# per-case runs still capture their own stdout/stderr into dedicated logs.
+exec > "$LOG_DIR/script.log" 2>&1
+
+echo "=== env ==="
+date -u
+uname -a
+pwd
+
+mount -o rw,remount / 2>/dev/null || true
+
+cd "$BUNDLE_DIR" || { echo "FATAL: bundle missing at $BUNDLE_DIR"; exit 1; }
+chmod +x bin/* 2>/dev/null
+export LD_LIBRARY_PATH="$BUNDLE_DIR/lib:$LD_LIBRARY_PATH"
+export ADSP_LIBRARY_PATH="$BUNDLE_DIR/lib"
+export GGML_HEXAGON_EXPERIMENTAL=1
+
+echo "=== download model ==="
+MODEL_URL="{MODEL_URL}"
+if [ -z "$MODEL_URL" ]; then
+  echo "No model URL provided, skipping download"
+elif [ ! -f "$MODEL_PATH" ]; then
+  curl -L -fS --retry 3 --retry-delay 5 -o "$MODEL_PATH" "$MODEL_URL"
+  curl_rc=$?
+  if [ $curl_rc -ne 0 ]; then
+    echo "FATAL: model download failed (rc=$curl_rc)"
+    exit 1
+  fi
+  ls -la "$MODEL_PATH"
+fi
+
+# ---------------------------------------------------------------------------
+# JUnit XML helpers
+# ---------------------------------------------------------------------------
+
+xml_open() {
+  printf '%s\n' \
+    '<?xml version="1.0" encoding="utf-8"?>' \
+    "<testsuites>" \
+    "<testsuite name=\"llama_cpp_linux\">" \
+    > "$RESULTS_XML"
+}
+
+xml_close() {
+  printf '%s\n' '</testsuite>' '</testsuites>' >> "$RESULTS_XML"
+}
+
+xml_case_pass() {
+  local classname=$1 name=$2
+  printf '<testcase classname="%s" name="%s"/>\n' "$classname" "$name" >> "$RESULTS_XML"
+}
+
+xml_case_fail() {
+  local classname=$1 name=$2 rc=$3 logfile=$4
+  {
+    printf '<testcase classname="%s" name="%s">\n' "$classname" "$name"
+    printf '<failure message="exit %s"><![CDATA[\n' "$rc"
+    tail -c 4096 "$logfile" 2>/dev/null | sed 's/]]>/]] >/g'
+    printf '\n]]></failure>\n</testcase>\n'
+  } >> "$RESULTS_XML"
+}
+
+# Map backend name -> "NDEV --device" pair. "none" means no offload (CPU).
+backend_env() {
+  case "$1" in
+    cpu) echo "0 none" ;;
+    gpu) echo "0 GPUOpenCL" ;;
+    npu) echo "1 HTP0" ;;
+  esac
+}
+
+backend_log_name() {
+  case "$1" in
+    cpu) echo "cpu" ;;
+    gpu) echo "gpu" ;;
+    npu) echo "htp" ;;
+  esac
+}
+
+
+backend_device_name() {
+  case "$1" in
+    cpu) echo "none" ;;
+    gpu) echo "GPUOpenCL" ;;
+    npu) echo "HTP0" ;;
+  esac
+}
+
+# Append a diagnostic block when a per-case `timeout N` fires (rc=124). The
+# naked log file at that point usually just ends mid-OpenCL-init with no
+# stderr, which is hard to read in CI summaries.
+note_timeout_if_triggered() {
+  local rc=$1 budget=$2 log=$3
+  [ "$rc" -eq 124 ] || return 0
+  {
+    printf '\n'
+    printf '=== TIMEOUT after %ss ===\n' "$budget"
+    printf 'uptime: '; uptime 2>/dev/null
+    printf 'free -m:\n'; free -m 2>/dev/null
+    printf 'loadavg: '; cat /proc/loadavg 2>/dev/null
+  } >> "$log"
+}
+
+completion_extra_args() {
+  case "$1" in
+    cpu) echo "--device none --ctx-size 128 -no-cnv -n 32 --seed 42 --batch-size 128" ;;
+    gpu) echo "--device GPUOpenCL --ctx-size 128 -no-cnv -n 32 --seed 42 --ubatch-size 512" ;;
+    npu) echo "--device HTP0 --ctx-size 128 -no-cnv -n 32 --seed 42 --ubatch-size 1024" ;;
+  esac
+}
+
+run_completion_case() {
+  local name=$1
+  local parts=($(backend_env "$name"))
+  local ndev=${parts[0]} device=${parts[1]}
+  local device_log_name=$(backend_device_name "$name")
+  local log="$LOG_DIR/llama_completion_${device_log_name}.log"
+  local prompt="$LOG_DIR/bench_prompt.txt"
+  echo 'What is the capital of France?' > "$prompt"
+  local extra
+  extra=$(completion_extra_args "$name")
+  echo "=== [completion:$name] llama-completion --device $device (NDEV=$ndev) ==="
+  timeout 600 env GGML_HEXAGON_NDEV=$ndev ./bin/llama-completion \
+      -m "$MODEL_PATH" \
+      -f "$prompt" \
+      $extra \
+      > "$log" 2>&1 < /dev/null
+  local rc=$?
+  note_timeout_if_triggered "$rc" 600 "$log"
+  if [ $rc -eq 0 ]; then
+    xml_case_pass "tests.test_bench_posix" "test_llama_completion[$name]"
+  else
+    xml_case_fail "tests.test_bench_posix" "test_llama_completion[$name]" "$rc" "$log"
+  fi
+}
+
+run_bench_case() {
+  local name=$1
+  local parts=($(backend_env "$name"))
+  local ndev=${parts[0]} device=${parts[1]}
+  local log_suffix=$(backend_log_name "$name")
+  local log="$LOG_DIR/llama_bench_${log_suffix}.log"
+  echo "=== [bench:$name] llama-bench --device $device (NDEV=$ndev) ==="
+  timeout 600 env GGML_HEXAGON_NDEV=$ndev ./bin/llama-bench \
+      -m "$MODEL_PATH" \
+      --device "$device" \
+      -ngl 99 \
+      --batch-size 128 \
+      -t 4 \
+      -p 128 \
+      -n 32 \
+      > "$log" 2>&1
+  local rc=$?
+  note_timeout_if_triggered "$rc" 600 "$log"
+  if [ $rc -eq 0 ]; then
+    xml_case_pass "tests.test_bench_posix" "test_llama_bench[$name]"
+  else
+    xml_case_fail "tests.test_bench_posix" "test_llama_bench[$name]" "$rc" "$log"
+  fi
+}
+
+run_backend_ops_case() {
+  local dtype=$1
+  local log="$LOG_DIR/backend_ops_${dtype}.log"
+  local pattern
+  case "$dtype" in
+    q4_0)
+      # Matches Android: exclude a known-broken shape on NPU.
+      pattern='^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$'
+      ;;
+    *)
+      pattern="type_a=${dtype}"
+      ;;
+  esac
+  echo "=== [backend-ops:$dtype] test-backend-ops -b HTP0 -o MUL_MAT ==="
+  timeout 600 env GGML_HEXAGON_NDEV=1 GGML_HEXAGON_HOSTBUF=0 ./bin/test-backend-ops \
+      -b HTP0 -o MUL_MAT -p "$pattern" \
+      > "$log" 2>&1
+  local rc=$?
+  note_timeout_if_triggered "$rc" 600 "$log"
+  if [ $rc -eq 0 ]; then
+    xml_case_pass "tests.test_backend_ops_posix" "test_backend_ops_htp0[$dtype]"
+  else
+    xml_case_fail "tests.test_backend_ops_posix" "test_backend_ops_htp0[$dtype]" "$rc" "$log"
+  fi
+}
+
+xml_open
+
+case "{TEST_MODE}" in
+  bench)
+    for b in cpu gpu npu; do run_completion_case "$b"; done
+    for b in cpu gpu npu; do run_bench_case "$b"; done
+    ;;
+  backend-ops)
+    for d in mxfp4 fp16 q4_0; do run_backend_ops_case "$d"; done
+    ;;
+  all)
+    for b in cpu gpu npu; do run_completion_case "$b"; done
+    for b in cpu gpu npu; do run_bench_case "$b"; done
+    for d in mxfp4 fp16 q4_0; do run_backend_ops_case "$d"; done
+    ;;
+  *)
+    echo "FATAL: unsupported TEST_MODE={TEST_MODE}"
+    ;;
+esac
+
+xml_close
+echo "=== done ==="
+# Host parses results.xml to decide pass/fail.
+exit 0
diff --git a/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py b/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py
index 958fc07476..355bf6c6a5 100644
--- a/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py
+++ b/scripts/snapdragon/qdc/tests/run_backend_ops_posix.py
@@ -1,8 +1,9 @@
 """
 On-device test-backend-ops runner for llama.cpp (HTP0 backend).
 
-Executed by QDC's Appium test framework on the QDC runner.
+On Android: executed by QDC's Appium test framework on the QDC runner.
 The runner has ADB access to the allocated device.
+On Linux: runs test-backend-ops directly via run_linux.sh (BASH framework).
 """
 
 import os
@@ -10,7 +11,12 @@ import sys
 
 import pytest
 
-from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
+from utils import (
+    BIN_PATH,
+    push_bundle_if_needed,
+    run_script,
+    write_qdc_log,
+)
 
 
 @pytest.fixture(scope="session", autouse=True)
@@ -20,17 +26,21 @@ def install(driver):
 
 @pytest.mark.parametrize("type_a", ["mxfp4", "fp16", "q4_0"])
 def test_backend_ops_htp0(type_a):
-    cmd = f"{CMD_PREFIX} GGML_HEXAGON_HOSTBUF=0 GGML_HEXAGON_EXPERIMENTAL=1 {BIN_PATH}/test-backend-ops -b HTP0 -o MUL_MAT"
     if type_a == "q4_0":
-        cmd += r' -p "^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$"'
+        pattern = r'^(?=.*type_a=q4_0)(?!.*type_b=f32,m=576,n=512,k=576).*$'
     else:
-        cmd += f" -p type_a={type_a}"
-    result = run_adb_command(
-        cmd,
-        check=False,
+        pattern = f"type_a={type_a}"
+
+    quoted_pattern = f'"{pattern}"' if type_a == "q4_0" else pattern
+    result = run_script(
+        "run-tool.sh",
+        extra_env={"HB": "0"},
+        extra_args=["test-backend-ops", "-b", "HTP0", "-o", "MUL_MAT", "-p", quoted_pattern],
     )
     write_qdc_log(f"backend_ops_{type_a}.log", result.stdout or "")
-    assert result.returncode == 0, f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
+    assert result.returncode == 0, (
+        f"test-backend-ops type_a={type_a} failed (exit {result.returncode})"
+    )
 
 
 if __name__ == "__main__":
diff --git a/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py b/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py
index 44802c3136..f42227c9f6 100644
--- a/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py
+++ b/scripts/snapdragon/qdc/tests/run_bench_tests_posix.py
@@ -1,11 +1,13 @@
 """
 On-device bench and completion test runner for llama.cpp (CPU, GPU, NPU backends).
 
-Executed by QDC's Appium test framework on the QDC runner.
-The runner has ADB access to the allocated device.
+On Android: calls upstream run-*.sh scripts from llama.cpp/scripts/snapdragon/adb/
+on the QDC runner host (scripts wrap commands in ``adb shell`` internally).
+
+On Linux: runs llama-bench directly via run_linux.sh (BASH framework).
 
 Placeholders replaced at artifact creation time by run_qdc_jobs.py:
-  <<MODEL_URL>>  Direct URL to the GGUF model file (downloaded on-device via curl)
+  <<MODEL_URL>>  Direct URL to the GGUF model file (downloaded on-device)
 """
 
 import os
@@ -14,58 +16,75 @@ import sys
 
 import pytest
 
-from utils import BIN_PATH, CMD_PREFIX, push_bundle_if_needed, run_adb_command, write_qdc_log
+from utils import (
+    BIN_PATH,
+    MODEL_DEVICE_PATH,
+    MODEL_NAME,
+    PROMPT_DIR,
+    push_bundle_if_needed,
+    run_adb_command,
+    run_script,
+    write_qdc_log,
+)
 
-MODEL_PATH = "/data/local/tmp/model.gguf"
-PROMPT     = "What is the capital of France?"
-CLI_OPTS   = "--batch-size 128 -n 128 -no-cnv --seed 42"
+MODEL_URL = "<<MODEL_URL>>"
 
 
 @pytest.fixture(scope="session", autouse=True)
 def install(driver):
     push_bundle_if_needed(f"{BIN_PATH}/llama-cli")
-
-    # Skip model download if already present
+    run_adb_command(f"mkdir -p /data/local/tmp/gguf {PROMPT_DIR}")
+    run_adb_command(f"echo 'What is the capital of France?' > {PROMPT_DIR}/bench_prompt.txt")
     check = subprocess.run(
-        ["adb", "shell", f"ls {MODEL_PATH}"],
+        ["adb", "shell", f"ls {MODEL_DEVICE_PATH}"],
         text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
     )
     if check.returncode != 0:
-        run_adb_command(f'curl -L -J --output {MODEL_PATH} "<<MODEL_URL>>"')
+        run_adb_command(f'curl -L -J --output {MODEL_DEVICE_PATH} "{MODEL_URL}"')
 
 
-@pytest.mark.parametrize("device,extra_flags", [
-    pytest.param("none",      "-ctk q8_0 -ctv q8_0", id="cpu"),
-    pytest.param("GPUOpenCL", "",                     id="gpu"),
-    pytest.param("HTP0",      "-ctk q8_0 -ctv q8_0", id="npu"),
-])
-def test_llama_completion(device, extra_flags):
-    result = run_adb_command(
-        f'{CMD_PREFIX} {BIN_PATH}/llama-completion'
-        f' -m {MODEL_PATH} --device {device} -ngl 99 -t 4 {CLI_OPTS} {extra_flags} -fa on'
-        f' -p "{PROMPT}"',
-        check=False,
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param("none", id="cpu"),
+        pytest.param("GPUOpenCL", id="gpu"),
+        pytest.param("HTP0", id="npu"),
+    ],
+)
+def test_llama_completion(device):
+    result = run_script(
+        "run-completion.sh",
+        extra_env={"D": device, "M": MODEL_NAME},
+        extra_args=["--batch-size", "128", "-n", "128", "--seed", "42",
+                    "-f", f"{PROMPT_DIR}/bench_prompt.txt"],
     )
     write_qdc_log(f"llama_completion_{device}.log", result.stdout or "")
-    assert result.returncode == 0, f"llama-completion {device} failed (exit {result.returncode})"
+    assert result.returncode == 0, (
+        f"llama-completion {device} failed (exit {result.returncode})"
+    )
 
 
 _DEVICE_LOG_NAME = {"none": "cpu", "GPUOpenCL": "gpu", "HTP0": "htp"}
 
 
-@pytest.mark.parametrize("device", [
-    pytest.param("none",      id="cpu"),
-    pytest.param("GPUOpenCL", id="gpu"),
-    pytest.param("HTP0",      id="npu"),
-])
+@pytest.mark.parametrize(
+    "device",
+    [
+        pytest.param("none", id="cpu"),
+        pytest.param("GPUOpenCL", id="gpu"),
+        pytest.param("HTP0", id="npu"),
+    ],
+)
 def test_llama_bench(device):
-    result = run_adb_command(
-        f"{CMD_PREFIX} {BIN_PATH}/llama-bench"
-        f" -m {MODEL_PATH} --device {device} -ngl 99 --batch-size 128 -t 4 -p 128 -n 32",
-        check=False,
+    result = run_script(
+        "run-bench.sh",
+        extra_env={"D": device, "M": MODEL_NAME},
+        extra_args=["--batch-size", "128", "-p", "128", "-n", "32"],
     )
     write_qdc_log(f"llama_bench_{_DEVICE_LOG_NAME[device]}.log", result.stdout or "")
-    assert result.returncode == 0, f"llama-bench {device} failed (exit {result.returncode})"
+    assert result.returncode == 0, (
+        f"llama-bench {device} failed (exit {result.returncode})"
+    )
 
 
 if __name__ == "__main__":
diff --git a/scripts/snapdragon/qdc/tests/utils.py b/scripts/snapdragon/qdc/tests/utils.py
index 00f0f1b2f9..fad6a92329 100644
--- a/scripts/snapdragon/qdc/tests/utils.py
+++ b/scripts/snapdragon/qdc/tests/utils.py
@@ -1,5 +1,7 @@
 """Shared helpers for QDC on-device test runners."""
 
+from __future__ import annotations
+
 import logging
 import os
 import subprocess
@@ -13,16 +15,14 @@ log = logging.getLogger(__name__)
 # On-device paths
 # ---------------------------------------------------------------------------
 
-BUNDLE_PATH  = "/data/local/tmp/llama_cpp_bundle"
+BUNDLE_PATH = "/data/local/tmp/llama.cpp"
+BIN_PATH = f"{BUNDLE_PATH}/bin"
+LIB_PATH = f"{BUNDLE_PATH}/lib"
 QDC_LOGS_PATH = "/data/local/tmp/QDC_logs"
-LIB_PATH    = f"{BUNDLE_PATH}/lib"
-BIN_PATH    = f"{BUNDLE_PATH}/bin"
-ENV_PREFIX  = (
-    f"export LD_LIBRARY_PATH={LIB_PATH} && "
-    f"export ADSP_LIBRARY_PATH={LIB_PATH} && "
-    f"chmod +x {BIN_PATH}/* &&"
-)
-CMD_PREFIX  = f"cd {BUNDLE_PATH} && {ENV_PREFIX}"
+SCRIPTS_DIR = "/qdc/appium"
+MODEL_NAME = "model.gguf"
+MODEL_DEVICE_PATH = "/data/local/tmp/gguf/model.gguf"
+PROMPT_DIR = "/data/local/tmp/scorecard_prompts"
 
 # ---------------------------------------------------------------------------
 # Appium session options
@@ -34,16 +34,47 @@ options.set_capability("platformName", "Android")
 options.set_capability("deviceName", os.getenv("ANDROID_DEVICE_VERSION"))
 
 # ---------------------------------------------------------------------------
-# ADB helpers
+# Shell / process helpers
+# ---------------------------------------------------------------------------
+
+
+def write_qdc_log(filename: str, content: str) -> None:
+    """Write content as a log file for QDC log collection."""
+    subprocess.run(
+        ["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+    )
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
+        f.write(content)
+        tmp_path = f.name
+    try:
+        subprocess.run(
+            ["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+        )
+    finally:
+        os.unlink(tmp_path)
+
+
+def ensure_bundle(check_binary: str | None = None) -> None:
+    """Ensure the llama_cpp_bundle is available on the target device."""
+    push_bundle_if_needed(check_binary or f"{BIN_PATH}/llama-cli")
+
+
+# ---------------------------------------------------------------------------
+# Android / Linux host helpers
 # ---------------------------------------------------------------------------
 
 
 def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProcess:
-    # Append exit-code sentinel because `adb shell` doesn't reliably propagate
-    # the on-device exit code (older ADB versions always return 0).
+    """Run a command on-device via ``adb shell`` with exit-code sentinel."""
     raw = subprocess.run(
         ["adb", "shell", f"{cmd}; echo __RC__:$?"],
-        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
     )
     stdout = raw.stdout
     returncode = raw.returncode
@@ -55,39 +86,58 @@ def run_adb_command(cmd: str, *, check: bool = True) -> subprocess.CompletedProc
                 stdout = "\n".join(lines[:-1]) + "\n"
             except ValueError:
                 pass
-    log.info("%s", stdout)
+    log.info(stdout)
     result = subprocess.CompletedProcess(raw.args, returncode, stdout=stdout)
     if check:
         assert returncode == 0, f"Command failed (exit {returncode})"
     return result
 
 
-def write_qdc_log(filename: str, content: str) -> None:
-    """Push content as a log file to QDC_LOGS_PATH on the device for QDC log collection."""
-    subprocess.run(
-        ["adb", "shell", f"mkdir -p {QDC_LOGS_PATH}"],
-        stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+def run_script(
+    script: str,
+    extra_env: dict[str, str] | None = None,
+    extra_args: list[str] | None = None,
+) -> subprocess.CompletedProcess:
+    """Run an upstream shell script from /qdc/appium/ on the QDC runner host."""
+    env = os.environ.copy()
+    env["GGML_HEXAGON_EXPERIMENTAL"] = "1"
+    if extra_env:
+        env.update(extra_env)
+    cmd = [f"{SCRIPTS_DIR}/{script}"] + (extra_args or [])
+    result = subprocess.run(
+        cmd, env=env,
+        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+    )
+    log.info(result.stdout)
+    return result
+
+
+def adb_shell(cmd: str) -> None:
+    """Run a command via adb shell (fire-and-forget, no error check)."""
+    subprocess.run(
+        ["adb", "shell", "sh", "-c", cmd],
+        capture_output=True, encoding="utf-8", errors="replace", check=False,
     )
-    with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
-        f.write(content)
-        tmp_path = f.name
-    try:
-        subprocess.run(
-            ["adb", "push", tmp_path, f"{QDC_LOGS_PATH}/{filename}"],
-            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-        )
-    finally:
-        os.unlink(tmp_path)
 
 
 def push_bundle_if_needed(check_binary: str) -> None:
     """Push llama_cpp_bundle to the device if check_binary is not already present."""
     result = subprocess.run(
         ["adb", "shell", f"ls {check_binary}"],
-        text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
     )
     if result.returncode != 0:
         subprocess.run(
-            ["adb", "push", "/qdc/appium/llama_cpp_bundle/", "/data/local/tmp"],
-            text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+            ["adb", "push", "/qdc/appium/llama_cpp_bundle/", BUNDLE_PATH],
+            text=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+        )
+        subprocess.run(
+            ["adb", "shell", f"find {BUNDLE_PATH}/bin -type f -exec chmod 755 {{}} +"],
+            text=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
         )
diff --git a/ty.toml b/ty.toml
index a07d7485d4..ad88ac7bda 100644
--- a/ty.toml
+++ b/ty.toml
@@ -13,7 +13,7 @@ exclude = [
 [[overrides]]
 include = [
     "./tools/server/tests/**",
-    "./scripts/snapdragon/qdc/tests/**",
+    "./scripts/snapdragon/qdc/**",
 ]
 
 [overrides.rules]