From 72108ba08ba4fc3f1ca470cd8e50ec139492f1db Mon Sep 17 00:00:00 2001 From: "bob_Pierce.Cai" Date: Tue, 2 Jun 2026 12:04:52 +0800 Subject: [PATCH 1/2] =?UTF-8?q?[doc]=20=E6=96=B0=E5=A2=9E=20AIOS=201.3.0.0?= =?UTF-8?q?03=20=E5=9F=BA=E4=BA=8E=E9=BA=92=E9=BA=9F=E7=B3=BB=E7=BB=9F=20G?= =?UTF-8?q?PU/NPU/VPU=20=E6=8E=A8=E7=90=86=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../AIBOX-kylin-docker-vllm.md | 736 ++++++++++++++++++ .../T035-Kylin-Docker-GPU.md | 701 +++++++++++++++++ .../T035-Kylin-Docker-NPU-VPN.md | 564 ++++++++++++++ ...41\345\236\213\346\216\250\347\220\206.md" | 691 ++++++++++++++++ 4 files changed, 2692 insertions(+) create mode 100644 thirdparty/skynoon/user_cases/AI-BOX-MTSDK-1.3.0/AIBOX-kylin-docker-vllm.md create mode 100644 thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-GPU.md create mode 100644 thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-NPU-VPN.md create mode 100644 "thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-vllm-musa\346\250\241\345\236\213\346\216\250\347\220\206.md" diff --git a/thirdparty/skynoon/user_cases/AI-BOX-MTSDK-1.3.0/AIBOX-kylin-docker-vllm.md b/thirdparty/skynoon/user_cases/AI-BOX-MTSDK-1.3.0/AIBOX-kylin-docker-vllm.md new file mode 100644 index 0000000..91ad750 --- /dev/null +++ b/thirdparty/skynoon/user_cases/AI-BOX-MTSDK-1.3.0/AIBOX-kylin-docker-vllm.md @@ -0,0 +1,736 @@ +

+ logo +

+

摩尔线程-AIBOX

+

kylin基于Docker 的vllm-musa安装模型指引

+ +
+

lastAuthor:Sandy.xu

+

lastDate:2026-05-27

+
+ + + + + + + +在Kylin系统上,使用Docker容器环境,基于**PyTorch MUSA**和**MTNN**技术栈,部署和运行AI模型。 + +# 准备工作 + +## 工作目录 + +为了方便后续操作,我们先约定一个工作目录。在接下来的指引中,我们将用 **`${WorkDir}`** 表示这个目录的路径。请您在开始前先**创建好您的工作目录**,并确保后续所有操作都在此目录下进行。 + +## 系统环境要求 + +| 组件 | 版本 | +| ----------- | ----------- | +| MUSA Driver | 3.1.3-AB100 | +| MUSA SDK | 4.1.4 | +| U-Boot SDK | 1.3.0.003 | + +## 查看musa环境 + +```shell +# 查看驱动 +dpkg -l|grep musa + +# 输出 +ii musa 3.1.3-AB100 arm64 Moore Threads MUSA driver [74ceff2b9] +ii musa-sdk 4.1.4 arm64 Moore Threads MTGPU Software Development Kit + +# 查看环境 +ll /usr/local/ |grep musa* + +# 输出 +lrwxrwxrwx 1 root root 10 Dec 24 15:08 musa -> musa-4.1.2/ +drwxr-xr-x 13 root root 4096 Feb 4 16:27 musa-4.1.2/ +``` + +## 安装musa(若不满足) + +> **请先解锁桌面!** +> +> 为以后方便,请在桌面右上角点击:电源->设置->用户->自动登录 + +百度网盘: https://pan.baidu.com/s/1IODTOkvPXAhhKDwTNpHOJA?pwd=ngwm + +**安装包下载和解压** + +```shell +wget -c https://mt-ai-data.tos-cn-shanghai.volces.com/vllm_musa/v1.3/release_M1000_1.3.0.003/musa_m1000_release_1.3.0.003_20251223.tar.gz + +tar zxvf musa_m1000_release_1.3.0.003_20251223.tar.gz +cd musa_m1000_release_1.3.0.003_20251223 +``` + +**安装解压** + +```shell +##################### 首先卸载当前驱动 ###################### +sudo dpkg -P musa-sdk +sudo dpkg -P musa # 输出 cryptsetup 的一些报错是正常的 +sudo rm -rf /usr/local/musa* + +##################### 安装新的驱动 ###################### +sudo dpkg -i musa_3.1.3-AB100_arm64.deb +sudo dpkg -i musa-sdk_4.1.4-20251223_arm64.deb + +tar zxvf muDNN_2.9.4-AB100.tar.gz +cd mudnn/ +sudo bash install_mudnn.sh + +##################### 重启电脑 ###################### +sudo reboot +``` + +## 系统版本 + +```shell +kylin@kylinos:~$ uname -a +Linux kylinos 6.6.10 #1 SMP PREEMPT Wed Jan 7 01:30:30 UTC 2026 aarch64 aarch64 aarch64 GNU/Linux + +kylin@kylinos:~$ cat /etc/os-release +NAME="Kylin" +VERSION="银河麒麟桌面操作系统 (工业版) V10 SP1 2503" +VERSION_US="Kylin Linux Desktop Industry V10 SP1 2503" +ID=kylin +ID_LIKE=debian +PRETTY_NAME="Kylin Industry V10 SP1" +VERSION_ID="v10" +HOME_URL="http://www.kylinos.cn/" +SUPPORT_URL="http://www.kylinos.cn/support/technology.html" +BUG_REPORT_URL="http://www.kylinos.cn/" +PRIVACY_POLICY_URL="http://www.kylinos.cn" +VERSION_CODENAME=kylin +UBUNTU_CODENAME=kylin +PROJECT_CODENAME=V10SP1 +KYLIN_RELEASE_ID="2503" + +``` + +# 安装docker + +```shell +# 更新 apt 缓存 +sudo apt update + +# 下载并添加 Docker 官方 GPG 密钥 +sudo mkdir -p /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + +# 添加适配 arm64 架构的 Docker 软件源 +echo "deb [arch=arm64 signed-by=/etc/apt/keyrings/docker.gpg] http://mirrors.aliyun.com/docker-ce/linux/ubuntu focal stable" | sudo tee /etc/apt/sources.list.d/docker.list + +# 再次更新软件包列表,让刚才添加的源生效 +sudo apt update + + +# 安装 Docker 核心组件 +sudo apt install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +# 将当前用户加入 docker 用户组 +sudo usermod -aG docker $USER + +# 刷新用户组权限,让设置在当前终端立即生效 +newgrp docker + +# 全局生效 +sudo reboot +``` + +# 编辑Docker脚本 +```bash +# 安装nano编辑器 +sudo apt install -y nano +# 编进并创建Dockerfile,将后面代码粘贴进来 ctrl+s保存; ctrl+x退出 +nano Dockerfile +``` + +把以下代码粘贴到Dockerfile中: +```shell +ARG IMAGE_BASE=ubuntu:22.04 +FROM $IMAGE_BASE + +# 设置Ubuntu APT源为国内源 +ARG APT_URI=http://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports +RUN mv /etc/apt/sources.list /etc/apt/sources.list.bak 2>/dev/null || true && \ + echo "deb ${APT_URI}/ jammy main restricted universe multiverse" > /etc/apt/sources.list && \ + echo "deb ${APT_URI}/ jammy-updates main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb ${APT_URI}/ jammy-backports main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb ${APT_URI}/ jammy-security main restricted universe multiverse" >> /etc/apt/sources.list && \ + cat /etc/apt/sources.list + + +# 创建.pip/pip.conf文件以设置pip使用国内源 +ARG PIP_URI +RUN mkdir -p /root/.pip && \ + cat < /root/.pip/pip.conf +[global] +index-url = $PIP_URI +EOL + +# 安装必要的系统依赖项(最小化安装) +RUN apt-get update && apt-get install -y --fix-missing \ + ca-certificates \ + net-tools software-properties-common \ + vim curl bzip2 wget tmux python3 python3-pip \ + git gdb build-essential \ + libopenmpi-dev libelf-dev libopenblas-dev \ + && ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ + && echo "Asia/Shanghai" > /etc/timezone \ + && rm -rf /var/lib/apt/lists/* + +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 + +# 安装基础库 +RUN pip install ahocorapy ipykernel numpy==1.26 + +# 安装PyTorch +ARG PYTORCH_URI +ARG TORCH_MUSA_URI +ARG TORCHVISION_URI +ARG TORCHAUDIO_URI +ARG TRITON_URI +RUN pip install --no-cache-dir $PYTORCH_URI +RUN pip install --no-cache-dir $TORCH_MUSA_URI +RUN pip install --no-cache-dir $TORCHVISION_URI +RUN pip install --no-cache-dir $TORCHAUDIO_URI +RUN pip install --no-cache-dir $TRITON_URI + +# 创建playground目录并在其中创建test_musa.py文件 +RUN mkdir -p /playground && \ + cat < /playground/test_musa.py +#!/usr/bin/env python + +import torch +import torch_musa + +def main(): + if torch.musa.is_available(): + device_count = torch.musa.device_count() + print(f"Found {device_count} MUSA devices") + + for i in range(device_count): + device_props = torch.musa.get_device_properties(i) + print(f"Device {i}: {device_props}") + else: + print("MUSA is not available.") + +if __name__ == "__main__": + main() +EOL + +# 修改.bashrc文件,自动激活pytorch_env环境 +RUN echo "export PATH=/usr/local/musa/bin:/usr/local/musa/tools:$PATH" >> /root/.bashrc +RUN echo "export LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu:/usr/lib/aarch64-linux-gnu/musa:/usr/local/musa/lib:$LD_LIBRARY_PATH" >> /root/.bashrc + +# 进入playground目录, 设置默认命令,启动bash并激活环境, +WORKDIR /playground + +# 设置默认命令:启动bash +CMD ["/bin/bash", "-l"] +``` + + + +# 编辑运行脚本 + +```shell +# 编进并创建,将后面代码粘贴进来 ctrl+s保存; ctrl+x退出 +nano run_docker_musa.sh +``` + + +把以下代码粘贴到run_docker_musa.sh中: +```shell +#!/bin/bash + +IMAGE_BASE="ubuntu:22.04" +IMAGE_NAME=${IMAGE_NAME:-"docker/e300gpu"} +IMAGE_TAG=${IMAGE_TAG:-"v1.3.0.003"} +DOCKERFILE=${DOCKERFILE:-"Dockerfile"} +PLATFORM=${PLATFORM:-"linux/aarch64"} +APT_URI=${APT_URI:-"http://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports"} +PIP_URI=${PIP_URI:-"https://pypi.tuna.tsinghua.edu.cn/simple"} +PYTORCH_URI=${PYTORCH_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torch-2.5.0-cp310-cp310-linux_aarch64.whl"} +TORCH_MUSA_URI=${TORCH_MUSA_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torch_musa-2.1.1-cp310-cp310-linux_aarch64.whl"} +TORCHVISION_URI=${TORCHVISION_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torchvision-0.20.0a0+afc54f7-cp310-cp310-linux_aarch64.whl"} +TORCHAUDIO_URI=${TORCHAUDIO_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torchaudio-2.5.0a0+56bc006-cp310-cp310-linux_aarch64.whl"} +TRITON_URI=${TRITON_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/pip/pytorch/2.1.0/triton-3.1.0-cp310-cp310-linux_aarch64.whl"} + + +ACTION="start" +DEAMON_MODE=false + +while [[ $# -gt 0 ]]; do + case $1 in + --start) + ACTION="start" + shift + ;; + --stop) + ACTION="stop" + shift + ;; + --build) + ACTION="build" + shift + ;; + --daemon) + DEAMON_MODE=true + shift + ;; + --help) + echo "Usage: $0 [ --build | --start [ --daemon ] | --stop ]" + echo " --build: 构建镜像" + echo " --start: 启动并进入容器" + echo " --start --daemon: 后台运行容器(不会进入容器)" + echo " --stop: 停止并删除容器" + exit 0 + ;; + *) + echo "Unknown argument: $1" + echo "Usage: $0 [ --build | --start [ --daemon ] | --stop ]" + exit 1 + ;; + esac +done + +function check_image() { + if [ -z "$IMAGE_NAME" ] || [ -z "$IMAGE_TAG" ]; then + echo "镜像名称或标签为无效,无法检查。" + return 1 + fi + + if docker inspect "${IMAGE_NAME}:${IMAGE_TAG}" > /dev/null 2>&1; then + echo "镜像存在: ${IMAGE_NAME}:${IMAGE_TAG}" + return 0 + else + echo "错误:镜像 ${IMAGE_NAME}:${IMAGE_TAG} 不存在。" + return 1 + fi +} + +function get_running_container() { + container=$(docker ps -a -f ancestor="${IMAGE_NAME}:${IMAGE_TAG}" --format "{{.ID}}" | head -n 1) + echo "$container" +} + +function start_container() { + if ! check_image; then + echo "请先构建镜像:bash run_docker_musa.sh --build" + exit 1 + fi + + container_id=$(get_running_container) + if [ -z "$container_id" ]; then + echo "开始启动镜像后台运行..." + echo "镜像名称: $IMAGE_NAME" + echo "镜像标签: $IMAGE_TAG" + + CUR_DIR=$(pwd) + DIR_NAME=$(basename $CUR_DIR) + + docker run -it -d \ + --privileged \ + -v /dev:/dev \ + -e DISPLAY=unix$DISPLAY \ + --net=host \ + -v /usr/local/musa:/usr/local/musa \ + -v /usr/lib/aarch64-linux-gnu/musa:/usr/lib/aarch64-linux-gnu/musa \ + -v /usr/lib/aarch64-linux-gnu/dri:/usr/lib/aarch64-linux-gnu/dri \ + -v /usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0 \ + -v /usr/lib/aarch64-linux-gnu/libGL.so.1:/usr/lib/aarch64-linux-gnu/libGL.so.1 \ + -v /usr/lib/aarch64-linux-gnu/libGLX.so.0:/usr/lib/aarch64-linux-gnu/libGLX.so.0 \ + -v /home/$USER:/home/$USER \ + -v /tmp/.X11-unix:/tmp/.X11-unix \ + -v ${CUR_DIR}:/playground/${DIR_NAME} \ + ${IMAGE_NAME}:${IMAGE_TAG} + fi + + container_id=$(get_running_container) + echo "正在进入容器: $container_id 镜像: ${IMAGE_NAME}:${IMAGE_TAG}" + if [ "$DEAMON_MODE" = true ]; then + echo "守护模式下不进入容器,退出命令行。" + else + echo "正在进入容器终端..." + docker exec -it "$container_id" bash + fi +} + +function stop_container() { + container_id=$(get_running_container) + if [ -n "$container_id" ]; then + echo "停止容器 $container_id..." + docker stop "$container_id" > /dev/null + echo "删除容器 $container_id..." + docker rm "$container_id" > /dev/null + echo "操作完成。" + else + echo "当前没有运行中的容器。" + fi +} + +function build_image() { + echo "正在构建镜像..." + echo "镜像名: ${IMAGE_NAME}:${IMAGE_TAG}" + echo "平台: ${PLATFORM}" + + # 构建镜像 + docker build \ + --platform $PLATFORM \ + --build-arg IMAGE_BASE=${IMAGE_BASE} \ + --build-arg APT_URI=${APT_URI} \ + --build-arg PIP_URI=${PIP_URI} \ + --build-arg PYTORCH_URI=${PYTORCH_URI} \ + --build-arg TORCH_MUSA_URI=${TORCH_MUSA_URI} \ + --build-arg TORCHVISION_URI=${TORCHVISION_URI} \ + --build-arg TORCHAUDIO_URI=${TORCHAUDIO_URI} \ + --build-arg TRITON_URI=${TRITON_URI} \ + -f $DOCKERFILE \ + -t $IMAGE_NAME:$IMAGE_TAG . + + # 检查构建是否成功 + if [ $? -eq 0 ]; then + echo "镜像构建成功: ${IMAGE_NAME}:${IMAGE_TAG}" + else + echo "镜像构建失败" + exit 1 + fi +} + +case "$ACTION" in + "build") + build_image + ;; + "start") + start_container + ;; + "stop") + stop_container + ;; + *) + echo "Unknown action: $ACTION" + echo "Usage: $0 [ --build | --start [ --daemon ] | --stop ]" + exit 1 + ;; +esac +``` + +# 构建Docker + +```bash +bash run_docker_musa.sh --build +``` + +# 运行Docker容器 + +```shell +bash run_docker_musa.sh +``` + + +## 容器操作示例 + +```bash +# 进入容器终端 +bash run_docker_musa.sh +# ======================= 预期输出如下内容:======================= +镜像存在: infra/torchmusa:v2.1.0 +开始启动镜像后台运行... +镜像名称: infra/torchmusa +镜像标签: v2.1.0 +fc989bb666ce6c2c05b52ee80852d76a4a4adb7d5e41d1edbb6419000e3fef10 +正在进入容器: fc989bb666ce 镜像: infra/torchmusa:v2.1.0 +正在进入容器终端... +root@kylinos:/playground# + + +``` + +# 验证torch_musa环境 + +```bash +# 在容器内的执行以下命令 +# 验证torch_musa环境 输出 true 证明torch_musa环境安装正确 +python3 -c "import torch;import torch_musa;print(torch.musa.is_available())" + +# 期望结果 +Error in cpuinfo: prctl(PR_SVE_GET_VL) failed # 此处正常 +True +``` + + + + + +# VLLM-MUSA 安装与验证 + +## 安装 VLLM-MUSA + +```bash +# 下载vllm-musa +wget -c https://mt-ai-data.tos-cn-shanghai.volces.com/vllm_musa/v1.2/release_1.3.0/20251011/release_1.3.0%2Bvllm_musa%2Bm1000_v1.2%2Btorch2.1.0.tar.gz + +# 解压 +tar zxvf release_1.3.0+vllm_musa+m1000_v1.2+torch2.1.0.tar.gz + +# 进入目录 +cd release_1.3.0+vllm_musa+m1000_v1.2+torch2.1.0 + +# 安装依赖和vllm-musa包 +pip3 install -r requirements.txt +pip3 install triton-3.1.0-cp310-cp310-linux_aarch64.whl flash_attn-2.6.3-py3-none-any.whl +pip3 install vllm-0.9.2*.whl vllm_musa-1.2*.whl + + +``` + + +## 验证 VLLM-MUSA + +```shell +python3 -c "from vllm_musa import _musa_custom_ops; _musa_custom_ops.decode_mla" + +# 预期输出包含 "loading torch_musa into torch.musa...",表示成功。 +``` + + + +# 模型下载 + +> 📌 提示:已下载模型权重文件(如GPTQ量化版) + +```bash +apt update +apt install git-lfs +# 克隆 +git clone git clone https://www.modelscope.cn/hiruyun/gptq-Qwen3-8B.git +``` + + + +# 清理缓存 + +建议启动前在【宿主机】的命令行清除缓存: + +```bash +export TRITON_CACHE_DIR="/tmp/triton" && sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" +``` + + + +# 模型推理演示 + +> 请在docker容器里面运行 + +## 启动模型服务 + +```shell +vllm serve ./gptq-Qwen3-8B/ \ + --served_model_name gptq-Qwen3-8B \ + --port 8000 \ + -tp 1 \ + --gpu_memory_utilization 0.7 \ + --num-gpu-blocks-override 1024 \ + --max-model-len 16384 \ + --swap_space 0 \ + --enforce-eager +``` + +服务启动后,可通过API( http://localhost:8000 )进行模型推理。 + + + +## 查看模型列表 + +另开⼀个窗⼝调⽤ + +### 执行命令 + +```shell +curl http://localhost:8000/v1/models +``` + +### 输出 + +```json +{ + "object": "list", + "data": [ + { + "id": "gptq-Qwen3-8B", + "object": "model", + "created": 1779879199, + "owned_by": "vllm", + "root": "./gptq-Qwen3-8B/", + "parent": null, + "max_model_len": 16384, + "permission": [ + { + "id": "modelperm-87f386891fb74084a93c387b3ee3790e", + "object": "model_permission", + "created": 1779879199, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + ] +} +``` + +## 发起对话请求 + +另开⼀个窗⼝调⽤,需要替换为本地的模型路径 + +### 执行命令 + +```shell +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gptq-Qwen3-8B", + "temperature": 0.7, + "top_p": 0.8, + "top_k": 20, + "repetition_penalty":1.05, + "max_tokens": 1000, + "messages": [{"role": "user", "content": "介绍一下北京"}] + }' + +# 若运行时qwen3模型,该模型默认会进行思考,可在请求体中添加 "chat_template_kwargs": {"enable_thinking": false} 来取消思考 +# 结构如下:----------------------------------------------- +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "某个Qwen3模型", + "temperature": 0.7, + "top_p": 0.8, + "top_k": 20, + "repetition_penalty":1.05, + "max_tokens": 1000, + "messages": [{"role": "user", "content": "介绍一下北京"}], + "chat_template_kwargs": {"enable_thinking": false} + }' +# ------------------------------------------------------- +``` + +### 输出 + +```json +{ + "id": "chatcmpl-5b396afee7f547b99ba4b61e633758db", + "object": "chat.completion", + "created": 1779879235, + "model": "gptq-Qwen3-8B", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "reasoning_content": null, + "content": "\n嗯,用户让我介绍一下北京。首先,我需要确定他们对北京的了解程度。可能是一个旅游者、学生,或者只是好奇的人。我应该涵盖北京的基本信息,比如历史、文化、著名景点,还有现代发展。不过得注意别太冗长,保持简洁明了。\n\n用户可能想知道北京作为首都的重要性,比如政治、经济、文化中心。还有历史背景,比如古代王朝的首都,像元明清等。著名景点如故宫、长城、颐和园这些必须提到。另外,现代北京的科技和教育方面,比如中关村、高校资源也很重要。\n\n还要考虑用户是否想了解北京的地理或交通情况。比如位于华北,有河流、山脉,或者地铁系统发达。另外,气候可能也是个点,但不确定是否需要,所以暂时先不提。可能还需要提到北京的美食,比如京味小吃,但用户没有特别问,可能不需要深入。\n\n有没有可能用户是计划去旅游,所以需要推荐景点?或者他们正在做作业,需要简要介绍?如果是学生的话,可能需要更结构化的回答,分点列出。不过用户没有说明,所以保持一般性的介绍比较安全。\n\n另外,要注意北京的别称,比如“京”、“北平”、“燕京”这些历史名称,可以稍微提一下。还有语言方面,北京话作为标准普通话,这点也很重要。还有体育方面的成就,比如奥运会主办城市,2008年奥运会,可能也是一个亮点。\n\n需要确保信息准确,比如历史部分,确保提到的朝代正确,比如元、明、清都建都于此。同时避免错误,比如不要混淆其他城市的历史。还要注意时间线,比如明清时期的建筑,以及现代的发展。\n\n可能用户还想知道北京的现状,比如国际地位、经济数据,但如果没有具体要求,保持基础信息即可。总结起来,应该包括:地理位置、历史地位、著名景点、文化特色、现代发展、语言与语言影响、教育科研资源、体育成就等。这样全面又不冗余,满足一般性介绍的需求。\n\n\n北京,简称“京”,是中国的首都和直辖市,位于华北平原的西北部,背靠燕山,东临渤海湾,是全国的政治、经济、文化、科技和国际交往中心,也是世界著名古都之一。以下从几个方面简要介绍:\n\n---\n\n### **1. 历史与地位**\n- **历史底蕴**:北京是中国历史上最重要的都城之一,先后有**元、明、清**三个王朝在此建都,拥有超过**800年的建都史**。周朝时曾称“燕国”,辽代称“南京”,金代称“中都”,元明清时期成为统一王朝的首都。\n- **文化象征**:作为中华文明的重要发源地之一,北京见证了无数历史事件,如**五四运动**、**抗日战争**、**新中国成立**等,承载着深厚的文化积淀。\n\n---\n\n### **2. 知名景点与文化遗产**\n- **世界遗产**:\n - **故宫**(明、清皇家宫殿,世界最大宫殿群)\n **颐和园**(中国现存规模最大的皇家园林)\n - **天坛**(明清皇帝祭天的场所,世界文化遗产)\n - **明十三陵**(明成祖至崇祯13位皇帝陵墓)\n- **标志性建筑**:\n - **天安门广场**(世界上最大的城市广场,国家象征)\n - **国家大剧院**(现代建筑艺术代表)\n - **鸟巢体育馆**(2008年奥运会主场馆)\n- **长城与山水**:**八达岭长城**(明长城重要关隘)、**香山、玉泉山**等自然景观。\n\n---\n\n### **3. 文化与生活方式**\n- **京味文化**:以**京韵大鼓**、**京剧**(“国粹”)、**胡同文化**(传统街巷格局)为代表,饮食以**炸酱面、烤鸭、豆汁儿**等特色小吃闻名。\n- **语言与语言影响**:**北京话**是普通话的标准音,对全国语言传播有深远影响。\n\n---\n\n### **4. 现代发展与国际化**\n- **科技与教育**:**中关村**(中国最大的科技企业聚集地)、**清华大学**(世界顶尖大学)、**北京大学**(中国最高学府)均位于北京。\n- **交通与基建**:地铁网络覆盖广泛,**北京大兴国际机场**(全球最繁忙机场之一)连接全球。\n- **国际影响力**:举办过**2008年夏季奥运会**(首个获得奥运会主办权的亚洲城市)、**2019年世园", + "tool_calls": [] + }, + "logprobs": null, + "finish_reason": "length", + "stop_reason": null + } + ], + "usage": { + "prompt_tokens": 10, + "total_tokens": 1010, + "completion_tokens": 1000, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null +} +``` + + + +## VLLM参数说明 + +- **`model`**:模型路径 +- **`served_model_name`**:设置启动后模型的名称,默认使用model的路径命名 +- **`device`**:仅支持设置为`musa` +- **`tensor-parallel-size`**:目前仅支持tp=1 +- **`dtype`**: 支持默认值`auto,float16,bfloat16` +- **`kv-cache-dtype`**:仅支持默认值`auto` +- **`pipeline-parallel-size`**:仅支持默认值`1` +- **`max_num_batched_tokens`**,**`max_model_len`** :需要根据运行的序列长度进行配置,如果出现OOM可减小这两个参数值 +- **`enforce-eager`** : 表示立即执行,不启用musaGraph + + + + + + + +# 常见问题 + +## Q1:docker拉取超时/失败 + +**A1:请在 `/etc/docker/daemon.json` 中配置国内镜像加速器(如阿里云、DaoCloud 等),并重启 docker 服务。步骤如下:** + +```shell +mkdir -p /etc/docker + +tee /etc/docker/daemon.json <<-'EOF' +{ + "registry-mirrors": ["https://xxxxxx" ] +} +EOF + +# 查看是否创建了daemon.json 若无请用sudo权限重新执行一次 +cat /etc/docker/daemon.json + + +systemctl daemon-reload +systemctl restart docker +``` + +## Q2:docker 启动失败 + +容器启动失败 Error response from daemon: container xxxxis not running + +如果启动容器失败的报错如下时,请停止并清理容器后重新启动容器 + +```bash +bash run_docker_musa.sh + +# 镜像存在: docker/e300gpu:v1.3.0.003 +# 正在进入容器: dd6cb1d2ca43 镜像: docker/e300gpu:v1.3.0.003 +# 正在进入容器终端... +# Error response from daemon: container dd6cb1d2ca43a6eaafc8f25f9331fe41a6a60ed8748be9fbc3a5542e96ff446b is not running + +# 停止并且清理容器 +bash run_docker_musa.sh --stop + +# 启动进入容器终端 +bash run_docker_musa.sh + +# 如下输出成功进入================================ +# 镜像存在: docker/e300gpu:v1.3.0.003 +# 开始启动镜像后台运行... +# 镜像名称: docker/e300gpu +# 镜像标签: v1.3.0.003 +# d85cf094673b51916dd520b53c8918384143a97d095bfb940bf8ace1d400c3ad +# 正在进入容器: d85cf094673b 镜像: docker/e300gpu:v1.3.0.003 +# 正在进入容器终端... +# root@kylinos:/playground# + +``` + diff --git a/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-GPU.md b/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-GPU.md new file mode 100644 index 0000000..a249e07 --- /dev/null +++ b/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-GPU.md @@ -0,0 +1,701 @@ +

+ logo +

+

摩尔线程-T035-Kylin

+

麒麟系统docker下调用GPU

+ +
+

lastAuthor:Pierce.Cai

+

lastDate:2026-05-27

+
+ + +# 环境要求 + +| 组件 | 版本 | +| ----------- | ----------- | +| MUSA Driver | 3.1.3-AB100 | +| MUSA SDK | 4.1.4 | +| U-Boot SDK | 1.3.0.003 | + +# 麒麟版本 + +```shell +kylin@kylinos:~$ uname -a +Linux kylinos 6.6.10 #1 SMP PREEMPT Wed Jan 7 01:30:30 UTC 2026 aarch64 aarch64 aarch64 GNU/Linux + +kylin@kylinos:~$ cat /etc/os-release +NAME="Kylin" +VERSION="银河麒麟桌面操作系统 (工业版) V10 SP1 2503" +VERSION_US="Kylin Linux Desktop Industry V10 SP1 2503" +ID=kylin +ID_LIKE=debian +PRETTY_NAME="Kylin Industry V10 SP1" +VERSION_ID="v10" +HOME_URL="http://www.kylinos.cn/" +SUPPORT_URL="http://www.kylinos.cn/support/technology.html" +BUG_REPORT_URL="http://www.kylinos.cn/" +PRIVACY_POLICY_URL="http://www.kylinos.cn" +VERSION_CODENAME=kylin +UBUNTU_CODENAME=kylin +PROJECT_CODENAME=V10SP1 +KYLIN_RELEASE_ID="2503" + +``` + +# 安装docker + +```shell + +# 更新 apt 缓存 +sudo apt update + +# 下载并添加 Docker 官方 GPG 密钥 +sudo mkdir -p /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + +# 添加适配 arm64 架构的 Docker 软件源 +echo "deb [arch=arm64 signed-by=/etc/apt/keyrings/docker.gpg] http://mirrors.aliyun.com/docker-ce/linux/ubuntu focal stable" | sudo tee /etc/apt/sources.list.d/docker.list + +# 再次更新软件包列表,让刚才添加的源生效 +sudo apt update + + +# 安装 Docker 核心组件 +sudo apt install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +# 将当前用户加入 docker 用户组 +sudo usermod -aG docker $USER + +# 刷新用户组权限,让设置在当前终端立即生效 +newgrp docker + +# 全局生效 +sudo reboot +``` + +# 编辑Docker脚本 + +```shell +mkdir dockerData + +cd dockerData +``` + +## 编写Dockfile + +```shell +# 安装编辑器 +sudo apt update && sudo apt install nano -y +# 编进并创建Dockerfile,将后面代码粘贴进来 ctrl+s保存; ctrl+x退出 +nano Dockerfile +``` + +```shell +ARG IMAGE_BASE=ubuntu:22.04 +FROM $IMAGE_BASE + +# 设置Ubuntu APT源为国内源 +ARG APT_URI=http://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports +RUN mv /etc/apt/sources.list /etc/apt/sources.list.bak 2>/dev/null || true && \ + echo "deb ${APT_URI}/ jammy main restricted universe multiverse" > /etc/apt/sources.list && \ + echo "deb ${APT_URI}/ jammy-updates main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb ${APT_URI}/ jammy-backports main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb ${APT_URI}/ jammy-security main restricted universe multiverse" >> /etc/apt/sources.list && \ + cat /etc/apt/sources.list + + +# 创建.pip/pip.conf文件以设置pip使用国内源 +ARG PIP_URI +RUN mkdir -p /root/.pip && \ + cat < /root/.pip/pip.conf +[global] +index-url = $PIP_URI +EOL + +# 安装必要的系统依赖项(最小化安装) +RUN apt-get update && apt-get install -y --fix-missing \ + ca-certificates \ + net-tools software-properties-common \ + vim curl bzip2 wget tmux python3 python3-pip \ + git gdb build-essential \ + libopenmpi-dev libelf-dev libopenblas-dev \ + && ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ + && echo "Asia/Shanghai" > /etc/timezone \ + && rm -rf /var/lib/apt/lists/* + +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 + +# 安装基础库 +RUN pip install ahocorapy ipykernel numpy==1.26 + +# 安装PyTorch +ARG PYTORCH_URI +ARG TORCH_MUSA_URI +ARG TORCHVISION_URI +ARG TORCHAUDIO_URI +ARG TRITON_URI +RUN pip install --no-cache-dir $PYTORCH_URI +RUN pip install --no-cache-dir $TORCH_MUSA_URI +RUN pip install --no-cache-dir $TORCHVISION_URI +RUN pip install --no-cache-dir $TORCHAUDIO_URI +RUN pip install --no-cache-dir $TRITON_URI + +# 创建playground目录并在其中创建test_musa.py文件 +RUN mkdir -p /playground && \ + cat < /playground/test_musa.py +#!/usr/bin/env python + +import torch +import torch_musa + +def main(): + if torch.musa.is_available(): + device_count = torch.musa.device_count() + print(f"Found {device_count} MUSA devices") + + for i in range(device_count): + device_props = torch.musa.get_device_properties(i) + print(f"Device {i}: {device_props}") + else: + print("MUSA is not available.") + +if __name__ == "__main__": + main() +EOL + +# 修改.bashrc文件,自动激活pytorch_env环境 +RUN echo "export PATH=/usr/local/musa/bin:/usr/local/musa/tools:$PATH" >> /root/.bashrc +RUN echo "export LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu:/usr/lib/aarch64-linux-gnu/musa:/usr/local/musa/lib:$LD_LIBRARY_PATH" >> /root/.bashrc + +# 进入playground目录, 设置默认命令,启动bash并激活环境, +WORKDIR /playground + +# 设置默认命令:启动bash +CMD ["/bin/bash", "-l"] +``` + +## 编辑运行脚本 + +```shell +# 编进并创建,将后面代码粘贴进来 ctrl+s保存; ctrl+x退出 +nano run_docker_musa.sh +``` + +```shell +#!/bin/bash + +IMAGE_BASE="ubuntu:22.04" +IMAGE_NAME=${IMAGE_NAME:-"docker/e300gpu"} +IMAGE_TAG=${IMAGE_TAG:-"v1.3.0.003"} +DOCKERFILE=${DOCKERFILE:-"Dockerfile"} +PLATFORM=${PLATFORM:-"linux/aarch64"} +APT_URI=${APT_URI:-"http://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports"} +PIP_URI=${PIP_URI:-"https://pypi.tuna.tsinghua.edu.cn/simple"} +PYTORCH_URI=${PYTORCH_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torch-2.5.0-cp310-cp310-linux_aarch64.whl"} +TORCH_MUSA_URI=${TORCH_MUSA_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torch_musa-2.1.1-cp310-cp310-linux_aarch64.whl"} +TORCHVISION_URI=${TORCHVISION_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torchvision-0.20.0a0+afc54f7-cp310-cp310-linux_aarch64.whl"} +TORCHAUDIO_URI=${TORCHAUDIO_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torchaudio-2.5.0a0+56bc006-cp310-cp310-linux_aarch64.whl"} +TRITON_URI=${TRITON_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/pip/pytorch/2.1.0/triton-3.1.0-cp310-cp310-linux_aarch64.whl"} + + +ACTION="start" +DEAMON_MODE=false + +while [[ $# -gt 0 ]]; do + case $1 in + --start) + ACTION="start" + shift + ;; + --stop) + ACTION="stop" + shift + ;; + --build) + ACTION="build" + shift + ;; + --daemon) + DEAMON_MODE=true + shift + ;; + --help) + echo "Usage: $0 [ --build | --start [ --daemon ] | --stop ]" + echo " --build: 构建镜像" + echo " --start: 启动并进入容器" + echo " --start --daemon: 后台运行容器(不会进入容器)" + echo " --stop: 停止并删除容器" + exit 0 + ;; + *) + echo "Unknown argument: $1" + echo "Usage: $0 [ --build | --start [ --daemon ] | --stop ]" + exit 1 + ;; + esac +done + +function check_image() { + if [ -z "$IMAGE_NAME" ] || [ -z "$IMAGE_TAG" ]; then + echo "镜像名称或标签为无效,无法检查。" + return 1 + fi + + if docker inspect "${IMAGE_NAME}:${IMAGE_TAG}" > /dev/null 2>&1; then + echo "镜像存在: ${IMAGE_NAME}:${IMAGE_TAG}" + return 0 + else + echo "错误:镜像 ${IMAGE_NAME}:${IMAGE_TAG} 不存在。" + return 1 + fi +} + +function get_running_container() { + container=$(docker ps -a -f ancestor="${IMAGE_NAME}:${IMAGE_TAG}" --format "{{.ID}}" | head -n 1) + echo "$container" +} + +function start_container() { + if ! check_image; then + echo "请先构建镜像:bash run_docker_musa.sh --build" + exit 1 + fi + + container_id=$(get_running_container) + if [ -z "$container_id" ]; then + echo "开始启动镜像后台运行..." + echo "镜像名称: $IMAGE_NAME" + echo "镜像标签: $IMAGE_TAG" + + CUR_DIR=$(pwd) + DIR_NAME=$(basename $CUR_DIR) + + docker run -it -d \ + --privileged \ + -v /dev:/dev \ + -e DISPLAY=unix$DISPLAY \ + --net=host \ + -v /usr/local/musa:/usr/local/musa \ + -v /usr/lib/aarch64-linux-gnu/musa:/usr/lib/aarch64-linux-gnu/musa \ + -v /usr/lib/aarch64-linux-gnu/dri:/usr/lib/aarch64-linux-gnu/dri \ + -v /usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0 \ + -v /usr/lib/aarch64-linux-gnu/libGL.so.1:/usr/lib/aarch64-linux-gnu/libGL.so.1 \ + -v /usr/lib/aarch64-linux-gnu/libGLX.so.0:/usr/lib/aarch64-linux-gnu/libGLX.so.0 \ + -v /home/$USER:/home/$USER \ + -v /tmp/.X11-unix:/tmp/.X11-unix \ + -v ${CUR_DIR}:/playground/${DIR_NAME} \ + ${IMAGE_NAME}:${IMAGE_TAG} + fi + + container_id=$(get_running_container) + echo "正在进入容器: $container_id 镜像: ${IMAGE_NAME}:${IMAGE_TAG}" + if [ "$DEAMON_MODE" = true ]; then + echo "守护模式下不进入容器,退出命令行。" + else + echo "正在进入容器终端..." + docker exec -it "$container_id" bash + fi +} + +function stop_container() { + container_id=$(get_running_container) + if [ -n "$container_id" ]; then + echo "停止容器 $container_id..." + docker stop "$container_id" > /dev/null + echo "删除容器 $container_id..." + docker rm "$container_id" > /dev/null + echo "操作完成。" + else + echo "当前没有运行中的容器。" + fi +} + +function build_image() { + echo "正在构建镜像..." + echo "镜像名: ${IMAGE_NAME}:${IMAGE_TAG}" + echo "平台: ${PLATFORM}" + + # 构建镜像 + docker build \ + --platform $PLATFORM \ + --build-arg IMAGE_BASE=${IMAGE_BASE} \ + --build-arg APT_URI=${APT_URI} \ + --build-arg PIP_URI=${PIP_URI} \ + --build-arg PYTORCH_URI=${PYTORCH_URI} \ + --build-arg TORCH_MUSA_URI=${TORCH_MUSA_URI} \ + --build-arg TORCHVISION_URI=${TORCHVISION_URI} \ + --build-arg TORCHAUDIO_URI=${TORCHAUDIO_URI} \ + --build-arg TRITON_URI=${TRITON_URI} \ + -f $DOCKERFILE \ + -t $IMAGE_NAME:$IMAGE_TAG . + + # 检查构建是否成功 + if [ $? -eq 0 ]; then + echo "镜像构建成功: ${IMAGE_NAME}:${IMAGE_TAG}" + else + echo "镜像构建失败" + exit 1 + fi +} + +case "$ACTION" in + "build") + build_image + ;; + "start") + start_container + ;; + "stop") + stop_container + ;; + *) + echo "Unknown action: $ACTION" + echo "Usage: $0 [ --build | --start [ --daemon ] | --stop ]" + exit 1 + ;; +esac +``` + +# 构建Docker + +```bash +bash run_docker_musa.sh --build + +# 输出以下代表 镜像构建完成 ✅ +镜像构建成功: docker/e300gpu:v1.3.0.003 + +``` + + + +# 运行Docker容器 + +```shell +bash run_docker_musa.sh +``` + +# 验证驱动 + +```shell +# 启动容器 +bash run_docker_musa.sh + +# ================= 预期输出 进入容器 ================= +镜像存在: docker/e300gpu:v1.3.0.003 +开始启动镜像后台运行... +镜像名称: docker/e300gpu +镜像标签: v1.3.0.003 +dd6cb1d2ca43a6eaafc8f25f9331fe41a6a60ed8748be9fbc3a5542e96ff446b +正在进入容器: dd6cb1d2ca43 镜像: docker/e300gpu:v1.3.0.003 +正在进入容器终端... +root@kylinos:/playground# + +# ================= 在容器内执以下命令 ================= +# 命令 +python3 -c "import torch;import torch_musa;print(torch.musa.is_available())" + +# 期望结果 +Error in cpuinfo: prctl(PR_SVE_GET_VL) failed # 此处正常 +True +``` + +# YOLO训练与推理 + +## 安装依赖 + +```shell +pip install https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/pip/torch_compat/torch_compat-1.0.0-cp310-cp310-linux_aarch64.whl + +pip install ultralytics + +pip install pandas + +pip install numpy==1.26 +``` + +## YOLO训推一体 + +```shell +# 创建脚本 +touch yolo_train_infer.py + +# 编辑脚本 +vim yolo_train_infer.py +# 把以下代码粘贴到脚本中 + +# 保存并退出 +esc # 退出编辑模式 +:wq # 保存并退出 + +# 运行 +python yolo_train_infer.py +``` + +**代码** + +``` +#!/usr/bin/env python3 +import torch_compat as torch +from ultralytics import YOLO + +# Load a model +model = YOLO("yolo11n.pt") + +# Train the model +train_results = model.train( + data="coco8.yaml", # path to dataset YAML + epochs=100, # number of training epochs + imgsz=640, # training image size + device="0", # device to run on, i.e. device=0 or device=0,1,2,3 or device=cpu + amp=False, # MUST +) + + +# Evaluate model performance on the validation set +metrics = model.val() + +# Perform object detection on an image +results = model("./bus.jpg") +# 此结果需要连接显示器可看 +# results[0].show() + +# 存储结果 +results[0].save(filename="bus_detected.jpg") +``` + +## YOLO on GPU推理性能测试 + +```shell +# 创建脚本 +touch yolo_infer_perf.py + +# 编辑脚本 +vim yolo_infer_perf.py +# 把以下代码粘贴到脚本中 + +# 保存并退出 +esc # 退出编辑模式 +:wq # 保存并退出 + +# 运行 +python yolo_infer_perf.py +``` + +**代码** + +```python +#!/usr/bin/env python3 + +import torch_compat as torch; +from ultralytics import YOLO +from ultralytics.cfg import TASK2DATA, TASK2METRIC +from ultralytics.engine.exporter import export_formats +from ultralytics.utils import ASSETS, LOGGER +from ultralytics.utils.checks import check_imgsz, check_yolo +from ultralytics.utils.files import file_size +from ultralytics.utils.torch_utils import select_device + +import argparse +import time +from itertools import product +from typing import List +from pathlib import Path + +DEFAULT_MODELS = [ + "yolov5n.pt", + "yolov5s.pt", + "yolov5m.pt", + "yolov5l.pt", + + "yolov8n.pt", + "yolov8s.pt", + "yolov8m.pt", + "yolov8l.pt", + + "yolov10n.pt", + "yolov10s.pt", + "yolov10m.pt", + "yolov10l.pt", + + "yolo11n.pt", + "yolo11s.pt", + "yolo11m.pt", + "yolo11l.pt", + + "yolo12n.pt", + "yolo12s.pt", + "yolo12m.pt", + "yolo12l.pt", + ] +DEFAULT_BATCHES = [1, 2, 4, 8, 16, 32] +DEFAULT_DTYPES = [False, True] # False: fp32, True: fp16 + +def benchmark( + model, + data=None, + imgsz=160, + batch=1, + half=False, + int8=False, + device="cpu", + verbose=False, + eps=1e-3, + format="-", +): + imgsz = check_imgsz(imgsz) + assert imgsz[0] == imgsz[1] if isinstance(imgsz, list) else True, "benchmark() only supports square imgsz." + + import pandas as pd # scope for faster 'import ultralytics' + + pd.options.display.max_columns = 10 + pd.options.display.width = 120 + device = select_device(device, verbose=False) + if isinstance(model, (str, Path)): + model = YOLO(model) + is_end2end = getattr(model.model.model[-1], "end2end", False) + data = data or TASK2DATA[model.task] # task to dataset, i.e. coco8.yaml for task=detect + key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect + + y = [] + t0 = time.time() + + format_arg = format.lower() + if format_arg: + formats = frozenset(export_formats()["Argument"]) + assert format in formats, f"Expected format to be one of {formats}, but got '{format_arg}'." + for i, (name, format, suffix, cpu, gpu, _) in enumerate(zip(*export_formats().values())): + emoji, filename = "❌", None # export defaults + try: + if format_arg and format_arg != format: + continue + + # Export + if format == "-": + filename = model.pt_path or model.ckpt_path or model.model_name + exported_model = model # PyTorch format + else: + filename = model.export( + imgsz=imgsz, format=format, half=half, int8=int8, data=data, device=device, verbose=False + ) + exported_model = YOLO(filename, task=model.task) + assert suffix in str(filename), "export failed" + emoji = "❎" # indicates export succeeded + + # Predict + assert model.task != "pose" or i != 7, "GraphDef Pose inference is not supported" + assert i not in {9, 10}, "inference not supported" # Edge TPU and TF.js are unsupported + assert i != 5 or platform.system() == "Darwin", "inference only supported on macOS>=10.13" # CoreML + if i in {13}: + assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet" + exported_model.predict(ASSETS / "bus.jpg", imgsz=imgsz, device=device, half=half, verbose=False) + + # Validate + results = exported_model.val( + data=data, batch=batch, imgsz=imgsz, plots=False, device=device, half=half, int8=int8, verbose=False + ) + metric, speed = results.results_dict[key], results.speed["inference"] + fps = round(1000 / (speed + eps), 2) # frames per second + y.append([name, "✅", round(file_size(filename), 1), round(metric, 4), round(speed, 2), fps]) + except Exception as e: + if verbose: + assert type(e) is AssertionError, f"Benchmark failure for {name}: {e}" + LOGGER.warning(f"ERROR ❌️ Benchmark failure for {name}: {e}") + y.append([name, emoji, round(file_size(filename), 1), None, None, None]) # mAP, t_inference + + # Print results + check_yolo(device=device) # print system info + df = pd.DataFrame(y, columns=["Format", "Status❔", "Size (MB)", key, "Inference time (ms/im)", "FPS"]) + + name = model.model_name + dt = time.time() - t0 + legend = "Benchmarks legend: - ✅ Success - ❎ Export passed but validation failed - ❌️ Export failed" + s = f"\nBenchmarks complete for {name} on {data} at imgsz={imgsz} ({dt:.2f}s)\n{legend}\n{df.fillna('-')}\n" + LOGGER.info(s) + with open("benchmarks.log", "a", errors="ignore", encoding="utf-8") as f: + f.write(s) + + if verbose and isinstance(verbose, float): + metrics = df[key].array # values to compare to floor + floor = verbose # minimum metric floor to pass, i.e. = 0.29 mAP for YOLOv5n + assert all(x > floor for x in metrics if pd.notna(x)), f"Benchmark failure: metric(s) < floor {floor}" + + return df + +def main(models: List[str], batches: List[int], dtypes: List[bool], dataset: str = "coco128.yaml", imgsz: int = 640, device: str = "cuda:0"): + for half, model, batch in product(dtypes, models, batches): + print(f"Benchmarking model: {model} with half:{half} batch:{batch} dataset:{dataset} imgsz:{imgsz}") + benchmark(model=model, data=dataset, imgsz=imgsz, batch=batch, half=half, int8=True, device=device) + time.sleep(5) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run YOLO benchmarks.") + parser.add_argument("--models", nargs="+", default=DEFAULT_MODELS, help="List of models to benchmark.") + parser.add_argument("--batches", nargs="+", type=int, default=DEFAULT_BATCHES, help="List of batch to test.") + parser.add_argument("--dtypes", nargs="+", type=lambda x: x.lower() == 'true', default=DEFAULT_DTYPES, help="List of dtypes to test (False for fp32, True for fp16).") + parser.add_argument("--dataset", default="coco128.yaml", help="Dataset configuration file (e.g., coco128.yaml).") + parser.add_argument("--imgsz", type=int, default=640, help="Image size.") + parser.add_argument("--device", default="cuda:0", help="Device to run on.") + + args = parser.parse_args() + + main(args.models, args.batches, args.dtypes, args.dataset, args.imgsz, args.device) +``` + +如若需要保存推理图片可修改以下代码 + +```python +# 源代码 +exported_model.predict(ASSETS / "bus.jpg", imgsz=imgsz, device=device, half=half, verbose=False) +# 保存图片代码 +exported_model.predict(ASSETS / "bus.jpg", imgsz=imgsz, device=device, half=half, verbose=False, save=True) +``` + +# 常见问题 + +## Q1:docker拉取超时/失败 + +**A1:请在 `/etc/docker/daemon.json` 中配置国内镜像加速器(如阿里云、DaoCloud 等),并重启 docker 服务。步骤如下:** + +```shell +mkdir -p /etc/docker + +tee /etc/docker/daemon.json <<-'EOF' +{ + "registry-mirrors": ["https://xxxxxx" ] +} +EOF + +# 查看是否创建了daemon.json 若无请用sudo权限重新执行一次 +cat /etc/docker/daemon.json + + +systemctl daemon-reload +systemctl restart docker +``` + +## Q2:docker 启动失败Error response from daemon: container xxxxis not running +容器启动失败 +```bash +bash run_docker_musa.sh + +# 镜像存在: docker/e300gpu:v1.3.0.003 +# 正在进入容器: dd6cb1d2ca43 镜像: docker/e300gpu:v1.3.0.003 +# 正在进入容器终端... +# Error response from daemon: container dd6cb1d2ca43a6eaafc8f25f9331fe41a6a60ed8748be9fbc3a5542e96ff446b is not running + +# 停止容器 +bash run_docker_musa.sh --stop + +# 重新执行启动容器 +bash run_docker_musa.sh +# 镜像存在: docker/e300gpu:v1.3.0.003 +# 开始启动镜像后台运行... +# 镜像名称: docker/e300gpu +# 镜像标签: v1.3.0.003 +# d85cf094673b51916dd520b53c8918384143a97d095bfb940bf8ace1d400c3ad +# 正在进入容器: d85cf094673b 镜像: docker/e300gpu:v1.3.0.003 +# 正在进入容器终端... +# root@kylinos:/playground# +``` + diff --git a/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-NPU-VPN.md b/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-NPU-VPN.md new file mode 100644 index 0000000..3be95da --- /dev/null +++ b/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-NPU-VPN.md @@ -0,0 +1,564 @@ +

+ logo +

+

摩尔线程-T035-Kylin

+

麒麟系统docker下调用NPU与VPU

+ +
+

lastAuthor:Pierce.Cai

+

lastDate:2026-05-27

+
+ + +# 环境要求 + +| 组件 | 版本 | +| ----------- | ----------- | +| MUSA Driver | 3.1.3-AB100 | +| MUSA SDK | 4.1.4 | +| U-Boot SDK | 1.3.0.003 | + +# 系统版本 + +```shell +kylin@kylinos:~$ uname -a +Linux kylinos 6.6.10 #1 SMP PREEMPT Wed Jan 7 01:30:30 UTC 2026 aarch64 aarch64 aarch64 GNU/Linux + +kylin@kylinos:~$ cat /etc/os-release +NAME="Kylin" +VERSION="银河麒麟桌面操作系统 (工业版) V10 SP1 2503" +VERSION_US="Kylin Linux Desktop Industry V10 SP1 2503" +ID=kylin +ID_LIKE=debian +PRETTY_NAME="Kylin Industry V10 SP1" +VERSION_ID="v10" +HOME_URL="http://www.kylinos.cn/" +SUPPORT_URL="http://www.kylinos.cn/support/technology.html" +BUG_REPORT_URL="http://www.kylinos.cn/" +PRIVACY_POLICY_URL="http://www.kylinos.cn" +VERSION_CODENAME=kylin +UBUNTU_CODENAME=kylin +PROJECT_CODENAME=V10SP1 +KYLIN_RELEASE_ID="2503" + +``` + +# 安装docker + +```shell + +# 更新 apt 缓存 +sudo apt update + +# 下载并添加 Docker 官方 GPG 密钥 +sudo mkdir -p /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + +# 添加适配 arm64 架构的 Docker 软件源 +echo "deb [arch=arm64 signed-by=/etc/apt/keyrings/docker.gpg] http://mirrors.aliyun.com/docker-ce/linux/ubuntu focal stable" | sudo tee /etc/apt/sources.list.d/docker.list + +# 再次更新软件包列表,让刚才添加的源生效 +sudo apt update + + +# 安装 Docker 核心组件 +sudo apt install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +# 将当前用户加入 docker 用户组 +sudo usermod -aG docker $USER + +# 刷新用户组权限,让设置在当前终端立即生效 +newgrp docker + +# 全局生效 +sudo reboot +``` + +# 确认NPU设备节点 + +```shell +# 确认NPU设备节点 是否存在 +ls -l /dev/npucore + +# 输出正确示例 +# crw-rw-rw- 1 root root 199, 0 3月 5 19:50 /dev/npucore + +# 不存在示例 (需要进行NPU设备节点的加载) +# ls: 无法访问'/dev/npucore': 没有那个文件或目录 +``` + +# 加载NPU设备节点(若无) + +```shell +# 查找内核NPU设备节点 +find /lib/modules/$(uname -r) -name "*npu*.ko" 2>/dev/null +# 输出示例 +# /lib/modules/6.6.10/extra/npucore.ko + +# 加载设备节点 +sudo modprobe npucore + +# 设置开机自动加载 +echo "npucore" | sudo tee /etc/modules-load.d/npu.conf +``` + +# 构建docker + +**创建/运行docker构建脚本** + +```shell +mkdir dockerData + +cd dockerData + +# 编进并创建docker脚本,将后面脚本代码粘贴进来 ctrl+s保存; ctrl+x退出 +nano npu_vpu_docker.sh + +# 运行脚本 +bash npu_vpu_docker.sh +``` + +**构建脚本代码展示** + +```shell +#!/bin/bash + +set -e + +# 配置参数 +IMAGE_NAME="npu-vpu-env:v1.0" +CONTAINER_NAME="my_dev_container" +APT_URI="http://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports" + +# 定义下载链接 +BASE_URL="https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release" +VIDEO_URL="${BASE_URL}/clips/demo_720p.mp4" + +# NPU SDK 相关 DEB 包 +# 注意:如果其他组件名有变动,请手动调整文件名 +DEB_NPU_KERNEL="${BASE_URL}/mtnpu/npu_sdk_v1.5/kernel-module-m1000-npu_1.5-r20251223061609-8cc96d8_all.deb" +DEB_MTNN_UNIFY="${BASE_URL}/mtnpu/npu_sdk_v1.5/m1000-mtnn-unify-lib_1.5-r20251211101745-cca46b8_all.deb" +DEB_MTNNRT="${BASE_URL}/mtnpu/npu_sdk_v1.5/m1000-mtnnrt_1.5-r20251211101900-f5a4ba5_all.deb" +DEB_MTC_TOOLKIT="${BASE_URL}/mtnpu/npu_sdk_v1.5/m1000-mtc-toolkit_1.5-r20251211102012-5aaa237ba_arm64.deb" + +echo "==== 1. 开始下载依赖包和测试视频 ====" +wget -nc $DEB_NPU_KERNEL +wget -nc $DEB_MTNN_UNIFY +wget -nc $DEB_MTNNRT +wget -nc $DEB_MTC_TOOLKIT +wget -nc $VIDEO_URL + +echo "==== 2. 准备 Dockerfile ====" +cat <<'EOF' > Dockerfile +ARG IMAGE_BASE=ubuntu:22.04 +FROM $IMAGE_BASE + +# 设置 APT 源 +ARG APT_URI +RUN mv /etc/apt/sources.list /etc/apt/sources.list.bak && \ + echo "deb $APT_URI jammy main restricted universe multiverse" > /etc/apt/sources.list && \ + echo "deb $APT_URI jammy-updates main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb $APT_URI jammy-backports main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb $APT_URI jammy-security main restricted universe multiverse" >> /etc/apt/sources.list + +# 安装基础依赖 +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates net-tools vim curl wget tmux \ + python3 python3-pip build-essential libopenblas-dev \ + libelf1 kmod vainfo ffmpeg \ + && ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ + && echo "Asia/Shanghai" > /etc/timezone \ + && rm -rf /var/lib/apt/lists/* + +# 拷贝依赖包和测试视频 +COPY *.deb /tmp/ +COPY demo_720p.mp4 /playground/ + +# 安装 NPU 包 (处理 systemctl 报错) +RUN ln -s /bin/true /usr/bin/systemctl && \ + dpkg -i /tmp/kernel-module-m1000-npu_*.deb && \ + dpkg -i /tmp/m1000-mtnn-unify-lib_*.deb && \ + dpkg -i /tmp/m1000-mtnnrt_*.deb && \ + dpkg -i /tmp/m1000-mtc-toolkit_*.deb && \ + apt-get install -f -y && \ + rm -rf /tmp/*.deb && \ + rm /usr/bin/systemctl + +# 配置环境变量 +ENV LIBVA_DRIVER_NAME=mtgpu +ENV PATH="/usr/local/musa/bin:/usr/local/musa/tools:${PATH}" +ENV LD_LIBRARY_PATH="/usr/lib/aarch64-linux-gnu/dri:/usr/lib/aarch64-linux-gnu/musa:/usr/local/musa/lib:/usr/local/musa/lib64" +RUN echo "export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:$LD_LIBRARY_PATH" >> /root/.bashrc + +# 创建验证脚本 +RUN mkdir -p /playground && \ + printf '#!/bin/bash\n\ +echo "--- 1. Checking VPU (VA-API) ---"\n\ +vainfo || echo "VPU check failed"\n\ +ffmpeg -hwaccels | grep vaapi && echo "VA-API is supported in ffmpeg"\n\ +\n\ +echo -e "\\n--- 2. Checking NPU ---"\n\ +if [ -e /dev/npucore ]; then\n\ + echo "==== NPU Device node (/dev/npucore) is OK. ===="\n\ +else\n\ + echo "==== NPU Device node NOT found. ===="\n\ +fi\n\ +ls /usr/lib/libmtnnrt.so &> /dev/null && echo "==== MTNN Libraries found. ====" || echo "==== MTNN Libraries NOT found. ===="\n'\ + > /playground/check_hw.sh && \ + chmod +x /playground/check_hw.sh + +WORKDIR /playground +CMD ["/bin/bash", "-l"] +EOF + +echo "==== 3. 开始构建 Docker 镜像 ====" +docker build --build-arg APT_URI=$APT_URI -t $IMAGE_NAME . + +echo "==== 4. 启动容器 ====" +# 如果容器已存在,先删除 +docker rm -f $CONTAINER_NAME || true + +docker run -it -d \ + --name $CONTAINER_NAME \ + --privileged \ + --net=host \ + --ipc=host \ + -e DISPLAY=unix$DISPLAY \ + -v /dev:/dev \ + -v /usr/local/musa:/usr/local/musa \ + -v /lib/modules:/lib/modules:ro \ + -v /usr/lib/aarch64-linux-gnu/musa:/usr/lib/aarch64-linux-gnu/musa \ + -v /usr/lib/aarch64-linux-gnu/dri:/usr/lib/aarch64-linux-gnu/dri \ + $IMAGE_NAME + +echo "==== 部署完成! ====" +echo "执行以下命令进入容器:" +echo "docker exec -it $CONTAINER_NAME /bin/bash" +echo "进入容器后,运行 ./check_hw.sh 验证硬件。" +``` + +# 验证驱动 + +```shell +docker exec -it my_dev_container /bin/bash + +./check_hw.sh +``` + +**输出示例** + +```shell +--- 1. Checking VPU (VA-API) --- +error: XDG_RUNTIME_DIR not set in the environment. +error: can't connect to X server! +libva info: VA-API version 1.14.0 +libva info: User environment variable requested driver 'mtgpu' +libva info: Trying to open /usr/lib/aarch64-linux-gnu/dri/mtgpu_drv_video.so +libva info: Found init function __vaDriverInit_1_12 +trace path : /sys/kernel/debug/tracing/trace_marker_raw +support abi version:2 +libva info: va_openDriver() returns 0 +vainfo: VA-API version: 1.14 (libva 2.12.0) +vainfo: Driver version: Mthreads VPU driver for MT(R) MTT M1000 Graphics - 0.0.1.614219b (614219b) +vainfo: Supported profile and entrypoints + VAProfileNone : VAEntrypointVideoProc + VAProfileNone : VAEntrypointStats + VAProfileMPEG2Simple : VAEntrypointVLD + VAProfileMPEG2Main : VAEntrypointVLD + VAProfileMPEG4Simple : VAEntrypointVLD + VAProfileMPEG4AdvancedSimple : VAEntrypointVLD + VAProfileMPEG4Main : VAEntrypointVLD + VAProfileH264Main : VAEntrypointVLD + VAProfileH264Main : VAEntrypointEncSlice + VAProfileH264Main : VAEntrypointEncSliceLP + VAProfileH264High : VAEntrypointVLD + VAProfileH264High : VAEntrypointEncSlice + VAProfileH264High : VAEntrypointEncSliceLP + VAProfileVC1Simple : VAEntrypointVLD + VAProfileVC1Main : VAEntrypointVLD + VAProfileVC1Advanced : VAEntrypointVLD + VAProfileJPEGBaseline : VAEntrypointVLD + VAProfileJPEGBaseline : VAEntrypointEncPicture + VAProfileH264ConstrainedBaseline: VAEntrypointVLD + VAProfileH264ConstrainedBaseline: VAEntrypointEncSlice + VAProfileH264ConstrainedBaseline: VAEntrypointEncSliceLP + VAProfileVP8Version0_3 : VAEntrypointVLD + VAProfileHEVCMain : VAEntrypointVLD + VAProfileHEVCMain : VAEntrypointEncSlice + VAProfileHEVCMain : VAEntrypointEncSliceLP + VAProfileHEVCMain10 : VAEntrypointVLD + VAProfileHEVCMain10 : VAEntrypointEncSlice + VAProfileHEVCMain10 : VAEntrypointEncSliceLP + VAProfileVP9Profile0 : VAEntrypointVLD + VAProfileVP9Profile2 : VAEntrypointVLD + VAProfileAV1Profile0 : VAEntrypointVLD + VAProfileAV1Profile1 : VAEntrypointVLD + : VAEntrypointVLD + : VAEntrypointEncSlice + : VAEntrypointEncSliceLP + : VAEntrypointVLD + : VAEntrypointVLD + : VAEntrypointVLD + : VAEntrypointVLD +ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers + built with gcc 11 (Ubuntu 11.2.0-19ubuntu1) + configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/aarch64-linux-gnu --incdir=/usr/include/aarch64-linux-gnu --arch=arm64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared + libavutil 56. 70.100 / 56. 70.100 + libavcodec 58.134.100 / 58.134.100 + libavformat 58. 76.100 / 58. 76.100 + libavdevice 58. 13.100 / 58. 13.100 + libavfilter 7.110.100 / 7.110.100 + libswscale 5. 9.100 / 5. 9.100 + libswresample 3. 9.100 / 3. 9.100 + libpostproc 55. 9.100 / 55. 9.100 +vaapi +VA-API is supported in ffmpeg + +--- 2. Checking NPU --- +==== NPU Device node (/dev/npucore) is OK. ==== +==== MTNN Libraries found. ==== +``` + +# 验证VPU + +```shell +ffmpeg -init_hw_device vaapi=va:/dev/dri/renderD128 \ + -i demo_720p.mp4 -vf 'format=nv12,hwupload' \ + -c:v h264_vaapi -b:v 5M vaapi_output.mp4 +``` + +**输出示例** + +```shell +root@kylinos:/playground# ffmpeg -init_hw_device vaapi=va:/dev/dri/renderD128 -i demo_720p.mp4 -vf 'format=nv12,hwupload' -c:v h264_vaapi -b:v 5M vaapi_output.mp4 + +ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers + built with gcc 11 (Ubuntu 11.2.0-19ubuntu1) + configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/aarch64-linux-gnu --incdir=/usr/include/aarch64-linux-gnu --arch=arm64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared + libavutil 56. 70.100 / 56. 70.100 + libavcodec 58.134.100 / 58.134.100 + libavformat 58. 76.100 / 58. 76.100 + libavdevice 58. 13.100 / 58. 13.100 + libavfilter 7.110.100 / 7.110.100 + libswscale 5. 9.100 / 5. 9.100 + libswresample 3. 9.100 / 3. 9.100 + libpostproc 55. 9.100 / 55. 9.100 +trace path : /sys/kernel/debug/tracing/trace_marker_raw +support abi version:2 +Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'demo_720p.mp4': + Metadata: + major_brand : dash + minor_version : 0 + compatible_brands: iso6avc1mp41 + creation_time : 2025-01-29T08:45:19.000000Z + Duration: 00:02:40.48, start: 0.000000, bitrate: 1174 kb/s + Stream #0:0(und): Video: h264 (Main) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 1280x720 [SAR 1:1 DAR 16:9], 12 kb/s, 25 fps, 25 tbr, 12800 tbn, 50 tbc (default) + Metadata: + creation_time : 2025-01-29T08:45:19.000000Z + handler_name : ISO Media file produced by Google Inc. + vendor_id : [0][0][0][0] +File 'vaapi_output.mp4' already exists. Overwrite? [y/N] y +Stream mapping: + Stream #0:0 -> #0:0 (h264 (native) -> h264 (h264_vaapi)) +Press [q] to stop, [?] for help +Output #0, mp4, to 'vaapi_output.mp4': + Metadata: + major_brand : dash + minor_version : 0 + compatible_brands: iso6avc1mp41 + encoder : Lavf58.76.100 + Stream #0:0(und): Video: h264 (High) (avc1 / 0x31637661), vaapi_vld(tv, bt709, progressive), 1280x720 [SAR 1:1 DAR 16:9], q=2-31, 5000 kb/s, 25 fps, 12800 tbn (default) + Metadata: + creation_time : 2025-01-29T08:45:19.000000Z + handler_name : ISO Media file produced by Google Inc. + vendor_id : [0][0][0][0] + encoder : Lavc58.134.100 h264_vaapi +frame= 978 fps=388 q=-0.0 size= 24576kB time=00:00:38.96 bitrate=5167.5kbits/s speed=15.5x +``` + +# 验证NPU + +```shell +demo_npu_resnet50.sh +``` + +**输出示例** + +```shell +root@kylinos:/playground# demo_npu_resnet50.sh + +Running npu renet50 test +Thread 0: Enter +[t0] mtnn file=/m1000/data/resnet50/resnet50.mtnn +[t0][0] Test Passed. use time 11.45ms, 87.32fps +Thread 0: Exited +``` + +# NPU应用:yolo推理 + +**此操作所有过程都处于容器中** + +## 安装依赖 + +> 为Docker“无头”环境安装一个“虚拟屏幕” + +```shell +apt update +apt install -y xvfb python-is-python3 +``` + +## 安装YOLO推理样本 + +```shell +# 下载 +wget https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/mtnpu/mtnn_samples_for_npu_sdk_v1.1_r5_20250423.tar.gz + +# 解压 +tar zxvf mtnn_samples_for_npu_sdk_v1.1_r5_20250423.tar.gz + +# 进入目录 +cd mtnn_samples_for_npu_sdk_v1.1_r5/ + +# 安装依赖包 +bash setup.sh +``` + +## 查看样本信息 + +```shell +# 确保此时处于样本目录 +cd mtnn_samples_for_npu_sdk_v1.1_r5/ + +bash demo.sh +``` + +**输出示例** + +``` +Error: Mode not specified. +Usage: demo.sh [CORES] + +Runs YOLO inference with specified models and NPU configurations. + +Arguments: + MODE The execution mode. See options below. + CORES Number of cores/NPUs to use (default: 1). + Affects modes like yolov5m, yolov8s, etc., distributing the model across npu0 and npu1 if CORES > 1. + +Options: + -h, --help Show this help message and exit. + +Available OPTIONS: + yolov5m - npu0 (yolov5m) # 单NPU核心运行yolov5m + yolov5m 2 - npu0 (yolov5m), npu1 (yolov5m) # 双NPU核心各运行yolov5m + yolov8n - npu0 (yolov8n) # 单NPU核心运行yolov8n + yolov8n 2 - npu0 (yolov8n), npu1 (yolov8n) # 双NPU核心各运行yolov8n + yolov8n-pose - npu0 (yolov8n-pose) # 单NPU核心运行yolov8n-pose + yolov8n-pose 2 - npu0 (yolov8n-pose), npu1 (yolov8n-pose) # 双NPU核心各运行yolov8n-pose + yolov8s - npu0 (yolov8s) # 单NPU核心运行yolov8s + yolov8s 2 - npu0 (yolov8s), npu1 (yolov8s) # 双NPU核心各运行yolov8s + yolov8s-pose - npu0 (yolov8s-pose) # 单NPU核心运行yolov8s-pose + yolov8s-pose 2 - npu0 (yolov8s-pose), npu1 (yolov8s-pose) # 双NPU核心各运行yolov8s-pose + yolov8m - npu0 (yolov8m) # 单NPU核心运行yolov8m + yolov8m 2 - npu0 (yolov8m), npu1 (yolov8m) # 双NPU核心各运行yolov8m + yolov8m-pose - npu0 (yolov8m-pose) # 单NPU核心运行yolov8m-pose + yolov8m-pose 2 - npu0 (yolov8m-pose), npu1 (yolov8m-pose) # 双NPU核心各运行yolov8m-pose + npu_1c_2m - npu0 (yolov5m, yolov8m) # 单NPU核心运行2个模型 + npu_2c_2m - npu0 (yolov5m), npu1(yolov8m) # 双NPU核心各运行1个不同模型 + npu_2c_3m - npu0 (yolov5m, yolov8m-pose), npu1 (yolov8m) # 双NPU核心运行3个模型 + npu_2c_4m_01 - npu0 (yolov5m, yolov5m), npu1 (yolov8m, yolov8m) # 双NPU核心各运行2个相同模型 + npu_2c_4m_02 - npu0 (yolov5m, yolov8m), npu1 (yolov5m, yolov8m) # 双NPU核心各运行2个不同模型 +``` + +## 推理 + +> 使用虚拟屏幕运行 + +```shell +xvfb-run -a bash demo.sh yolov5m +``` + +**输出示例** + +```shell +root@kylinos:/playground/mtnn_samples_for_npu_sdk_v1.1_r5# xvfb-run -a bash demo.sh yolov5m +--- Running Configuration --- +Mode: yolov5m +Models: /playground/mtnn_samples_for_npu_sdk_v1.1_r5/models/yolov5m.nb +Accelerators: npu0 +Thread Num: 1 +Threads: 0 +FPS: -1 +------------------------------------------------------------------------------------------------------------ +[ WARN:0@0.087] global loadsave.cpp:278 findDecoder imread_(''): can't open/read file: check file path/integrity +model_size: 13772864 +model name: yolov5m +infer core: npu0 +input source: /playground/mtnn_samples_for_npu_sdk_v1.1_r5/assets +frame fps: unlimited +QFontDatabase: Cannot find font directory /usr/local/lib/python3.10/dist-packages/cv2/qt/fonts. +Note that Qt no longer ships fonts. Deploy some (from https://dejavu-fonts.github.io/ for example) or switch to fontconfig. +QFontDatabase: Cannot find font directory /usr/local/lib/python3.10/dist-packages/cv2/qt/fonts. +Note that Qt no longer ships fonts. Deploy some (from https://dejavu-fonts.github.io/ for example) or switch to fontconfig. +QFontDatabase: Cannot find font directory /usr/local/lib/python3.10/dist-packages/cv2/qt/fonts. +Note that Qt no longer ships fonts. Deploy some (from https://dejavu-fonts.github.io/ for example) or switch to fontconfig. +QFontDatabase: Cannot find font directory /usr/local/lib/python3.10/dist-packages/cv2/qt/fonts. +Note that Qt no longer ships fonts. Deploy some (from https://dejavu-fonts.github.io/ for example) or switch to fontconfig. +QFontDatabase: Cannot find font directory /usr/local/lib/python3.10/dist-packages/cv2/qt/fonts. +Note that Qt no longer ships fonts. Deploy some (from https://dejavu-fonts.github.io/ for example) or switch to fontconfig. +[tid-281472273604896][yolov5m-npu0]: preproc: 2.26ms infer: 24.34ms postproc: 0.84ms total: 27.44ms avg_infer_fps: 41.30 +[tid-281472273604896][yolov5m-npu0]: preproc: 2.25ms infer: 24.37ms postproc: 0.81ms total: 27.43ms avg_infer_fps: 41.14 +[tid-281472273604896][yolov5m-npu0]: preproc: 2.26ms infer: 24.72ms postproc: 0.88ms total: 27.86ms avg_infer_fps: 41.12 +[tid-281472273604896][yolov5m-npu0]: preproc: 2.25ms infer: 24.73ms postproc: 0.94ms total: 27.92ms avg_infer_fps: 41.07 +[tid-281472273604896][yolov5m-npu0]: preproc: 2.27ms infer: 24.42ms postproc: 0.90ms total: 27.59ms avg_infer_fps: 41.20 +[tid-281472273604896][yolov5m-npu0]: preproc: 2.23ms infer: 24.66ms postproc: 0.86ms total: 27.75ms avg_infer_fps: 41.09 +[tid-281472273604896][yolov5m-npu0]: preproc: 2.24ms infer: 24.38ms postproc: 0.97ms total: 27.60ms avg_infer_fps: 41.20 +[tid-281472273604896][yolov5m-npu0]: preproc: 2.25ms infer: 24.37ms postproc: 0.80ms total: 27.43ms avg_infer_fps: 41.17 +[tid-281472273604896][yolov5m-npu0]: preproc: 2.27ms infer: 24.29ms postproc: 0.80ms total: 27.37ms avg_infer_fps: 41.20 +[tid-281472273604896][yolov5m-npu0]: preproc: 2.28ms infer: 24.64ms postproc: 0.97ms total: 27.89ms avg_infer_fps: 41.10 +``` + +## 查看NPU占用 + +> 宿主机 任意终端运行 + +```shell +watch -n1 sudo cat /sys/kernel/debug/gc/load + +# =================== 输出示例 start =================== +Every 1.0s: sudo cat /sys/kernel/debug/gc/load kylinos: Thu May 28 12:39:27 2026 + +device : 0 + core : 0 + load : 70% + + core : 1 + load : 0% +# =================== 输出示例 end =================== +``` + + + +# 常见问题 + +## Q1:docker拉取超时/失败 + +**A1:请在 `/etc/docker/daemon.json` 中配置国内镜像加速器(如阿里云、DaoCloud 等),并重启 docker 服务。步骤如下:** + +```shell +mkdir -p /etc/docker + +tee /etc/docker/daemon.json <<-'EOF' +{ +"registry-mirrors": ["https://xxxxxxx" ] +} +EOF + +# 查看是否创建了daemon.json 若无请用sudo权限重新执行一次 +cat /etc/docker/daemon.json + + +systemctl daemon-reload +systemctl restart docker +``` + diff --git "a/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-vllm-musa\346\250\241\345\236\213\346\216\250\347\220\206.md" "b/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-vllm-musa\346\250\241\345\236\213\346\216\250\347\220\206.md" new file mode 100644 index 0000000..ab66056 --- /dev/null +++ "b/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.3.0/T035-Kylin-Docker-vllm-musa\346\250\241\345\236\213\346\216\250\347\220\206.md" @@ -0,0 +1,691 @@ +

+ logo +

+

摩尔线程-T035-Kylin

+

麒麟系统docker下使用vllm_musa推理模型

+ +
+

lastAuthor:Pierce.Cai

+

lastDate:2026-05-27

+
+ + +# 环境要求 + +| 组件 | 版本 | +| ----------- | ----------- | +| MUSA Driver | 3.1.3-AB100 | +| MUSA SDK | 4.1.4 | +| U-Boot SDK | 1.3.0.003 | + +# 系统版本 + +```shell +kylin@kylinos:~$ uname -a +Linux kylinos 6.6.10 #1 SMP PREEMPT Wed Jan 7 01:30:30 UTC 2026 aarch64 aarch64 aarch64 GNU/Linux + +kylin@kylinos:~$ cat /etc/os-release +NAME="Kylin" +VERSION="银河麒麟桌面操作系统 (工业版) V10 SP1 2503" +VERSION_US="Kylin Linux Desktop Industry V10 SP1 2503" +ID=kylin +ID_LIKE=debian +PRETTY_NAME="Kylin Industry V10 SP1" +VERSION_ID="v10" +HOME_URL="http://www.kylinos.cn/" +SUPPORT_URL="http://www.kylinos.cn/support/technology.html" +BUG_REPORT_URL="http://www.kylinos.cn/" +PRIVACY_POLICY_URL="http://www.kylinos.cn" +VERSION_CODENAME=kylin +UBUNTU_CODENAME=kylin +PROJECT_CODENAME=V10SP1 +KYLIN_RELEASE_ID="2503" + +``` + +# 安装docker + +```shell + +# 更新 apt 缓存 +sudo apt update + +# 下载并添加 Docker 官方 GPG 密钥 +sudo mkdir -p /etc/apt/keyrings +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + +# 添加适配 arm64 架构的 Docker 软件源 +echo "deb [arch=arm64 signed-by=/etc/apt/keyrings/docker.gpg] http://mirrors.aliyun.com/docker-ce/linux/ubuntu focal stable" | sudo tee /etc/apt/sources.list.d/docker.list + +# 再次更新软件包列表,让刚才添加的源生效 +sudo apt update + + +# 安装 Docker 核心组件 +sudo apt install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +# 将当前用户加入 docker 用户组 +sudo usermod -aG docker $USER + +# 刷新用户组权限,让设置在当前终端立即生效 +newgrp docker + +# 全局生效 +sudo reboot +``` + +# 编辑Docker脚本 + +```shell +mkdir dockerData + +cd dockerData +``` + +## 编写Dockfile + +```shell +# 编进并创建Dockerfile,将后面代码粘贴进来 ctrl+s保存; ctrl+x退出 +nano Dockerfile +``` + +```shell +ARG IMAGE_BASE=ubuntu:22.04 +FROM $IMAGE_BASE + +# 设置Ubuntu APT源为国内源 +ARG APT_URI=http://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports +RUN mv /etc/apt/sources.list /etc/apt/sources.list.bak 2>/dev/null || true && \ + echo "deb ${APT_URI}/ jammy main restricted universe multiverse" > /etc/apt/sources.list && \ + echo "deb ${APT_URI}/ jammy-updates main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb ${APT_URI}/ jammy-backports main restricted universe multiverse" >> /etc/apt/sources.list && \ + echo "deb ${APT_URI}/ jammy-security main restricted universe multiverse" >> /etc/apt/sources.list && \ + cat /etc/apt/sources.list + + +# 创建.pip/pip.conf文件以设置pip使用国内源 +ARG PIP_URI +RUN mkdir -p /root/.pip && \ + cat < /root/.pip/pip.conf +[global] +index-url = $PIP_URI +EOL + +# 安装必要的系统依赖项(最小化安装) +RUN apt-get update && apt-get install -y --fix-missing \ + ca-certificates \ + net-tools software-properties-common \ + vim curl bzip2 wget tmux python3 python3-pip \ + git gdb build-essential \ + libopenmpi-dev libelf-dev libopenblas-dev \ + && ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ + && echo "Asia/Shanghai" > /etc/timezone \ + && rm -rf /var/lib/apt/lists/* + +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 + +# 安装基础库 +RUN pip install ahocorapy ipykernel numpy==1.26 + +# 安装PyTorch +ARG PYTORCH_URI +ARG TORCH_MUSA_URI +ARG TORCHVISION_URI +ARG TORCHAUDIO_URI +ARG TRITON_URI +RUN pip install --no-cache-dir $PYTORCH_URI +RUN pip install --no-cache-dir $TORCH_MUSA_URI +RUN pip install --no-cache-dir $TORCHVISION_URI +RUN pip install --no-cache-dir $TORCHAUDIO_URI +RUN pip install --no-cache-dir $TRITON_URI + +# 创建playground目录并在其中创建test_musa.py文件 +RUN mkdir -p /playground && \ + cat < /playground/test_musa.py +#!/usr/bin/env python + +import torch +import torch_musa + +def main(): + if torch.musa.is_available(): + device_count = torch.musa.device_count() + print(f"Found {device_count} MUSA devices") + + for i in range(device_count): + device_props = torch.musa.get_device_properties(i) + print(f"Device {i}: {device_props}") + else: + print("MUSA is not available.") + +if __name__ == "__main__": + main() +EOL + +# 修改.bashrc文件,自动激活pytorch_env环境 +RUN echo "export PATH=/usr/local/musa/bin:/usr/local/musa/tools:$PATH" >> /root/.bashrc +RUN echo "export LD_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu:/usr/lib/aarch64-linux-gnu/musa:/usr/local/musa/lib:$LD_LIBRARY_PATH" >> /root/.bashrc + +# 进入playground目录, 设置默认命令,启动bash并激活环境, +WORKDIR /playground + +# 设置默认命令:启动bash +CMD ["/bin/bash", "-l"] +``` + +## 编辑运行脚本 + +```shell +# 编进并创建,将后面代码粘贴进来 ctrl+s保存; ctrl+x退出 +nano run_docker_musa.sh +``` + +```shell +#!/bin/bash + +IMAGE_BASE="ubuntu:22.04" +IMAGE_NAME=${IMAGE_NAME:-"docker/e300gpu"} +IMAGE_TAG=${IMAGE_TAG:-"v1.3.0.003"} +DOCKERFILE=${DOCKERFILE:-"Dockerfile"} +PLATFORM=${PLATFORM:-"linux/aarch64"} +APT_URI=${APT_URI:-"http://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports"} +PIP_URI=${PIP_URI:-"https://pypi.tuna.tsinghua.edu.cn/simple"} +PYTORCH_URI=${PYTORCH_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torch-2.5.0-cp310-cp310-linux_aarch64.whl"} +TORCH_MUSA_URI=${TORCH_MUSA_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torch_musa-2.1.1-cp310-cp310-linux_aarch64.whl"} +TORCHVISION_URI=${TORCHVISION_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torchvision-0.20.0a0+afc54f7-cp310-cp310-linux_aarch64.whl"} +TORCHAUDIO_URI=${TORCHAUDIO_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/project/AIModule/v1.3.0/package/torch_musa/torchaudio-2.5.0a0+56bc006-cp310-cp310-linux_aarch64.whl"} +TRITON_URI=${TRITON_URI:-"https://apollo-appstore-pre.tos-cn-beijing.volces.com/appstore/release/pip/pytorch/2.1.0/triton-3.1.0-cp310-cp310-linux_aarch64.whl"} + + +ACTION="start" +DEAMON_MODE=false + +while [[ $# -gt 0 ]]; do + case $1 in + --start) + ACTION="start" + shift + ;; + --stop) + ACTION="stop" + shift + ;; + --build) + ACTION="build" + shift + ;; + --daemon) + DEAMON_MODE=true + shift + ;; + --help) + echo "Usage: $0 [ --build | --start [ --daemon ] | --stop ]" + echo " --build: 构建镜像" + echo " --start: 启动并进入容器" + echo " --start --daemon: 后台运行容器(不会进入容器)" + echo " --stop: 停止并删除容器" + exit 0 + ;; + *) + echo "Unknown argument: $1" + echo "Usage: $0 [ --build | --start [ --daemon ] | --stop ]" + exit 1 + ;; + esac +done + +function check_image() { + if [ -z "$IMAGE_NAME" ] || [ -z "$IMAGE_TAG" ]; then + echo "镜像名称或标签为无效,无法检查。" + return 1 + fi + + if docker inspect "${IMAGE_NAME}:${IMAGE_TAG}" > /dev/null 2>&1; then + echo "镜像存在: ${IMAGE_NAME}:${IMAGE_TAG}" + return 0 + else + echo "错误:镜像 ${IMAGE_NAME}:${IMAGE_TAG} 不存在。" + return 1 + fi +} + +function get_running_container() { + container=$(docker ps -a -f ancestor="${IMAGE_NAME}:${IMAGE_TAG}" --format "{{.ID}}" | head -n 1) + echo "$container" +} + +function start_container() { + if ! check_image; then + echo "请先构建镜像:bash run_docker_musa.sh --build" + exit 1 + fi + + container_id=$(get_running_container) + if [ -z "$container_id" ]; then + echo "开始启动镜像后台运行..." + echo "镜像名称: $IMAGE_NAME" + echo "镜像标签: $IMAGE_TAG" + + CUR_DIR=$(pwd) + DIR_NAME=$(basename $CUR_DIR) + + docker run -it -d \ + --privileged \ + -v /dev:/dev \ + -e DISPLAY=unix$DISPLAY \ + --net=host \ + -v /usr/local/musa:/usr/local/musa \ + -v /usr/lib/aarch64-linux-gnu/musa:/usr/lib/aarch64-linux-gnu/musa \ + -v /usr/lib/aarch64-linux-gnu/dri:/usr/lib/aarch64-linux-gnu/dri \ + -v /usr/lib/aarch64-linux-gnu/libGLdispatch.so.0:/usr/lib/aarch64-linux-gnu/libGLdispatch.so.0 \ + -v /usr/lib/aarch64-linux-gnu/libGL.so.1:/usr/lib/aarch64-linux-gnu/libGL.so.1 \ + -v /usr/lib/aarch64-linux-gnu/libGLX.so.0:/usr/lib/aarch64-linux-gnu/libGLX.so.0 \ + -v /home/$USER:/home/$USER \ + -v /tmp/.X11-unix:/tmp/.X11-unix \ + -v ${CUR_DIR}:/playground/${DIR_NAME} \ + ${IMAGE_NAME}:${IMAGE_TAG} + fi + + container_id=$(get_running_container) + echo "正在进入容器: $container_id 镜像: ${IMAGE_NAME}:${IMAGE_TAG}" + if [ "$DEAMON_MODE" = true ]; then + echo "守护模式下不进入容器,退出命令行。" + else + echo "正在进入容器终端..." + docker exec -it "$container_id" bash + fi +} + +function stop_container() { + container_id=$(get_running_container) + if [ -n "$container_id" ]; then + echo "停止容器 $container_id..." + docker stop "$container_id" > /dev/null + echo "删除容器 $container_id..." + docker rm "$container_id" > /dev/null + echo "操作完成。" + else + echo "当前没有运行中的容器。" + fi +} + +function build_image() { + echo "正在构建镜像..." + echo "镜像名: ${IMAGE_NAME}:${IMAGE_TAG}" + echo "平台: ${PLATFORM}" + + # 构建镜像 + docker build \ + --platform $PLATFORM \ + --build-arg IMAGE_BASE=${IMAGE_BASE} \ + --build-arg APT_URI=${APT_URI} \ + --build-arg PIP_URI=${PIP_URI} \ + --build-arg PYTORCH_URI=${PYTORCH_URI} \ + --build-arg TORCH_MUSA_URI=${TORCH_MUSA_URI} \ + --build-arg TORCHVISION_URI=${TORCHVISION_URI} \ + --build-arg TORCHAUDIO_URI=${TORCHAUDIO_URI} \ + --build-arg TRITON_URI=${TRITON_URI} \ + -f $DOCKERFILE \ + -t $IMAGE_NAME:$IMAGE_TAG . + + # 检查构建是否成功 + if [ $? -eq 0 ]; then + echo "镜像构建成功: ${IMAGE_NAME}:${IMAGE_TAG}" + else + echo "镜像构建失败" + exit 1 + fi +} + +case "$ACTION" in + "build") + build_image + ;; + "start") + start_container + ;; + "stop") + stop_container + ;; + *) + echo "Unknown action: $ACTION" + echo "Usage: $0 [ --build | --start [ --daemon ] | --stop ]" + exit 1 + ;; +esac +``` + +# 构建Docker + +```bash +bash run_docker_musa.sh --build +``` + +# 运行Docker容器 + +```shell +bash run_docker_musa.sh +``` + +# 验证驱动 + +```shell +bash run_docker_musa.sh + +# 命令 +python3 -c "import torch;import torch_musa;print(torch.musa.is_available())" + +# 期望结果 +Error in cpuinfo: prctl(PR_SVE_GET_VL) failed # 此处正常 +True +``` + +# 下载VLLM_MUSA依赖包 + +> 请在docker容器里面运行 + +```shell +wget https://mt-ai-data.tos-cn-shanghai.volces.com/vllm_musa/v1.3.2/release_M1000_1.3.0.003/20260414/release_1.3.0.003-vllm_musa_1.3.2-torch_2.1.1.tar.gz + +tar zxvf release_1.3.0.003-vllm_musa_1.3.2-torch_2.1.1.tar.gz + +cd release_1.3.0.003-vllm_musa_1.3.2-torch_2.1.1 +``` + +# 安装 VLLM 与 VLLM-MUSA + +> 请在docker容器里面运行 + +```shell +pip3 install -r requirements.txt + +pip3 install triton-3.1.0-cp310-cp310-linux_aarch64.whl --force-reinstall + +pip3 install vllm-0.9.2.dev260+g7264557d6.empty-py3-none-any.whl +pip3 install vllm_musa-1.3.2+m1000-cp310-cp310-linux_aarch64.whl +``` + +# 验证 VLLM-MUSA + +> 请在docker容器里面运行 + +```shell +python3 -c "from vllm_musa import _musa_custom_ops;_musa_custom_ops.decode_mla" + +# 正常输出如下: +Error in cpuinfo: prctl(PR_SVE_GET_VL) failed # 此处正常 +``` + +# 模型下载 + +> 📌提示:模型较大(7B约11GB,30B约32GB),请预留足够磁盘空间。 + +```shell +sudo apt update + +sudo apt install git-lfs + +cd ${WorkDir} + +# 克隆 +git clone https://www.modelscope.cn/Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4.git +``` + +# 清理缓存 + +**建议启动前在【宿主机】的命令行清除缓存:** + +``` +export TRITON_CACHE_DIR="/tmp/triton" && sudo sh -c "echo 3 > /proc/sys/vm/drop_caches" +``` + +# 启动模型 + +> 请在docker容器里面运行 + +```shell +ENABLE_MUSA_MMA=1 vllm serve Qwen2.5-1.5B-Instruct-GPTQ-Int4 \ + --served_model_name Qwen2.5-1.5B-Instruct-GPTQ-Int4 \ + -tp 1 \ + --gpu-memory-utilization 0.7 \ + --quantization gptq \ + --num-gpu-blocks-override 1024 \ + --max-model-len 2048 \ + --swap-space 0 \ + --block-size 32 + +# vllm serve Qwen2.5-1.5B-Instruct-GPTQ-Int4 +# - Qwen2.5-1.5B-Instruct-GPTQ-Int4 为模型的路径,需要在模型上级目录运行 +# - ${modelDir}/Qwen2.5-1.5B-Instruct-GPTQ-Int4 可以在任意目录下执行 +# 不使用served_model_name指定模型id,那么模型id将依据【模型路径】来命名 +``` + +# 模型服务调用与测试 + +> 可在另一个命令行进入容器内部进行测试 + +## 查看模型列表 + +命令: + +``` +curl http://localhost:8000/v1/models +``` + +输出: + +```shell +{ + "object": "list", + "data": [ + { + "id": "Qwen2.5-1.5B-Instruct-GPTQ-Int4", + "object": "model", + "created": 1779872341, + "owned_by": "vllm", + "root": "Qwen2.5-1.5B-Instruct-GPTQ-Int4", + "parent": null, + "max_model_len": 2048, + "permission": [ + { + "id": "modelperm-d44348e1bf4e4d51b0f13e01cc415abc", + "object": "model_permission", + "created": 1779872341, + "allow_create_engine": false, + "allow_sampling": true, + "allow_logprobs": true, + "allow_search_indices": false, + "allow_view": true, + "allow_fine_tuning": false, + "organization": "*", + "group": null, + "is_blocking": false + } + ] + } + ] +} +``` + +## 发起对话请求 + +另开⼀个窗⼝调⽤,需要替换为本地的模型路径 + +```shell +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Qwen2.5-1.5B-Instruct-GPTQ-Int4", + "temperature": 0.7, + "top_p": 0.8, + "top_k": 20, + "repetition_penalty":1.05, + "max_tokens": 1000, + "messages": [{"role": "user", "content": "介绍一下北京"}] + }' + +# 若运行时qwen3模型,该模型默认会进行思考,可在请求体中添加 "chat_template_kwargs": {"enable_thinking": false} 来取消思考 +# 结构如下: +curl http://localhost:8000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "某个Qwen3模型", + "temperature": 0.7, + "top_p": 0.8, + "top_k": 20, + "repetition_penalty":1.05, + "max_tokens": 1000, + "messages": [{"role": "user", "content": "介绍一下北京"}], + "chat_template_kwargs": {"enable_thinking": false} + }' +``` + +输出: + +```python +{ + "id": "chatcmpl-746cf161037d476986f26f65a823ed59", + "object": "chat.completion", + "created": 1779872754, + "model": "Qwen2.5-1.5B-Instruct-GPTQ-Int4", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "reasoning_content": null, + "content": "北京是中国 waived是中华人民的首都,是中国的的首都之一。北京位於华北,北倚太行山,西界燕山,南依太行山,东与天津相望。北京自古以来就是中国的的重要城市,也是中国政治、经济、文化和业、文化中心之一。", + "tool_calls": [] + }, + "logprobs": null, + "finish_reason": "stop", + "stop_reason": null + }], + "usage": { + "prompt_tokens": 31, + "total_tokens": 95, + "completion_tokens": 64, + "prompt_tokens_details": null + }, + "prompt_logprobs": null, + "kv_transfer_params": null +} +``` + +## python调用 + +### 流式输出 + +```python +from openai import OpenAI + +# Modify OpenAI's API key and API base to use vLLM's API server. +openai_api_key = "EMPTY" +openai_api_base = "http://localhost:8000/v1" + +client = OpenAI( + # defaults to os.environ.get("OPENAI_API_KEY") + api_key=openai_api_key, + base_url=openai_api_base, +) + +models = client.models.list() +model = models.data[0].id + +chat_completion = client.chat.completions.create( + messages=[{ + "role": "system", + "content": "You are a helpful assistant." + }, { + "role": "user", + "content": "北京有哪些名胜古迹?" + }], + model=model, + temperature=0.7, + top_p=0.8, + extra_body={ + 'top_k':20, + 'repetition_penalty':1.05, # 惩罚重复,vllm默认没有加载需要添加,参考:https://modelscope.cn/models/Qwen/Qwen2.5-7B-Instruct/file/view/master?fileName=generation_config.json&status=1#L9 + # "chat_template_kwargs": {"enable_thinking": False}, # 对于Qwen3模型取消思考 + }, + max_tokens=512, + stream=True, # 启用流式输出 +) + +# 处理流式响应 +print("Chat response (streaming):") +for chunk in chat_completion: + if chunk.choices: + delta = chunk.choices[0].delta + content = delta.content + if content: + print(content, end='', flush=True) +print("\n - Chat response (end) -\n") +``` + +### 非流式输出 + +```python +from openai import OpenAI + +# Modify OpenAI's API key and API base to use vLLM's API server. +openai_api_key = "EMPTY" +openai_api_base = "http://localhost:8000/v1" + +client = OpenAI( + # defaults to os.environ.get("OPENAI_API_KEY") + api_key=openai_api_key, + base_url=openai_api_base, +) + +models = client.models.list() +model = models.data[0].id + +chat_completion = client.chat.completions.create( + messages=[{ + "role": "system", + "content": "You are a helpful assistant." + }, { + "role": "user", + "content": "北京有哪些名胜古迹?" + }], + model=model, + temperature=0.7, + top_p=0.8, + extra_body={ + 'top_k':20, + 'repetition_penalty':1.05, + # "chat_template_kwargs": {"enable_thinking": False}, # 对于Qwen3模型取消思考 + }, + max_tokens=512, + stream=False, +) + +print("Chat completion results:") +print(chat_completion) +``` + + + +# 常见问题 + +## Q1:docker拉取超时/失败 + +**A1:请在 `/etc/docker/daemon.json` 中配置国内镜像加速器(如阿里云、DaoCloud 等),并重启 docker 服务。步骤如下:** + +```shell +mkdir -p /etc/docker + +tee /etc/docker/daemon.json <<-'EOF' +{ +"registry-mirrors": ["https://xxxxxxx" ] +} +EOF + +# 查看是否创建了daemon.json 若无请用sudo权限重新执行一次 +cat /etc/docker/daemon.json + + +systemctl daemon-reload +systemctl restart docker +``` + -- Gitee From 5e569f815c8201fdd5604bd1f284f6c465388a88 Mon Sep 17 00:00:00 2001 From: "bob_Pierce.Cai" Date: Tue, 2 Jun 2026 12:06:08 +0800 Subject: [PATCH 2/2] =?UTF-8?q?[doc]=20=E6=96=B0=E5=A2=9E1.4.0=E7=9A=84NPU?= =?UTF-8?q?=20zoo=20=E4=B8=8E=20=E6=95=B0=E5=AD=97=E4=BA=BA=E6=96=87?= =?UTF-8?q?=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../T035- m1000_npu_model_zoo.md | 1478 +++++++++++++++++ ...24\346\225\260\345\255\227\344\272\272.md" | 185 +++ 2 files changed, 1663 insertions(+) create mode 100644 thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.4.0/T035- m1000_npu_model_zoo.md create mode 100644 "thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.4.0/T035-\346\221\251\345\260\224\346\225\260\345\255\227\344\272\272.md" diff --git a/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.4.0/T035- m1000_npu_model_zoo.md b/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.4.0/T035- m1000_npu_model_zoo.md new file mode 100644 index 0000000..c820f1f --- /dev/null +++ b/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.4.0/T035- m1000_npu_model_zoo.md @@ -0,0 +1,1478 @@ +

+ logo +

+

摩尔线程-AIBOX

+

AI-SOC M1000-NPU-MODEL-ZOO 使用指引

+ + + +
+

lastAuthor:Pierce.Cai

+

lastDate:2026-05-28

+
+ + + + + + +> gitee地址:[MooreThreads-AI-SOC/m1000_npu_model_zoo](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo) + +# 准备工作 + +## 工作目录 + +为了方便后续操作,我们先约定一个工作目录。在接下来的指引中,我们将用 **`${WorkDir}`** 表示这个目录的路径。请您在开始前先**创建好您的工作目录**,并确保后续所有操作都在此目录下进行。 + +## 系统环境要求 + +| 组件 | 版本 | +| ----------- | ----------- | +| MUSA Driver | 5.1.0-M1000 | +| MUSA SDK | 5.1.0 | +| U-BootSDK | 1.4.0 | +| NPU SDK | 1.5.1 | + +## 查看musa环境 + +```shell +# 查看驱动 +dpkg -l|grep musa + +# 输出 +ii musa 5.1.0-M1000 arm64 Moore Threads MUSA driver [452f33695] +ii musa-sdk 5.1.0 arm64 Moore Threads MTGPU Software Development Kit + +# 查看环境 +ll /usr/local/ |grep musa + +# 输出 +lrwxrwxrwx 1 root root 10 Apr 24 01:22 musa -> musa-5.1.0/ +drwxr-xr-x 13 root root 4096 May 13 22:33 musa-5.1.0/ +``` + +## 安装musa(若不满足) + +> **请先解锁桌面!** +> +> 为以后方便,请在桌面右上角点击:电源->设置->用户->自动登录 + +百度网盘: https://pan.baidu.com/s/103Qz-ddCDxpkQFEhAc6Nxw?pwd=acje + +```shell +##################### 首先卸载当前驱动 ###################### +sudo dpkg -P musa-sdk +sudo dpkg -P musa # 输出 cryptsetup 的一些报错是正常的 +sudo rm -rf /usr/local/musa + +##################### 安装新的驱动 ###################### +sudo dpkg -i musa_5.1.0-M1000_arm64.deb +sudo dpkg -i musa-sdk_5.1.0-M1000_arm64.deb + +##################### 重启电脑 ###################### +sudo reboot +``` + +## 确认NPU驱动基础依赖 + +```shell +# 确认项1:查看NPU设备节点 +ls -l /dev/npucore + +# 输出: +crw-rw-rw- 1 root root 199, 0 May 28 14:27 /dev/npucore + +# 确认项2:查看NPU驱动版本 (三项为ii状态:kernel-module-m1000-npu, m1000-mtnn-unify-lib, m1000-mtnnrt) +dpkg -l | grep mtnn + +# 输出: +ii kernel-module-m1000-npu 1.5.1-r20260317034235-d701809 all mtnn unify driver +ii m1000-mtnn-unify-lib 1.5.1-r20260120174449-cca46b8 all mtnn unify lib +ii m1000-mtnnrt 1.5.1-r20260120174605-e56cea8 all mtnn runtime lib + +# 确认项3:查看NPU模块是否已加载 +lsmod | grep npu + +# 输出 +npucore 614400 0 +``` + +## 安装NPU_SDK(若不满足) + +```shell +wget https://moorethreads-ai-soc.tos-cn-beijing.volces.com/npu_sdk_release/npu_sdk_v1.5.1.tar.gz + +tar zxvf npu_sdk_v1.5.1.tar.gz + +# 进入目录 +cd npu_sdk_v1.5.1/ + +# 以下是目录结构 +npu_sdk_v1.5.0 + ├── mtnnrt + ├── mtc_tool + │ + ├── doc + │ + └── debs + +# 进入存放驱动的debs目录 +cd debs/ + +sudo dpkg -i *deb + +# 开发者如需使用 Python-API ,请在对应的Python环境下进行安装 +cd npu_sdk_v1.5.0/mtnnrt/ +pip install libmtnn_api_4py-1.5.1-py3-none-any.whl +``` + +# 安装依赖 + +```shell +sudo apt install -y git cmake +``` + +# 安装miniconda + +```shell +curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh +chmod +x Miniconda3-latest-Linux-aarch64.sh && ./Miniconda3-latest-Linux-aarch64.sh +一路回车到底 +``` + +# 配置conda环境变量 + +```bash +export PATH=/home/mt/miniforge3/bin:$PATH +source ~/.bashrc +conda init +source ~/.bashrc +``` + +# 创建conda环境 + +```bash +conda create -n m1000_npu_mode_zoo python=3.10 +``` + +# 激活conda环境 + +```bash +# 激活虚拟环境 +conda activate m1000_npu_mode_zoo +``` + +# 配置PIP国内镜像源 + +```bash +pip config set global.index-url https://pypi.mirrors.ustc.edu.cn/simple/ +``` + +# 安装mtnn_api + +```shell +# 进入工作目录 +cd ${WorkDir} +# 确保虚拟环境 +conda activate m1000_npu_mode_zoo + +wget https://moorethreads-ai-soc.tos-cn-beijing.volces.com/npu_sdk_release/npu_sdk_v1.5.1.tar.gz +tar npu_sdk_v1.5.1.tar.gz +cd npu_sdk_v1.5.1/mtnnrt/ + +#安装mtnn_api +pip install ./libmtnn_api_4py-1.5.1-py3-none-any.whl +``` + +# 克隆npu_model_zoo项目 + +```bash +# 进入工作目录 +cd ${WorkDir} +# 确保虚拟环境 +conda activate m1000_npu_mode_zoo + +git clone --recursive https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo.git +``` + +# 安装项目依赖 + +```shell +# 进入工作目录 +cd ${WorkDir}/m1000_npu_model_zoo/ + +# 确保虚拟环境 +conda activate m1000_npu_mode_zoo + +pip3 install -r requirements.txt +``` + +# 模型转换与部署 + +## mobilenetv1 模型示例 + +### step1:导出model.onnx和input.npy + +```bash +# 进入001mobilenet_v1目录 +cd ${WorkDir}/m1000_npu_model_zoo/Image_Classification/001mobilenet_v1 + +#请确保已在conda环境中 +conda activate m1000_npu_mode_zoo + +python3 ./export.py +``` + +**超时问题:脚本默认从huggingface下拉取mobilenet_v1_1.0_224,可以参考以下方式解决** + +```shell +# 进入001mobilenet_v1目录 +cd ${WorkDir}/m1000_npu_model_zoo/Image_Classification/001mobilenet_v1 + +# 创建多级目录 +mkdir -p models/google/ + +# 安装lfs +sudo apt update +sudo apt install git-lfs + +cd models/google/ +# 克隆 +git clone https://www.modelscope.cn/google/mobilenet_v1_1.0_224.git + +# 打开export.py脚本 + +# 将以下两行的模型路径更改一下 +# 修改前 +preprocessor = AutoImageProcessor.from_pretrained("google/mobilenet_v1_1.0_224") +model = AutoModelForImageClassification.from_pretrained("google/mobilenet_v1_1.0_224") + +# 修改后 +preprocessor = AutoImageProcessor.from_pretrained("./models/google/mobilenet_v1_1.0_224") +model = AutoModelForImageClassification.from_pretrained("./models/google/mobilenet_v1_1.0_224") +``` + +### step2:模型转换设置 + +项目已提供可直接使用的mtc配置文件`mtc_config.yaml`,如需更改,常见关注如下。完整的配置选项,请参考 mtnn_guide 文档。 + +- `input_file_list` : ./input.npy(⚠️使用相对mtc_config.yaml的路径) +- `ModelPath` : ./model.onnx (⚠️使用相对mtc_config.yaml的路径) +- `quant_type` : 量化类型(与`quantizer`联合使用) +- `quantizer` : 量化方法 + +### step3:利用mtc工具进行模型转换 + +```shell +mtc --config ./mtc_config.yaml +``` + +运行后在`/opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/`路径下,生成名为 mtc_config.yaml 中 Name 的文件夹,在该文件夹的`./mtc/dump_npu_files/*_export_data/`中获得转换后的 *.mtnn 模型 + +### step4:验证mtnn模型的正确性和部署 + +除了cos值(>0.99),项目还提供了可运行 mtnn 和 onnx 模型预测的脚本sample.py + +**预测onnx模型** + +```shell +# 进入001mobilenet_v1目录 +cd ${WorkDir}/m1000_npu_model_zoo/Image_Classification/001mobilenet_v1 + +#请确保已在conda环境中 +conda activate m1000_npu_mode_zoo + +# onnx模型预测 CPU执行 +python3 ./sample.py --model_path ./model.onnx + +# 运行示例 +(m1000_npu_mode_zoo) dev@localhost:~/03-npu_mode_zoo/m1000_npu_model_zoo/Image_Classification/001mobilenet_v1$ python3 ./sample.py --model_path ./model.onnx +Top 5 Predictions: +1: Class Index 282 - Probability 88.01% +2: Class Index 283 - Probability 5.44% +3: Class Index 286 - Probability 4.38% +4: Class Index 762 - Probability 0.68% +5: Class Index 623 - Probability 0.45% +``` + +**预测mtnn模型** + +```shell +# 进入001mobilenet_v1目录 +cd ${WorkDir}/m1000_npu_model_zoo/Image_Classification/001mobilenet_v1 + +#请确保已在conda环境中 +conda activate m1000_npu_mode_zoo + +# 先拷贝生成的mtnn模型到当前目录下 +cp /opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/mobilenetv1/mtc/dump_npu_files/mobilenetv1_export_data/mobilenetv1.mtnn ./ + +# mtnn模型预测 NPU执行 +python3 ./sample.py --model_path ./mobilenetv1.mtnn + +# 示例输出 +(m1000_npu_mode_zoo) dev@localhost:~/03-npu_mode_zoo/m1000_npu_model_zoo/Image_Classification/001mobilenet_v1$ python3 ./sample.py --model_path ./mobilenetv1.mtnn +model_size: 7202589 +Top 5 Predictions: +1: Class Index 282 - Probability 87.98% +2: Class Index 283 - Probability 5.50% +3: Class Index 286 - Probability 4.36% +4: Class Index 762 - Probability 0.68% +5: Class Index 623 - Probability 0.45% +``` + +通过比较 mtnn模型和onnx模型的预测结果,进一步验证NPU模型的正确性 + +进一步进行数据集验证和嵌入式部署 + +## yolo8vm模型示例 + +``` +001yolov8m/ +├── export.py 提取出onnx模型和输入的npy +├── prepare_dataset_yolo.py 配置量化数据集的脚本,受export.py 调用 +├── onnx_static_quantize.py onnx static 量化脚本,受export.py 调用 +├── mtc_config.yaml mtc的config文件 +├── sample.py 用于部署onnx或mtnn以验证正确性 +└── README.md 该模型说明和教程 +``` + +### step1: 导出model.onnx + +通过 `export.py` 导出onnx模型,并下载coco128数据集,配置为量化数据集,最后使用 onnx static 进行量化 + +```bash +# 进入001yolov8m目录 +cd ${WorkDir}/m1000_npu_model_zoo/Object_Detection/001yolov8m + +#请确保已在conda环境中 +conda activate m1000_npu_mode_zoo + +python3 ./export.py + +#--当直接 python3 ./export.py 运行很慢时,可以先下载资源再运行------------ +# 使用Proxy下载模型 +# 如果无法建立SSL 连接证明proxy被封 可在https://ghproxy.link/ 寻找新的proxy +wget https://ghfast.top/https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt + +# 然后运行 +python3 ./export.py +``` + +导出后,文件夹中增加如下文件: + +``` +001yolov8m/ +├── model.onnx 导出的onnx原始模型 +├── model_static_quantized.onnx onnx static 量化后的模型 +├── coco128 下载的coco128数据集 +├── yolo_input_dataset 导出的量化数据集 +``` + +### step2:模型转换设置 + +项目已提供可直接使用的mtc配置文件`mtc_config.yaml`,如需更改,常见关注如下。完整的配置选项,请参考 mtnn_guide 文档。 + +- `input_file_list` : ./input.npy(⚠️使用相对mtc_config.yaml的路径) +- `ModelPath` : ./model.onnx (⚠️使用相对mtc_config.yaml的路径) +- `static_quantization`: 使用静态量化设置为 `true` + +### step3:利用mtc工具进行模型转换 + +```shell +mtc --config ./mtc_config.yaml +``` + +运行后在`/opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/`路径下,生成名为 mtc_config.yaml 中 Name 的文件夹,在该文件夹的`./mtc/dump_npu_files/*_export_data/`中获得转换后的 *.mtnn 模型 + +### step4:验证mtnn模型的正确性 + +除了cos值(>0.99),项目还提供了可运行 mtnn 和 onnx 模型预测的脚本`sample.py` + +**预测onnx模型** + +```bash +# 进入001yolov8m目录 +cd ${WorkDir}/m1000_npu_model_zoo/Object_Detection/001yolov8m + +#请确保已在conda环境中 +conda activate m1000_npu_mode_zoo + +# onnx模型预测 CPU执行 +python3 ./sample.py --model_path ./model.onnx --input ../../data/bus.jpg --output result_onnx.jpg + +# 示例输出 +Inference time: 267.87 ms +Detection result saved to result_onnx.jpg # 测试图片结果在当前目录 +``` + +**预测mtnn模型** + +```shell +# 进入001yolov8m目录 +cd ${WorkDir}/m1000_npu_model_zoo/Object_Detection/001yolov8m + +#请确保已在conda环境中 +conda activate m1000_npu_mode_zoo + +# 先拷贝生成的mtnn模型到当前目录下 +cp /opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/yolov8m/mtc/dump_npu_files/yolov8m_export_data/yolov8m.mtnn ./ + +# mtnn模型预测 NPU执行 +python3 ./sample.py --model_path ./yolov8m.mtnn --input ../../data/bus.jpg --output result_mtnn.jpg + +# 示例输出 +model_size: 18224371 +Inference time: 33.78 ms +Detection result saved to result_mtnn.jpg # 测试图片结果在当前目录 +``` + +通过比较 mtnn模型和onnx模型的预测结果,进一步验证NPU模型的正确性 + +进一步进行数据集验证和嵌入式部署 + +## whisper-tiny模型示例 + +⚠️注意:此模型需要独立的conda环境 + +Whisper 是一个通用的语音识别模型,它使用了大量的多语言和多任务的监督数据来训练, 能够在英语语音识别上达到接近人类水平的鲁棒性和准确性,Whisper还可以进行多语言语音识别、语音翻译和语言识别等任务 + +``` +006whisper-tiny/ +├── export.py 提取出onnx模型和输入的npy +├── model/ whisper 模型需要的相关文件 +├── mtc_config_encoder.yaml encoder部分的 mtc 配置 +├── mtc_config_decoder.yaml decoder部分的 mtc 配置 +├── sample.py 用于部署onnx和mtnn模型以验证正确性 +└── README.md 该模型说明和教程 +``` + +### 创建conda环境 + +```bash +# 进入项目目录 +cd ${WorkDir}/m1000_npu_model_zoo/ + +# whisper 环境安装 +conda create -n whisper python=3.10 +conda activate whisper + +# 安装依赖 +pip3 install -r requirements.txt +pip3 install soundfile openai-whisper==20231117 + +# 安装mtnn_api +# 若您在前面章节已经下载过,直接修改pip安装路径即可 +wget https://moorethreads-ai-soc.tos-cn-beijing.volces.com/npu_sdk_release/npu_sdk_v1.4.0.tar.gz +tar zxvf npu_sdk_v1.4.0.tar.gz +cd npu_sdk_v1.4.0/mtnnrt + +pip3 install ./libmtnn_api_4py-1.4-py3-none-any.whl +``` + +### 步骤1:导出model.onnx和input.npy + +```shell +# 进入006whisper-tiny目录 +cd ${WorkDir}/m1000_npu_model_zoo/Other/006whisper-tiny/ + +python3 ./export.py +``` + +默认导出的onnx模型为tiny模型,可根据需要通过`--model_type`指定模型类型。 + +### 步骤2:模型转换设置 + +项目已提供可直接使用的mtc配置文件`mtc_config_xx.yaml`,如需更改,常见关注如下。完整的配置选项,请参考 mtnn_guide 文档。 + +- `input_file_list` : ./input.npy(⚠️使用相对mtc_config.yaml的路径) +- `ModelPath` : ./model.onnx (⚠️使用相对mtc_config.yaml的路径) +- `quant_type` : 量化类型(与`quantizer`联合使用) +- `quantizer` : 量化方法 + +本项目中encoder部分采用int16量化,decoder部分采用fp16量化 + +### 步骤3:利用mtc工具进行模型转换 + +```shell +mtc --config ./mtc_config_encoder.yaml +mtc --config ./mtc_config_decoder.yaml +``` + +运行后在`/opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/`路径下,生成名为 mtc_config.yaml 中 Name 的文件夹,在该文件夹的`./mtc/dump_npu_files/*_export_data/`中获得转换后的 *.mtnn 模型 + +### 步骤4:验证mtnn模型的正确性 + +**预测onnx模型** + +```shell +# 进入006whisper-tiny目录 +cd ${WorkDir}/m1000_npu_model_zoo/Other/006whisper-tiny/ + +#请确保已在conda环境中 +conda activate whisper + +# 使用onnx模型 +python3 ./sample.py --encoder_model_path ./whisper_encoder_tiny.onnx --decoder_model_path ./whisper_decoder_tiny.onnx --audio_path ./test_en.wav --task en + +# 示例输出 +Whisper output: Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. +``` + +**预测mtnn模型** + +```shell +# 进入006whisper-tiny目录 +cd ${WorkDir}/m1000_npu_model_zoo/Other/006whisper-tiny/ + +#请确保已在conda环境中 +conda activate whisper + +# 先拷贝生成的mtnn模型到当前目录下` +cp /opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/whisper_encoder_tiny/mtc/dump_npu_files/whisper_encoder_tiny_export_data/whisper_encoder_tiny.mtnn ./ + +cp /opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/whisper_decoder_tiny/mtc/dump_npu_files/whisper_decoder_tiny_export_data/whisper_decoder_tiny.mtnn ./ + +# 使用mtnn模型 +python3 ./sample.py --encoder_model_path ./whisper_encoder_tiny.mtnn --decoder_model_path ./whisper_decoder_tiny.mtnn --audio_path ./test_en.wav --task en + +# 示例输出 +model_size: 20930403 +model_size: 59459467 + +Whisper output: Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel. +``` + +## wespeaker模型示例 + +⚠️注意:此模型需要独立的conda环境 + +说话人识别 + +``` +004wespeaker/ +├── export.py 提取出onnx模型和输入的npy +├── export_input.py 用于导出input.npy,受export.py调用 +├── mtc_config.yaml mtc的config文件 +├── sample.py 用于部署onnx和mtnn模型以验证正确性 +├── official_demo.py 官方demo,快速使用 +└── README.md 该模型说明和教程 +``` + +### 创建conda环境 + +```bash +# 进入项目目录 +cd ${WorkDir}/m1000_npu_model_zoo/ + +# whisper 环境安装 +conda create -n wespeaker python=3.10 +conda activate wespeaker + +# 安装依赖 +pip install git+https://github.com/wenet-e2e/wespeaker.git +pip3 install onnx onnxsim soundfile librosa numpy==2.2.6 + +# 安装mtnn_api +# 若您在前面章节已经下载过,直接修改pip安装路径即可 +wget https://moorethreads-ai-soc.tos-cn-beijing.volces.com/npu_sdk_release/npu_sdk_v1.4.0.tar.gz +tar zxvf npu_sdk_v1.4.0.tar.gz +cd npu_sdk_v1.4.0/mtnnrt + +pip3 install ./libmtnn_api_4py-1.4-py3-none-any.whl +``` + +### 步骤1:导出model.onnx和input.npy + +```shell +# 进入004wespeaker目录 +cd ${WorkDir}/m1000_npu_model_zoo/004wespeaker/ + +# 确保安装环境 +conda activate wespeaker + +python3 ./export.py +``` + +**超时问题:脚本默认从huggingface下拉取voxceleb_resnet34_LM.onnx,可以参考以下方式解决** + +```shell +# 进入004wespeaker目录 +cd ${WorkDir}/m1000_npu_model_zoo/004wespeaker/ + +#打开export.py脚本,将模型地址修改为镜像地址 +https://hf-mirror.com/Wespeaker/wespeaker-voxceleb-resnet34-LM/resolve/main/voxceleb_resnet34_LM.onnx?download=true +``` + +### 步骤2:模型转换设置 + +项目已提供可直接使用的mtc配置文件`mtc_config.yaml`,如需更改,常见关注如下。完整的配置选项,请参考 mtnn_guide 文档。 + +- `input_file_list` : ./input.npy(⚠️使用相对mtc_config.yaml的路径) +- `ModelPath` : ./model.onnx (⚠️使用相对mtc_config.yaml的路径) +- `quant_type` : 量化类型(与`quantizer`联合使用) +- `quantizer` : 量化方法 + +### 步骤3:利用mtc工具进行模型转换 + +``` +mtc --config ./mtc_config.yaml +``` + +运行后在`/opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/`路径下,生成名为 mtc_config.yaml 中 Name 的文件夹,在该文件夹的`./mtc/dump_npu_files/*_export_data/`中获得转换后的 *.mtnn 模型 + +### 步骤4:验证mtnn模型的正确性 + +``` +# 使用onnx模型 +python3 ./sample.py --model_path model_static.onnx --input input1024.npy +python3 ./sample.py --model_path model_static.onnx --input speaker1.wav + + +# 使用mtnn模型 + +cp /opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/wespeaker_int16/mtc/dump_npu_files/wespeaker_int16_export_data/wespeaker_int16.mtnn npu_model_zoo/Other/004wespeaker/ + +python3 ./sample.py --model_path wespeaker_int16.mtnn --input input1024.npy +python3 ./sample.py --model_path wespeaker_int16.mtnn --input speaker1.wav +``` + +#### **预测onnx模型** + +```bash +#使用 ONNX 模型 + 预处理好的特征 -------------------------------- + +# 进入004wespeaker目录 +cd ${WorkDir}/m1000_npu_model_zoo/004wespeaker/ + +#请确保已在conda环境中 +conda activate whisper + +python3 ./sample.py --model_path model_static.onnx --input input1024.npy + +# 示例输出 +2025-10-22 14:49:57.022478030 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card0/device/vendor" + +(1, 256) +[[ 0.06175416 0.07394763 0.04809486 -0.05183442 0.01597237 -0.02811271 + 0.04524091 -0.01230831 0.10666951 -0.05512402 0.08321162 0.1096753 + -0.11125638 -0.08400543 0.06438035 0.02226631 0.09000795 0.01578397 + 0.08567274 -0.02510518 -0.02617739 -0.10976101 0.03808949 0.07730354 + -0.02440264 -0.03208135 -0.07692465 -0.18743041 0.03890049 0.0153964 + 0.05757389 -0.02957565 0.06008356 0.01121876 -0.00027497 0.01191995 + -0.04189787 0.07760945 -0.01407159 -0.01586531 -0.00499368 0.01458262 + -0.07744788 0.00154183 -0.15083885 0.06262071 -0.02906791 -0.07830677 + 0.05199993 0.16714086 0.01486572 -0.06892414 0.04920255 -0.03063077 + -0.02250358 0.0027305 0.08748937 0.09160757 0.00762661 0.03286281 + 0.02641286 -0.07692845 0.00864477 -0.0218116 -0.11323612 -0.04476905 + -0.08575711 0.00925667 0.09966253 -0.05250091 -0.01152905 -0.09645098 + 0.06116279 0.06157736 0.07201838 0.03264837 -0.01572622 -0.11307874 + -0.03555959 0.04793519 0.06642839 0.02420875 -0.06102857 -0.02571664 + 0.04500219 0.11132073 -0.1599907 -0.0649804 -0.05712454 0.08970842 + -0.0031838 -0.01774945 0.02353931 0.08766222 0.03835266 -0.01001078 + -0.00639139 0.10128035 -0.03599345 0.05842532 0.02709731 -0.11345038 + 0.00208663 -0.02082464 -0.05825977 0.0019403 -0.02250865 0.01108355 + -0.09815197 -0.00994065 0.0042666 0.01243292 0.00326086 -0.02225921 + -0.05838213 -0.09494069 -0.09464682 0.10259155 -0.01862558 0.0581187 + -0.06467782 0.01200155 0.11880414 -0.13108125 -0.07807926 0.08279649 + -0.11124056 -0.01018865 0.01434857 0.02660723 -0.04494607 -0.06030759 + 0.0524314 -0.09953389 0.00190039 0.08170175 0.09722901 -0.01895785 + 0.08381603 0.04060753 0.06008651 0.02762383 0.05190575 0.03864 + -0.1339342 -0.02057567 -0.02704739 -0.05959381 -0.11657952 0.02740997 + -0.07905328 -0.08205444 -0.07708324 -0.034995 0.00223826 0.01364383 + 0.02042603 -0.04720423 0.01960236 0.01407438 -0.04776831 -0.00104137 + -0.03311884 -0.06710211 -0.0087723 -0.07440954 0.01878704 0.04359739 + 0.06329601 0.0125811 -0.03781127 -0.03523497 0.02751255 0.07952646 + -0.02880951 -0.10674644 0.06021541 0.04536697 -0.17881067 -0.05328718 + -0.20605311 0.05549764 0.01307757 -0.12401468 -0.04688993 0.08295709 + 0.01732995 0.12230302 0.05575857 -0.21639442 -0.01922343 -0.1400938 + -0.09013721 -0.06639876 -0.04179816 0.05697892 0.00248353 -0.06945466 + 0.04887656 -0.00764221 -0.02769776 0.08311836 -0.03761084 0.03284037 + 0.08137957 -0.03580143 -0.01724376 -0.08913023 0.13767387 -0.12194921 + -0.02009844 0.06134125 0.10527842 0.10186529 -0.05977145 -0.02828394 + 0.01243341 0.03440234 -0.04703771 -0.02288068 -0.01796648 0.01017149 + 0.05394196 0.04313685 -0.04883212 -0.04891015 0.00321223 -0.03219022 + 0.09586394 -0.02632406 -0.04350443 0.05346872 -0.08295195 -0.00092889 + 0.12150446 -0.04073324 -0.10709976 -0.02376817 -0.03166633 0.09590879 + 0.00577981 0.19183604 0.00952161 -0.02747929 -0.00375382 0.01881048 + -0.12816192 -0.02935448 -0.04590284 0.02288579 0.09208582 -0.15548415 + 0.039862 0.16954209 0.11649276 0.0173209 ]] +``` + +```bash +# 使用 ONNX 模型 + 原始音频,脚本内部会自动提取特征 --------------------- + + # 进入004wespeaker目录 +cd ${WorkDir}/m1000_npu_model_zoo/004wespeaker/ + +#请确保已在conda环境中 +conda activate whisper + +python3 ./sample.py --model_path model_static.onnx --input speaker1.wav + +# 使用 ONNX 模型 + 原始音频,脚本内部会自动提取特征 输出--------------------- +2025-10-22 14:50:18.928471790 [W:onnxruntime:Default, device_discovery.cc:164 DiscoverDevicesForPlatform] GPU device discovery failed: device_discovery.cc:89 ReadFileContents Failed to open file: "/sys/class/drm/card0/device/vendor" +(80, 421) +当前数组长度小于1024 +填充后形状: (1, 1024, 80) +填充后数组: [[[-12.932206 -14.97774 -16.327011 ... -35.383686 -35.365635 -35.642418] + [-18.236984 -23.004082 -24.822948 ... -40.818928 -39.8542 -40.626637] + [-18.46315 -23.545086 -28.120045 ... -42.06042 -40.921894 -41.310253] + ... + [ 0. 0. 0. ... 0. 0. 0. ] + [ 0. 0. 0. ... 0. 0. 0. ] + [ 0. 0. 0. ... 0. 0. 0. ]]] +(1, 256) +[[ 0.06175416 0.07394763 0.04809486 -0.05183442 0.01597237 -0.02811271 + 0.04524091 -0.01230831 0.10666951 -0.05512402 0.08321162 0.1096753 + -0.11125638 -0.08400543 0.06438035 0.02226631 0.09000795 0.01578397 + 0.08567274 -0.02510518 -0.02617739 -0.10976101 0.03808949 0.07730354 + -0.02440264 -0.03208135 -0.07692465 -0.18743041 0.03890049 0.0153964 + 0.05757389 -0.02957565 0.06008356 0.01121876 -0.00027497 0.01191995 + -0.04189787 0.07760945 -0.01407159 -0.01586531 -0.00499368 0.01458262 + -0.07744788 0.00154183 -0.15083885 0.06262071 -0.02906791 -0.07830677 + 0.05199993 0.16714086 0.01486572 -0.06892414 0.04920255 -0.03063077 + -0.02250358 0.0027305 0.08748937 0.09160757 0.00762661 0.03286281 + 0.02641286 -0.07692845 0.00864477 -0.0218116 -0.11323612 -0.04476905 + -0.08575711 0.00925667 0.09966253 -0.05250091 -0.01152905 -0.09645098 + 0.06116279 0.06157736 0.07201838 0.03264837 -0.01572622 -0.11307874 + -0.03555959 0.04793519 0.06642839 0.02420875 -0.06102857 -0.02571664 + 0.04500219 0.11132073 -0.1599907 -0.0649804 -0.05712454 0.08970842 + -0.0031838 -0.01774945 0.02353931 0.08766222 0.03835266 -0.01001078 + -0.00639139 0.10128035 -0.03599345 0.05842532 0.02709731 -0.11345038 + 0.00208663 -0.02082464 -0.05825977 0.0019403 -0.02250865 0.01108355 + -0.09815197 -0.00994065 0.0042666 0.01243292 0.00326086 -0.02225921 + -0.05838213 -0.09494069 -0.09464682 0.10259155 -0.01862558 0.0581187 + -0.06467782 0.01200155 0.11880414 -0.13108125 -0.07807926 0.08279649 + -0.11124056 -0.01018865 0.01434857 0.02660723 -0.04494607 -0.06030759 + 0.0524314 -0.09953389 0.00190039 0.08170175 0.09722901 -0.01895785 + 0.08381603 0.04060753 0.06008651 0.02762383 0.05190575 0.03864 + -0.1339342 -0.02057567 -0.02704739 -0.05959381 -0.11657952 0.02740997 + -0.07905328 -0.08205444 -0.07708324 -0.034995 0.00223826 0.01364383 + 0.02042603 -0.04720423 0.01960236 0.01407438 -0.04776831 -0.00104137 + -0.03311884 -0.06710211 -0.0087723 -0.07440954 0.01878704 0.04359739 + 0.06329601 0.0125811 -0.03781127 -0.03523497 0.02751255 0.07952646 + -0.02880951 -0.10674644 0.06021541 0.04536697 -0.17881067 -0.05328718 + -0.20605311 0.05549764 0.01307757 -0.12401468 -0.04688993 0.08295709 + 0.01732995 0.12230302 0.05575857 -0.21639442 -0.01922343 -0.1400938 + -0.09013721 -0.06639876 -0.04179816 0.05697892 0.00248353 -0.06945466 + 0.04887656 -0.00764221 -0.02769776 0.08311836 -0.03761084 0.03284037 + 0.08137957 -0.03580143 -0.01724376 -0.08913023 0.13767387 -0.12194921 + -0.02009844 0.06134125 0.10527842 0.10186529 -0.05977145 -0.02828394 + 0.01243341 0.03440234 -0.04703771 -0.02288068 -0.01796648 0.01017149 + 0.05394196 0.04313685 -0.04883212 -0.04891015 0.00321223 -0.03219022 + 0.09586394 -0.02632406 -0.04350443 0.05346872 -0.08295195 -0.00092889 + 0.12150446 -0.04073324 -0.10709976 -0.02376817 -0.03166633 0.09590879 + 0.00577981 0.19183604 0.00952161 -0.02747929 -0.00375382 0.01881048 + -0.12816192 -0.02935448 -0.04590284 0.02288579 0.09208582 -0.15548415 + 0.039862 0.16954209 0.11649276 0.0173209 ]] +``` + +#### 预测mtnn模型 + +```shell + # 进入004wespeaker目录 +cd ${WorkDir}/m1000_npu_model_zoo/004wespeaker/ + +#请确保已在conda环境中 +conda activate whisper + + +# 先拷贝生成的mtnn模型到当前目录下 +cp /opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/wespeaker_int16/mtc/dump_npu_files/wespeaker_int16_export_data/wespeaker_int16.mtnn ./ +``` + +```bash +# --------使用NPU模型+预处理特征 执行--------------------------------------------------------- + +python3 ./sample.py --model_path wespeaker_int16.mtnn --input input1024.npy +# --------使用NPU模型+预处理特征 输出--------------------------------------------------------- +model_size: 10819998 +(1, 256) +[[ 6.35390133e-02 7.54294544e-02 4.87283766e-02 -5.30631132e-02 + 1.32457064e-02 -2.68053710e-02 4.40619178e-02 -9.06872377e-03 + 1.06419683e-01 -5.43792769e-02 8.38833749e-02 1.10741749e-01 + -1.14003301e-01 -8.39497000e-02 6.51917756e-02 2.21296139e-02 + 9.20686573e-02 1.80202276e-02 8.49836618e-02 -2.58431509e-02 + -2.99551412e-02 -1.10340573e-01 3.90334539e-02 7.86285996e-02 + -2.10140590e-02 -3.23608704e-02 -7.43520409e-02 -1.90061092e-01 + 3.85843739e-02 1.01726102e-02 5.67235574e-02 -3.00517790e-02 + 6.11887500e-02 9.03564878e-03 6.34358265e-04 5.95028885e-03 + -3.78782675e-02 7.77435750e-02 -1.53685007e-02 -1.72950793e-02 + -6.64254744e-03 1.93168446e-02 -7.92794824e-02 2.10580043e-03 + -1.50929481e-01 6.05858490e-02 -2.89188661e-02 -7.61246830e-02 + 5.43759540e-02 1.65105790e-01 1.33057432e-02 -6.99397177e-02 + 4.75038514e-02 -2.79215761e-02 -2.09760237e-02 2.63712741e-03 + 9.09502953e-02 9.41711068e-02 6.91031665e-03 3.16628516e-02 + 2.52119936e-02 -7.50168413e-02 9.09210090e-03 -2.17699893e-02 + -1.11601949e-01 -4.30273712e-02 -8.56325552e-02 7.18964171e-03 + 1.00557745e-01 -5.67442402e-02 -1.39248185e-02 -9.49113071e-02 + 6.28116503e-02 5.94371185e-02 7.46599287e-02 3.26657444e-02 + -1.31822741e-02 -1.12483129e-01 -3.52626964e-02 4.91229743e-02 + 6.67244643e-02 2.52043791e-02 -6.23475015e-02 -2.78964788e-02 + 4.07626629e-02 1.13333091e-01 -1.61951631e-01 -6.38021827e-02 + -6.09120503e-02 9.09712315e-02 -4.95913252e-03 -1.70412213e-02 + 2.15854384e-02 9.15492028e-02 3.89233790e-02 -6.18290808e-03 + -5.46803977e-03 1.05055019e-01 -3.39893773e-02 5.53997681e-02 + 2.97934525e-02 -1.12014636e-01 9.05949622e-04 -2.34513190e-02 + -5.93972802e-02 2.53418833e-03 -2.21363083e-02 4.82331123e-03 + -1.00372046e-01 -1.03408033e-02 6.42758794e-03 1.41478563e-02 + 6.32498506e-03 -2.24746875e-02 -5.93354329e-02 -9.70462114e-02 + -9.71140563e-02 1.06525764e-01 -2.05935966e-02 5.58415428e-02 + -6.63573444e-02 1.15147317e-02 1.20677844e-01 -1.27888054e-01 + -7.79951066e-02 8.22000653e-02 -1.10020205e-01 -1.30894985e-02 + 1.43670626e-02 2.90294848e-02 -4.83903512e-02 -5.98414913e-02 + 5.56014776e-02 -1.00059777e-01 1.86858047e-03 8.10700208e-02 + 9.55588669e-02 -1.61933228e-02 8.83429348e-02 4.04014364e-02 + 6.02706447e-02 2.68122926e-02 5.38519174e-02 3.87054384e-02 + -1.35651469e-01 -2.44396534e-02 -2.85179801e-02 -6.40430897e-02 + -1.13664463e-01 2.65602022e-02 -7.50265270e-02 -7.88931698e-02 + -7.54901916e-02 -3.41845527e-02 3.56097519e-03 1.41606340e-02 + 2.10895874e-02 -4.62137461e-02 1.99580491e-02 1.73978172e-02 + -4.99253124e-02 -1.83948874e-03 -3.28275934e-02 -6.58996552e-02 + -9.15534515e-03 -7.42015541e-02 1.79575942e-02 4.65632901e-02 + 6.01159409e-02 1.36187105e-02 -3.72452214e-02 -3.52387242e-02 + 3.11431661e-02 7.86825418e-02 -3.04030459e-02 -1.09615833e-01 + 6.07877597e-02 4.61805761e-02 -1.83432430e-01 -5.23700491e-02 + -2.08961636e-01 5.84638566e-02 1.60003826e-02 -1.26453370e-01 + -4.87544015e-02 8.45020860e-02 1.97359845e-02 1.23068616e-01 + 5.70334904e-02 -2.18617439e-01 -2.03295238e-02 -1.35983795e-01 + -9.40278918e-02 -6.55276626e-02 -4.41493168e-02 5.64567745e-02 + 1.05132814e-03 -7.20095485e-02 4.60826978e-02 -7.15618674e-03 + -2.76792739e-02 8.37774351e-02 -3.42478305e-02 3.71700525e-02 + 8.51254612e-02 -3.77198085e-02 -1.63756162e-02 -9.10716653e-02 + 1.35600120e-01 -1.21854901e-01 -1.97206885e-02 6.11645430e-02 + 1.04395375e-01 1.05042726e-01 -5.88230006e-02 -2.85237804e-02 + 1.13043888e-02 3.33578475e-02 -4.75166142e-02 -2.47857869e-02 + -1.70153789e-02 1.29126860e-02 5.73450252e-02 4.15682346e-02 + -4.86380160e-02 -4.71688509e-02 1.24443322e-05 -2.90980507e-02 + 9.59894806e-02 -2.55213827e-02 -4.59580645e-02 5.23667708e-02 + -8.18857998e-02 -9.11689363e-04 1.21836901e-01 -4.10389528e-02 + -1.07598647e-01 -2.76031606e-02 -3.02426293e-02 9.36545134e-02 + 5.53290453e-03 1.90723538e-01 7.70752504e-03 -3.17686722e-02 + -7.90456776e-03 1.93169266e-02 -1.28455639e-01 -3.16730589e-02 + -4.82683107e-02 2.14633159e-02 9.26998854e-02 -1.52898490e-01 + 4.02583741e-02 1.67243451e-01 1.15860477e-01 1.74919311e-02]] +``` + +```shell + +# -------- 使用 NPU 模型 + 原始音频,完整端到端验证 --------------------------------------------------------- +python3 ./sample.py --model_path wespeaker_int16.mtnn --input speaker1.wav +# -------- 使用 NPU 模型 + 原始音频,完整端到端验证 输出--------------------------------------------------------- + +model_size: 10819998 +(80, 421) +当前数组长度小于1024 +填充后形状: (1, 1024, 80) +填充后数组: [[[-12.932206 -14.97774 -16.327011 ... -35.383686 -35.365635 -35.642418] + [-18.236984 -23.004082 -24.822948 ... -40.818928 -39.8542 -40.626637] + [-18.46315 -23.545086 -28.120045 ... -42.06042 -40.921894 -41.310253] + ... + [ 0. 0. 0. ... 0. 0. 0. ] + [ 0. 0. 0. ... 0. 0. 0. ] + [ 0. 0. 0. ... 0. 0. 0. ]]] +(1, 256) +[[ 6.35390133e-02 7.54294544e-02 4.87283766e-02 -5.30631132e-02 + 1.32457064e-02 -2.68053710e-02 4.40619178e-02 -9.06872377e-03 + 1.06419683e-01 -5.43792769e-02 8.38833749e-02 1.10741749e-01 + -1.14003301e-01 -8.39497000e-02 6.51917756e-02 2.21296139e-02 + 9.20686573e-02 1.80202276e-02 8.49836618e-02 -2.58431509e-02 + -2.99551412e-02 -1.10340573e-01 3.90334539e-02 7.86285996e-02 + -2.10140590e-02 -3.23608704e-02 -7.43520409e-02 -1.90061092e-01 + 3.85843739e-02 1.01726102e-02 5.67235574e-02 -3.00517790e-02 + 6.11887500e-02 9.03564878e-03 6.34358265e-04 5.95028885e-03 + -3.78782675e-02 7.77435750e-02 -1.53685007e-02 -1.72950793e-02 + -6.64254744e-03 1.93168446e-02 -7.92794824e-02 2.10580043e-03 + -1.50929481e-01 6.05858490e-02 -2.89188661e-02 -7.61246830e-02 + 5.43759540e-02 1.65105790e-01 1.33057432e-02 -6.99397177e-02 + 4.75038514e-02 -2.79215761e-02 -2.09760237e-02 2.63712741e-03 + 9.09502953e-02 9.41711068e-02 6.91031665e-03 3.16628516e-02 + 2.52119936e-02 -7.50168413e-02 9.09210090e-03 -2.17699893e-02 + -1.11601949e-01 -4.30273712e-02 -8.56325552e-02 7.18964171e-03 + 1.00557745e-01 -5.67442402e-02 -1.39248185e-02 -9.49113071e-02 + 6.28116503e-02 5.94371185e-02 7.46599287e-02 3.26657444e-02 + -1.31822741e-02 -1.12483129e-01 -3.52626964e-02 4.91229743e-02 + 6.67244643e-02 2.52043791e-02 -6.23475015e-02 -2.78964788e-02 + 4.07626629e-02 1.13333091e-01 -1.61951631e-01 -6.38021827e-02 + -6.09120503e-02 9.09712315e-02 -4.95913252e-03 -1.70412213e-02 + 2.15854384e-02 9.15492028e-02 3.89233790e-02 -6.18290808e-03 + -5.46803977e-03 1.05055019e-01 -3.39893773e-02 5.53997681e-02 + 2.97934525e-02 -1.12014636e-01 9.05949622e-04 -2.34513190e-02 + -5.93972802e-02 2.53418833e-03 -2.21363083e-02 4.82331123e-03 + -1.00372046e-01 -1.03408033e-02 6.42758794e-03 1.41478563e-02 + 6.32498506e-03 -2.24746875e-02 -5.93354329e-02 -9.70462114e-02 + -9.71140563e-02 1.06525764e-01 -2.05935966e-02 5.58415428e-02 + -6.63573444e-02 1.15147317e-02 1.20677844e-01 -1.27888054e-01 + -7.79951066e-02 8.22000653e-02 -1.10020205e-01 -1.30894985e-02 + 1.43670626e-02 2.90294848e-02 -4.83903512e-02 -5.98414913e-02 + 5.56014776e-02 -1.00059777e-01 1.86858047e-03 8.10700208e-02 + 9.55588669e-02 -1.61933228e-02 8.83429348e-02 4.04014364e-02 + 6.02706447e-02 2.68122926e-02 5.38519174e-02 3.87054384e-02 + -1.35651469e-01 -2.44396534e-02 -2.85179801e-02 -6.40430897e-02 + -1.13664463e-01 2.65602022e-02 -7.50265270e-02 -7.88931698e-02 + -7.54901916e-02 -3.41845527e-02 3.56097519e-03 1.41606340e-02 + 2.10895874e-02 -4.62137461e-02 1.99580491e-02 1.73978172e-02 + -4.99253124e-02 -1.83948874e-03 -3.28275934e-02 -6.58996552e-02 + -9.15534515e-03 -7.42015541e-02 1.79575942e-02 4.65632901e-02 + 6.01159409e-02 1.36187105e-02 -3.72452214e-02 -3.52387242e-02 + 3.11431661e-02 7.86825418e-02 -3.04030459e-02 -1.09615833e-01 + 6.07877597e-02 4.61805761e-02 -1.83432430e-01 -5.23700491e-02 + -2.08961636e-01 5.84638566e-02 1.60003826e-02 -1.26453370e-01 + -4.87544015e-02 8.45020860e-02 1.97359845e-02 1.23068616e-01 + 5.70334904e-02 -2.18617439e-01 -2.03295238e-02 -1.35983795e-01 + -9.40278918e-02 -6.55276626e-02 -4.41493168e-02 5.64567745e-02 + 1.05132814e-03 -7.20095485e-02 4.60826978e-02 -7.15618674e-03 + -2.76792739e-02 8.37774351e-02 -3.42478305e-02 3.71700525e-02 + 8.51254612e-02 -3.77198085e-02 -1.63756162e-02 -9.10716653e-02 + 1.35600120e-01 -1.21854901e-01 -1.97206885e-02 6.11645430e-02 + 1.04395375e-01 1.05042726e-01 -5.88230006e-02 -2.85237804e-02 + 1.13043888e-02 3.33578475e-02 -4.75166142e-02 -2.47857869e-02 + -1.70153789e-02 1.29126860e-02 5.73450252e-02 4.15682346e-02 + -4.86380160e-02 -4.71688509e-02 1.24443322e-05 -2.90980507e-02 + 9.59894806e-02 -2.55213827e-02 -4.59580645e-02 5.23667708e-02 + -8.18857998e-02 -9.11689363e-04 1.21836901e-01 -4.10389528e-02 + -1.07598647e-01 -2.76031606e-02 -3.02426293e-02 9.36545134e-02 + 5.53290453e-03 1.90723538e-01 7.70752504e-03 -3.17686722e-02 + -7.90456776e-03 1.93169266e-02 -1.28455639e-01 -3.16730589e-02 + -4.82683107e-02 2.14633159e-02 9.26998854e-02 -1.52898490e-01 + 4.02583741e-02 1.67243451e-01 1.15860477e-01 1.74919311e-02]] +``` + + + +# 进阶处理 + +## MTC工具的常用设置 + +MTC工具的常用设置, 包含uint8量化、int16量化、混合量化、static量化、fp16量化等。 + +使用 MTC 进行模型转换,首先模型转换需要获得onnx模型,并使用 MTC 工具将onnx模型转换为mtnn模型。 + +此处对MTC工具的使用进行说明,并以列举了不同量化策略下的*.yaml配置文件。 + +## MTC 工具机器参数的使用说明 + +``` +mtc [-h] \ + --config +``` + +| 参数 | 说明 | +| -------- | --------------------------------------- | +| [-h] | 显示参数信息 | +| --config | 需要一个表示mtc配置的 *.yaml 的文件路径 | + +*.yaml 文件参数说明如下(以mobilenet_v1为例): + +```yaml +FuncConfig: + layer_data_analyze: false #按照Layername逐层调试选项,暂不支持 + layer_data_compare: false #按照Layername逐层对比选项,暂不支持 + mtnn_test_enable: false #是否支持使用mtnn模型进行软件仿真推理 +ModelConfig: + force_fp32input_list: true #设置量化模型的输入为float32,若有多个输入则用#隔开,如 true#false#true 表示模型的第一个和第三个输入为fp32,第二输入对应为量化类型 + force_fp32output_list: true #设置量化模型的输出为float32,若有多个输出则用#隔开,如 true#false#true 表示模型的第一个和第三个输出为fp32,第二输出对应为量化类型 + input_file_list: #指定输入的npy文件 + - ./input.npy +ModelPath: ./model.onnx #onnx模型的路径(Required) +Name: mobilenetv1 #模型名称(Required) +PerfConfig: + perf_collect_enable: true #是否将perf结果汇总输出 +QuantConfig: + quant_type: int16 #指定量化的数据类型,可选 int8 uint8 int16 + quantizer: dynamic_fixed_point #指定量化方式,可选 asymmetric_affine dynamic_fixed_point +TestEnable: true #暂未使用 +UseSingleCore: true #| 是否编译单核使用的模型,默认为true +``` + +示例: + +> ⚠️在模型文件夹内执行 + +``` +mtc --config ./mtc_config.yaml +``` + +运行后在`/opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/`路径下,生成名为 mtc_config.yaml 中 Name 的文件夹,在该文件夹的`./mtc/dump_npu_files/*_export_data/`中获得转换后的 *.mtnn 模型 + +### 常用量化配置 + +常用的uint8量化、int16量化、混合量化、static 量化参考 + +#### uint8 量化 + +参考 [nsfw](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/004nsfw/mtc_config.yaml) + +``` +FuncConfig: + layer_data_analyze: false + layer_data_compare: false + mtnnlite_test_enable: false +ModelConfig: + force_fp32input_list: true + force_fp32output_list: true + input_file_list: + - ./input.npy +ModelPath: ./model.onnx +Name: nsfw +PerfConfig: + perf_collect_enable: true +QuantConfig: + quant_type: uint8 + quantizer: asymmetric_affine +TestEnable: true +UseSingleCore: true +``` + +#### int16 量化 + +参考[mobilenet_v1](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/001mobilenet_v1/mtc_config.yaml) + +``` +FuncConfig: + layer_data_analyze: false + layer_data_compare: false + mtnn_test_enable: false +ModelConfig: + force_fp32input_list: true + force_fp32output_list: true + input_file_list: + - ./input.npy +ModelPath: ./model.onnx +Name: mobilenetv1 +PerfConfig: + perf_collect_enable: true +QuantConfig: + quant_type: int16 + quantizer: dynamic_fixed_point +TestEnable: true +UseSingleCore: true +``` + +#### 混合量化 + +参考[yolov8s-seg](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Segmentation/003yolov8s-seg/mtc_config.yaml) + +```yaml +FuncConfig: + layer_data_analyze: false + layer_data_compare: false + mtnnlite_test_enable: false +ModelConfig: + force_fp32input_list: true + force_fp32output_list: true#true + input_dataset_list: + - ./yolo_input_dataset/data_list.txt +ModelPath: ./model.onnx +Name: yolov8s-seg +PerfConfig: + perf_collect_enable: false +QuantConfig: + iterations: 128 + algorithm: normal + quant_type: uint8 + quantizer: asymmetric_affine + quantize_op_list: Mul#MatMul#Conv#MaxPool#Add#Sigmoid#Split#ConvTranspose +TestEnable: true +UseSingleCore: true +``` + +#### static 量化 + +static量化前,需要使用onnx自带的量化工具,对onnx模型进行量化。具体过程参考[yolov8m](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/001yolov8m/mtc_config.yaml) + +```yaml +FuncConfig: + layer_data_analyze: false + layer_data_compare: false + mtnnlite_test_enable: false +ModelConfig: + input_file_list: + - ./yolo_input_dataset/000000000009.npy + force_fp32input_list: true + force_fp32output_list: true +ModelPath: ./model_static_quantized.onnx +Name: yolov8m +PerfConfig: + perf_collect_enable: true +QuantConfig: + static_quantization: true +TestEnable: true +UseSingleCore: true +``` + +#### fp16量化 + +参考[whisper_decoder_tiny的 decoder 模型](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Speech_Recognition/001whisper_decoder_tiny/mtc_config.yaml) + +```yaml +FuncConfig: + layer_data_analyze: false + layer_data_compare: false + mtnnlite_test_enable: false +ModelConfig: + input_file_list: + - ./x_tokens.npy + - ./encoder_output.npy + force_fp32input_list: false#true + force_fp32output_list: true +ModelPath: ./whisper_decoder_tiny.onnx +Name: whisper_decoder_tiny +PerfConfig: + perf_collect_enable: true +TestEnable: true +UseSingleCore: true +``` + +## 配置量化数据集 + +为使得NPU模型的精度更高,可在量化阶段配置量化数据集。参考 mtnn_guide文档 第十章 配置数据集。 + +在量化/校准过程中,input_dataset_list 提供了待送入模型的样本,用于: + +- 收集各层激活分布以计算量化参数; +- 验证量化前后模型的输出精度与性能; + +### YOLO COCO128 量化数据集配置 + +#### 生成量化数据集 + +``` +python3 ./prepare_dataset.py +``` + +#### 配置mtc_config.yaml文件, 指定输入的图片路径 + +``` +ModelConfig: + input_file_list: + - ./yolo_input_dataset/data_list.txt +``` + +下面是数据集配置示例,直接写到.yaml 文件中: + +```yaml +ModelConfig: + #用于校准(calibrat ion )或推理测试的输入数据集列表 + #每一行影视一个样本文件的绝对或相对路径(如图片、npy数据等) + #校准数据按照PreprocConfig中配置的mean_values、 std_values 以及reverse_channel来预处理 + input_dataset_list: + - /home/dev/zxk_test/from_zhujian/test/datasets. txt +PreprocConfig: + mean_values: [0,0,0] + std_values: 254 + reverse_channel: false +QuantConfig: + #量化过程使用的迭代此书 + #框架会一次读取input_dataset_list中列出的样本,并且根据 + # PreprocConf ig中配置的mean_values、 std_values 以及reverse_channel来预处理 + #并在前N次(iterations )样本上采集激活分布或执行量化误差估计 + # exclude_op_list 会排除list中的算子类型,其它算子类型按照量化配置来量化,如果全量化可以不配置 + # quantize_op_list会只量化list中的算子类型,其它算子不量化,如果全量化可以不配置 + iterations: 100 + algorithm: normal + quant_type: int8 #指定量化的数据类型,可选 int8 uint8 int16 + quantizer: asymmetric_affine #指定量化方式,可选 asymmetric_affine dynamic_fixed_point + exclude_op_list: Concat#Softmax +``` + +## 前处理功能 + +为追求更强的性能,mtnn提供图像前处理接口,可将前处理部分置于NPU执行。 + +在模型转换/导出环节,把原本需要在外部做的图像格式转换、尺寸调整、通道重排和数值规范等步骤,通过配置(如 preproc_type、mean、scale、preproc_crop、preproc_perm 等)一并“烘焙”到模型中,这样推理时只要给模型传入原始输入 buffer,就能自动完成整个前处理+推理流程,简化部署、保证一致性并最大化硬件性能。 + +示例说明:下面是 **`PreprocConfig`** 配置示例,直接写到**`.yaml`** 文件中: + +```yaml +PreprocConfig: + # 每个通道减去的均值 + mean_values: [127.5, 127.5, 127.5] + + # 每个通道除以的标准差(若是单个值,所有通道都用这个) + std_values: 128 + + # 是否反转通道(BGR←→RGB) -当设置为、false时,不会做通道顺序调换;当设置为'true' 时,会把三通道图像的顺序从默认的(如BGR)翻转成RGB,或反之。 + reverse_channel: false + + # 是否在模型图里插入一个预处理节点 + add_preproc_node: true + + # 输入像素格式(IMAGE_RGB /IMAGE_YUYV422/NV12...)--预处理节点的类型,支持"IMAGE_ RGB, IMAGE_ RGB888_ PLANAR,IMAGE_ RGB888_ PLANAR_ SEP, IMAGE_ I420, IMAGE_ NV12, IMAGE_ _NV21, IMAGE_ YUV444,IMAGE_ YUYV422, IMAGE_ UYVY422, IMAGE_ GRAY, IMAGE_ BGRA, TENSOR" + preproc_type: IMAGE_YUYV422 + + # resize到目标尺寸 格式为:[width,height],定义预处理节点里的“重采样/缩放”目标尺寸 + preproc_image_size: [1920,1080] + #是否启用裁剪功能。、 false’时会跳过裁剪; 'true’ 时才会读取下面的、 crop_ rect。 + enable_ preproc_ crop: false + # 裁剪区域 格式:[x,·y,width,height] + crop_rect: [0,θ,1920,1080] + # tensor·维度置换,默认[0,1,2,3]表示不变。若想把NHWC → NCHW,应写成`[0,3,1,2]`。 + preproc_perm: [0,1,2,3] + + +``` + + + +# 模型支持 + +## 图像分类 Image Classification + +| | Name | Quant Type | Input Shape | Model Link | +| ---- | ------------------------------------------------------------ | ---------- | ------------- | ------------------------------------------------------------ | +| 001 | [mobilenet_v1](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/001mobilenet_v1/README.md) | INT16 | [1,3,224,224] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Fgoogle%2Fmobilenet_v1_1.0_224) | +| 002 | [mobilenet_v3](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/002mobilenet_v3/README.md) | INT16 | [1,3,224,224] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Ftimm%2Fmobilenetv3_small_100.lamb_in1k) | +| 003 | [vit](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/003vit/README.md) | INT16 | [1,3,224,224] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Fgoogle%2Fvit-base-patch16-224) | +| 004 | [nsfw](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/004nsfw/README.md) | UINT8 | [1,3,224,224] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2FFalconsai%2Fnsfw_image_detection) | +| 005 | [gender-cls](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/005gender-cls/README.md) | INT16 | [1,3,224,224] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Frizvandwiki%2Fgender-classification) | +| 006 | [swin_vit](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/006swin_vit/README.md) | INT16 | [1,3,224,224] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Ftimm%2Fswin_s3_tiny_224.ms_in1k) | +| 007 | [efficientnet](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/007efficientnet/README.md) | UINT8 | [1,3,260,260] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Fgoogle%2Fefficientnet-b2) | +| 008 | [deit](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/008deit/README.md) | INT16 | [1,3,224,224] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Ffacebook%2Fdeit-base-patch16-224) | +| 009 | [dino_vit](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/009dino_vit/README.md) | INT16 | [1,3,224,224] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Ffacebook%2Fdinov2-base-imagenet1k-1-layer) | +| 010 | [lenet](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/010lenet/README.md) | UINT8 | [1,1,28,28] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2FEclipseAidge%2FLeNet) | +| 011 | [Alexnet](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/011Alexnet/README.md) | UINT8 | [1,3,224,224] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2FKalray%2Falexnet) | +| 012 | [VGG](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Classification/012VGG/README.md) | UINT8 | [1,3,224,224] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Ftimm%2Fvgg16.tv_in1k) | + +## 目标检测 Object_Detection + +| | Name | Quant Type | Input Shape | Model Link | +| ---- | ------------------------------------------------------------ | ----------- | -------------- | ------------------------------------------------------------ | +| 001 | [yolov8m](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/001yolov8m/README.md) | static onnx | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fultralytics) | +| 002 | [yolov8s](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/002yolov8s/README.md) | static onnx | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fultralytics) | +| 003 | [yolov11m](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/003yolov11m/README.md) | static onnx | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fultralytics) | +| 004 | [yolov8m-pose](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/004yolov8m-pose/README.md) | static onnx | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fultralytics) | +| 005 | [yolov8s-pose](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/005yolov8s-pose/README.md) | static onnx | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fultralytics) | +| 006 | [yolov11m-pose](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/006yolov11m-pose/README.md) | static onnx | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fultralytics) | +| 007 | [retinaface(Resnet50)](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/007retinaface_Resnet50/README.md) | INT16 | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fbiubug6%2FPytorch_Retinaface) | +| 008 | [adaface(R100-WebFace12M)](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/008adaface/README.md) | INT16 | [1,3,112,112] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Fminchul%2Fcvlface_adaface_ir101_webface12m) | +| 009 | [yolov5s](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/009yolov5s/README.md) | static onnx | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fultralytics) | +| 010 | [efficientdet-d0](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/010efficientDet-D0/README.md) | INT16 | [1,3,512,512] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2FKalray%2Fefficientdet-d0) | +| 011 | [faster-rcnn](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/011faster-rcnn/README.md) | UINT8 | [1,3,512,512] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2FKalray%2Ffaster-rcnn-rn50) | +| 012 | [ssd](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/012ssd/README.md) | UINT8 | [1,3,300,300] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2FKalray%2Fssd-mobilenet-v2) | +| 013 | [detr](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Object_Detection/013detr/README.md) | UINT8 | [1,3,800,1066] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Ffacebook%2Fdetr-resnet-50) | + +## 图像分割 Image_Segmentation + +| | Name | Quant Type | Input Shape | Model Link | +| ---- | ------------------------------------------------------------ | -------------- | -------------- | ------------------------------------------------------------ | +| 001 | [deeplab_v3](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Segmentation/001deeplab_v3/README.md) | INT16 | [1,3,513,513] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Fgoogle%2Fdeeplabv3_mobilenet_v2_1.0_513) | +| 002 | [Unet](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Segmentation/002Unet/README.md) | UINT8 | [1,3,640,1280] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Fqualcomm%2FUnet-Segmentation%2Ftree%2Fmain) | +| 003 | [yolov8s-seg](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Segmentation/003yolov8s-seg/README.md) | UINT8 混合量化 | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fultralytics) | +| 004 | [yolov8m-seg](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Segmentation/004yolov8m-seg/README.md) | UINT8 混合量化 | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fultralytics) | +| 005 | [yolov11m-seg](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Segmentation/005yolov11m-seg/README.md) | UINT8 混合量化 | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fultralytics) | +| 006 | [yolov5s-seg](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Segmentation/006yolov5s-seg/README.md) | UINT8 混合量化 | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fultralytics%2Fyolov5) | +| 007 | [FastSAM](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Image_Segmentation/007FastSAM/README.md) | UINT8 混合量化 | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fcqu20160901%2FFastSAM_onnx_rknn) | + +## 其他模型 Other + +| | Name | Quant Type | Categrory | Input Shape | Model Link | +| ---- | ------------------------------------------------------------ | ---------- | ------------ | ------------- | ------------------------------------------------------------ | +| 001 | [mediapipe-hand](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Other/001mediapipe-hand/README.md) | UINT8 | 手势识别 | [1,3,256,256] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Fqualcomm%2FMediaPipe-Hand-Detection) | +| 002 | [PP-OCRv5](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Other/002PP-OCRv5/README.md) | INT16 | 文字识别 | [1,3,320,320] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2FPaddlePaddle%2FPP-OCRv5_server_det) | +| 003 | [Superpoint](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Other/003Superpoint/README.md) | UINT8 | 关键点检测 | [1,1,480,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Fqualcomm%2FUnet-Segmentation) | +| 004 | [wespeaker](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Other/004wespeaker/README.md) | INT16 | 说话人识别 | [1,1024,80] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2FWespeaker%2Fwespeaker-voxceleb-resnet34-LM) | +| 005 | [GTE](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Other/005GTE/README.md) | INT16 | 语言文本表示 | int64[4,11] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fhkchengrex%2FCutie) | +| 006 | [whisper-tiny](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Other/006whisper-tiny/README.md) | INT16/fp16 | 语音识别 | [1,80,3000] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fhuggingface.co%2Fopenai%2Fwhisper-tiny) | +| 008 | [InsightFace](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Other/008InsightFace/README.md) | UINT8 | 人脸检测 | [1,3,640,640] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fdeepinsight%2Finsightface) | +| 009 | [cutie](https://gitee.com/MooreThreads-AI-SOC/m1000_npu_model_zoo/blob/master/Other/009cutie/README.md) | UINT8 | 视频图像分割 | [1,3,480,864] | [🔗](https://gitee.com/link?target=https%3A%2F%2Fgithub.com%2Fhkchengrex%2FCutie) | + +# 常见问题 + +## 问题1: 运行NPU模型部署代码 sample.py 时,找不到 *.mtnn 文件 + +针对常用的模型,提供部署NPU模型的 sample.py 脚本,方便开发者开箱即用。运行 sample.py 需要指定 *.mtnn 模型的路径。 + +运行`mtc --config ./mtc_config.yaml`后,会在`/opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/`路径下,生成名为 mtc_config.yaml 中 Name 的文件夹,在该文件夹下的`./mtc/dump_npu_files/*_export_data/`中的 *.mtnn 文件,即为可在NPU上运行的模型。 + +方便起见,可将该文件复制到 sample.py 的同级路径下,运行 sample.py 。 + +## 问题2: 如何查看 *.mtnn 模型的网络结构 + +*.mtnn 文件是最终在NPU上运行的模型,并不能直接查看网络结构。可以通过 **定制化的netron** 打开查看在同一路径生成的 中间格式 *.mtnnir 文件,查看网络结构。 + +> 例如:yolov8m +> +> /opt/m1000-mtc-toolkit/bin/baselib/data/net_data/onnx/yolov8m/mtc/dump_npu_files/yolov8m_export_data/ +> +> 中的*.mtnnir + +- linux_arm64 版本下载地址:(AIBook 和 AIModule 的 ubuntu 系统下可直接安装使用) [Linux_arm64版安装包](https://moorethreads-ai-soc.tos-cn-beijing.volces.com/npu_tools/netron-mtnn/linux_arm64/netron-mtnn_8.2.2-r20250930152403+d17c14b7_arm64.deb) + + +- windows_x64 版本下载地址: [windows_x64版安装包](https://moorethreads-ai-soc.tos-cn-beijing.volces.com/npu_tools/netron-mtnn/windows_x64/Netron-MTNN_Setup_8.2.2.exe) + +```bash +# 在liunx系统中安装 +sudo dpkg -i netron-mtnn_8.2.2-r20250930152403+d17c14b7_arm64.deb +# 在桌面运行 +# 报错 Cannot mount AppImage, please check your FUSE setup. You might still be able to extract the contents of this AppImage if you run it with the --appimage-extract option.See https://github.com/AppImage/AppImageKit/wiki/FUSE for more information + +# 安装或重新安装 FUSE 支持 +sudo apt update +sudo apt install fuse libfuse2 libfuse3-3 + +# 方法二:临时绕过 FUSE(直接提取运行)netron-mtnn +./netron-mtnn.AppImage --appimage-extract +这会创建一个名为 squashfs-root 的文件夹,包含所有程序文件。 + +然后您可以直接运行其中的可执行文件: + +bash +编辑 +./squashfs-root/AppRu +``` + +## 问题3: 如何快速测试 NPU 模型的推理性能 + +系统中已预先安装了 mtnn_test 性能测试工具。也可通过编译[mtnn_test源码](https://gitee.com/MooreThreads-AI-SOC/m1000_mtnn/blob/master/sample/mtnn_test/main.c)获得。 + +工具。也可通过编译[mtnn_test源码](https://gitee.com/MooreThreads-AI-SOC/m1000_mtnn/blob/master/sample/mtnn_test/main.c)获得。 + +### 测试步骤 + +#### Step1:查看NPU模型网络的输入,生成二进制的输入文件。 + +在 `npu_model_zoo\doc\performace-test` 文件夹中 提供了一个快速生成输入文件的脚本`generate_input_dat.py`,这里以mobilenetv1为例,输入为[1,3,224,224]的float32类型。 + +```bash +# 进入目标目录 +cd npu_model_zoo/doc/performace-test/ + +``` + +通过修改`generate_input_dat.py` 脚本中的参数,生成输入文件。 + +```python +_data_type = DATA_TYPE.M_FLOAT32 #float32数据类型 +_Height = 224 # 输入高度 +_Width = 224 # 输入宽度 +``` + +修改完成后,运行脚本:`python3 ./generate_input_dat.py`,生成`input_float32_224_224.dat` + +#### Step2:运行mtnn_test + +```bash +mtnn_test -l 100 -i 1 input_float32_224_224.dat -n mobilenetv1.mtnn -o 1 +``` + +| 参数 | 说明 | +| ---- | ----------------------- | +| -l | 循环次数 | +| -i | 输入张量个数 & 输入张量 | +| -n | mtnn文件 | +| -o | 输出张量个数 | + +更多参数可使用 `mtnn_test -h` 查看 + +输出结果如下: + +```shell + mtnn_test -l 100 -i 1 input_float32_224_224.dat -n mobilenetv1.mtnn -o 1 +Thread 0: Enter +[t0] mtnn file=mobilenetv1.mtnn +[t0][99] Test Passed. use time 2.37ms, 422.83fps +[t0][98] Test Passed. use time 1.47ms, 679.81fps +[t0][97] Test Passed. use time 1.45ms, 687.76fps +[t0][96] Test Passed. use time 1.45ms, 692.04fps +[t0][95] Test Passed. use time 1.44ms, 693.48fps +[t0][94] Test Passed. use time 1.44ms, 693.48fps +[t0][93] Test Passed. use time 1.44ms, 694.44fps +[t0][92] Test Passed. use time 1.44ms, 693.96fps +[t0][91] Test Passed. use time 1.44ms, 695.41fps +[t0][90] Test Passed. use time 1.44ms, 694.44fps +[t0][89] Test Passed. use time 1.44ms, 693.48fps +[t0][88] Test Passed. use time 1.44ms, 694.93fps +[t0][87] Test Passed. use time 1.44ms, 695.89fps +[t0][86] Test Passed. use time 1.44ms, 694.93fps +[t0][85] Test Passed. use time 1.44ms, 695.89fps +[t0][84] Test Passed. use time 1.44ms, 694.44fps +[t0][83] Test Passed. use time 1.44ms, 692.52fps +[t0][82] Test Passed. use time 1.44ms, 694.93fps +[t0][81] Test Passed. use time 1.44ms, 696.38fps +[t0][80] Test Passed. use time 1.44ms, 694.44fps +[t0][79] Test Passed. use time 1.44ms, 693.48fps +[t0][78] Test Passed. use time 1.44ms, 695.41fps +[t0][77] Test Passed. use time 1.44ms, 693.96fps +[t0][76] Test Passed. use time 1.44ms, 695.89fps +[t0][75] Test Passed. use time 1.43ms, 697.35fps +[t0][74] Test Passed. use time 1.44ms, 694.93fps +[t0][73] Test Passed. use time 1.44ms, 694.44fps +[t0][72] Test Passed. use time 1.44ms, 695.41fps +[t0][71] Test Passed. use time 1.44ms, 694.44fps +[t0][70] Test Passed. use time 1.44ms, 696.38fps +[t0][69] Test Passed. use time 1.44ms, 696.86fps +[t0][68] Test Passed. use time 1.44ms, 696.86fps +[t0][67] Test Passed. use time 1.44ms, 695.41fps +[t0][66] Test Passed. use time 1.44ms, 695.41fps +[t0][65] Test Passed. use time 1.44ms, 696.38fps +[t0][64] Test Passed. use time 1.45ms, 692.04fps +[t0][63] Test Passed. use time 1.45ms, 691.56fps +[t0][62] Test Passed. use time 1.44ms, 693.96fps +[t0][61] Test Passed. use time 1.44ms, 695.41fps +[t0][60] Test Passed. use time 1.44ms, 695.41fps +[t0][59] Test Passed. use time 1.44ms, 692.52fps +[t0][58] Test Passed. use time 1.44ms, 695.41fps +[t0][57] Test Passed. use time 1.44ms, 696.86fps +[t0][56] Test Passed. use time 1.44ms, 693.48fps +[t0][55] Test Passed. use time 1.44ms, 694.93fps +[t0][54] Test Passed. use time 1.44ms, 696.38fps +[t0][53] Test Passed. use time 1.44ms, 693.96fps +[t0][52] Test Passed. use time 1.44ms, 696.86fps +[t0][51] Test Passed. use time 1.44ms, 696.86fps +[t0][50] Test Passed. use time 1.44ms, 696.38fps +[t0][49] Test Passed. use time 1.44ms, 696.86fps +[t0][48] Test Passed. use time 1.44ms, 696.86fps +[t0][47] Test Passed. use time 1.44ms, 695.89fps +[t0][46] Test Passed. use time 1.43ms, 697.35fps +[t0][45] Test Passed. use time 1.44ms, 694.44fps +[t0][44] Test Passed. use time 1.43ms, 698.32fps +[t0][43] Test Passed. use time 1.44ms, 694.93fps +[t0][42] Test Passed. use time 1.44ms, 695.89fps +[t0][41] Test Passed. use time 1.44ms, 695.41fps +[t0][40] Test Passed. use time 1.44ms, 696.86fps +[t0][39] Test Passed. use time 1.44ms, 696.38fps +[t0][38] Test Passed. use time 1.44ms, 694.93fps +[t0][37] Test Passed. use time 1.44ms, 696.86fps +[t0][36] Test Passed. use time 1.44ms, 696.38fps +[t0][35] Test Passed. use time 1.43ms, 697.84fps +[t0][34] Test Passed. use time 1.43ms, 697.35fps +[t0][33] Test Passed. use time 1.44ms, 694.93fps +[t0][32] Test Passed. use time 1.44ms, 695.89fps +[t0][31] Test Passed. use time 1.43ms, 697.84fps +[t0][30] Test Passed. use time 1.44ms, 695.41fps +[t0][29] Test Passed. use time 1.45ms, 689.66fps +[t0][28] Test Passed. use time 1.45ms, 692.04fps +[t0][27] Test Passed. use time 1.44ms, 695.89fps +[t0][26] Test Passed. use time 1.44ms, 696.86fps +[t0][25] Test Passed. use time 1.44ms, 695.89fps +[t0][24] Test Passed. use time 1.44ms, 695.89fps +[t0][23] Test Passed. use time 1.44ms, 696.86fps +[t0][22] Test Passed. use time 1.44ms, 694.44fps +[t0][21] Test Passed. use time 1.43ms, 697.35fps +[t0][20] Test Passed. use time 1.44ms, 696.38fps +[t0][19] Test Passed. use time 1.44ms, 696.86fps +[t0][18] Test Passed. use time 1.43ms, 697.35fps +[t0][17] Test Passed. use time 1.44ms, 696.86fps +[t0][16] Test Passed. use time 1.44ms, 695.89fps +[t0][15] Test Passed. use time 1.44ms, 696.38fps +[t0][14] Test Passed. use time 1.44ms, 696.38fps +[t0][13] Test Passed. use time 1.43ms, 697.35fps +[t0][12] Test Passed. use time 1.43ms, 698.32fps +[t0][11] Test Passed. use time 1.44ms, 693.48fps +[t0][10] Test Passed. use time 1.44ms, 694.93fps +[t0][9] Test Passed. use time 1.44ms, 693.00fps +[t0][8] Test Passed. use time 1.44ms, 695.41fps +[t0][7] Test Passed. use time 1.44ms, 696.38fps +[t0][6] Test Passed. use time 1.43ms, 698.32fps +[t0][5] Test Passed. use time 1.44ms, 695.41fps +[t0][4] Test Passed. use time 1.43ms, 697.35fps +[t0][3] Test Passed. use time 1.44ms, 694.93fps +[t0][2] Test Passed. use time 1.44ms, 694.93fps +[t0][1] Test Passed. use time 1.43ms, 697.84fps +[t0][0] Test Passed. use time 1.44ms, 696.38fps +Thread 0: Exited + +``` \ No newline at end of file diff --git "a/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.4.0/T035-\346\221\251\345\260\224\346\225\260\345\255\227\344\272\272.md" "b/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.4.0/T035-\346\221\251\345\260\224\346\225\260\345\255\227\344\272\272.md" new file mode 100644 index 0000000..9151e43 --- /dev/null +++ "b/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.4.0/T035-\346\221\251\345\260\224\346\225\260\345\255\227\344\272\272.md" @@ -0,0 +1,185 @@ +

+ logo +

+

摩尔线程-T035

+

摩尔数字人安装指引

+ +
+

lastAuthor:Pierce.cai

+

lastDate:2026-06-02

+
+ + + + +# 系统环境要求 + +> **⚠️ 重要提示** +> +> - **FAQ 模式**:若需使用本地大模型推理(FAQ模式),必须确保物理内存不低于 32GB。 +> - **Conda 环境**:除定制版外,请在安装vllm_musa环境前退出 Conda 环境,以免干扰模型的运行。 + +| 组件 | 版本 | +| ----------- | ----------- | +| MUSA Driver | 5.1.0-M1000 | +| MUSA SDK | 5.1.0 | +| U-Boot SDK | 1.4.0 | + +# 查看musa环境 + +```shell +# 查看驱动 +dpkg -l|grep musa + +# 输出 +ii musa 5.1.0-M1000 arm64 Moore Threads MUSA driver [452f33695] +ii musa-sdk 5.1.0 arm64 Moore Threads MTGPU Software Development Kit + +# 查看环境 +ll /usr/local/ |grep musa* + +# 输出 +lrwxrwxrwx 1 root root 10 Apr 24 01:22 musa -> musa-5.1.0/ +drwxr-xr-x 13 root root 4096 May 19 14:39 musa-5.1.0/ +``` + +# 安装musa(若不满足) + +> 软件包于TOS上:tos://ab100-sw/AI MODULE/AI MODULE UBOOT (平台化版本)/V1.4.0/SourceCodes/prebuild/ + +```shell +##################### 首先卸载当前驱动 ###################### +sudo dpkg -P musa-sdk +sudo dpkg -P musa # 输出 cryptsetup 的一些报错是正常的 +sudo rm -rf /usr/local/musa* + +##################### 安装新的驱动和 SDK ###################### +sudo dpkg -i musa_5.1.0-M1000_arm64.deb +sudo dpkg -i musa-sdk_5.1.0-M1000_arm64.deb + +##################### 重启电脑 ###################### +sudo reboot +``` + +# 安装依赖 + +```shell +sudo apt update +sudo apt install -y portaudio19-dev libopenmpi-dev g++-12 xdotool ffmpeg libgl1-mesa-dev libglfw3-dev xvfb libopenblas-dev +``` + +# 安装前后端 + +```shell +# 下载前端安装包 +wget -c https://mt-maliang-saas.tos-cn-beijing.volces.com/dh2d/release/V2.0.1/2D-DigitalME_v2.0.1-normal.beta.3_arm64.deb + +# 下载后端安装包 +wget -c https://mt-maliang-saas.tos-cn-beijing.volces.com/dh2d/release/V2.0.1/mt-ai-service_release_arm64_v2.0.1-alpha.24-license-aibox_20260527_191705_e51b663.deb + +# 安装应用 +sudo dpkg -i 2D-DigitalME_v2.0.1-normal.beta.3_arm64.deb +sudo dpkg -i mt-ai-service_release_arm64_v2.0.1-alpha.24-license-aibox_20260527_191705_e51b663.deb +``` + +# 初始化配置信息 + +初始化脚本需申请(按公司维度发放,同一公司可复用): + +```shell +# 需要申请初始化脚本:init_script_address +wget -qO- init_script_address | sudo python3 + +# 重启设备 +sudo reboot +``` + +# 配置license + +- License需要申请 +- License与设备绑定,**不可跨设备复用**。 +- 重启后系统将自动启动“魔影元像”应用。 +- 在弹出的 **SN 验证窗口** 中输入您申请的 License。 + +![image-20260429182502556](https://bob-markdown-picture.oss-cn-shenzhen.aliyuncs.com/markdown-img/image-20260429182502556.png) + + + + + +# 配置大模型 + +1. 打开“魔影元像”应用。 + +2. 点击右下角 **设置 → 切换模型 → 添加模型**。 + +3. 填写以下参数: + + | 字段 | 说明 | + | -------- | --------------------------------------------------------- | + | 模型名称 | 自定义名称(如:Qwen2.5) | + | 模型 ID | 模型请求时使用的ID(如:Qwen2.5-0.5B-Instruct-GPTQ-Int4) | + | endpoint | 模型 API 地址(如:`http://xxxxx/chat/completions`) | + | token | API 密钥(若无,可随意填写) | + +4. 点击 **测试连接**,确认模型状态为“连通”。 + +5. 勾选该模型以启用。 + + ![_home_dev_Pictures_Screenshots_Screenshot from 2026-04-29 18-36-57](https://bob-markdown-picture.oss-cn-shenzhen.aliyuncs.com/markdown-img/_home_dev_Pictures_Screenshots_Screenshot%20from%202026-04-29%2018-36-57.png) + +6. 若无法编辑,请将屏幕设置为**竖屏**。 + +# 配置本地TTS与ASR + +本版本已内置本地语音合成(TTS)与语音识别(ASR)服务。 + +1. 打开“魔影元像”。 +2. 进入 **设置 → 通用**,**取消勾选“云端语音识别”**。 +3. 进入 **设置 → 切换音色**,**务必选择带有 `lite` 后缀的音色**。 + +![Screenshot from 2026-04-29 18-46-58](https://bob-markdown-picture.oss-cn-shenzhen.aliyuncs.com/markdown-img/Screenshot%20from%202026-04-29%2018-46-58.png) + +# FAQ模式 + +该模式需要使用特定的模型进行推理 + +使用该模式前,请确认本机至少有32G内存 + +搭建环境前,请确保已经退出conda环境(定制版除外) + +## 环境搭建 + +请参考该文档进行vllm_musa的搭建:https://gitee.com/MooreThreads-AI-SOC/M1000_Developer_Document/blob/master/thirdparty/skynoon/user_cases/T035EVB-MTSDK-1.4.0/T035-vllm-musa%E8%BF%9B%E8%A1%8C%E5%A4%A7%E6%A8%A1%E5%9E%8B%E6%8E%A8%E7%90%86.md + +## 模型下载与安装 + +```shell +wget -c https://mt-maliang-saas.tos-cn-beijing.volces.com/dh2d/aimodule/mt-ai-emb_1.2.deb +sudo dpkg -i mt-ai-emb_1.2.deb +``` + +**在数字人界面点击 **设置 → 切换模型 → FAQ模式**,系统将自动拉起本地模型服务。** + +# **常见问题(Q&A)** + +## 模型连通无法调用 + +1. 编辑启动脚本 `runAI.sh`(可通过 `sudo nano /usr/local/bin/runAI.sh` 修改)。 + +2. 在启动命令中添加参数 `--no-tools-in-conversations`。 + + **修改前:** + + ```shell + ./mt_digitalme_sdk_server --use-local-mic --llm=musachat --sentence-end-delay=1.0 --component-server-addr https://dh-admin-api.bjg2.mthreads.com/api/v1 --tts-ws-addr wss://dh-admin-api.bjg2.mthreads.com/api/v1/tts/generate/ws $FLAVOR_STARTUP_ARGS --role-config-file=$cwd/roles.json --logger-output="$LOG_FILE" "$@" & + ``` + + **修改后:** + + ```shell + # 此处为了看到区别特别使用了``进行高亮显示,实际添加请不要将``添加 + ./mt_digitalme_sdk_server --use-local-mic --llm=musachat --sentence-end-delay=1.0 `--no-tools-in-conversations` --component-server-addr https://dh-admin-api.bjg2.mthreads.com/api/v1 --tts-ws-addr wss://dh-admin-api.bjg2.mthreads.com/api/v1/tts/generate/ws $FLAVOR_STARTUP_ARGS --role-config-file=$cwd/roles.json --logger-output="$LOG_FILE" "$@" & + ``` + +3. **保存文件并重启设备**以使更改生效。 \ No newline at end of file -- Gitee