高通 Gen AI 推理扩展 (GENIE)(6):Genie Profile和管道教程
Genie Profile API 提供了分析 GenAI 模型性能指标的方法。可以通过传递“–profile FILENAME”参数在 genie-t2t-run 和 genie-t2e-run 上启用分析。这会将收集的分析数据转储到具有提供名称的 JSON 文件中。
分析 JSON 输出架构
{
"header": {
"type": "object",
"properties": {
"header_version": {
"type": "object",
"properties": {
"major": {"type": "integer"},
"minor": {"type": "integer"},
"patch": {"type": "integer"}
},
"required": [
"major",
"minor",
"patch"
]
},
"version": {
"type": "object",
"properties": {
"major": {"type": "integer"},
"minor": {"type": "integer"},
"patch": {"type": "integer"}
},
"required": [
"major",
"minor",
"patch"
]
},
"artifact_type": { "type": "string" }
},
"required": [
"header_version",
"version",
"artifact_type"
]
},
"metadata": {
"type": "object",
"properties": {
"timestamp": {"type": "integer"}
},
"required": [
"timestamp"
]
},
"components": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"type": {"type": "string", "enum" : ["dialog", "embedding"]},
"events": {
"type": "array",
"items": {
"type": "object",
"properties": {
"type": {
"type": "string",
"enum" : [
"GenieDialog_create", "GenieDialog_query", "GenieDialog_free",
"GenieEmbedding_create", "GenieEmbedding_generate","GenieEmbedding_free"
]
},
"duration": {"type": "integer"},
"start": {"type": "integer"},
"stop": {"type": "integer"},
"init-time": {
"type": "object",
"properties": {
"value": {"type": "integer"},
"unit": {"type": "string"}
},
"required": [
"value",
"unit"
]
},
"num-prompt-tokens": {
"type": "object",
"properties": {
"value": {"type": "integer"},
"unit": {"type": "string"}
},
"required": [
"value",
"unit"
]
},
"prompt-processing-rate": {
"type": "object",
"properties": {
"value": {"type": "integer"},
"unit": {"type": "string"}
},
"required": [
"value",
"unit"
]
},
"time-to-first-token": {
"type": "object",
"properties": {
"value": {"type": "integer"},
"unit": {"type": "string"}
},
"required": [
"value",
"unit"
]
},
"num-generated-tokens": {
"type": "object",
"properties": {
"value": {"type": "integer"},
"unit": {"type": "string"}
},
"required": [
"value",
"unit"
]
},
"token-generation-rate": {
"type": "object",
"properties": {
"value": {"type": "integer"},
"unit": {"type": "string"}
},
"required": [
"value",
"unit"
]
},
"token-generation-time": {
"type": "object",
"properties": {
"value": {"type": "integer"},
"unit": {"type": "string"}
},
"required": [
"value",
"unit"
]
}
}
},
"required": [
"type",
"duration",
"start",
"stop"
]
}
}
},
"required": [
"name",
"type",
"events"
]
}
}
如何使用分析 API 的示例
# Create Profile Handle
GenieProfile_Handle_t profileHandle = NULL;
GenieProfile_create(nullptr, &profileHandle);
# Create Dialog Config
GenieDialogConfig_Handle_t dialogConfigHandle = NULL;
GenieDialogConfig_createFromJson(dialogConfigStr, &dialogConfigHandle);
# Bind Profile Handle to Dialog Config
GenieDialogConfig_bindProfiler(dialogConfigHandle, profileHandle);
# Create Dialog
GenieDialog_Handle_t dialogHandle = NULL;
GenieDialog_create(dialogConfigHandle, &dialogHandle);
# Run Dialog Query API
GenieDialog_query(dialogHandle, promptStr, GenieDialog_SentenceCode_t::GENIE_DIALOG_SENTENCE_COMPLETE, queryCallback);
# Get Profiling Data
const char* jsonData = nullptr;
const Genie_AllocCallback_t callback([](size_t size, const char** data) {
*data = (char*)malloc(size);
});
GenieProfile_getJsonData(profileHandle, callback, &jsonData);
# Dump to JSON File and Free the Allocated Data
std::ofstream outFile;
outFile.open(profilePath);
outFile << jsonData;
outFile.close();
free((char*)jsonData);
# Free Profile Handle
GenieProfile_free(profileHandle);
管道教程
目标模型或教程主题:
- GLM-4v
-
HTP 后端示例模型配置和后端扩展配置
有关字段和内容的详细信息,请参阅 Genie Node JSON 配置字符串 他们的意思是。将节点和管道调用绑定在一起的示例 model_configs 和脚本可以在以下位置找到: ${QNN_SDK_ROOT}/examples/Genie/configs/lmm/glm-4v/ 。请注意,分词器路径和上下文二进制字段需要根据您的实际准备步骤进行更新。 -
Android 上的 GLM-4V 模型推理
-
要在 QNN HTP 后端运行,请在 android 上打开命令 shell 并运行以下命令。这假设已经设置了 HTP 架构(例如,ARCH=75)。
adb shell mkdir -p /data/local/tmp/
adb push ${QNN_SDK_ROOT}/bin/aarch64-android/genie-app /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libGenie.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpPrepare.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpNetRunExtensions.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV${ARCH}Stub.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/hexagon-v${ARCH}/unsigned/libQnnHtpV${ARCH}Skel.so /data/local/tmp/
adb push <path to htp_backend_ext_config.json> /data/local/tmp/
adb push <path to siglip.json> /data/local/tmp/
adb push <path to text-encoder.json> /data/local/tmp/
adb push <path to glm-4v.json> /data/local/tmp/
adb push <path to tokenizer.json> /data/local/tmp/
adb push <path to model bin files> /data/local/tmp/
adb push <path to LMMScript> /data/local/tmp/
# open adb shell
adb shell
export LD_LIBRARY_PATH=/data/local/tmp/
export PATH=$LD_LIBRARY_PATH:$PATH
cd $LD_LIBRARY_PATH
./genie-app -s <path to LMMScript>
- facebook/wt19-en-de
有关字段和内容的详细信息,请参阅 Genie Node JSON 配置字符串 他们的意思是。将节点和管道调用绑定在一起的示例 model_configs 和脚本可以在以下位置找到: ${QNN_SDK_ROOT}/examples/Genie/configs/wt19-en-de/ .
模型下载
从以下位置下载模型:https://huggingface.co/facebook/wmt19-en-de
模型准备
在 Linux 主机上打开命令 shell 并运行:
# Make sure environment is setup as per instructions, or can cd into bin folder on Linux host
cd ${QNN_SDK_ROOT}/bin/x86_64-linux-clang/
./qnn-genai-transformer-composer --model <path to model> --outfile <output filename with complete path> --export_tokenizer_json --dump_lut
例如:
qnn-genai-transformer-composer --model wt19_en_de_model --outfile wt19_en_de_model_artifacts/model.bin --export_tokenizer_json --dump_lut
这将在 wt19_en_de_model_artifacts 内生成 model_encoder.bin、model_decoder.bin、LUT.bin 和 tokenizer.json。
Android 上的模型推理
要在 QNN GenAiTransformer 后端运行,请在 android 上打开一个命令 shell 并运行以下命令。
adb shell mkdir -p /data/local/tmp/
adb push ${QNN_SDK_ROOT}/bin/aarch64-android/genie-app /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libGenie.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnGenAiTransformer.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnGenAiTransformerModel.so /data/local/tmp/
adb push ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnGenAiTransformerCpuOpPkg.so /data/local/tmp/
adb push <path to text-encoder.json> /data/local/tmp/
adb push <path to text-generator.json> /data/local/tmp/
adb push <path to LUT.bin> /data/local/tmp/
adb push <path to tokenizer.json> /data/local/tmp/
adb push <path to model bin files> /data/local/tmp/
adb push <path to LMMScript> /data/local/tmp/
# open adb shell
adb shell
export LD_LIBRARY_PATH=/data/local/tmp/
export PATH=$LD_LIBRARY_PATH:$PATH
cd $LD_LIBRARY_PATH
./genie-app -s <path to LMMScript>
更多推荐
所有评论(0)