1.下载ncnn源码
项目地址:https://github.com/Tencent/ncnn
git clone https://github.com/Tencent/ncnn.git
cd ncnn
git submodule update --init
2.安装依赖
2.1 通用依赖
- git
- g++
- cmake
- protocol buffer (protobuf) headers files and protobuf compiler
- glslang
- opencv(用于编译案列)
sudo apt install build-essential git cmake libprotobuf-dev protobuf-compiler libvulkan-dev vulkan-utils libopencv-dev
2.2 vulkan header files and loader library (用于调用GPU,只用CPU的可以不用安装)
2.2.1 X86版本安装
# 为GPU安装Vulkan驱动
sudo apt install mesa-vulkan-drivers
# 安装vulkansdk
wget https://sdk.lunarg.com/sdk/download/1.2.189.0/linux/vulkansdk-linux-x86_64-1.2.189.0.tar.gz?Human=true -O vulkansdk-linux-x86_64-1.2.189.0.tar.gz
tar -xvf vulkansdk-linux-x86_64-1.2.189.0.tar.gz
export VULKAN_SDK=$(pwd)/1.2.189.0/x86_64
2.2.2 Jetson Nano安装
确认vulkan驱动是否安装正常
nvidia@xavier:/$ vulkaninfo
Xlib: extension "NV-GLX" missing on display "localhost:10.0".
Xlib: extension "NV-GLX" missing on display "localhost:10.0".
Xlib: extension "NV-GLX" missing on display "localhost:10.0".
/build/vulkan-tools-WR7ZBj/vulkan-tools-1.1.126.0+dfsg1/vulkaninfo/vulkaninfo.h:399: failed with ERROR_INITIALIZATION_FAILED
- 异常原因查找
- 通过vnc远程连接到图形界面后运行
vulkaninfo
nano@nano:~$ vulkaninfo
===========
VULKAN INFO
===========
Vulkan Instance Version: 1.2.70
Instance Extensions:
====================
Instance Extensions count = 16
VK_KHR_device_group_creation : extension revision 1
······
=========================
minImageCount = 2
maxImageCount = 8
currentExtent:
width = 256
height = 256
minImageExtent:
width = 256
height = 256
maxImageExtent:
width = 256
height = 256
maxImageArrayLayers = 1
······
安装vulkansdk
# 编译安装vulkansdk
sudo apt-get update && sudo apt-get install git build-essential libx11-xcb-dev libxkbcommon-dev libwayland-dev libxrandr-dev cmake
git clone https://github.com/KhronosGroup/Vulkan-Loader.git
cd Vulkan-Loader && mkdir build && cd build
../scripts/update_deps.py
cmake -DCMAKE_BUILD_TYPE=Release -DVULKAN_HEADERS_INSTALL_DIR=$(pwd)/Vulkan-Headers/build/install ..
make -j$(nproc)
export LD_LIBRARY_PATH=$(pwd)/loader
cd Vulkan-Headers
ln -s ../loader lib
export VULKAN_SDK=$(pwd)
3. 开始编译
CPU 版
# 没安VULKAN运行这个 cd ncnn mkdir -p build cd build cmake -DCMAKE_BUILD_TYPE=Release -DNCNN_VULKAN=OFF -DNCNN_SYSTEM_GLSLANG=ON -DNCNN_BUILD_EXAMPLES=ON .. make -j$(nproc)
GPU-X86
# 有GPU安了VULKAN运行这个 cd ncnn mkdir -p build cd build cmake -DCMAKE_BUILD_TYPE=Release -DNCNN_VULKAN=ON -DNCNN_SYSTEM_GLSLANG=ON -DNCNN_BUILD_EXAMPLES=ON .. make -j$(nproc)
GPU- Jetson Nano
# Jetson Nano用这个 cd ncnn mkdir -p build cd build cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/jetson.toolchain.cmake -DNCNN_VULKAN=ON -DCMAKE_BUILD_TYPE=Release -DNCNN_BUILD_EXAMPLES=ON .. make -j$(nproc)
4.验证安装
4.1 验证squeezenet
cd ../examples
../build/examples/squeezenet ../images/256-ncnn.png
nano@nano:/software/ncnn/examples$ ../build/examples/squeezenet ../images/256-ncnn.png
[0 NVIDIA Tegra X1 (nvgpu)] queueC=0[16] queueG=0[16] queueT=0[16]
[0 NVIDIA Tegra X1 (nvgpu)] bugsbn1=0 bugbilz=0 bugcopc=0 bugihfa=0
[0 NVIDIA Tegra X1 (nvgpu)] fp16-p/s/a=1/1/1 int8-p/s/a=1/1/1
[0 NVIDIA Tegra X1 (nvgpu)] subgroup=32 basic=1 vote=1 ballot=1 shuffle=1
532 = 0.168945
920 = 0.093323
716 = 0.063110
nvdc: start nvdcEventThread
nvdc: exit nvdcEventThread
4.1 验证benchncnn
cd ../benchmark
../build/benchmark/benchncnn 10 $(nproc) 0 0
nano@nano:/software/ncnn/benchmark$ ../build/benchmark/benchncnn 10 $(nproc) 0 0[0 NVIDIA Tegra X1 (nvgpu)] queueC=0[16] queueG=0[16] queueT=0[16]
[0 NVIDIA Tegra X1 (nvgpu)] bugsbn1=0 bugbilz=0 bugcopc=0 bugihfa=0
[0 NVIDIA Tegra X1 (nvgpu)] fp16-p/s/a=1/1/1 int8-p/s/a=1/1/1
[0 NVIDIA Tegra X1 (nvgpu)] subgroup=32 basic=1 vote=1 ballot=1 shuffle=1
loop_count = 10
num_threads = 4
powersave = 0
gpu_device = 0
cooling_down = 1
squeezenet min = 19.90 max = 22.82 avg = 20.82
squeezenet_int8 min = 36.58 max = 236.35 avg = 66.89
mobilenet min = 24.75 max = 41.05 avg = 28.83
mobilenet_int8 min = 42.95 max = 70.39 avg = 52.08
mobilenet_v2 min = 31.84 max = 38.09 avg = 35.59
mobilenet_v3 min = 29.77 max = 38.48 avg = 33.56
shufflenet min = 25.98 max = 36.90 avg = 30.86
shufflenet_v2 min = 18.46 max = 27.65 avg = 20.49
mnasnet min = 22.63 max = 35.37 avg = 24.88
proxylessnasnet min = 27.85 max = 33.44 avg = 30.52
efficientnet_b0 min = 34.85 max = 48.31 avg = 38.46
efficientnetv2_b0 min = 56.62 max = 76.70 avg = 61.99
regnety_400m min = 28.31 max = 35.59 avg = 31.92
blazeface min = 14.40 max = 34.70 avg = 23.63
googlenet min = 55.01 max = 75.36 avg = 60.89
googlenet_int8 min = 111.53 max = 315.94 avg = 167.58
resnet18 min = 51.45 max = 77.21 avg = 59.26
resnet18_int8 min = 81.99 max = 207.09 avg = 117.43
alexnet min = 69.98 max = 102.26 avg = 83.27
vgg16 min = 302.14 max = 337.56 avg = 320.55
vgg16_int8 min = 464.06 max = 601.92 avg = 540.28
resnet50 min = 140.36 max = 176.66 avg = 159.53
resnet50_int8 min = 299.16 max = 554.05 avg = 453.26
squeezenet_ssd min = 53.43 max = 78.75 avg = 63.67
squeezenet_ssd_int8 min = 91.45 max = 215.14 avg = 123.13
mobilenet_ssd min = 66.30 max = 90.77 avg = 76.86
mobilenet_ssd_int8 min = 89.05 max = 261.33 avg = 119.18
mobilenet_yolo min = 142.24 max = 182.72 avg = 154.48
mobilenetv2_yolov3 min = 81.96 max = 107.17 avg = 91.93
yolov4-tiny min = 103.76 max = 138.15 avg = 115.43
nanodet_m min = 27.15 max = 36.88 avg = 32.00
yolo-fastest-1.1 min = 33.21 max = 40.95 avg = 35.84
yolo-fastestv2 min = 17.51 max = 29.54 avg = 21.32
vision_transformer min = 4981.82 max = 5576.98 avg = 5198.79
nvdc: start nvdcEventThread
nvdc: exit nvdcEventThread
评论 (0)