Docker
print(f'cuDNN版本: {torch.backends.cudnn.version()}')"sudo docker run --rm --gpus all docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 find /usr -name "*cudnn*" -type f 2>/dev/null #找cudnn_ver
环境选择
| ubuntu | 显卡类型 | Driver Version | CUDA Version | python Version | cuda |
| 22.04 | NVIDIA RTX A6000 | 560.35.03 | 12.6 | 3.9.21 | cuda_11.5.r11.5/compiler.30672275_0 |
| 22.04 | NVIDIA RTX A6000 | 555.42.06 | 12.5 | 3.12.4 | cuda_12.5.r12.5/compiler.34385749_0 |
| 18.04 | NVIDIA A40 | 535.104.12 | 12.2 | 3.9.13 | cuda_12.1.r12.1/compiler.32688072_0 |
| 24.04 | NVIDIA GeForce RTX 3090 | 580.95.05 | 13 | 3.12.3 | cuda_13.0.r13.0/compiler.36424714_0 |
| 22.04 | NVIDIA RTX A6000 | 550.54.14 | 12.4 | 3.10.12 | cuda_11.5.r11.5/compiler.30672275_0 |
docker安装
sudo apt update
sudo apt-get install ca-certificates curl gnupg lsb-release apt-transport-https software-properties-common
sudo mkdir -p /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
sudo apt-get install docker-ce docker-ce-cli containerd docker-compose-plugin
sudo docker –version #显示版本表名安装成功
sudo tee /etc/docker/daemon.json <<-'EOF'
{
"registry-mirrors": [
"https://docker.mirrors.ustc.edu.cn",
"https://hub-mirror.c.163.com",
"https://mirror.baidubce.com"
]
}
EOF
sudo systemctl status docker.service
sudo systemctl daemon-reload
sudo systemctl restart docker
sudo systemctl status docker.service
sudo systemctl is-enabled docker
enabled 表示已设置开机自启
Docker 镜像下载
sudo docker pull docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
sudo docker image ls #显示镜像表明安装成功
dpkg -l | grep nvidia-container-toolkit #是否安装
没有则
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
sudo docker run --rm --gpus all docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 nvidia-smi #显示版本
sudo docker run --rm --gpus all docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 nvcc --version
sudo docker run --rm --gpus all docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 find /usr -name "*cudnn*" -type f 2>/dev/null #找cudnn_version*.h路径
sudo docker run --rm --gpus all docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 cat /usr/include/x86_64-linux-gnu/cudnn_version_v8.h | grep CUDNN_MAJOR -A 2
sudo docker build -t python-3.9.13-cuda12.1-cudnn8.9.7 .
sudo docker image ls #显示镜像表明安装成功
sudo docker run --rm --gpus all python-3.9.13-cuda12.1-cudnn8.9.7 python3 -c "
import torch
print(f'PyTorch版本: {torch.__version__}')
print(f'CUDA版本: {torch.version.cuda}')
print(f'cuDNN版本: {torch.backends.cudnn.version()}')
print(f'CUDA可用: {torch.cuda.is_available()}')
print(f'GPU数量: {torch.cuda.device_count()}')
"
输出版本证明python3.9安装成功
sudo docker run --gpus all -v /home/pyapp:/workspace -w /workspace --rm python-3.9.13-cuda12.1-cudnn8.9.7:latest python3 -u test-gpu.py
Docker 容器创建
sudo docker build -t python-3.9.13-cuda12.1-cudnn8.9.7 .
sudo docker ps -a
mkdir -p workspace
sudo docker compose up -d
sudo docker exec -it python3.9-cuda12.1 bash
sudo docker compose exec python3.9-cuda12.1 python3 -c "
import torch
print(f'PyTorch版本: {torch.__version__}')
print(f'CUDA版本: {torch.version.cuda}')
print(f'cuDNN版本: {torch.backends.cudnn.version()}')
print(f'CUDA可用: {torch.cuda.is_available()}')
print(f'GPU数量: {torch.cuda.device_count()}')
"
sudo docker exec -it python3.9-cuda12.1 bash
exit
Docker 镜像转移
sudo docker save python-3.9.13-cuda12.1-cudnn8.9.7:latest | gzip > python-3.9.13-cuda12.1-cudnn8.9.7.tar.gz
sudo sh -c "gunzip -c python-3.9.13-cuda12.1-cudnn8.9.7.tar.gz | docker load"
Docker退出
Docker 安装python3.9的新镜像
sudo docker build -t python-3.9.13-cuda12.1-cudnn8.9.7 .
# 停止并删除容器
sudo docker compose down
# 删除镜像
sudo docker rmi python-3.9.13-cuda12.1-cudnn8.9.7
# 清理未使用的资源
sudo docker system prune -f
Dockerfile
# Dockerfile
FROM docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
# 设置环境变量
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHON_VERSION=3.9.13
ENV CUDA_VERSION=12.1.1
ENV CUDNN_VERSION=8.9.7
# 更新系统并安装依赖
RUN apt-get update && apt-get install -y \
build-essential \
curl \
wget \
git \
screen \
pkg-config \
software-properties-common \
libssl-dev \
zlib1g-dev \
libncurses5-dev \
libncursesw5-dev \
libreadline-dev \
libsqlite3-dev \
libgdbm-dev \
libdb5.3-dev \
libbz2-dev \
libexpat1-dev \
libffi-dev \
liblzma-dev \
tk-dev \
&& rm -rf /var/lib/apt/lists/*
# 安装Python 3.9.13
RUN wget https://www.python.org/ftp/python/3.9.13/Python-3.9.13.tgz \
&& tar xzf Python-3.9.13.tgz \
&& cd Python-3.9.13 \
&& ./configure --enable-optimizations \
&& make -j$(nproc) \
&& make altinstall \
&& cd .. \
&& rm -rf Python-3.9.13*
# 创建软链接
RUN ln -s /usr/local/bin/python3.9 /usr/local/bin/python3
RUN ln -s /usr/local/bin/pip3.9 /usr/local/bin/pip3
# 升级pip
RUN python3 -m pip install --upgrade pip
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
RUN pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn
# 复制 requirements.txt 并安装Python包
COPY requirements.txt /tmp/requirements.txt
RUN python3 -m pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
# COPY requirements-torch.txt /tmp/requirements-torch.txt
# RUN python3 -m pip install --no-cache-dir -r /tmp/requirements-torch.txt && rm /tmp/requirements-torch.txt
# 设置工作目录
WORKDIR /workspace
# 验证安装
RUN python3 -c "import torch; print(f'PyTorch版本: {torch.__version__}'); print(f'CUDA可用: {torch.cuda.is_available()}'); print(f'cuDNN版本: {torch.backends.cudnn.version()}')"
# 暴露端口
EXPOSE 8888
# 开发环境默认进入bash
CMD ["/bin/bash"]
docker-compose.yml
services:
python3.9-cuda12.1:
build: .
image: python-3.9.13-cuda12.1-cudnn8.9.7
container_name: python3.9-cuda12.1
runtime: nvidia
environment:
- NVIDIA_VISIBLE_DEVICES=all
volumes:
- /home/pyap/Project/DockerWorkSpace/workspace:/workspace
ports:
- "8887:8887"
- "8080:8080"
stdin_open: true
tty: true
working_dir: /workspace # 设置工作目录
command: /bin/bash # 默认命令
requirements.txt
sqlalchemy==1.4.47
fastapi==0.111.0
pydantic==2.12.5
uvicorn==0.21.1
redis==7.0.1
aiohttp==3.11.13
aioredis==2.0.1
APScheduler==3.10.4
boto3==1.24.28
botocore==1.27.28
imagesize==1.4.1
loguru==0.7.2
matplotlib==3.5.2
numpy==1.23.0
nvidia_cuda_nvrtc_cu11==11.7.99
nvidia_cudnn_cu11==8.5.0.96
nvidia_cufft_cu11==10.9.0.58
nvidia_cusolver_cu11==11.4.0.1
nvidia_dali_cuda120==1.30.0
onnxruntime_gpu==1.19.2
opencv_python==4.7.0.68
pandas==1.4.4
pika==1.3.2
Pillow==9.2.0
py7zr==0.22.0
pytest==7.1.2
PyYAML==6.0
scikit_learn==1.0.2
scipy==1.9.1
torch==2.0.1
torchsummary==1.5.1
torchvision==0.15.2
tqdm==4.64.1
ultralytics==8.3.119
tensorboard==2.19.0
pymysql==1.1.1
cx_Oracle==8.3.0
test-gpu.py
import torch
import random
import numpy as np
import torch.nn as nn
import cv2 as cv
from typing import Tuple, Union, List
import json
import os
import shutil
from datetime import datetime, timedelta
from functools import wraps,lru_cache
import sys
import time
from loguru import logger
import pandas as pd
from torchvision import transforms as tvt
import tqdm
import pika
import uuid
import socket
import math
from sklearn.cluster import KMeans
import base64
import filecmp
import io,re,py7zr
import redis
from enum import Enum, unique
from pydantic import BaseModel
from configparser import ConfigParser
from urllib.parse import quote
import yaml
import zipfile
from multiprocessing import Pool
from PIL import Image
from email.header import Header
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import tarfile
from matplotlib import pyplot as plt
from sklearn.metrics import roc_auc_score
import boto3
from botocore.exceptions import ClientError
import sqlalchemy
import fastapi
import pydantic
import uvicorn
import redis
from ultralytics import YOLO
from scipy.spatial import KDTree
from torchvision import models
print(f'PyTorch版本: {torch.__version__}')
print(f'CUDA版本: {torch.version.cuda}')
print(f'cuDNN版本: {torch.backends.cudnn.version()}')
print(f'CUDA可用: {torch.cuda.is_available()}')
print(f'GPU数量: {torch.cuda.device_count()}')
更多推荐

所有评论(0)