环境选择

ubuntu 显卡类型 Driver Version CUDA Version python Version cuda
22.04 NVIDIA RTX A6000 560.35.03 12.6 3.9.21 cuda_11.5.r11.5/compiler.30672275_0
22.04 NVIDIA RTX A6000 555.42.06 12.5 3.12.4 cuda_12.5.r12.5/compiler.34385749_0
18.04 NVIDIA A40 535.104.12 12.2 3.9.13 cuda_12.1.r12.1/compiler.32688072_0
24.04 NVIDIA GeForce RTX 3090 580.95.05 13 3.12.3 cuda_13.0.r13.0/compiler.36424714_0
22.04 NVIDIA RTX A6000 550.54.14 12.4 3.10.12 cuda_11.5.r11.5/compiler.30672275_0

docker安装

sudo apt update
sudo apt-get install ca-certificates curl gnupg lsb-release apt-transport-https software-properties-common
sudo mkdir -p /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update
sudo apt-get install docker-ce docker-ce-cli containerd docker-compose-plugin
sudo docker –version #显示版本表名安装成功

sudo tee /etc/docker/daemon.json <<-'EOF'
{
"registry-mirrors": [
"https://docker.mirrors.ustc.edu.cn",
"https://hub-mirror.c.163.com",
"https://mirror.baidubce.com"
]
}
EOF

sudo systemctl status docker.service
sudo systemctl daemon-reload
sudo systemctl restart docker
sudo systemctl status docker.service
sudo systemctl is-enabled docker
enabled 表示已设置开机自启

Docker 镜像下载

sudo docker pull docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
sudo docker image ls #显示镜像表明安装成功
dpkg -l | grep nvidia-container-toolkit #是否安装
没有则
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
sudo docker run --rm --gpus all docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 nvidia-smi #显示版本
sudo docker run --rm --gpus all docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 nvcc --version
sudo docker run --rm --gpus all docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 find /usr -name "*cudnn*" -type f 2>/dev/null #找cudnn_version*.h路径
sudo docker run --rm --gpus all docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 cat /usr/include/x86_64-linux-gnu/cudnn_version_v8.h | grep CUDNN_MAJOR -A 2

sudo docker build -t python-3.9.13-cuda12.1-cudnn8.9.7 .
sudo docker image ls #显示镜像表明安装成功
sudo docker run --rm --gpus all python-3.9.13-cuda12.1-cudnn8.9.7 python3 -c "
import torch
print(f'PyTorch版本: {torch.__version__}')
print(f'CUDA版本: {torch.version.cuda}')
print(f'cuDNN版本: {torch.backends.cudnn.version()}')
print(f'CUDA可用: {torch.cuda.is_available()}')
print(f'GPU数量: {torch.cuda.device_count()}')
"

输出版本证明python3.9安装成功

sudo docker run --gpus all -v /home/pyapp:/workspace -w /workspace --rm python-3.9.13-cuda12.1-cudnn8.9.7:latest python3 -u test-gpu.py

Docker 容器创建

sudo docker build -t python-3.9.13-cuda12.1-cudnn8.9.7 .

sudo docker ps -a
mkdir -p workspace
sudo docker compose up -d
sudo docker exec -it python3.9-cuda12.1 bash
sudo docker compose exec python3.9-cuda12.1 python3 -c "
import torch
print(f'PyTorch版本: {torch.__version__}')
print(f'CUDA版本: {torch.version.cuda}')
print(f'cuDNN版本: {torch.backends.cudnn.version()}')
print(f'CUDA可用: {torch.cuda.is_available()}')
print(f'GPU数量: {torch.cuda.device_count()}')
"

sudo docker exec -it python3.9-cuda12.1 bash
exit

Docker 镜像转移

sudo docker save python-3.9.13-cuda12.1-cudnn8.9.7:latest | gzip > python-3.9.13-cuda12.1-cudnn8.9.7.tar.gz
sudo sh -c "gunzip -c python-3.9.13-cuda12.1-cudnn8.9.7.tar.gz | docker load"
 

Docker退出

Docker 安装python3.9的新镜像
sudo docker build -t python-3.9.13-cuda12.1-cudnn8.9.7 .
# 停止并删除容器
sudo docker compose down

# 删除镜像
sudo docker rmi python-3.9.13-cuda12.1-cudnn8.9.7

# 清理未使用的资源
sudo docker system prune -f

Dockerfile

# Dockerfile



FROM docker.1ms.run/nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04



# 设置环境变量

ENV DEBIAN_FRONTEND=noninteractive

ENV PYTHON_VERSION=3.9.13

ENV CUDA_VERSION=12.1.1

ENV CUDNN_VERSION=8.9.7



# 更新系统并安装依赖

RUN apt-get update && apt-get install -y \

    build-essential \

    curl \

    wget \

    git \

    screen \

    pkg-config \

    software-properties-common \

    libssl-dev \

    zlib1g-dev \

    libncurses5-dev \

    libncursesw5-dev \

    libreadline-dev \

    libsqlite3-dev \

    libgdbm-dev \

    libdb5.3-dev \

    libbz2-dev \

    libexpat1-dev \

    libffi-dev \

    liblzma-dev \

    tk-dev \

    && rm -rf /var/lib/apt/lists/*



# 安装Python 3.9.13

RUN wget https://www.python.org/ftp/python/3.9.13/Python-3.9.13.tgz \

    && tar xzf Python-3.9.13.tgz \

    && cd Python-3.9.13 \

    && ./configure --enable-optimizations \

    && make -j$(nproc) \

    && make altinstall \

    && cd .. \

    && rm -rf Python-3.9.13*



# 创建软链接

RUN ln -s /usr/local/bin/python3.9 /usr/local/bin/python3

RUN ln -s /usr/local/bin/pip3.9 /usr/local/bin/pip3



# 升级pip

RUN python3 -m pip install --upgrade pip



RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple

RUN pip config set global.trusted-host pypi.tuna.tsinghua.edu.cn

# 复制 requirements.txt 并安装Python包

COPY requirements.txt /tmp/requirements.txt

RUN python3 -m pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt

# COPY requirements-torch.txt /tmp/requirements-torch.txt

# RUN python3 -m pip install --no-cache-dir -r /tmp/requirements-torch.txt && rm /tmp/requirements-torch.txt

# 设置工作目录

WORKDIR /workspace



# 验证安装

RUN python3 -c "import torch; print(f'PyTorch版本: {torch.__version__}'); print(f'CUDA可用: {torch.cuda.is_available()}'); print(f'cuDNN版本: {torch.backends.cudnn.version()}')"



# 暴露端口

EXPOSE 8888



# 开发环境默认进入bash

CMD ["/bin/bash"]

docker-compose.yml

services:

  python3.9-cuda12.1:

    build: .

    image: python-3.9.13-cuda12.1-cudnn8.9.7

    container_name: python3.9-cuda12.1

    runtime: nvidia

    environment:

      - NVIDIA_VISIBLE_DEVICES=all

    volumes:

      - /home/pyap/Project/DockerWorkSpace/workspace:/workspace

    ports:

      - "8887:8887"

      - "8080:8080"

    stdin_open: true

    tty: true

    working_dir: /workspace  # 设置工作目录

    command: /bin/bash  # 默认命令

requirements.txt

sqlalchemy==1.4.47
fastapi==0.111.0
pydantic==2.12.5
uvicorn==0.21.1
redis==7.0.1
aiohttp==3.11.13
aioredis==2.0.1
APScheduler==3.10.4
boto3==1.24.28
botocore==1.27.28
imagesize==1.4.1
loguru==0.7.2
matplotlib==3.5.2
numpy==1.23.0
nvidia_cuda_nvrtc_cu11==11.7.99
nvidia_cudnn_cu11==8.5.0.96
nvidia_cufft_cu11==10.9.0.58
nvidia_cusolver_cu11==11.4.0.1
nvidia_dali_cuda120==1.30.0
onnxruntime_gpu==1.19.2
opencv_python==4.7.0.68
pandas==1.4.4
pika==1.3.2
Pillow==9.2.0
py7zr==0.22.0
pytest==7.1.2
PyYAML==6.0
scikit_learn==1.0.2
scipy==1.9.1
torch==2.0.1
torchsummary==1.5.1
torchvision==0.15.2
tqdm==4.64.1
ultralytics==8.3.119
tensorboard==2.19.0
pymysql==1.1.1
cx_Oracle==8.3.0

test-gpu.py

import torch

import random

import numpy as np

import torch.nn as nn

import cv2 as cv

from typing import Tuple, Union, List

import json

import os

import shutil

from datetime import datetime, timedelta

from functools import wraps,lru_cache

import sys

import time

from loguru import logger

import pandas as pd

from torchvision import transforms as tvt

import tqdm

import pika

import uuid

import socket

import math

from sklearn.cluster import KMeans

import base64

import filecmp

import io,re,py7zr

import redis

from enum import Enum, unique

from pydantic import BaseModel

from configparser import ConfigParser

from urllib.parse import quote

import yaml

import zipfile

from multiprocessing import Pool

from PIL import Image

from email.header import Header

from email.mime.multipart import MIMEMultipart

from email.mime.text import MIMEText

import tarfile

from matplotlib import pyplot as plt

from sklearn.metrics import roc_auc_score

import boto3

from botocore.exceptions import ClientError

import sqlalchemy

import fastapi

import pydantic

import uvicorn

import redis

from ultralytics import YOLO

from scipy.spatial import KDTree

from torchvision import models



print(f'PyTorch版本: {torch.__version__}')

print(f'CUDA版本: {torch.version.cuda}')

print(f'cuDNN版本: {torch.backends.cudnn.version()}')

print(f'CUDA可用: {torch.cuda.is_available()}')

print(f'GPU数量: {torch.cuda.device_count()}')

Logo

有“AI”的1024 = 2048,欢迎大家加入2048 AI社区

更多推荐