1、有线

多试几次
sudo dnf config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo
sudo dnf install -y docker-ce docker-ce-cli containerd.io

dnf install -y net-tools
dnf install -y vim
dnf install -y wget
sudo dnf install epel-release -y
sudo dnf install dkms -y

sudo dnf install -y dnf-utils device-mapper-persistent-data lvm2
sudo dnf config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo
sudo dnf install -y docker-ce docker-ce-cli containerd.io
sudo systemctl start docker
sudo systemctl enable docker

装驱动前要禁用Nouveau

echo -e "blacklist nouveau\noptions nouveau modeset=0" | sudo tee /etc/modprobe.d/disable-nouveau.conf > /dev/null
sudo mv /boot/initramfs-$(uname -r).img /boot/initramfs-$(uname -r).img.bak
sudo dracut --force
sudo dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
sudo dnf clean all
sudo dnf -y module install nvidia-driver:latest-dkms
sudo dnf -y install cuda
dnf -y install "kernel-devel-uname-r == $(uname -r)" 

dnf -y install “kernel-devel-uname-r == $(uname -r)” 这步遇到没有uname -r的版本的时候,就dnf update,然后reboot就行了

dkms install -m nvidia -v 460.32.03
sudo dnf config-manager --add-repo https://nvidia.github.io/nvidia-docker/centos8/nvidia-docker.repo
sudo dnf install -y nvidia-docker2
sudo systemctl restart docker

如果想安装指定版本的docker

sudo dnf install docker-ce-<VERSION_STRING> docker-ce-cli-<VERSION_STRING> containerd.io

2)系统时间

# 查看当前系统时区
timedatectl status
sudo timedatectl set-timezone Asia/Shanghai

设置NTP同步

sudo dnf install chrony
sudo systemctl start chronyd
sudo systemctl enable chronyd
# 查看硬件时钟
hwclock --show
# 将硬件时钟同步到系统时钟,如果硬件时钟时间准,系统时间不同步的情况下
hwclock --hctosys

2、offline

驱动

sudo dnf install epel-release -y
sudo dnf install dkms -y
dnf -y install "kernel-devel-uname-r == $(uname -r)"
sh cuda_12.2.1_535.86.10_linux.run

问题解决

[root@localhost ~]# sudo dnf install -y nvidia-docker2
created by dnf config-manager from https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repodnf                                                  21  B/s | 433  B     00:20
Errors during downloading metadata for repository 'developer.download.nvidia.com_compute_cuda_repos_rhel8_x86_64_cuda-rhel8.repodnf':
  - Status code: 404 for https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repodnf/repodata/repomd.xml (IP: 42.237.113.75)
Error: Failed to download metadata for repo 'developer.download.nvidia.com_compute_cuda_repos_rhel8_x86_64_cuda-rhel8.repodnf': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried
[root@localhost ~]# ls
anaconda-ks.cfg
[root@localhost ~]# cd /etc/yum.repos.d/
[root@localhost yum.repos.d]# ls
cuda-rhel8.repo                                                                        epel-testing-modular.repo      Rocky-BaseOS.repo            Rocky-Media.repo             Rocky-RT.repo
developer.download.nvidia.com_compute_cuda_repos_rhel8_x86_64_cuda-rhel8.repodnf.repo  epel-testing.repo              Rocky-Debuginfo.repo         Rocky-NFV.repo               Rocky-Sources.repo
docker-ce.repo                                                                         nvidia-container-toolkit.repo  Rocky-Devel.repo             Rocky-Plus.repo
epel-modular.repo                                                                      nvidia-docker.repo             Rocky-Extras.repo            Rocky-PowerTools.repo
epel.repo                                                                              Rocky-AppStream.repo           Rocky-HighAvailability.repo  Rocky-ResilientStorage.repo
[root@localhost yum.repos.d]# rm cuda-rhel8.repo
rm: remove regular file 'cuda-rhel8.repo'? y
[root@localhost yum.repos.d]# sudo dnf install -y nvidia-docker2
created by dnf config-manager from https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repodnf                                                  16  B/s | 433  B     00:27
Errors during downloading metadata for repository 'developer.download.nvidia.com_compute_cuda_repos_rhel8_x86_64_cuda-rhel8.repodnf':
  - Status code: 404 for https://developer.download.nvidia.cn/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repodnf/repodata/repomd.xml (IP: 61.133.50.154)
Error: Failed to download metadata for repo 'developer.download.nvidia.com_compute_cuda_repos_rhel8_x86_64_cuda-rhel8.repodnf': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried
[root@localhost yum.repos.d]# rm developer.download.nvidia.com_compute_cuda_repos_rhel8_x86_64_cuda-rhel8.repodnf.repo
rm: remove regular file 'developer.download.nvidia.com_compute_cuda_repos_rhel8_x86_64_cuda-rhel8.repodnf.repo'? y
[root@localhost yum.repos.d]# sudo dnf install -y nvidia-docker2

Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐