Mellanox RDMA网卡驱动安装

1 RDMA驱动安装

# 1. get OFED
# https://cn.mellanox.com/products/infiniband-drivers/linux/mlnx_ofed

tar -xvf xxx.tar.gz

cd MLNX_OFED_LINUX-xxxx-x86_64

sudo ./mlnxofedinstall --add-kernel-support

# after successful
sudo /etc/init.d/openibd restart

# show RNIC status
sudo hca_self_test.ofed # results should be "PASS"

# other command about showing the tatus of RNIC
ibstat
ibstatus
ibv_devinfo
ibv_devices
ibnodes

# change model of RNIC (ETH or Infiniband)
systemctl start mst
mst status # we will get MST devices: e.g., /dev/mst/mt4119_pciconf0 & domain:bus:dev.fn=0000:d8:00.0 addr.reg=88....
# show RNIC model
mlxconfig -d /dev/mst/mt4119_pciconf0 query # we get the result like this : LINK_TYPE_P1 ETH(2)

# now, we change the model of RNIC, method 1
ETH model : mlxconfig -d /dev/mst/mt4119_pciconf0 set LINK_TYPE_P1=2
IB model : mlxconfig -d /dev/mst/mt4119_pciconf0 set LINK_TYPE_P1=1

# method 2
ETH: mstconfig -d d8:00.0 set LINK_TYPE_P1=2
IB: mstconfig -d d8:00.0 set LINK_TYPE_P1=1
# note: we can get "d8:00.0" using command: lspci|grep Mellanox

sudo reboot # we need reboot host to enable our configuration

# config IP 
ifconfig -a # we need to know the RNIC name 
sudo netplan generate && sudo vim /etc/netplan/00-installer-config.yaml
# add content like this
network:
    ethernets:
        ens7f1:
            dhcp4: no
            addresses: [10.100.1.5]
    version: 2


# reboot network to enable our configuration
netplan apply

# RNIC bandwidth & latency test

#install lldp
apt install lldpad -y
lldpad -d
for i in `ls /sys/class/net/ |grep 'eth\|ens\|eno\|enp'`
do
    echo "enabling lldp for interfacce: $i"
    lldptool set-lldp -i $i adminStatus=rxtx
    lldptool -T -i $i -V sysName enableTx=yes
    lldptool -T -i $i -V portDesc enableTx=yes
    lldptool -T -i $i -V sysDesc enableTx=yes
    lldptool -T -i $i -V sysCap enableTx=yes
    lldptool -T -i $i -V mngAddr enableTx=yes
done

2 时延与带宽测试

#测试RDMA网卡带宽
#server
ib_read_bw -a -c RC -F -d mlx5_1 --report_gbits
#client
ib_read_bw -a -c RC -F -d mlx5_1 --report_gbits <server ip>
#ib_read_bw,ib_write_bw,ib_send_lat分别测试读、写、send的带宽


#测试RDMA网卡时延
#server
ib_read_lat -a -c RC -F -d mlx5_1 
#client
ib_read_lat -a -c RC -F -d mlx5_1 <server ip>
#ib_read_lat,ib_write_lat,ib_send_lat分别测试读、写、send的时延

相关参考:
Infiniband常用命令

Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐