git clone https://github.com/kevin7674/nvidia_smi_exporter.git
cd nvidia_smi_exporter
./nvidia_smi_exporter 9101 &
./nvidia_smi_exporter 9101 &
mkdir -p /nfs/prometheus
echo "/nfs/prometheus *(rw,no_subtree_check,sync,all_squash,anonuid=0,anongid=0)" >> /nfs/exports
git clone https://github.com/kevin7674/prometheus.git
cd prometheus
nfs:
server: <NFS_IP>
path: "/nfs/prometheus"
- job_name: 'nvidia_smi_exporter'
static_configs:
- targets: ['<GPU_NODE_IP>:9101']
- targets: ['<GPU_NODE_IP>:9101']
kubectl create -f node-exporter.yaml
kubectl create -f rbac-setup.yaml.yaml
kubectl create -f prometheus-config-map.yaml
kubectl create -f prometheus-deploy.yaml