#! /bin/bash

## This script install NVIDIA CUDA driver in a CENTOS Linux standalone machine via a local rpm file
## Assume the rpm is aldeady downloaded in the same directory
## Run as su

## Preliminary checking
lspci | grep -i nvidia


## Download the latest rpm version suited for your system at https://developer.nvidia.com/cuda-downloads, e.g., Linux -> x86_64 -> CentOS -> 7 -> rpm(local)

## e.g., for cuda 9.2,
## wget https://developer.nvidia.com/compute/cuda/9.2/Prod2/local_installers/cuda-repo-rhel7-9-2-local-9.2.148-1.x86_64
# mv cuda-repo-rhel7-9-2-local-9.2.148-1.x86_64 cuda-repo-rhel7-9-2-local-9.2.148-1.x86_64.rpm

rpm -i cuda-repo-rhel7-9-2-local-9.2.148-1.x86_64.rpm
yum clean all
yum install -y cuda

## Follow and accept all prompts. The installation process will take up some while, around 10 mins or longer.

## After installation, make sure that the CUDA paths are linked in ~/.bashrc
 
echo ' ' >> ~/.bashrc
echo '# Added by CUDA installation ' >> ~/.bashrc
echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
echo 'export CUDA_HOME=/usr/local/cuda' >> ~/.bashrc
echo ' ' >> ~/.bashrc

#To verify that the installation works,
source ~/.bashrc
nvcc -V

# run a more comprehensive test
cd /usr/local/cuda/samples/ #(or the directory where the samples/ are instructed to install during the cuda installation process)
make
#It will run a series of tests.

#After the test, 

cd /usr/local/cuda/samples/bin/x86_64/linux/release
./deviceQuery
./bandwidthTest

## In case ./deviceQuery and ./bandwidthTest return failure output, you may just reboot the system and run the test again. The tests should passed after rebooting.


#### In case the instalation procedure described above fails, install the nvidia cuda driver by using a local run file
# As an example, in a newly installed CentOS 7, CUDA 10 installation via rpm failed presumably I have updated CentOS 7 before installing the CUDA 10. 
# Follow the instructions as provided in 
# http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#abstract
# Read the section under "4. Runfile Installation"
# Download the latest nvidia cuda installation run file appropriate for your version of Linux at https://developer.nvidia.com/cuda-downloads
# For example, for Linux -> x86_64 -> CentOS -> 7 -> run file (local). Then download 
# https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda-repo-rhel7-10-0-local-10.0.130-410.48-1.0-1.x86_64 to download the file cuda_10.0.130_410.48_linux.run

#Check that the nvidia gpu is seen by the system
#lspci | grep -i nvidia
#If see nothing, 
update-pciids
lspci | grep -i nvidia

## Check if see if nouveau is on. 
lsmod | grep nouveau
## if something is printed that means nouveau is on. We need to disable it.

## run the run file
sudo ./cuda_10.0.130_410.48_linux.run

#Disabling Nouveau by creating a file at /etc/modprobe.d/blacklist-nouveau.conf with the following contents:

#    blacklist nouveau
#    options nouveau modeset=0

#  Note that in the cuda version 10, creation of nouveau blacklist is done automatically when the run file is executed

#    Regenerate the kernel initramfs:
sudo dracut --force

echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
echo 'export CUDA_HOME=/usr/local/cuda' >> ~/.bashrc

## Check if see if nouveau is still on. 
# lsmod | grep nouveau
## if something is printed that means nouveau is still on. In this case, need to reboot.

reboot now

# Execute ./cuda_10.0.130_410.48_linux.run again upon rebooting. If needed, sudo init 3 and execute  ./cuda_10.0.130_410.48_linux.run