#! /bin/bash

## This script install NVIDIA CUDA driver in a CENTOS Linux standalone machine via a local runfile. You may have to use a local runfile for cuda installation in case the instalation 
## using a local rpm fails. 
# As an example, in a newly installed CentOS 7, CUDA 9.2 installation via rpm failed presumably I have updated CentOS 7 before installing the CUDA 9.2. 
# The follwing intallation instructions is found in 
# http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#abstract
# Read the section under "4. Runfile Installation"
# Download the latest nvidia cuda installation run file appropriate for your version of Linux at https://developer.nvidia.com/cuda-downloads
# Linux -> x86_64 -> CentOS -> 7 -> run file (local). Then download 

cd /share/apps/configrepo
wget https://developer.nvidia.com/compute/cuda/9.2/Prod2/local_installers/cuda_9.2.148_396.37_linux -O cuda_9.2.148_396.37_linux.run
   
#Also download the patch file
wget https://developer.nvidia.com/compute/cuda/9.2/Prod2/patches/1/cuda_9.2.148.1_linux -O cuda_9.2.148.1_linux.run

chmod +x *.run

#Check that the nvidia gpu is seen by the system
#lspci | grep -i nvidia
#If see nothing, 
update-pciids
lspci | grep -i nvidia

## Check if see if nouveau is on. 
lsmod | grep nouveau
## if something is printed that means nouveau is on. We need to disabling Nouveau by creating a file at /etc/modprobe.d/blacklist-nouveau.conf with the following contents:
#  blacklist nouveau
#  options nouveau modeset=0

echo '## added when installing cuda_9.2.148_396.37_linux-run ' >> /etc/modprobe.d/blacklist-nouveau.conf
echo 'blacklist nouveau' >> /etc/modprobe.d/blacklist-nouveau.conf
echo 'options nouveau modeset=0' >> /etc/modprobe.d/blacklist-nouveau.conf

#  Note that in the cuda version 10, creation of nouveau blacklist is done automatically when the run file is executed

init 3
#    Regenerate the kernel initramfs:
dracut --force

## Check if see if nouveau is still on. 
lsmod | grep nouveau
## if something is printed that means nouveau is still on. In this case, need to reboot.

reboot

######### Manually continue the following lines after rebooting #################
echo ' ' >> ~/.bashrc
echo '## added when installing cuda ' >> ~/.bashrc
echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
echo 'export CUDA_HOME=/usr/local/cuda' >> ~/.bashrc

# Execute ./cuda_9.2.148_396.37_linux-run again upon rebooting 
init 3
sh /share/apps/configrepo/cuda_9.2.148_396.37_linux.run
sh /share/apps/configrepo/cuda_9.2.148.1_linux.run
init 5

## After installation, make sure that the CUDA paths are linked in ~/.bashrc

#To verify that the installation works,
source ~/.bashrc
nvcc -V

# run a more comprehensive test
cd /usr/local/cuda/samples/ #(or the directory where the samples/ are instructed to install during the cuda installation process)
make
# this will make a whole lot of executables in /usr/local/cuda/samples/bin/x86_64/linux/release

## To test the cuda installation, 

cd /usr/local/cuda/samples/bin/x86_64/linux/release
./deviceQuery
./bandwidthTest

## In case ./deviceQuery and ./bandwidthTest return failure output, you may just reboot the system and run the test again. The tests should passed after rebooting.