Setup Python workbench
Here is a short documentation for the setup of my home at the NYGC cluster. I started by cloning my dotfiles from GitHub.
Directory structure
optfor installing softwaresoftwarefor downloading and/or developing softwareworkfor experiments / simulations / analysis
Setup modules
- Add
export MODULEPATH="${MODULEPATH}:${HOME}/modulefiles"to include personal modulefiles.
Setup Miniconda
- Download and install latest version of Miniconda (23.3.1).
- Copy conda init script from
.bashrctoopt/miniconda3/conda_bash_init.sh. - Create a modulefile to initialize conda on demand. Similar issue for LMOD modulefiles.
#%Module2.1.10#####################################################################
##
## modulefiles conda/23.3.1
##
set version 23.3.1
set modroot /gpfs/commons/home/sbanerjee/opt/miniconda3
proc ModulesHelp { } {
global version modroot
puts stderr "conda/$version - adds Miniconda to PATH"
}
module-whatis "Sets the environment for using Miniconda"
# Only one version at a time
conflict conda
if [module-info mode load] {
puts stdout "source $modroot/conda_bash_init.sh ;"
}
if [module-info mode remove] {
puts stdout "conda deactivate ;"
remove-path PATH $modroot/bin
remove-path PATH $modroot/condabin
## unset variable names.
## env | grep "CONDA"
unsetenv CONDA_SHLVL
unsetenv CONDA_EXE
unsetenv _CE_CONDA
unsetenv CONDA_PYTHON_EXE
## unset function names
## declare -F
unsetenv __conda_activate
unsetenv __conda_exe
unsetenv __conda_hashr
unsetenv __conda_reactivate
unsetenv conda
}- Setup
.condarc(see dotfiles)
Install Python
module load gcc/11.2.0 lapack/3.10 openmpi/4.1.4
conda create --name py311 python=3.11 -c conda-forge
conda activate py311
conda config --env --add channels conda-forge
conda install numpy scipy pandas scikit-learn matplotlib
conda install jupyterlabConnect to JupyterLab
mkdir ~/.jupyter
cp /nfs/sw/jupyter3/jupyter3-4.1/.jupyter/jupyter_notebook_config.py ~/.jupyter/jupyter_notebook_config.py
jupyter labThere is no need for local port forwarding. (I want to know how it is set up, ask NYGC Cluster Linux help).
Load R libraries with Python
- Set CXX17 path for compiling R packages
echo 'CXX17 = g++ -std=gnu++17 -fPIC' > ~/.R/Makevars- Create modulefile for loading environment variables (required for loading R from Python) which are not set by NYGC.
#%Module2.1.10#####################################################################
##
## modulefiles Renv/4.2.3
##
prereq R/4.2.3
set version 4.2.3
set rhome $::env(rhome)
proc ModulesHelp { } {
global version
puts stderr "Renv/$version - adds extra environment variables not provided by NYGC"
}
module-whatis "Sets the environment for using R/$version"
# Only one version at a time
conflict Renv
prepend-path LIBRARY_PATH $rhome/lib64/R/lib
prepend-path CPATH $rhome/lib64/R/include
prepend-path CMAKE_PREFIX_PATH $rhome/
setenv R_ROOT $rhome
setenv R_HOME $rhome/lib64/R- Load the R module and the environment variables
module load R/4.2.3
module load Renv/4.2.3
- Install some R packages (also checks that gcc is working)
parallel::detectCores()
options(Ncpus = 48)
install.packages(c("devtools", "ggplot2"))- Install and verify rpy2
conda install tzlocal # other dependencies are already installed while installing the previous packages
pip install rpy2
python -m rpy2.situation
python
>> import rpy2.robjects as robj