diff --git a/.github/ISSUE_TEMPLATE/---bug-report.md b/.github/ISSUE_TEMPLATE/---bug-report.md
index acf0d58d6..c4c71233f 100644
--- a/.github/ISSUE_TEMPLATE/---bug-report.md
+++ b/.github/ISSUE_TEMPLATE/---bug-report.md
@@ -23,7 +23,7 @@ A clear and concise description of what you expected to happen.
 **Screenshots**
 If applicable, add screenshots to help explain your problem.
 
-**GANDLF Version**
+**GaNDLF Version**
 Version information is found on Help > About
 
 **Desktop (please complete the following information):**
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3d66357d7..a7f23884b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -15,7 +15,7 @@ Depending on our bandwidth, one of the team members will get around to it (we wi
 
 ## Pull Requests
 
-Thank you for your interest in contributing to **GANDLF**! To make the process as smooth as posssible, please follow these guidelines for speedy reviews and merges:
+Thank you for your interest in contributing to **GaNDLF**! To make the process as smooth as posssible, please follow these guidelines for speedy reviews and merges:
 
 - Ensure your PR addresses an existing issue (if none exists, please open one so that it can be triaged by the admins).
 - Your PR should be from a branch that either contains the issue number it fixes and/or contains as much top-level information as possible to help moderators review and merge it.
diff --git a/GANDLF/models/MSDNet.py b/GANDLF/models/MSDNet.py
index 6edcd7a7c..d2c556f30 100644
--- a/GANDLF/models/MSDNet.py
+++ b/GANDLF/models/MSDNet.py
@@ -67,7 +67,7 @@ def forward(self, x):
             x = f(x)
 
             if (i + 1) % 2 == 0 and not i == (len(self.layers) - 1):
-                x = nn.ReLU(x) #F.relu(x)
+                x = F.relu(x)
                 # Append output into previous features
                 prev_features.append(x)
                 x = torch.cat(prev_features, 1)
diff --git a/GANDLF/parameterParsing.py b/GANDLF/parameterParsing.py
index 584c06591..8c1ee84e0 100644
--- a/GANDLF/parameterParsing.py
+++ b/GANDLF/parameterParsing.py
@@ -94,7 +94,8 @@ def get_model(
             base_filters,
             final_convolution_layer=final_convolution_layer,
         )
-
+        amp = False # this is not yet implemented for msdnet 
+        
     elif 'imagenet' in modelname: # these are generic imagenet-trained models and should be customized 
         
         if num_dimensions != 2:
@@ -154,6 +155,12 @@ def get_model(
             model = torchvision.models.resnet101(pretrained=True)
         elif 'resnet152' in modelname:
             model = torchvision.models.resnet152(pretrained=True)
+        else:
+            sys.exit(
+                "Could not find the requested model '"
+                + modelname
+                + "' in the implementation"
+            )
         
     elif 'densenet' in modelname:
         if modelname == 'densenet121': # regressor/classifier network
diff --git a/GANDLF/parseConfig.py b/GANDLF/parseConfig.py
index 6594d191c..2e2366f58 100644
--- a/GANDLF/parseConfig.py
+++ b/GANDLF/parseConfig.py
@@ -43,11 +43,15 @@ def parseConfig(config_file_path, version_check = True):
       max = parse_version(params['version']['maximum'])
       if (min > gandlf_version_int) or (max < gandlf_version_int):
         sys.exit('Incompatible version of GANDLF detected (' + gandlf_version + ')')
-      
+  
+  if ('psize' in params):
+    print('WARNING: \'psize\' has been deprecated in favor of \'patch_size\'', file = sys.stderr)
+    if not('patch_size' in params):
+      params['patch_size'] = params['psize']
+
   if 'patch_size' in params:
-    params['psize'] = params['patch_size'] 
-    if len(params['psize']) == 2: # 2d check
-        params['psize'].append(1) # ensuring same size during torchio processing
+    if len(params['patch_size']) == 2: # 2d check
+        params['patch_size'].append(1) # ensuring same size during torchio processing
   else:
     sys.exit('The \'patch_size\' parameter needs to be present in the configuration file')
   
diff --git a/GANDLF/preprocessing.py b/GANDLF/preprocessing.py
index 384621a04..518be90d6 100644
--- a/GANDLF/preprocessing.py
+++ b/GANDLF/preprocessing.py
@@ -133,7 +133,7 @@ def znorm(tensor: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
         return tensor
 
 # adapted from https://codereview.stackexchange.com/questions/132914/crop-black-border-of-image-using-numpy/132933#132933
-def crop_image_outside_zeros(array, psize):
+def crop_image_outside_zeros(array, patch_size):
     dimensions = len(array.shape)
     if dimensions != 4:
         raise ValueError("Array expected to be 4D but got {} dimensions.".format(dimensions)) 
@@ -156,13 +156,13 @@ def crop_image_outside_zeros(array, psize):
     # for each axis
     for i in range(3):
         # if less than patch size, extend the small corner out
-        if large[i] - small[i] < psize[i]:
-            small[i] = large[i] - psize[i]
+        if large[i] - small[i] < patch_size[i]:
+            small[i] = large[i] - patch_size[i]
 
         # if bottom fell off array, extend the large corner and set small to 0
         if small[i] < 0:
             small[i] = 0
-            large[i] = psize[i]
+            large[i] = patch_size[i]
 
     # calculate pixel location of new bounding box corner (will use to update the reference of the image to physical space)
     new_corner_idxs = np.array([small[0], small[1], small[2]])
@@ -181,15 +181,15 @@ class  CropExternalZeroplanes(SpatialTransform):
     Transformation class to enable taking the whole image stack (including segmentation) and removing 
     (starting from edges) physical-coordinate planes with all zero voxels until you reach a non-zero voxel.
     Args:
-        psize: patch size (used to ensure we do not crop to smaller size than this)
+        patch_size: patch size (used to ensure we do not crop to smaller size than this)
         **kwargs: See :class:`~torchio.transforms.Transform` for additional
             keyword arguments.
     """
 
-    def __init__(self, psize, **kwargs):
+    def __init__(self, patch_size, **kwargs):
         super().__init__(**kwargs)
-        self.psize = psize
-        self.args_names = ('psize',)
+        self.patch_size = patch_size
+        self.args_names = ('patch_size',)
     
     def apply_transform(self, subject):
 
@@ -209,7 +209,7 @@ def apply_transform(self, subject):
         numpy_stack = np.concatenate(numpy_stack_list, axis=0)
 
         # crop away the external zero-planes on the whole stack
-        new_corner_idxs, new_stack = crop_image_outside_zeros(array=numpy_stack, psize=self.psize)
+        new_corner_idxs, new_stack = crop_image_outside_zeros(array=numpy_stack, patch_size=self.patch_size)
 
         # recompute origin of affine matrix using initial image shape
         new_origin = nib.affines.apply_affine(example_image_affine, new_corner_idxs)
diff --git a/HISTORY.md b/HISTORY.md
index 1ffa61fd6..5a75f8b04 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -58,7 +58,7 @@
 
 ## 0.0.1 (2020/08/25)
 
-- First tag of GANDLF
+- First tag of GaNDLF
 - Initial feature list:
   - Supports multiple
     - Deep Learning model architectures
diff --git a/docs/README.md b/docs/README.md
index aac5bf9c2..b5d181091 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,4 +1,4 @@
-The website for GANDLF; start with [index.md](./index.md).
+The website for GaNDLF; start with [index.md](./index.md).
 
 ## Flowchart
 
diff --git a/docs/extending.md b/docs/extending.md
index 6bfca00f6..45d808e48 100644
--- a/docs/extending.md
+++ b/docs/extending.md
@@ -1,4 +1,4 @@
-# Extending GANDLF
+# Extending GaNDLF
 
 For any new feature, please ensure the corresponding option in the [sample configuration](https://github.com/CBICA/GaNDLF/blob/master/samples/sample_training.yaml) is added, so that others can review/use/extend it as needed.
 
diff --git a/docs/index.md b/docs/index.md
index ff82f2706..6d2c4e31a 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,4 +1,4 @@
-# GANDLF
+# GaNDLF
 
 A **G**ener**a**lly **N**uanced **D**eep **L**earning **F**ramework for segmentation and classification.
 
@@ -19,4 +19,4 @@ A **G**ener**a**lly **N**uanced **D**eep **L**earning **F**ramework for segmenta
 
 - [Application Setup](./setup.md)
 - [Usage](./usage.md)
-- [Extending GANDLF](./extending.md)
+- [Extending GaNDLF](./extending.md)
diff --git a/docs/usage.md b/docs/usage.md
index a96dc7a3d..6bf88032d 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -1,5 +1,20 @@
 # Usage
 
+For any DL pipeline, the following flow needs to be performed:
+
+1. Data preparation
+2. Split data into training, validation, and testing
+3. Customize the training parameters
+
+GaNDLF tackles all of these and the details are split in the manner explained in [the following section](#table-of-contents).
+## Table of Contents
+- [Preparing the Data](#preparing-the-data)
+- [Constructing the Data CSV](#constructing-the-data-csv)
+- [Customize the Training](#customize-the-training)
+- [Running GaNDLF](#running-gandlf-traininginference)
+- [Plot the final results](#plot-the-final-results)
+- [Multi-GPU systems](#multi-gpu-systems)
+
 ## Preparing the Data
 
 It is **highly** recommended that the dataset you want to train/infer on has been harmonized:
@@ -7,13 +22,16 @@ It is **highly** recommended that the dataset you want to train/infer on has bee
 - Registration
   - Within-modality co-registration [[1](https://doi.org/10.1109/TMI.2014.2377694), [2](https://doi.org/10.1038/sdata.2017.117), [3](https://arxiv.org/abs/1811.02629)]
   - **OPTIONAL**: Registration of all datasets to patient atlas, if applicable [[1](https://doi.org/10.1109/TMI.2014.2377694), [2](https://doi.org/10.1038/sdata.2017.117), [3](https://arxiv.org/abs/1811.02629)]
-- Size harmonization: Same physical definition of all images (see https://upenn.box.com/v/spacingsIssue for a presentation on how voxel resolutions affects downstream analyses). This is available via [GANDLF's preprocessing module](#customize-the-training).
-- Intensity harmonization: Same intensity profile, i.e., normalization [[4](https://doi.org/10.1016/j.nicl.2014.08.008), [5](https://visualstudiomagazine.com/articles/2020/08/04/ml-data-prep-normalization.aspx), [6](https://developers.google.com/machine-learning/data-prep/transform/normalization), [7](https://towardsdatascience.com/understand-data-normalization-in-machine-learning-8ff3062101f0)]. Z-scoring is available via [GANDLF's preprocessing module](#customize-the-training).
+- Size harmonization: Same physical definition of all images (see https://upenn.box.com/v/spacingsIssue for a presentation on how voxel resolutions affects downstream analyses). This is available via [GaNDLF's preprocessing module](#customize-the-training).
+- Intensity harmonization: Same intensity profile, i.e., normalization [[4](https://doi.org/10.1016/j.nicl.2014.08.008), [5](https://visualstudiomagazine.com/articles/2020/08/04/ml-data-prep-normalization.aspx), [6](https://developers.google.com/machine-learning/data-prep/transform/normalization), [7](https://towardsdatascience.com/understand-data-normalization-in-machine-learning-8ff3062101f0)]. Z-scoring is available via [GaNDLF's preprocessing module](#customize-the-training).
 
 Recommended tool for tackling all aforementioned preprocessing tasks: https://github.com/CBICA/CaPTk
 
 **For Histopathology Only:**
-- Convert WSI/label map to patches with OPM: [See using OPM](./GANDLF/OPM/README.md)
+- Convert WSI/label map to patches with OPM: [See using OPM](./GaNDLF/OPM/README.md)
+
+[Back To Top &uarr;](#table-of-contents)
+
 
 ## Constructing the Data CSV
 
@@ -44,9 +62,11 @@ Notes:
 - For classification/regression, add a column called `ValueToPredict`. Currently, we are supporting only a single value prediction per model.
 - `SubjectID` or `PatientName` is used to ensure that the randomized split is done per-subject rather than per-image.
 
+[Back To Top &uarr;](#table-of-contents)
+
 ## Customize the Training
 
-GANDLF requires a YAML-based configuration that controls various aspects of the training/inference process, such as:
+GaNDLF requires a YAML-based configuration that controls various aspects of the training/inference process, such as:
 
 - Model
   - Architecture
@@ -79,7 +99,9 @@ Please see a [sample](https://github.com/CBICA/GaNDLF/blob/master/samples/config
 
 **Note**: Ensure that the configuration has valid syntax by checking the file using any YAML validator such as https://yamlchecker.com/ or https://yamlvalidator.com/ **before** trying to train.
 
-## Running GANDLF (Training/Inference)
+[Back To Top &uarr;](#table-of-contents)
+
+## Running GaNDLF (Training/Inference)
 
 ```bash
 # continue from previous shell
@@ -92,9 +114,11 @@ python gandlf_run \
   # -modelDir /path/to/model/weights # used in inference mode
 ```
 
+[Back To Top &uarr;](#table-of-contents)
+
 ## Plot the final results
 
-After the testing/validation training is finished, GANDLF makes it possible to collect all the statistics from the final models for testing and validation datasets and plot them. The [gandlf_collectStats](https://github.com/CBICA/GaNDLF/blob/master/gandlf_collectStats) can be used for this:
+After the testing/validation training is finished, GaNDLF makes it possible to collect all the statistics from the final models for testing and validation datasets and plot them. The [gandlf_collectStats](https://github.com/CBICA/GaNDLF/blob/master/gandlf_collectStats) can be used for this:
 
 ```bash
 # continue from previous shell
@@ -105,8 +129,12 @@ python gandlf_collectStats \
   -output ./experiment_0/output_dir_stats/ \ # output directory
 ```
 
+[Back To Top &uarr;](#table-of-contents)
+
 ### Multi-GPU systems
 
 Please ensure that the environment variable `CUDA_VISIBLE_DEVICES` is set [[ref](https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/)].
 
 For an example how this is set, see [sge_wrapper](https://github.com/CBICA/GaNDLF/blob/master/samples/sge_wrapper).
+
+[Back To Top &uarr;](#table-of-contents)
diff --git a/samples/config_all_options.yaml b/samples/config_all_options.yaml
index 0d3f457f7..66d52ebc0 100644
--- a/samples/config_all_options.yaml
+++ b/samples/config_all_options.yaml
@@ -18,6 +18,13 @@ model:
     amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False
     # n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types
   }
+# metrics to evaluate the validation performance
+metrics:
+  {
+    dice, # segmentation
+    # accuracy # regression/classification
+    # mse # regression/classification
+  }
 # this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements
 # in I/O at the expense of memory consumption
 in_memory: False
diff --git a/samples/config_classification.yaml b/samples/config_classification.yaml
index c2a11357c..d7d6972d4 100644
--- a/samples/config_classification.yaml
+++ b/samples/config_classification.yaml
@@ -15,6 +15,11 @@ model:
     amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False
     n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types
   }
+# metrics to evaluate the validation performance
+metrics:
+  {
+    mse # regression/classification
+  }
 # this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements
 # in I/O at the expense of memory consumption
 in_memory: False
diff --git a/samples/config_regression.yaml b/samples/config_regression.yaml
index c2a11357c..d7d6972d4 100644
--- a/samples/config_regression.yaml
+++ b/samples/config_regression.yaml
@@ -15,6 +15,11 @@ model:
     amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False
     n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types
   }
+# metrics to evaluate the validation performance
+metrics:
+  {
+    mse # regression/classification
+  }
 # this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements
 # in I/O at the expense of memory consumption
 in_memory: False
diff --git a/samples/config_segmentation_brats.yaml b/samples/config_segmentation_brats.yaml
index 45ed27dc3..8e13f0cc2 100644
--- a/samples/config_segmentation_brats.yaml
+++ b/samples/config_segmentation_brats.yaml
@@ -15,6 +15,11 @@ model:
     amp: True, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False
     # n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types
   }
+# metrics to evaluate the validation performance
+metrics:
+  {
+    dice
+  }
 # Set the Modality : rad for radiology, path for histopathology
 modality: rad
 weighted_loss: True
diff --git a/testing/test_full.py b/testing/test_full.py
index af39a6413..cb71f9993 100644
--- a/testing/test_full.py
+++ b/testing/test_full.py
@@ -107,7 +107,6 @@ def test_train_segmentation_rad_2d(device):
   training_data, headers = parseTrainingCSV(inputDir + '/train_2d_rad_segmentation.csv')
   parameters = parseConfig(testingDir + '/config_segmentation.yaml', version_check = False)
   parameters['patch_size'] = patch_size['2D']
-  parameters['psize'] = patch_size['2D']
   parameters['model']['dimension'] = 2
   parameters['model']['class_list'] = [0,255]
   parameters['model']['amp'] = True
@@ -128,7 +127,6 @@ def test_train_segmentation_rad_3d(device):
   # read and initialize parameters for specific data dimension
   parameters = parseConfig(testingDir + '/config_segmentation.yaml', version_check = False)
   parameters['patch_size'] = patch_size['3D']
-  parameters['psize'] = patch_size['3D']
   parameters['model']['dimension'] = 3
   parameters['model']['class_list'] = [0,1]
   parameters['model']['amp'] = True
@@ -146,7 +144,6 @@ def test_train_regression_rad_2d(device):
   # read and initialize parameters for specific data dimension
   parameters = parseConfig(testingDir + '/config_regression.yaml', version_check = False)
   parameters['patch_size'] = patch_size['2D']
-  parameters['psize'] = patch_size['2D']
   parameters['model']['dimension'] = 2
   parameters['model']['amp'] = True
   # read and parse csv 
@@ -167,7 +164,6 @@ def test_train_regression_rad_3d(device):
   # read and initialize parameters for specific data dimension
   parameters = parseConfig(testingDir + '/config_regression.yaml', version_check = False)
   parameters['patch_size'] = patch_size['3D']
-  parameters['psize'] = patch_size['3D']
   parameters['model']['dimension'] = 3
   parameters['model']['amp'] = True
   # read and parse csv 
@@ -188,7 +184,6 @@ def test_train_classification_rad_2d(device):
   parameters = parseConfig(testingDir + '/config_classification.yaml', version_check = False)
   parameters['modality'] = 'rad'
   parameters['patch_size'] = patch_size['2D']
-  parameters['psize'] = patch_size['2D']
   parameters['model']['dimension'] = 2
   parameters['model']['amp'] = True
   # read and parse csv 
@@ -208,7 +203,6 @@ def test_train_classification_rad_3d(device):
   # read and initialize parameters for specific data dimension
   parameters = parseConfig(testingDir + '/config_classification.yaml', version_check = False)
   parameters['patch_size'] = patch_size['3D']
-  parameters['psize'] = patch_size['3D']
   parameters['model']['dimension'] = 3
   parameters['model']['amp'] = True
   # read and parse csv