diff --git a/.github/ISSUE_TEMPLATE/---bug-report.md b/.github/ISSUE_TEMPLATE/---bug-report.md index acf0d58d6..c4c71233f 100644 --- a/.github/ISSUE_TEMPLATE/---bug-report.md +++ b/.github/ISSUE_TEMPLATE/---bug-report.md @@ -23,7 +23,7 @@ A clear and concise description of what you expected to happen. **Screenshots** If applicable, add screenshots to help explain your problem. -**GANDLF Version** +**GaNDLF Version** Version information is found on Help > About **Desktop (please complete the following information):** diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3d66357d7..a7f23884b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,7 +15,7 @@ Depending on our bandwidth, one of the team members will get around to it (we wi ## Pull Requests -Thank you for your interest in contributing to **GANDLF**! To make the process as smooth as posssible, please follow these guidelines for speedy reviews and merges: +Thank you for your interest in contributing to **GaNDLF**! To make the process as smooth as posssible, please follow these guidelines for speedy reviews and merges: - Ensure your PR addresses an existing issue (if none exists, please open one so that it can be triaged by the admins). - Your PR should be from a branch that either contains the issue number it fixes and/or contains as much top-level information as possible to help moderators review and merge it. diff --git a/GANDLF/models/MSDNet.py b/GANDLF/models/MSDNet.py index 6edcd7a7c..d2c556f30 100644 --- a/GANDLF/models/MSDNet.py +++ b/GANDLF/models/MSDNet.py @@ -67,7 +67,7 @@ def forward(self, x): x = f(x) if (i + 1) % 2 == 0 and not i == (len(self.layers) - 1): - x = nn.ReLU(x) #F.relu(x) + x = F.relu(x) # Append output into previous features prev_features.append(x) x = torch.cat(prev_features, 1) diff --git a/GANDLF/parameterParsing.py b/GANDLF/parameterParsing.py index 584c06591..8c1ee84e0 100644 --- a/GANDLF/parameterParsing.py +++ b/GANDLF/parameterParsing.py @@ -94,7 +94,8 @@ def get_model( base_filters, final_convolution_layer=final_convolution_layer, ) - + amp = False # this is not yet implemented for msdnet + elif 'imagenet' in modelname: # these are generic imagenet-trained models and should be customized if num_dimensions != 2: @@ -154,6 +155,12 @@ def get_model( model = torchvision.models.resnet101(pretrained=True) elif 'resnet152' in modelname: model = torchvision.models.resnet152(pretrained=True) + else: + sys.exit( + "Could not find the requested model '" + + modelname + + "' in the implementation" + ) elif 'densenet' in modelname: if modelname == 'densenet121': # regressor/classifier network diff --git a/GANDLF/parseConfig.py b/GANDLF/parseConfig.py index 6594d191c..2e2366f58 100644 --- a/GANDLF/parseConfig.py +++ b/GANDLF/parseConfig.py @@ -43,11 +43,15 @@ def parseConfig(config_file_path, version_check = True): max = parse_version(params['version']['maximum']) if (min > gandlf_version_int) or (max < gandlf_version_int): sys.exit('Incompatible version of GANDLF detected (' + gandlf_version + ')') - + + if ('psize' in params): + print('WARNING: \'psize\' has been deprecated in favor of \'patch_size\'', file = sys.stderr) + if not('patch_size' in params): + params['patch_size'] = params['psize'] + if 'patch_size' in params: - params['psize'] = params['patch_size'] - if len(params['psize']) == 2: # 2d check - params['psize'].append(1) # ensuring same size during torchio processing + if len(params['patch_size']) == 2: # 2d check + params['patch_size'].append(1) # ensuring same size during torchio processing else: sys.exit('The \'patch_size\' parameter needs to be present in the configuration file') diff --git a/GANDLF/preprocessing.py b/GANDLF/preprocessing.py index 384621a04..518be90d6 100644 --- a/GANDLF/preprocessing.py +++ b/GANDLF/preprocessing.py @@ -133,7 +133,7 @@ def znorm(tensor: torch.Tensor, mask: torch.Tensor) -> torch.Tensor: return tensor # adapted from https://codereview.stackexchange.com/questions/132914/crop-black-border-of-image-using-numpy/132933#132933 -def crop_image_outside_zeros(array, psize): +def crop_image_outside_zeros(array, patch_size): dimensions = len(array.shape) if dimensions != 4: raise ValueError("Array expected to be 4D but got {} dimensions.".format(dimensions)) @@ -156,13 +156,13 @@ def crop_image_outside_zeros(array, psize): # for each axis for i in range(3): # if less than patch size, extend the small corner out - if large[i] - small[i] < psize[i]: - small[i] = large[i] - psize[i] + if large[i] - small[i] < patch_size[i]: + small[i] = large[i] - patch_size[i] # if bottom fell off array, extend the large corner and set small to 0 if small[i] < 0: small[i] = 0 - large[i] = psize[i] + large[i] = patch_size[i] # calculate pixel location of new bounding box corner (will use to update the reference of the image to physical space) new_corner_idxs = np.array([small[0], small[1], small[2]]) @@ -181,15 +181,15 @@ class CropExternalZeroplanes(SpatialTransform): Transformation class to enable taking the whole image stack (including segmentation) and removing (starting from edges) physical-coordinate planes with all zero voxels until you reach a non-zero voxel. Args: - psize: patch size (used to ensure we do not crop to smaller size than this) + patch_size: patch size (used to ensure we do not crop to smaller size than this) **kwargs: See :class:`~torchio.transforms.Transform` for additional keyword arguments. """ - def __init__(self, psize, **kwargs): + def __init__(self, patch_size, **kwargs): super().__init__(**kwargs) - self.psize = psize - self.args_names = ('psize',) + self.patch_size = patch_size + self.args_names = ('patch_size',) def apply_transform(self, subject): @@ -209,7 +209,7 @@ def apply_transform(self, subject): numpy_stack = np.concatenate(numpy_stack_list, axis=0) # crop away the external zero-planes on the whole stack - new_corner_idxs, new_stack = crop_image_outside_zeros(array=numpy_stack, psize=self.psize) + new_corner_idxs, new_stack = crop_image_outside_zeros(array=numpy_stack, patch_size=self.patch_size) # recompute origin of affine matrix using initial image shape new_origin = nib.affines.apply_affine(example_image_affine, new_corner_idxs) diff --git a/HISTORY.md b/HISTORY.md index 1ffa61fd6..5a75f8b04 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -58,7 +58,7 @@ ## 0.0.1 (2020/08/25) -- First tag of GANDLF +- First tag of GaNDLF - Initial feature list: - Supports multiple - Deep Learning model architectures diff --git a/docs/README.md b/docs/README.md index aac5bf9c2..b5d181091 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,4 +1,4 @@ -The website for GANDLF; start with [index.md](./index.md). +The website for GaNDLF; start with [index.md](./index.md). ## Flowchart diff --git a/docs/extending.md b/docs/extending.md index 6bfca00f6..45d808e48 100644 --- a/docs/extending.md +++ b/docs/extending.md @@ -1,4 +1,4 @@ -# Extending GANDLF +# Extending GaNDLF For any new feature, please ensure the corresponding option in the [sample configuration](https://github.com/CBICA/GaNDLF/blob/master/samples/sample_training.yaml) is added, so that others can review/use/extend it as needed. diff --git a/docs/index.md b/docs/index.md index ff82f2706..6d2c4e31a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,4 @@ -# GANDLF +# GaNDLF A **G**ener**a**lly **N**uanced **D**eep **L**earning **F**ramework for segmentation and classification. @@ -19,4 +19,4 @@ A **G**ener**a**lly **N**uanced **D**eep **L**earning **F**ramework for segmenta - [Application Setup](./setup.md) - [Usage](./usage.md) -- [Extending GANDLF](./extending.md) +- [Extending GaNDLF](./extending.md) diff --git a/docs/usage.md b/docs/usage.md index a96dc7a3d..6bf88032d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,5 +1,20 @@ # Usage +For any DL pipeline, the following flow needs to be performed: + +1. Data preparation +2. Split data into training, validation, and testing +3. Customize the training parameters + +GaNDLF tackles all of these and the details are split in the manner explained in [the following section](#table-of-contents). +## Table of Contents +- [Preparing the Data](#preparing-the-data) +- [Constructing the Data CSV](#constructing-the-data-csv) +- [Customize the Training](#customize-the-training) +- [Running GaNDLF](#running-gandlf-traininginference) +- [Plot the final results](#plot-the-final-results) +- [Multi-GPU systems](#multi-gpu-systems) + ## Preparing the Data It is **highly** recommended that the dataset you want to train/infer on has been harmonized: @@ -7,13 +22,16 @@ It is **highly** recommended that the dataset you want to train/infer on has bee - Registration - Within-modality co-registration [[1](https://doi.org/10.1109/TMI.2014.2377694), [2](https://doi.org/10.1038/sdata.2017.117), [3](https://arxiv.org/abs/1811.02629)] - **OPTIONAL**: Registration of all datasets to patient atlas, if applicable [[1](https://doi.org/10.1109/TMI.2014.2377694), [2](https://doi.org/10.1038/sdata.2017.117), [3](https://arxiv.org/abs/1811.02629)] -- Size harmonization: Same physical definition of all images (see https://upenn.box.com/v/spacingsIssue for a presentation on how voxel resolutions affects downstream analyses). This is available via [GANDLF's preprocessing module](#customize-the-training). -- Intensity harmonization: Same intensity profile, i.e., normalization [[4](https://doi.org/10.1016/j.nicl.2014.08.008), [5](https://visualstudiomagazine.com/articles/2020/08/04/ml-data-prep-normalization.aspx), [6](https://developers.google.com/machine-learning/data-prep/transform/normalization), [7](https://towardsdatascience.com/understand-data-normalization-in-machine-learning-8ff3062101f0)]. Z-scoring is available via [GANDLF's preprocessing module](#customize-the-training). +- Size harmonization: Same physical definition of all images (see https://upenn.box.com/v/spacingsIssue for a presentation on how voxel resolutions affects downstream analyses). This is available via [GaNDLF's preprocessing module](#customize-the-training). +- Intensity harmonization: Same intensity profile, i.e., normalization [[4](https://doi.org/10.1016/j.nicl.2014.08.008), [5](https://visualstudiomagazine.com/articles/2020/08/04/ml-data-prep-normalization.aspx), [6](https://developers.google.com/machine-learning/data-prep/transform/normalization), [7](https://towardsdatascience.com/understand-data-normalization-in-machine-learning-8ff3062101f0)]. Z-scoring is available via [GaNDLF's preprocessing module](#customize-the-training). Recommended tool for tackling all aforementioned preprocessing tasks: https://github.com/CBICA/CaPTk **For Histopathology Only:** -- Convert WSI/label map to patches with OPM: [See using OPM](./GANDLF/OPM/README.md) +- Convert WSI/label map to patches with OPM: [See using OPM](./GaNDLF/OPM/README.md) + +[Back To Top ↑](#table-of-contents) + ## Constructing the Data CSV @@ -44,9 +62,11 @@ Notes: - For classification/regression, add a column called `ValueToPredict`. Currently, we are supporting only a single value prediction per model. - `SubjectID` or `PatientName` is used to ensure that the randomized split is done per-subject rather than per-image. +[Back To Top ↑](#table-of-contents) + ## Customize the Training -GANDLF requires a YAML-based configuration that controls various aspects of the training/inference process, such as: +GaNDLF requires a YAML-based configuration that controls various aspects of the training/inference process, such as: - Model - Architecture @@ -79,7 +99,9 @@ Please see a [sample](https://github.com/CBICA/GaNDLF/blob/master/samples/config **Note**: Ensure that the configuration has valid syntax by checking the file using any YAML validator such as https://yamlchecker.com/ or https://yamlvalidator.com/ **before** trying to train. -## Running GANDLF (Training/Inference) +[Back To Top ↑](#table-of-contents) + +## Running GaNDLF (Training/Inference) ```bash # continue from previous shell @@ -92,9 +114,11 @@ python gandlf_run \ # -modelDir /path/to/model/weights # used in inference mode ``` +[Back To Top ↑](#table-of-contents) + ## Plot the final results -After the testing/validation training is finished, GANDLF makes it possible to collect all the statistics from the final models for testing and validation datasets and plot them. The [gandlf_collectStats](https://github.com/CBICA/GaNDLF/blob/master/gandlf_collectStats) can be used for this: +After the testing/validation training is finished, GaNDLF makes it possible to collect all the statistics from the final models for testing and validation datasets and plot them. The [gandlf_collectStats](https://github.com/CBICA/GaNDLF/blob/master/gandlf_collectStats) can be used for this: ```bash # continue from previous shell @@ -105,8 +129,12 @@ python gandlf_collectStats \ -output ./experiment_0/output_dir_stats/ \ # output directory ``` +[Back To Top ↑](#table-of-contents) + ### Multi-GPU systems Please ensure that the environment variable `CUDA_VISIBLE_DEVICES` is set [[ref](https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/)]. For an example how this is set, see [sge_wrapper](https://github.com/CBICA/GaNDLF/blob/master/samples/sge_wrapper). + +[Back To Top ↑](#table-of-contents) diff --git a/samples/config_all_options.yaml b/samples/config_all_options.yaml index 0d3f457f7..66d52ebc0 100644 --- a/samples/config_all_options.yaml +++ b/samples/config_all_options.yaml @@ -18,6 +18,13 @@ model: amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False # n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types } +# metrics to evaluate the validation performance +metrics: + { + dice, # segmentation + # accuracy # regression/classification + # mse # regression/classification + } # this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements # in I/O at the expense of memory consumption in_memory: False diff --git a/samples/config_classification.yaml b/samples/config_classification.yaml index c2a11357c..d7d6972d4 100644 --- a/samples/config_classification.yaml +++ b/samples/config_classification.yaml @@ -15,6 +15,11 @@ model: amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types } +# metrics to evaluate the validation performance +metrics: + { + mse # regression/classification + } # this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements # in I/O at the expense of memory consumption in_memory: False diff --git a/samples/config_regression.yaml b/samples/config_regression.yaml index c2a11357c..d7d6972d4 100644 --- a/samples/config_regression.yaml +++ b/samples/config_regression.yaml @@ -15,6 +15,11 @@ model: amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types } +# metrics to evaluate the validation performance +metrics: + { + mse # regression/classification + } # this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements # in I/O at the expense of memory consumption in_memory: False diff --git a/samples/config_segmentation_brats.yaml b/samples/config_segmentation_brats.yaml index 45ed27dc3..8e13f0cc2 100644 --- a/samples/config_segmentation_brats.yaml +++ b/samples/config_segmentation_brats.yaml @@ -15,6 +15,11 @@ model: amp: True, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False # n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types } +# metrics to evaluate the validation performance +metrics: + { + dice + } # Set the Modality : rad for radiology, path for histopathology modality: rad weighted_loss: True diff --git a/testing/test_full.py b/testing/test_full.py index af39a6413..cb71f9993 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -107,7 +107,6 @@ def test_train_segmentation_rad_2d(device): training_data, headers = parseTrainingCSV(inputDir + '/train_2d_rad_segmentation.csv') parameters = parseConfig(testingDir + '/config_segmentation.yaml', version_check = False) parameters['patch_size'] = patch_size['2D'] - parameters['psize'] = patch_size['2D'] parameters['model']['dimension'] = 2 parameters['model']['class_list'] = [0,255] parameters['model']['amp'] = True @@ -128,7 +127,6 @@ def test_train_segmentation_rad_3d(device): # read and initialize parameters for specific data dimension parameters = parseConfig(testingDir + '/config_segmentation.yaml', version_check = False) parameters['patch_size'] = patch_size['3D'] - parameters['psize'] = patch_size['3D'] parameters['model']['dimension'] = 3 parameters['model']['class_list'] = [0,1] parameters['model']['amp'] = True @@ -146,7 +144,6 @@ def test_train_regression_rad_2d(device): # read and initialize parameters for specific data dimension parameters = parseConfig(testingDir + '/config_regression.yaml', version_check = False) parameters['patch_size'] = patch_size['2D'] - parameters['psize'] = patch_size['2D'] parameters['model']['dimension'] = 2 parameters['model']['amp'] = True # read and parse csv @@ -167,7 +164,6 @@ def test_train_regression_rad_3d(device): # read and initialize parameters for specific data dimension parameters = parseConfig(testingDir + '/config_regression.yaml', version_check = False) parameters['patch_size'] = patch_size['3D'] - parameters['psize'] = patch_size['3D'] parameters['model']['dimension'] = 3 parameters['model']['amp'] = True # read and parse csv @@ -188,7 +184,6 @@ def test_train_classification_rad_2d(device): parameters = parseConfig(testingDir + '/config_classification.yaml', version_check = False) parameters['modality'] = 'rad' parameters['patch_size'] = patch_size['2D'] - parameters['psize'] = patch_size['2D'] parameters['model']['dimension'] = 2 parameters['model']['amp'] = True # read and parse csv @@ -208,7 +203,6 @@ def test_train_classification_rad_3d(device): # read and initialize parameters for specific data dimension parameters = parseConfig(testingDir + '/config_classification.yaml', version_check = False) parameters['patch_size'] = patch_size['3D'] - parameters['psize'] = patch_size['3D'] parameters['model']['dimension'] = 3 parameters['model']['amp'] = True # read and parse csv