8000 Refactor: psize changed to patch_size and other minor fixes by sarthakpati · Pull Request #30 · mlcommons/GaNDLF · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Refactor: psize changed to patch_size and other minor fixes #30

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/---bug-report.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.

**GANDLF Version**
**GaNDLF Version**
Version information is found on Help > About

**Desktop (please complete the following information):**
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Depending on our bandwidth, one of the team members will get around to it (we wi

## Pull Requests

Thank you for your interest in contributing to **GANDLF**! To make the process as smooth as posssible, please follow these guidelines for speedy reviews and merges:
Thank you for your interest in contributing to **GaNDLF**! To make the process as smooth as posssible, please follow these guidelines for speedy reviews and merges:

- Ensure your PR addresses an existing issue (if none exists, please open one so that it can be triaged by the admins).
- Your PR should be from a branch that either contains the issue number it fixes and/or contains as much top-level information as possible to help moderators review and merge it.
Expand Down
2 changes: 1 addition & 1 deletion GANDLF/models/MSDNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def forward(self, x):
x = f(x)

if (i + 1) % 2 == 0 and not i == (len(self.layers) - 1):
x = nn.ReLU(x) #F.relu(x)
x = F.relu(x)
# Append output into previous features
prev_features.append(x)
x = torch.cat(prev_features, 1)
Expand Down
9 changes: 8 additions & 1 deletion GANDLF/parameterParsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ def get_model(
base_filters,
final_convolution_layer=final_convolution_layer,
)

amp = False # this is not yet implemented for msdnet

elif 'imagenet' in modelname: # these are generic imagenet-trained models and should be customized

if num_dimensions != 2:
Expand Down Expand Up @@ -154,6 +155,12 @@ def get_model(
model = torchvision.models.resnet101(pretrained=True)
elif 'resnet152' in modelname:
model = torchvision.models.resnet152(pretrained=True)
else:
sys.exit(
"Could not find the requested model '"
+ modelname
+ "' in the implementation"
)

elif 'densenet' in modelname:
if modelname == 'densenet121': # regressor/classifier network
Expand Down
12 changes: 8 additions & 4 deletions GANDLF/parseConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,15 @@ def parseConfig(config_file_path, version_check = True):
max = parse_version(params['version']['maximum'])
if (min > gandlf_version_int) or (max < gandlf_version_int):
sys.exit('Incompatible version of GANDLF detected (' + gandlf_version + ')')


if ('psize' in params):
print('WARNING: \'psize\' has been deprecated in favor of \'patch_size\'', file = sys.stderr)
if not('patch_size' in params):
params['patch_size'] = params['psize']

if 'patch_size' in params:
params['psize'] = params['patch_size']
if len(params['psize']) == 2: # 2d check
params['psize'].append(1) # ensuring same size during torchio processing
if len(params['patch_size']) == 2: # 2d check
params['patch_size'].append(1) # ensuring same size during torchio processing
else:
sys.exit('The \'patch_size\' parameter needs to be present in the configuration file')

Expand Down
18 changes: 9 additions & 9 deletions GANDLF/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def znorm(tensor: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
return tensor

# adapted from https://codereview.stackexchange.com/questions/132914/crop-black-border-of-image-using-numpy/132933#132933
def crop_image_outside_zeros(array, psize):
def crop_image_outside_zeros(array, patch_size):
dimensions = len(array.shape)
if dimensions != 4:
raise ValueError("Array expected to be 4D but got {} dimensions.".format(dimensions))
Expand All @@ -156,13 +156,13 @@ def crop_image_outside_zeros(array, psize):
# for each axis
for i in range(3):
# if less than patch size, extend the small corner out
if large[i] - small[i] < psize[i]:
small[i] = large[i] - psize[i]
if large[i] - small[i] < patch_size[i]:
small[i] = large[i] - patch_size[i]

# if bottom fell off array, extend the large corner and set small to 0
if small[i] < 0:
small[i] = 0
large[i] = psize[i]
large[i] = patch_size[i]

# calculate pixel location of new bounding box corner (will use to update the reference of the image to physical space)
new_corner_idxs = np.array([small[0], small[1], small[2]])
Expand All @@ -181,15 +181,15 @@ class CropExternalZeroplanes(SpatialTransform):
Transformation class to enable taking the whole image stack (including segmentation) and removing
(starting from edges) physical-coordinate planes with all zero voxels until you reach a non-zero voxel.
Args:
psize: patch size (used to ensure we do not crop to smaller size than this)
patch_size: patch size (used to ensure we do not crop to smaller size than this)
**kwargs: See :class:`~torchio.transforms.Transform` for additional
keyword arguments.
"""

def __init__(self, psize, **kwargs):
def __init__(self, patch_size, **kwargs):
super().__init__(**kwargs)
self.psize = psize
self.args_names = ('psize',)
self.patch_size = patch_size
self.args_names = ('patch_size',)

def apply_transform(self, subject):

Expand All @@ -209,7 +209,7 @@ def apply_transform(self, subject):
numpy_stack = np.concatenate(numpy_stack_list, axis=0)

# crop away the external zero-planes on the whole stack
new_corner_idxs, new_stack = crop_image_outside_zeros(array=numpy_stack, psize=self.psize)
new_corner_idxs, new_stack = crop_image_outside_zeros(array=numpy_stack, patch_size=self.patch_size)

# recompute origin of affine matrix using initial image shape
new_origin = nib.affines.apply_affine(example_image_affine, new_corner_idxs)
Expand Down
2 changes: 1 addition & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@

## 0.0.1 (2020/08/25)

- First tag of GANDLF
- First tag of GaNDLF
- Initial feature list:
- Supports multiple
- Deep Learning model architectures
Expand Down
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
The website for GANDLF; start with [index.md](./index.md).
The website for GaNDLF; start with [index.md](./index.md).

## Flowchart

Expand Down
2 changes: 1 addition & 1 deletion docs/extending.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Extending GANDLF
# Extending GaNDLF

For any new feature, please ensure the corresponding option in the [sample configuration](https://github.com/CBICA/GaNDLF/blob/master/samples/sample_training.yaml) is added, so that others can review/use/extend it as needed.

Expand Down
4 changes: 2 additions & 2 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# GANDLF
# GaNDLF

A **G**ener**a**lly **N**uanced **D**eep **L**earning **F**ramework for segmentation and classification.

Expand All @@ -19,4 +19,4 @@ A **G**ener**a**lly **N**uanced **D**eep **L**earning **F**ramework for segmenta

- [Application Setup](./setup.md)
- [Usage](./usage.md)
- [Extending GANDLF](./extending.md)
- [Extending GaNDLF](./extending.md)
40 changes: 34 additions & 6 deletions docs/usage.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,37 @@
# Usage

For any DL pipeline, the following flow needs to be performed:

1. Data preparation
2. Split data into training, validation, and testing
3. Customize the training parameters

GaNDLF tackles all of these and the details are split in the manner explained in [the following section](#table-of-contents).
## Table of Contents
- [Preparing the Data](#preparing-the-data)
- [Constructing the Data CSV](#constructing-the-data-csv)
- [Customize the Training](#customize-the-training)
- [Running GaNDLF](#running-gandlf-traininginference)
- [Plot the final results](#plot-the-final-results)
- [Multi-GPU systems](#multi-gpu-systems)

## Preparing the Data

It is **highly** recommended that the dataset you want to train/infer on has been harmonized:

- Registration
- Within-modality co-registration [[1](https://doi.org/10.1109/TMI.2014.2377694), [2](https://doi.org/10.1038/sdata.2017.117), [3](https://arxiv.org/abs/1811.02629)]
- **OPTIONAL**: Registration of all datasets to patient atlas, if applicable [[1](https://doi.org/10.1109/TMI.2014.2377694), [2](https://doi.org/10.1038/sdata.2017.117), [3](https://arxiv.org/abs/1811.02629)]
- Size harmonization: Same physical definition of all images (see https://upenn.box.com/v/spacingsIssue for a presentation on how voxel resolutions affects downstream analyses). This is available via [GANDLF's preprocessing module](#customize-the-training).
- Intensity harmonization: Same intensity profile, i.e., normalization [[4](https://doi.org/10.1016/j.nicl.2014.08.008), [5](https://visualstudiomagazine.com/articles/2020/08/04/ml-data-prep-normalization.aspx), [6](https://developers.google.com/machine-learning/data-prep/transform/normalization), [7](https://towardsdatascience.com/understand-data-normalization-in-machine-learning-8ff3062101f0)]. Z-scoring is available via [GANDLF's preprocessing module](#customize-the-training).
- Size harmonization: Same physical definition of all images (see https://upenn.box.com/v/spacingsIssue for a presentation on how voxel resolutions affects downstream analyses). This is available via [GaNDLF's preprocessing module](#customize-the-training).
- Intensity harmonization: Same intensity profile, i.e., normalization [[4](https://doi.org/10.1016/j.nicl.2014.08.008), [5](https://visualstudiomagazine.com/articles/2020/08/04/ml-data-prep-normalization.aspx), [6](https://developers.google.com/machine-learning/data-prep/transform/normalization), [7](https://towardsdatascience.com/understand-data-normalization-in-machine-learning-8ff3062101f0)]. Z-scoring is available via [GaNDLF's preprocessing module](#customize-the-training).

Recommended tool for tackling all aforementioned preprocessing tasks: https://github.com/CBICA/CaPTk

**For Histopathology Only:**
- Convert WSI/label map to patches with OPM: [See using OPM](./GANDLF/OPM/README.md)
- Convert WSI/label map to patches with OPM: [See using OPM](./GaNDLF/OPM/README.md)

[Back To Top &uarr;](#table-of-contents)


## Constructing the Data CSV

Expand Down Expand Up @@ -44,9 +62,11 @@ Notes:
- For classification/regression, add a column called `ValueToPredict`. Currently, we are supporting only a single value prediction per model.
- `SubjectID` or `PatientName` is used to ensure that the randomized split is done per-subject rather than per-image.

[Back To Top &uarr;](#table-of-contents)

## Customize the Training

GANDLF requires a YAML-based configuration that controls various aspects of the training/inference process, such as:
GaNDLF requires a YAML-based configuration that controls various aspects of the training/inference process, such as:

- Model
- Architecture
Expand Down Expand Up @@ -79,7 +99,9 @@ Please see a [sample](https://github.com/CBICA/GaNDLF/blob/master/samples/config

**Note**: Ensure that the configuration has valid syntax by checking the file using any YAML validator such as https://yamlchecker.com/ or https://yamlvalidator.com/ **before** trying to train.

## Running GANDLF (Training/Inference)
[Back To Top &uarr;](#table-of-contents)

## Running GaNDLF (Training/Inference)

```bash
# continue from previous shell
Expand All @@ -92,9 +114,11 @@ python gandlf_run \
# -modelDir /path/to/model/weights # used in inference mode
```

[Back To Top &uarr;](#table-of-contents)

## Plot the final results

After the testing/validation training is finished, GANDLF makes it possible to collect all the statistics from the final models for testing and validation datasets and plot them. The [gandlf_collectStats](https://github.com/CBICA/GaNDLF/blob/master/gandlf_collectStats) can be used for this:
After the testing/validation training is finished, GaNDLF makes it possible to collect all the statistics from the final models for testing and validation datasets and plot them. The [gandlf_collectStats](https://github.com/CBICA/GaNDLF/blob/master/gandlf_collectStats) can be used for this:

```bash
# continue from previous shell
Expand All @@ -105,8 +129,12 @@ python gandlf_collectStats \
-output ./experiment_0/output_dir_stats/ \ # output directory
```

[Back To Top &uarr;](#table-of-contents)

### Multi-GPU systems

Please ensure that the environment variable `CUDA_VISIBLE_DEVICES` is set [[ref](https://developer.nvidia.com/blog/cuda-pro-tip-control-gpu-visibility-cuda_visible_devices/)].

For an example how this is set, see [sge_wrapper](https://github.com/CBICA/GaNDLF/blob/master/samples/sge_wrapper).

[Back To Top &uarr;](#table-of-contents)
7 changes: 7 additions & 0 deletions samples/config_all_options.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ model:
amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False
# n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types
}
# metrics to evaluate the validation performance
metrics:
{
dice, # segmentation
# accuracy # regression/classification
# mse # regression/classification
}
# this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements
# in I/O at the expense of memory consumption
in_memory: False
Expand Down
5 changes: 5 additions & 0 deletions samples/config_classification.yaml
F438
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ model:
amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False
n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types
}
# metrics to evaluate the validation performance
metrics:
{
mse # regression/classification
}
# this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements
# in I/O at the expense of memory consumption
in_memory: False
Expand Down
5 changes: 5 additions & 0 deletions samples/config_regression.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ model:
amp: False, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False
n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types
}
# metrics to evaluate the validation performance
metrics:
{
mse # regression/classification
}
# this is to enable or disable lazy loading - setting to true reads all data once during data loading, resulting in improvements
# in I/O at the expense of memory consumption
in_memory: False
Expand Down
5 changes: 5 additions & 0 deletions samples/config_segmentation_brats.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ model:
amp: True, # Set if you want to use Automatic Mixed Precision for your operations or not - options: True, False
# n_channels: 3, # set the input channels - useful when reading RGB or images that have vectored pixel types
}
# metrics to evaluate the validation performance
metrics:
{
dice
}
# Set the Modality : rad for radiology, path for histopathology
modality: rad
weighted_loss: True
Expand Down
6 changes: 0 additions & 6 deletions testing/test_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def test_train_segmentation_rad_2d(device):
training_data, headers = parseTrainingCSV(inputDir + '/train_2d_rad_segmentation.csv')
parameters = parseConfig(testingDir + '/config_segmentation.yaml', version_check = False)
parameters['patch_size'] = patch_size['2D']
parameters['psize'] = patch_size['2D']
parameters['model']['dimension'] = 2
parameters['model']['class_list'] = [0,255]
parameters['model']['amp'] = True
Expand All @@ -128,7 +127,6 @@ def test_train_segmentation_rad_3d(device):
# read and initialize parameters for specific data dimension
parameters = parseConfig(testingDir + '/config_segmentation.yaml', version_check = False)
parameters['patch_size'] = patch_size['3D']
parameters['psize'] = patch_size['3D']
parameters['model']['dimension'] = 3
parameters['model']['class_list'] = [0,1]
parameters['model']['amp'] = True
Expand All @@ -146,7 +144,6 @@ def test_train_regression_rad_2d(device):
# read and initialize parameters for specific data dimension
parameters = parseConfig(testingDir + '/config_regression.yaml', version_check = False)
parameters['patch_size'] = patch_size['2D']
parameters['psize'] = patch_size['2D']
parameters['model']['dimension'] = 2
parameters['model']['amp'] = True
# read and parse csv
Expand All @@ -167,7 +164,6 @@ def test_train_regression_rad_3d(device):
# read and initialize parameters for specific data dimension
parameters = parseConfig(testingDir + '/config_regression.yaml', version_check = False)
parameters['patch_size'] = patch_size['3D']
parameters['psize'] = patch_size['3D']
parameters['model']['dimension'] = 3
parameters['model']['amp'] = True
# read and parse csv
Expand All @@ -188,7 +184,6 @@ def test_train_classification_rad_2d(device):
parameters = parseConfig(testingDir + '/config_classification.yaml', version_check = False)
parameters['modality'] = 'rad'
parameters['patch_size'] = patch_size['2D']
parameters['psize'] = patch_size['2D']
parameters['model']['dimension'] = 2
parameters['model']['amp'] = True
# read and parse csv
Expand All @@ -208,7 +203,6 @@ def test_train_classification_rad_3d(device):
# read and initialize parameters for specific data dimension
parameters = parseConfig(testingDir + '/config_classification.yaml', version_check = False)
parameters['patch_size'] = patch_size['3D']
parameters['psize'] = patch_size['3D']
parameters['model']['dimension'] = 3
parameters['model']['amp'] = True
# read and parse csv
Expand Down
0