Description
CONTEXT
Greetings. I'm working on a project where I want to segment four classes: Background, Myocardium, Blood-pool, and Remote Myocardium. The images in my dataset are grayscale MRI images (LGE PSIR SHORT AXIS VIEW) stored in .npy files. According to the documentation, I need to convert the files from .npy to .nii.gz using NibabelIO. To achieve this, I use the First Python Module that also builds the directory tree, concatenates my validation images/masks with the training set, and renames my files to a format compatible with nn-UNet.
First Python Module
import os
import numpy as np
import nibabel as nib
import subprocess
def get_last_case_number(directory):
numbers = []
for filename in os.listdir(directory):
if filename.endswith('.nii.gz'):
parts = filename.split('_')
if len(parts) > 1:
number = int(parts[1].split('.')[0])
numbers.append(number)
return max(numbers) if numbers else 0
def convert_and_rename(input_dir, output_dir, prefix, start_number=None):
os.makedirs(output_dir, exist_ok=True)
pixel_spacing = (1.40625, 1.40625) # PSIR pixel spacing value
for filename in os.listdir(input_dir):
if filename.endswith('.npy'):
npy_path = os.path.join(input_dir, filename)
data = np.load(npy_path)
if len(data.shape) != 2:
print(f"Skipping {filename}: Not a 2D image")
continue
data = data[np.newaxis, :, :]
data = data.astype(np.float32)
#print(data.shape)
affine = np.array([
[pixel_spacing[0], 0, 0, 0],
[0, pixel_spacing[1], 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]
])
nifti_img = nib.Nifti1Image(data, affine)
# Extract the number from the filename
base_name = filename.split('.')[0]
parts = base_name.split('_')
if len(parts) > 1 and 'case' in parts[0]:
if start_number is not None:
number = str(start_number).zfill(4)
start_number += 1
else:
number = parts[0].split("case")[1].zfill(4)
else:
print(f"Filename {filename} does not match expected format.")
continue
new_filename = f'{prefix}{number}.nii.gz'
output_path = os.path.join(output_dir, new_filename)
nib.save(nifti_img, output_path)
print(f"Converted and renamed {filename} to {new_filename}")
def create_directories(base_path, structure):
for dir_name, sub_dirs in structure.items():
path = os.path.join(base_path, dir_name)
if not os.path.exists(path):
os.makedirs(path)
create_directories(path, sub_dirs)
def main():
# Directory structure
dir_structure = {
"nnUNetFrame": {
"dataset": {
"nnUNet_preprocessed": {},
"nnUNet_raw": {
"nnUNet_cropped_data": {},
"nnUNet_raw_data": {
"Training": {
"img": {},
"label": {}
},
"Test": {
"img": {},
"label": {}
}
}
},
"nnUNet_trained_models": {}
},
"nnUNet": {}
}
}
# Base directory path
base_dir = "D:/csuarezgurruchaga/"
# Create directory tree
create_directories(base_dir, dir_structure)
# Input directories
train_input_img_dir = "../0000_data_segmentation/train_lge_psir/images/"
train_input_mask_dir = "../0000_data_segmentation/train_lge_psir/masks/"
val_input_img_dir = "../0000_data_segmentation/val_lge_psir/images/"
val_input_mask_dir = "../0000_data_segmentation/val_lge_psir/masks/"
test_input_img_dir = "../0000_data_segmentation/test_lge_psir/images/"
test_input_mask_dir = "../0000_data_segmentation/test_lge_psir/masks/"
# Output directories
train_output_img_dir = 'D:/csuarezgurruchaga/nnUNetFrame/dataset/nnUNet_raw/nnUNet_raw_data/Training/img'
train_output_mask_dir = 'D:/csuarezgurruchaga/nnUNetFrame/dataset/nnUNet_raw/nnUNet_raw_data/Training/label'
test_output_img_dir = 'D:/csuarezgurruchaga/nnUNetFrame/dataset/nnUNet_raw/nnUNet_raw_data/Test/img'
test_output_mask_dir = 'D:/csuarezgurruchaga/nnUNetFrame/dataset/nnUNet_raw/nnUNet_raw_data/Test/label'
# Convert and rename train files
print("Converting and renaming train images...")
convert_and_rename(train_input_img_dir, train_output_img_dir, 'SCA_')
print("Converting and renaming train masks...")
convert_and_rename(train_input_mask_dir, train_output_mask_dir, 'label_')
# Convert and rename test files
print("Converting and renaming test images...")
convert_and_rename(test_input_img_dir, test_output_img_dir, 'SCA_')
print("Converting and renaming test masks...")
convert_and_rename(test_input_mask_dir, test_output_mask_dir, 'label_')
last_train_number = max(get_last_case_number(train_output_img_dir),
get_last_case_number(train_output_mask_dir))
# Convert, rename and append validation files after train files
print("Converting, renaming and appending validation images...")
convert_and_rename(val_input_img_dir, train_output_img_dir, 'SCA_', last_train_number + 1)
print("Converting, renaming and appending validation masks...")
convert_and_rename(val_input_mask_dir, train_output_mask_dir, 'label_', last_train_number + 1)
print("Done!!")
if __name__ == "__main__":
main()
note: At this point all the images and masks have the shape (1, 384, 384). So they were converted from .npy(384, 384) -> .nii.gz(1, 384, 384).
After that, I need to perform the dataset conversion and create the JSON file. For this, I use the Second Python Module.
Second Python Module
import os
import shutil
from pathlib import Path
from typing import List
from collections import OrderedDict
from batchgenerators.utilities.file_and_folder_operations import subfiles, join, maybe_mkdir_p, save_json, load_json
from nnunet_utils import generate_dataset_json
def make_out_dirs(nnUNet_raw_path: str, dataset_id: int, task_name="ScarSegmentation"):
dataset_name = f"Dataset{dataset_id:03d}_{task_name}"
out_dir = Path(nnUNet_raw_path.replace('"', "")
C100
) / dataset_name
out_train_dir = out_dir / "imagesTr"
out_labels_dir = out_dir / "labelsTr"
out_test_dir = out_dir / "imagesTs"
os.makedirs(out_dir, exist_ok=True)
os.makedirs(out_train_dir, exist_ok=True)
os.makedirs(out_labels_dir, exist_ok=True)
os.makedirs(out_test_dir, exist_ok=True)
return out_dir, out_train_dir, out_labels_dir, out_test_dir
def copy_files(src_data_folder: Path, train_dir: Path, labels_dir: Path, test_dir: Path, prefix: str):
train_folder = src_data_folder / "Training" / "img"
label_folder = src_data_folder / "Training" / "label"
test_folder = src_data_folder / "Test" / "img"
train_patient_names = []
test_patient_names = []
# Copy training files and corresponding labels
train_patients = subfiles(str(train_folder), join=False, suffix='nii.gz')
for p in train_patients:
serial_number = int(p[4:8])
train_patient_name = f'{prefix}_{serial_number:04d}.nii.gz'
label_file = join(str(label_folder), f'label{p[3:]}')
image_file = join(str(train_folder), p)
shutil.copy(image_file, join(str(train_dir), f'{train_patient_name[:8]}_0000.nii.gz'))
shutil.copy(label_file, join(str(labels_dir), train_patient_name))
train_patient_names.append(train_patient_name)
# Copy test files
test_patients = subfiles(str(test_folder), join=False, suffix=".nii.gz")
for p in test_patients:
p = p[:-7]
image_file = join(str(test_folder), p + ".nii.gz")
serial_number = int(p[4:8])
test_patient_name = f'{prefix}_{serial_number:04d}.nii.gz'
shutil.copy(image_file, join(str(test_dir), f'{test_patient_name[:7]}_0000.nii.gz'))
test_patient_names.append(test_patient_name)
return len(train_patient_names), train_patient_names, test_patient_names
def convert_scar_segmentation(src_data_folder: str, dataset_id: int, task_name: str, prefix: str):
out_dir, train_dir, labels_dir, test_dir = make_out_dirs(nnUNet_raw_path=src_data_folder, dataset_id=dataset_id, task_name=task_name)
num_training_cases, train_patient_names, test_patient_names = copy_files(Path(src_data_folder), train_dir, labels_dir, test_dir, prefix)
generate_dataset_json(
str(out_dir),
channel_names={
0: "MRI",
},
labels={
"background": 0,
"myocardium": 1,
"blood_pool": 2,
"remote_myocardium": 3,
},
file_ending=".nii.gz",
num_training_cases=num_training_cases,
dataset_name="LGE_PSIR",
description="Segmentation of the LV with HCM cardiac condition",
reference="none",
release="0.0",
license='Dataset for scar segmentation with HCM as cardiac condition',
)
# Additional dataset-specific information
json_dict = OrderedDict()
json_dict['training'] = [{'image': f"./imagesTr/{name[:8]}_0000.nii.gz", "label": f"./labelsTr/{name}"} for name in train_patient_names]
json_dict['test'] = [f"./imagesTs/{name[:7]}_0000.nii.gz" for name in test_patient_names]
# Update the existing dataset.json with the additional information
dataset_json_path = join(str(out_dir), "dataset.json")
existing_json = OrderedDict(load_json(dataset_json_path))
existing_json.update(json_dict)
save_json(existing_json, dataset_json_path, sort_keys=False)
if __name__ == "__main__":
base = 'D:/csuarezgurruchaga/nnUNetFrame/dataset/nnUNet_raw/nnUNet_raw_data/'
task_id = 577
task_name = "ScarSegmentation"
prefix = "SCA"
convert_scar_segmentation(base, task_id, task_name, prefix)
print("Done!")
Then I ran the plan and preprocess, that works fine
PLAN AND PREPROCESS
(nnUnet) D:\csuarezgurruchaga\nnUNetFrame\dataset\nnUNet_raw>nnUNetv2_plan_and_preprocess -d 577 --verify_dataset_integrity
Fingerprint extraction...
Dataset577_ScarSegmentation
Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> as reader/writer
####################
verify_dataset_integrity Done.
If you didn't see any error messages then your dataset is most likely OK!
####################
Experiment planning...
############################
INFO: You are using the old nnU-Net default planner. We have updated our recommendations. Please consider using those instead! Read more here: https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/resenc_presets.md
############################
Dropping 3d_lowres config because the image size difference to 3d_fullres is too small. 3d_fullres: [208. 1. 256.], 3d_lowres: [208, 1, 256]
2D U-Net configuration:
{'data_identifier': 'nnUNetPlans_2d', 'preprocessor_name': 'DefaultPreprocessor', 'batch_size': 4766, 'patch_size': (1, 256), 'median_image_size_in_voxels': array([ 1., 256.]), 'spacing': array([1. , 1.40625]), 'normalization_schemes': ['ZScoreNormalization'], 'use_mask_for_norm': [True], 'resampling_fn_data': 'resample_data_or_seg_to_shape', 'resampling_fn_seg': 'resample_data_or_seg_to_shape', 'resampling_fn_data_kwargs': {'is_seg': False, 'order': 3, 'order_z': 0, 'force_separate_z': None}, 'resampling_fn_seg_kwargs': {'is_seg': True, 'order': 1, 'order_z': 0, 'force_separate_z': None}, 'resampling_fn_probabilities': 'resample_data_or_seg_to_shape', 'resampling_fn_probabilities_kwargs': {'is_seg': False, 'order': 1, 'order_z': 0, 'force_separate_z': None}, 'architecture': {'network_class_name': 'dynamic_network_architectures.architectures.unet.PlainConvUNet', 'arch_kwargs': {'n_stages': 6, 'features_per_stage': (32, 64, 128, 256, 512, 512), 'conv_op': 'torch.nn.modules.conv.Conv2d', 'kernel_sizes': ((3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3)), 'strides': ((1, 1), (1, 2), (1, 2), (1, 2), (1, 2), (1, 2)), 'n_conv_per_stage': (2, 2, 2, 2, 2, 2), 'n_conv_per_stage_decoder': (2, 2, 2, 2, 2), 'conv_bias': True, 'norm_op': 'torch.nn.modules.instancenorm.InstanceNorm2d', 'norm_op_kwargs': {'eps': 1e-05, 'affine': True}, 'dropout_op': None, 'dropout_op_kwargs': None, 'nonlin': 'torch.nn.LeakyReLU', 'nonlin_kwargs': {'inplace': True}}, '_kw_requires_import': ('conv_op', 'norm_op', 'dropout_op', 'nonlin')}, 'batch_dice': True}
Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> as reader/writer
3D fullres U-Net configuration:
{'data_identifier': 'nnUNetPlans_3d_fullres', 'preprocessor_name': 'DefaultPreprocessor', 'batch_size': 63, 'patch_size': (224, 1, 256), 'median_image_size_in_voxels': array([208., 1., 256.]), 'spacing': array([1.40625, 1. , 1.40625]), 'normalization_schemes': ['ZScoreNormalization'], 'use_mask_for_norm': [True], 'resampling_fn_data': 'resample_data_or_seg_to_shape', 'resampling_fn_seg': 'resample_data_or_seg_to_shape', 'resampling_fn_data_kwargs': {'is_seg': False, 'order': 3, 'order_z': 0, 'force_separate_z': None}, 'resampling_fn_seg_kwargs': {'is_seg': True, 'order': 1, 'order_z': 0, 'force_separate_z': None}, 'resampling_fn_probabilities': 'resample_data_or_seg_to_shape', 'resampling_fn_probabilities_kwargs': {'is_seg': False, 'order': 1, 'order_z': 0, 'force_separate_z': None}, 'architecture': {'network_class_name': 'dynamic_network_architectures.architectures.unet.PlainConvUNet', 'arch_kwargs': {'n_stages': 6, 'features_per_stage': (32, 64, 128, 256, 320, 320), 'conv_op': 'torch.nn.modules.conv.Conv3d', 'kernel_sizes': ((3, 3, 3), (3, 3, 3), (3, 3, 3), (3, 3, 3), (3, 3, 3), (3, 3, 3)), 'strides': ((1, 1, 1), (2, 1, 2), (2, 1, 2), (2, 1, 2), (2, 1, 2), (2, 1, 2)), 'n_conv_per_stage': (2, 2, 2, 2, 2, 2), 'n_conv_per_stage_decoder': (2, 2, 2, 2, 2), 'conv_bias': True, 'norm_op': 'torch.nn.modules.instancenorm.InstanceNorm3d', 'norm_op_kwargs': {'eps': 1e-05, 'affine': True}, 'dropout_op': None, 'dropout_op_kwargs': None, 'nonlin': 'torch.nn.LeakyReLU', 'nonlin_kwargs': {'inplace': True}}, '_kw_requires_import': ('conv_op', 'norm_op', 'dropout_op', 'nonlin')}, 'batch_dice': False}
Plans were saved to C:\Users\csuarezgurruchaga\Desktop\back_up_lge_seg\0000_suarez_gurruchaga\nnUNetFrame\dataset\nnUNet_preprocessed\Dataset577_ScarSegmentation\nnUNetPlans.json
Preprocessing...
Preprocessing dataset Dataset577_ScarSegmentation
Configuration: 2d...
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2464/2464 [01:18<00:00, 31.47it/s]
Configuration: 3d_fullres...
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2464/2464 [01:11<00:00, 34.48it/s]
Configuration: 3d_lowres...
INFO: Configuration 3d_lowres not found in plans file nnUNetPlans.json of dataset Dataset577_ScarSegmentation. Skipping.
Then I ran the training and here is where it brakes. It seems that is related with the padding for the data augmentation step.
ERROR: START THE TRAINING WITHOUT THE CV
(nnUnet) D:\csuarezgurruchaga\nnUNetFrame\dataset\nnUNet_raw>nnUNetv2_train 577 2d all
############################
INFO: You are using the old nnU-Net default plans. We have updated our recommendations. Please consider using those instead! Read more here: https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/resenc_presets.md
############################
Using device: cuda:0
#######################################################################
Please cite the following paper when using nnU-Net:
Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2021). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature methods, 18(2), 203-211.
#######################################################################
2024-08-02 23:00:41.125705: do_dummy_2d_data_aug: False
using pin_memory on device 0
Exception in background worker 0:
Argument #6: Padding size should be less than the corresponding input dimension, but got: padding (2, 2) at dimension 1 of input [1, 1, 256]
Traceback (most recent call last):
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgenerators\dataloading\nondet_multi_threaded_augmenter.py", line 53, in producer
item = next(data_loader)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgenerators\dataloading\data_loader.py", line 126, in __next__
return self.generate_train_batch()
File "D:\csuarezgurruchaga\nnUNetFrame\nnUNet\nnunetv2\training\dataloading\data_loader_2d.py", line 99, in generate_train_batch
tmp = self.transforms(**{'image': data_all[b], 'segmentation': seg_all[b]})
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgeneratorsv2\transforms\base\basic_transform.py", line 18, in __call__
return self.apply(data_dict, **params)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgeneratorsv2\transforms\utils\compose.py", line 13, in apply
data_dict = t(**data_dict)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgeneratorsv2\transforms\base\basic_transform.py", line 18, in __call__
return self.apply(data_dict, **params)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgeneratorsv2\transforms\utils\random.py", line 17, in apply
return self.transform(**data_dict)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgeneratorsv2\transforms\base\basic_transform.py", line 18, in __call__
return self.apply(data_dict, **params)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgeneratorsv2\transforms\base\basic_transform.py", line 65, in apply
data_dict['image'] = self._apply_to_image(data_dict['image'], **params)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgeneratorsv2\transforms\noise\gaussian_blur.py", line 140, in _apply_to_image
img[i:i+1] = self._benchmark_wrapper(img[i:i+1], params['sigmas'][j][d], d)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgeneratorsv2\transforms\noise\gaussian_blur.py", line 157, in _benchmark_wrapper
blur_dimension(dummy_img, sigma, dim_to_blur, force_use_fft=False)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgeneratorsv2\transforms\noise\gaussian_blur.py", line 63, in blur_dimension
img_padded = pad(img, padding, mode="reflect")
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\torch\nn\functional.py", line 4522, in pad
return torch._C._nn.pad(input, pad, mode, value)
RuntimeError: Argument #6: Padding size should be less than the corresponding input dimension, but got: padding (2, 2) at dimension 1 of input [1, 1, 256]
Traceback (most recent call last):
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\Scripts\nnUNetv2_train.exe\__main__.py", line 7, in <module>
File "D:\csuarezgurruchaga\nnUNetFrame\nnUNet\nnunetv2\run\run_training.py", line 275, in run_training_entry
run_training(args.dataset_name_or_id, args.configuration, args.fold, args.tr, args.p, args.pretrained_weights,
File "D:\csuarezgurruchaga\nnUNetFrame\nnUNet\nnunetv2\run\run_training.py", line 211, in run_training
nnunet_trainer.run_training()
File "D:\csuarezgurruchaga\nnUNetFrame\nnUNet\nnunetv2\training\nnUNetTrainer\nnUNetTrainer.py", line 1362, in run_training
self.on_train_start()
File "D:\csuarezgurruchaga\nnUNetFrame\nnUNet\nnunetv2\training\nnUNetTrainer\nnUNetTrainer.py", line 903, in on_train_start
self.dataloader_train, self.dataloader_val = self.get_dataloaders()
File "D:\csuarezgurruchaga\nnUNetFrame\nnUNet\nnunetv2\training\nnUNetTrainer\nnUNetTrainer.py", line 696, in get_dataloaders
_ = next(mt_gen_train)
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgenerators\dataloading\nondet_multi_threaded_augmenter.py", line 196, in __next__
item = self.__get_next_item()
File "C:\Users\csuarezgurruchaga\.conda\envs\nnUnet\lib\site-packages\batchgenerators\dataloading\nondet_multi_threaded_augmenter.py", line 181, in __get_next_item
raise RuntimeError("One or more background workers are no longer alive. Exiting. Please check the "
RuntimeError: One or more background workers are no longer alive. Exiting. Please check the print statements above for the actual error message
QUESTIONS
- Any idea of how can I solve this issue?
- With my 2D data, I only can use the 2d model or I also can use the 3d_fullres ?
Thank you in advance for your time,
Carlos.