当前位置：首页 > news >正文

nnunetv2（一）配置文件和nnUNetv2_convert_MSD_dataset命令

news 来源：原创 2024/9/20 4:46:37

文章目录

- setup.py
- pyproject.toml
- configuration.py
- nnUNetv2_convert_MSD_dataset

如有错误，欢迎评论

setup.py

可以使用pip install .命令来安装nnunet v2

pyproject.toml

配置文件

[project]
name = "nnunetv2" # 项目名称
version = "2.5" # 版本
requires-python = ">=3.9" # python版本3.9以上
description = "nnU-Net is a framework for out-of-the box image segmentation." # 描述：开箱即用
readme = "readme.md" # 指定README文件
license = { file = "LICENSE" } # 许可证
authors = [ # 作者信息{ name = "Fabian Isensee", email = "f.isensee@dkfz-heidelberg.de"},{ name = "Helmholtz Imaging Applied Computer Vision Lab" }
]
classifiers = [ # 分类，提供了一系列关于项目状态、受众、编程语言、许可证和主题相关的信息"Development Status :: 5 - Production/Stable","Intended Audience :: Developers","Intended Audience :: Science/Research","Intended Audience :: Healthcare Industry","Programming Language :: Python :: 3","License :: OSI Approved :: Apache Software License","Topic :: Scientific/Engineering :: Artificial Intelligence","Topic :: Scientific/Engineering :: Image Recognition","Topic :: Scientific/Engineering :: Medical Science Apps.",
]
keywords = [ # 项目关键字'deep learning','image segmentation','semantic segmentation','medical image analysis','medical image segmentation','nnU-Net','nnunet'
]
dependencies = [ # 依赖包"torch>=2.1.2","acvl-utils>=0.2,<0.3",  # 0.3 may bring breaking changes. Careful!"dynamic-network-architectures>=0.3.1,<0.4",  # 0.3.1 and lower are supported, 0.4 may have breaking changes. Let's be careful here"tqdm","dicom2nifti","scipy","batchgenerators>=0.25","numpy","scikit-learn","scikit-image>=0.19.3","SimpleITK>=2.2.1","pandas","graphviz",'tifffile','requests',"nibabel","matplotlib","seaborn","imagecodecs","yacs","batchgeneratorsv2","einops"
][project.urls] # 项目主页
homepage = "https://github.com/MIC-DKFZ/nnUNet"
repository = "https://github.com/MIC-DKFZ/nnUNet"[project.scripts] # 项目脚本入口点
# 数据集准备
nnUNetv2_plan_and_preprocess = "nnunetv2.experiment_planning.plan_and_preprocess_entrypoints:plan_and_preprocess_entry"
# 从数据集中提取指纹
nnUNetv2_extract_fingerprint = "nnunetv2.experiment_planning.plan_and_preprocess_entrypoints:extract_fingerprint_entry"
# 还没有进行预处理
nnUNetv2_plan_experiment = "nnunetv2.experiment_planning.plan_and_preprocess_entrypoints:plan_experiment_entry"
# 数据预处理
nnUNetv2_preprocess = "nnunetv2.experiment_planning.plan_and_preprocess_entrypoints:preprocess_entry"
# 模型训练
nnUNetv2_train = "nnunetv2.run.run_training:run_training_entry"
# 模型加载并预测
nnUNetv2_predict_from_modelfolder = "nnunetv2.inference.predict_from_raw_data:predict_entry_point_modelfolder"
# 模型预测
nnUNetv2_predict = "nnunetv2.inference.predict_from_raw_data:predict_entry_point"
# 将旧版本的nnunet数据集转换为新格式
nnUNetv2_convert_old_nnUNet_dataset = "nnunetv2.dataset_conversion.convert_raw_dataset_from_old_nnunet_format:convert_entry_point"
# 在交叉验证结果中查找最佳模型配置
nnUNetv2_find_best_configuration = "nnunetv2.evaluation.find_best_configuration:find_best_configuration_entry_point"
# 后处理
nnUNetv2_determine_postprocessing = "nnunetv2.postprocessing.remove_connected_components:entry_point_determine_postprocessing_folder"
nnUNetv2_apply_postprocessing = "nnunetv2.postprocessing.remove_connected_components:entry_point_apply_postprocessing"
# 多个模型集成
nnUNetv2_ensemble = "nnunetv2.ensembling.ensemble:entry_point_ensemble_folders"
# 累积交叉验证结果
nnUNetv2_accumulate_crossval_results = "nnunetv2.evaluation.find_best_configuration:accumulate_crossval_results_entry_point"
# 生成预测结果
nnUNetv2_plot_overlay_pngs = "nnunetv2.utilities.overlay_plots:entry_point_generate_overlay"
# 从url中下载预训练模型
nnUNetv2_download_pretrained_model_by_url = "nnunetv2.model_sharing.entry_points:download_by_url"
nnUNetv2_install_pretrained_model_from_zip = "nnunetv2.model_sharing.entry_points:install_from_zip_entry_point"
# 将预训练模型导出
nnUNetv2_export_model_to_zip = "nnunetv2.model_sharing.entry_points:export_pretrained_model_entry"
# 数据集之间移动
nnUNetv2_move_plans_between_datasets = "nnunetv2.experiment_planning.plans_for_pretraining.move_plans_between_datasets:entry_point_move_plans_between_datasets"
# 模型评估
nnUNetv2_evaluate_folder = "nnunetv2.evaluation.evaluate_predictions:evaluate_folder_entry_point"
nnUNetv2_evaluate_simple = "nnunetv2.evaluation.evaluate_predictions:evaluate_simple_entry_point"
# 转换task为dataset
nnUNetv2_convert_MSD_dataset = "nnunetv2.dataset_conversion.convert_MSD_dataset:entry_point"[project.optional-dependencies] # 项目可选依赖
dev = ["black","ruff","pre-commit"
][build-system] # 构建系统的要求
requires = ["setuptools>=67.8.0"]
build-backend = "setuptools.build_meta" # 构建后端[tool.codespell] # 跳过以下后缀文件
skip = '.git,*.pdf,*.svg'
#
# ignore-words-list = ''

configuration.py

nnUNet.nnunetv2.configuration

import osfrom nnunetv2.utilities.default_n_proc_DA import get_allowed_n_proc_DA
# 检测环境变量中nnUNet_def_n_proc，如果不存在，默认进程数为8
default_num_processes = 8 if 'nnUNet_def_n_proc' not in os.environ else int(os.environ['nnUNet_def_n_proc'])
# 在一个样本中，最低分辨率轴上的间距是次高分辨率轴上间距的3倍或更大，那么这个样本就被视为是各向异性的
ANISO_THRESHOLD = 3  # determines when a sample is considered anisotropic (3 means that the spacing in the low
# resolution axis must be 3x as large as the next largest spacing)
# 根据当前运行环境设置在数据增强过程中使用的进程数
default_n_proc_DA = get_allowed_n_proc_DA()

nnUNetv2_convert_MSD_dataset

nnunetv2.dataset_conversion.convert_MSD_dataset

entry_point()：使用命令行的代码入口，用来解析命令行参数，并且调用函数。
例如如下命令：nnUNetv2_convert_MSD_dataset -i nnUNetFrame/DATASET/nnUNet_raw/Task04_Hippocampus

def entry_point():parser = argparse.ArgumentParser()# 指定已经下载并且解压的MSD数据文件夹路径，参数不可缺parser.add_argument('-i', type=str, required=True,help='Downloaded and extracted MSD dataset folder. CANNOT be nnUNetv1 dataset! Example: ''/home/fabian/Downloads/Task05_Prostate')# 用指定的整数来覆盖数据集的ID，参数可选parser.add_argument('-overwrite_id', type=int, required=False, default=None,help='Overwrite the dataset id. If not set we use the id of the MSD task (inferred from ''folder name). Only use this if you already have an equivalently numbered dataset!')# 进程数，默认是12parser.add_argument('-np', type=int, required=False, default=default_num_processes,help=f'Number of processes used. Default: {default_num_processes}')args = parser.parse_args()convert_msd_dataset(args.i, args.overwrite_id, args.np)

split_4d_nifti：把task文件转换成dataset的具体操作

def split_4d_nifti(filename, output_folder):# 对于四维的图像，其中一个维度可能是时间或者序列"""对四维的图像分割成三维的图像，对三维的图像保留，并且复制到指定路径"""# 读取图像文件img_itk = sitk.ReadImage(filename)# 获取图像维度dim = img_itk.GetDimension()# 取出文件名file_base = os.path.basename(filename)if dim == 3:# 去掉后面的.nii.gzshutil.copy(filename, join(output_folder, file_base[:-7] + "_0000.nii.gz"))returnelif dim != 4:raise RuntimeError("Unexpected dimensionality: %d of file %s, cannot split" % (dim, filename))else:# 转换为numpyimg_npy = sitk.GetArrayFromImage(img_itk)# 获取图像间距spacing = img_itk.GetSpacing()# 图像原点origin = img_itk.GetOrigin()# 图像的方向矩阵direction = np.array(img_itk.GetDirection()).reshape(4,4)# now modify these to remove the fourth dimensionspacing = tuple(list(spacing[:-1]))origin = tuple(list(origin[:-1]))direction = tuple(direction[:-1, :-1].reshape(-1))# 遍历每个时间点for i, t in enumerate(range(img_npy.shape[0])):img = img_npy[t]# 提取当前时间点的三维图像img_itk_new = sitk.GetImageFromArray(img)# 将array转换为图像img_itk_new.SetSpacing(spacing)# 设置间距img_itk_new.SetOrigin(origin)# 设置原点img_itk_new.SetDirection(direction)# 设置方向sitk.WriteImage(img_itk_new, join(output_folder, file_base[:-7] + "_%04.0d.nii.gz" % i))# 保存图像

convert_msd_dataset：将task变成dataset的主要函数

def convert_msd_dataset(source_folder: str, overwrite_target_id: Optional[int] = None,num_processes: int = default_num_processes) -> None:"""将task变成dataset的主要函数"""if source_folder.endswith('/') or source_folder.endswith('\\'):source_folder = source_folder[:-1]# 找寻数据结构，必须包含三个子目录（labelsTr，imagesTs，imagesTr）和一个子文件（dataset.json）labelsTr = join(source_folder, 'labelsTr')imagesTs = join(source_folder, 'imagesTs')imagesTr = join(source_folder, 'imagesTr')assert isdir(labelsTr), f"labelsTr subfolder missing in source folder"assert isdir(imagesTs), f"imagesTs subfolder missing in source folder"assert isdir(imagesTr), f"imagesTr subfolder missing in source folder"dataset_json = join(source_folder, 'dataset.json')assert isfile(dataset_json), f"dataset.json missing in source_folder"# infer source dataset id and name，推断id和数据集名字task, dataset_name = os.path.basename(source_folder).split('_')task_id = int(task[4:])# check if target dataset id is takentarget_id = task_id if overwrite_target_id is None else overwrite_target_id# 查找是否有包含dataset_id的数据集，如果有就返回existing_datasets = find_candidate_datasets(target_id)# 判断数据集是否唯一，不唯一就报错，输出报错信息assert len(existing_datasets) == 0, f"Target dataset id {target_id} is already taken, please consider changing " \f"it using overwrite_target_id. Conflicting dataset: {existing_datasets} (check nnUNet_results, nnUNet_preprocessed and nnUNet_raw!)"# 处理后的数据集的数据结构命名target_dataset_name = f"Dataset{target_id:03d}_{dataset_name}"target_folder = join(nnUNet_raw, target_dataset_name)target_imagesTr = join(target_folder, 'imagesTr')target_imagesTs = join(target_folder, 'imagesTs')target_labelsTr = join(target_folder, 'labelsTr')# 新建目录maybe_mkdir_p(target_imagesTr)maybe_mkdir_p(target_imagesTs)maybe_mkdir_p(target_labelsTr)with multiprocessing.get_context("spawn").Pool(num_processes) as p:results = []# convert 4d train images# 不以点或者横线开头的，以.nii.gz结尾的文件source_images = [i for i in subfiles(imagesTr, suffix='.nii.gz', join=False) ifnot i.startswith('.') and not i.startswith('_')]source_images = [join(imagesTr, i) for i in source_images]results.append(p.starmap_async(# 异步# 对四维的图像分割成三维的图像，对三维的图像保留，并且复制到指定路径split_4d_nifti, zip(source_images, [target_imagesTr] * len(source_images))))# convert 4d test imagessource_images = [i for i in subfiles(imagesTs, suffix='.nii.gz', join=False) ifnot i.startswith('.') and not i.startswith('_')]source_images = [join(imagesTs, i) for i in source_images]results.append(p.starmap_async(split_4d_nifti, zip(source_images, [target_imagesTs] * len(source_images))))# copy segmentationssource_images = [i for i in subfiles(labelsTr, suffix='.nii.gz', join=False) ifnot i.startswith('.') and not i.startswith('_')]for s in source_images:shutil.copy(join(labelsTr, s), join(target_labelsTr, s))[i.get() for i in results]dataset_json = load_json(dataset_json)dataset_json['labels'] = {j: int(i) for i, j in dataset_json['labels'].items()}dataset_json['file_ending'] = ".nii.gz"dataset_json["channel_names"] = dataset_json["modality"]del dataset_json["modality"]del dataset_json["training"]del dataset_json["test"]save_json(dataset_json, join(nnUNet_raw, target_dataset_name, 'dataset.json'), sort_keys=False)