Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BAMF Liver and Tumor segmentation #84

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions models/bamf_ct_liver_tumor/config/default.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
general:
data_base_dir: /app/data
version: 1.0
description: default configuration for Bamf CT Liver and tumor segmentation (dicom to dicom)

execute:
- DicomImporter
- NiftiConverter
- NNUnetRunnerV2
- PostProcessor
- DsegConverter
- DataOrganizer

modules:
DicomImporter:
source_dir: input_data
import_dir: sorted_data
sort_data: true
meta:
mod: '%Modality'

NiftiConverter:
engine: dcm2niix

NNUnetRunnerV2:
in_data: nifti:mod=ct

DsegConverter:
model_name: BAMF Liver and Tumor AI Segmentation
target_dicom: dicom:mod=ct
source_segs: nifti:mod=seg:processor=bamf
skip_empty_slices: True

DataOrganizer:
targets:
- dicomseg-->[i:sid]/bamf_ct_liver_tumor.seg.dcm
31 changes: 31 additions & 0 deletions models/bamf_ct_liver_tumor/dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
FROM mhubai/base:latest

# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build
# by pulling sklearn instead of scikit-learn
# N.B. this is a known issue:
# https://github.com/MIC-DKFZ/nnUNet/issues/1281
# https://github.com/MIC-DKFZ/nnUNet/pull/1209
ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True

# Install nnunet and platipy
RUN pip3 install --no-cache-dir nnunetv2==2.0

# Clone the main branch of MHubAI/models
ARG MHUB_MODELS_REPO
RUN buildutils/import_mhub_model.sh bamf_ct_liver_tumor ${MHUB_MODELS_REPO}

# Pull nnUNet model weights into the container for Dataset009_Breast
ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/
RUN mkdir -p $WEIGHTS_DIR
ENV WEIGHTS_FN=Dataset006_Liver.zip
ENV WEIGHTS_URL=https://zenodo.org/records/11582728/files/$WEIGHTS_FN
RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}

# specify nnunet specific environment variables
ENV WEIGHTS_FOLDER=$WEIGHTS_DIR

# Default run script
ENTRYPOINT ["mhub.run"]
CMD ["--config", "/app/models/bamf_ct_liver_tumor/config/default.yml"]
145 changes: 145 additions & 0 deletions models/bamf_ct_liver_tumor/meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
{
"id": "",
"name": "bamf_ct_liver_tumor",
"title": "BAMF CT Liver and Tumor Seg (nnU-Net)",
"summary": {
"description": "An nnU-Net based model to segment liver and tumor from CT scans",
"inputs": [
{
"label": "Input Image",
"description": "The CT scan of a patient.",
"format": "DICOM",
"modality": "CT",
"bodypartexamined": "LIVER",
"slicethickness": "2.5mm",
"non-contrast": false,
"contrast": true
}
],
"outputs": [
{
"label": "Segmentation",
"type": "Segmentation",
"description": "Segmentation liver",
"classes": [
"LIVER",
"LIVER+NEOPLASM"
]
}
],
"model": {
"architecture": "U-net",
"training": "supervised",
"cmpapproach": "3D"
},
"data": {
"training": {
"vol_samples": 262
},
"evaluation": {
"vol_samples": 52
},
"public": true,
"external": true
}
},
"details": {
"name": "AIMI CT Liver",
"version": "1.0.0",
"devteam": "BAMF Health",
"authors": [
"Soni, Rahul",
"McCrumb, Diana",
"Murugesan, Gowtham Krishnan",
"Van Oss, Jeff",
"Kumar, Jithendra"
],
"type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
"date": {
"code": "28.09.2024",
"weights": "11.06.2024",
"pub": "30.09.2024"
},
"cite": "Gowtham Krishnan Murugesan, Diana McCrumb, Rahul Soni, Jithendra Kumar, Leonard Nuernberg, Linmin Pei, Ulrike Wagner, Sutton Granger, Andrey Y. Fedorov, Stephen Moore, Jeff Van Oss. AI generated annotations for Breast, Brain, Liver, Lungs and Prostate cancer collections in National Cancer Institute Imaging Data Commons. arXiv:2409.20342 (2024).",
"license": {
"code": "MIT",
"weights": "CC BY-NC 4.0"
},
"publications": [
{
"title": "AI generated annotations for Breast, Brain, Liver, Lungs and Prostate cancer collections in National Cancer Institute Imaging Data Commons",
"uri": "https://arxiv.org/abs/2409.20342"
}
],
"github": "https://github.com/bamf-health/aimi-liver-tumor-ct"
},
"info": {
"use": {
"title": "Intended Use",
"text": "This model is intended to perform liver and tumor segmentation in CT scans. The liver is a common site of primary (i.e. originating in the liver like hepatocellular carcinoma, HCC) or secondary (i.e. spreading to the liver like colorectal cancer) tumor development."
},
"analyses": {
"title": "Quantitative Analyses",
"text": "The model's performance was assessed using the Dice Coefficient, Hausdorff distance and NSD"
},
"evaluation": {
"title": "Evaluation Data",
"text": "The model was used to segment cases 509 from the Colorectal-Liver-Metastases [1] collection HCC-TACE-Seg [2]. 52 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",
"tables": [
{
"label": "Dice Score",
"entries": {
"Liver": "0.99±0.02",
"Tumor": "0.80±0.35"
LennyN95 marked this conversation as resolved.
Show resolved Hide resolved
}
},
{
"label": "95% Hausdorff Distance",
"entries": {
"Liver": "2.33±7.70",
"Tumor": "19.73±38.35"
LennyN95 marked this conversation as resolved.
Show resolved Hide resolved
}
},
{
"label": "Normalized surface distance ",
"entries": {
"Liver": "0.29±0.95",
"Tumor": "4.38±8.70"
LennyN95 marked this conversation as resolved.
Show resolved Hide resolved
}
}
],
"references": [
{
"label": "Colorectal-Liver-Metastases",
"uri": "https://doi.org/10.7937/QXK2-QG03"
},
{
"label": "HCC-TACE-Seg",
"uri": "https://doi.org/10.7937/TCIA.5FNA-0924"
}
]
},
"training": {
"title": "Training Data",
"text": "131 CT images from the LiTS 2017 and 131 CT images from the Medical Segmentation Decathlon datasets were used to train the nnU-Net model for liver and tumor segmentation. We utilized selected totalsegmentator outputs to develop anatomically informed model. The liver-CT model was trained to predict liver and liver tumors, as well as other abdominal organs including the duodenum, gallbladder, intestines, kidneys, lungs, pancreas, and spleen.",
"references": [
{
"label": "LiTS - Liver Tumor Segmentation Challenge",
"uri": "https://competitions.codalab.org/competitions/17094"
},
{
"label": "Medical Segmentation Decathlon",
"uri": "https://doi.org/10.1038/s41467-022-30695-9"
},
{
"label": "TotalSegmentator",
LennyN95 marked this conversation as resolved.
Show resolved Hide resolved
"uri": "https://mhub.ai/models/totalsegmentator"
}
]
},
"limitations": {
"title": "Limitations",
"text": "The model has been trained and tested on scans acquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
}
}
}
2 changes: 2 additions & 0 deletions models/bamf_ct_liver_tumor/mhub.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[model.deployment]
test = "https://zenodo.org/records/13859260/files/test.zip?download=1"
80 changes: 80 additions & 0 deletions models/bamf_ct_liver_tumor/utils/NNUnetRunnerV2.py
LennyN95 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
-------------------------------------------------
MHub - NNU-Net Runner v2
Custom Runner for pre-trained nnunet v2 models.
-------------------------------------------------

-------------------------------------------------
Author: Jithendra kumar
Email: jithendra.kumar@bamfhealth.com
-------------------------------------------------
"""

import os, shutil
from mhubio.core import Module, Instance, InstanceData, DataType, FileType, IO


@IO.ConfigInput('in_data', 'nifti', the="input data to run nnunet on")
class NNUnetRunnerV2(Module):

nnunet_dataset: str = 'Dataset006_Liver'
nnunet_config: str = '3d_fullres'
input_data_type: DataType

@IO.Instance()
@IO.Input("in_data", the="input data to run nnunet on")
@IO.Output("out_data", 'VOLUME_001.nii.gz',
'nifti:mod=seg:model=nnunet:nnunet_dataset=Dataset006_Liver:nnunet_config=3d_fullres:'
'roi=SPLEEN,KIDNEY,GALLBLADDER,DUODENUM,PANCREAS,SMALL_INTESTINE,LUNG,LIVER,LIVER+NEOPLASM',
data='in_data', the="output data from nnunet")
def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:

# get the nnunet model to run
self.v("Running nnUNetv2_predict.")
self.v(f" > dataset: {self.nnunet_dataset}")
self.v(f" > config: {self.nnunet_config}")
self.v(f" > input data: {in_data.abspath}")
self.v(f" > output data: {out_data.abspath}")

# download weights if not found
# NOTE: only for testing / debugging. For productiio always provide the weights in the Docker container.
if not os.path.isdir(os.path.join(os.environ["WEIGHTS_FOLDER"], '')):
print("Downloading nnUNet model weights...")
bash_command = ["nnUNet_download_pretrained_model", self.nnunet_dataset]
self.subprocess(bash_command, text=True)

# bring input data in nnunet specific format
# NOTE: only for nifti data as we hardcode the nnunet-formatted-filename (and extension) for now.
assert in_data.type.ftype == FileType.NIFTI
assert in_data.abspath.endswith('.nii.gz')
inp_dir = self.config.data.requestTempDir(label="nnunet-model-inp")
inp_file = f'VOLUME_001_0000.nii.gz'
shutil.copyfile(in_data.abspath, os.path.join(inp_dir, inp_file))

# define output folder (temp dir) and also override environment variable for nnunet
out_dir = self.config.data.requestTempDir(label="nnunet-model-out")
os.environ['nnUNet_results'] = out_dir

# create symlink in python
# NOTE: this is a workaround for the nnunet bash script that expects the model files to be in a output folder
# This is not the case for the mhub data structure. So we create a symlink to the input data
# in the nnunet input folder structure.
os.symlink(os.path.join(os.environ['WEIGHTS_FOLDER'], self.nnunet_dataset), os.path.join(out_dir, self.nnunet_dataset))

# construct nnunet inference command
bash_command = ["nnUNetv2_predict"]
bash_command += ["-i", str(inp_dir)]
bash_command += ["-o", str(out_dir)]
bash_command += ["-d", self.nnunet_dataset]
bash_command += ["-c", self.nnunet_config]

self.v(f" > bash_command: {bash_command}")
# run command
self.subprocess(bash_command, text=True)

# get output data
out_file = f'VOLUME_001.nii.gz'
out_path = os.path.join(out_dir, out_file)

# copy output data to instance
shutil.copyfile(out_path, out_data.abspath)
67 changes: 67 additions & 0 deletions models/bamf_ct_liver_tumor/utils/PostProcessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
-------------------------------------------------
MHub - Run Module for post processing on segmentations
-------------------------------------------------
-------------------------------------------------
Author: Jithendra Kumar
Email: jithendra.kumar@bamfhealth.com
-------------------------------------------------
"""

from mhubio.core import IO
from mhubio.core import Module, Instance, InstanceData
import SimpleITK as sitk
import numpy as np
from skimage import measure

class PostProcessor(Module):

def n_connected(self, img_data: np.ndarray) -> np.ndarray:
"""
Filters the connected components in the image, retaining only the largest components.

Parameters:
- img_data (np.ndarray): The input binary image.

Returns:
- np.ndarray: The filtered binary image.
"""
img_data_mask = np.zeros(img_data.shape)
img_data_mask[img_data > 0] = 1
img_filtered = np.zeros(img_data_mask.shape)
blobs_labels = measure.label(img_data_mask, background=0)
lbl, counts = np.unique(blobs_labels, return_counts=True)
lbl_dict = {}
for i, j in zip(lbl, counts):
lbl_dict[i] = j
sorted_dict = dict(sorted(lbl_dict.items(), key=lambda x: x[1], reverse=True))
count = 0

for key, value in sorted_dict.items():
if count == 1:
print(key, value)
img_filtered[blobs_labels == key] = 1
count += 1

img_data[img_filtered != 1] = 0
return img_data

@IO.Instance()
@IO.Input('in_data', 'nifti:mod=seg:model=nnunet', the='input segmentations')
@IO.Output('out_data', 'bamf_processed.nii.gz', 'nifti:mod=seg:processor=bamf:roi=LIVER,LIVER+NEOPLASM', data='in_data', the="filtered Liver and tumor segmentation")
def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:

# Log bamf runner info
self.log("Running Segmentation Post Processing on....")
self.log(f" > input data: {in_data.abspath}")
self.log(f" > output data: {out_data.abspath}")

label_img = sitk.ReadImage(in_data.abspath)
seg_data = sitk.GetArrayFromImage(label_img)
seg_data[seg_data < 8] = 0
seg_data[seg_data == 8] = 1
seg_data[seg_data == 9] = 2
seg_data = self.n_connected(seg_data)
filtered_label_img = sitk.GetImageFromArray(seg_data)
filtered_label_img.CopyInformation(label_img)
sitk.WriteImage(filtered_label_img, out_data.abspath)
Loading