MHubAI · jithenece · Jun 20, 2024 · Jun 20, 2024 · Jul 3, 2024 · Jul 3, 2024
diff --git a/models/bamf_ct_liver_tumor/config/default.yml b/models/bamf_ct_liver_tumor/config/default.yml
@@ -0,0 +1,36 @@
+general:
+  data_base_dir: /app/data
+  version: 1.0
+  description: default configuration for Bamf CT Liver and tumor segmentation (dicom to dicom)
+
+execute:
+- DicomImporter
+- NiftiConverter
+- NNUnetRunnerV2
+- PostProcessor
+- DsegConverter
+- DataOrganizer
+
+modules:
+  DicomImporter:
+    source_dir: input_data
+    import_dir: sorted_data
+    sort_data: true
+    meta: 
+      mod: '%Modality'
+
+  NiftiConverter:
+    engine: dcm2niix
+
+  NNUnetRunnerV2:
+    in_data: nifti:mod=ct
+
+  DsegConverter:
+    model_name: BAMF Liver and Tumor AI Segmentation
+    target_dicom: dicom:mod=ct
+    source_segs: nifti:mod=seg:processor=bamf
+    skip_empty_slices: True
+
+  DataOrganizer:
+    targets:
+    - dicomseg-->[i:sid]/bamf_ct_liver_tumor.seg.dcm
diff --git a/models/bamf_ct_liver_tumor/dockerfiles/Dockerfile b/models/bamf_ct_liver_tumor/dockerfiles/Dockerfile
@@ -0,0 +1,31 @@
+FROM mhubai/base:latest
+
+# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build
+# by pulling sklearn instead of scikit-learn
+# N.B. this is a known issue:
+# https://github.com/MIC-DKFZ/nnUNet/issues/1281 
+# https://github.com/MIC-DKFZ/nnUNet/pull/1209
+ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True
+
+# Install nnunet and platipy
+RUN pip3 install --no-cache-dir nnunetv2==2.0
+
+# Clone the main branch of MHubAI/models
+ARG MHUB_MODELS_REPO
+RUN buildutils/import_mhub_model.sh bamf_ct_liver_tumor ${MHUB_MODELS_REPO}
+
+# Pull nnUNet model weights into the container for Dataset009_Breast
+ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/
+RUN mkdir -p $WEIGHTS_DIR
+ENV WEIGHTS_FN=Dataset006_Liver.zip
+ENV WEIGHTS_URL=https://zenodo.org/records/11582728/files/$WEIGHTS_FN
+RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL}
+RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR}
+RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN}
+
+# specify nnunet specific environment variables
+ENV WEIGHTS_FOLDER=$WEIGHTS_DIR
+
+# Default run script
+ENTRYPOINT ["mhub.run"]
+CMD ["--config", "/app/models/bamf_ct_liver_tumor/config/default.yml"]
diff --git a/models/bamf_ct_liver_tumor/meta.json b/models/bamf_ct_liver_tumor/meta.json
@@ -0,0 +1,145 @@
+{
+  "id": "",
+  "name": "bamf_ct_liver_tumor",
+  "title": "BAMF CT Liver and Tumor Seg (nnU-Net)",
+  "summary": {
+    "description": "An nnU-Net based model to segment liver and tumor from CT scans",
+    "inputs": [
+      {
+        "label": "Input Image",
+        "description": "The CT scan of a patient.",
+        "format": "DICOM",
+        "modality": "CT",
+        "bodypartexamined": "LIVER",
+        "slicethickness": "2.5mm",
+        "non-contrast": false,
+        "contrast": true
+      }
+    ],
+    "outputs": [
+      {
+        "label": "Segmentation",
+        "type": "Segmentation",
+        "description": "Segmentation liver",
+        "classes": [
+          "LIVER",
+          "LIVER+NEOPLASM"
+        ]
+      }
+    ],
+    "model": {
+      "architecture": "U-net",
+      "training": "supervised",
+      "cmpapproach": "3D"
+    },
+    "data": {
+      "training": {
+        "vol_samples": 262
+      },
+      "evaluation": {
+        "vol_samples": 52
+      },
+      "public": true,
+      "external": true
+    }
+  },
+  "details": {
+    "name": "AIMI CT Liver",
+    "version": "1.0.0",
+    "devteam": "BAMF Health",
+    "authors": [
+      "Soni, Rahul",
+      "McCrumb, Diana",
+      "Murugesan, Gowtham Krishnan",
+      "Van Oss, Jeff",
+      "Kumar, Jithendra"
+    ],
+    "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)",
+    "date": {
+      "code": "28.09.2024",
+      "weights": "11.06.2024",
+      "pub": "30.09.2024"
+    },
+    "cite": "Gowtham Krishnan Murugesan, Diana McCrumb, Rahul Soni, Jithendra Kumar, Leonard Nuernberg, Linmin Pei, Ulrike Wagner, Sutton Granger, Andrey Y. Fedorov, Stephen Moore, Jeff Van Oss. AI generated annotations for Breast, Brain, Liver, Lungs and Prostate cancer collections in National Cancer Institute Imaging Data Commons. arXiv:2409.20342  (2024).",
+    "license": {
+      "code": "MIT",
+      "weights": "CC BY-NC 4.0"
+    },
+    "publications": [
+      {
+        "title": "AI generated annotations for Breast, Brain, Liver, Lungs and Prostate cancer collections in National Cancer Institute Imaging Data Commons",
+        "uri": "https://arxiv.org/abs/2409.20342"
+      }
+    ],
+    "github": "https://github.com/bamf-health/aimi-liver-tumor-ct"
+  },
+  "info": {
+    "use": {
+      "title": "Intended Use",
+      "text": "This model is intended to perform liver and tumor segmentation in CT scans. The liver is a common site of primary (i.e. originating in the liver like hepatocellular carcinoma, HCC) or secondary (i.e. spreading to the liver like colorectal cancer) tumor development."
+    },
+    "analyses": {
+      "title": "Quantitative Analyses",
+      "text": "The model's performance was assessed using the Dice Coefficient, Hausdorff distance and NSD"
+    },
+    "evaluation": {
+      "title": "Evaluation Data",
+      "text": "The model was used to segment cases 509 from the Colorectal-Liver-Metastases [1] collection HCC-TACE-Seg [2]. 52 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.",
+      "tables": [
+        {
+          "label": "Dice Score",
+          "entries": {
+            "Liver": "0.99±0.02",
+            "Tumor": "0.80±0.35"
+          }
+        },
+        {
+          "label": "95% Hausdorff Distance",
+          "entries": {
+            "Liver": "2.33±7.70",
+            "Tumor": "19.73±38.35"
+          }
+        },
+        {
+          "label": "Normalized surface distance ",
+          "entries": {
+            "Liver": "0.29±0.95",
+            "Tumor": "4.38±8.70"
+          }
+        }
+      ],
+      "references": [
+        {
+          "label": "Colorectal-Liver-Metastases",
+          "uri": "https://doi.org/10.7937/QXK2-QG03"
+        },
+        {
+          "label": "HCC-TACE-Seg",
+          "uri": "https://doi.org/10.7937/TCIA.5FNA-0924"
+        }
+      ]
+    },
+    "training": {
+      "title": "Training Data",
+      "text": "131 CT images from the LiTS 2017 and 131 CT images from the Medical Segmentation Decathlon datasets were used to train the nnU-Net model for liver and tumor segmentation. We utilized selected totalsegmentator outputs to develop anatomically informed model. The liver-CT model was trained to predict liver and liver tumors, as well as other abdominal organs including the duodenum, gallbladder, intestines, kidneys, lungs, pancreas, and spleen.",
+      "references": [
+        {
+          "label": "LiTS - Liver Tumor Segmentation Challenge",
+          "uri": "https://competitions.codalab.org/competitions/17094"
+        },
+        {
+          "label": "Medical Segmentation Decathlon",
+          "uri": "https://doi.org/10.1038/s41467-022-30695-9"
+        },
+        {
+          "label": "TotalSegmentator",
+          "uri": "https://mhub.ai/models/totalsegmentator"
+        }
+      ]
+    },
+    "limitations": {
+      "title": "Limitations",
+      "text": "The model has been trained and tested on scans acquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown."
+    }
+  }
+}
diff --git a/models/bamf_ct_liver_tumor/mhub.toml b/models/bamf_ct_liver_tumor/mhub.toml
@@ -0,0 +1,2 @@
+[model.deployment]
+test = "https://zenodo.org/records/13859260/files/test.zip?download=1"
diff --git a/models/bamf_ct_liver_tumor/utils/NNUnetRunnerV2.py b/models/bamf_ct_liver_tumor/utils/NNUnetRunnerV2.py
@@ -0,0 +1,80 @@
+"""
+-------------------------------------------------
+MHub - NNU-Net Runner v2
+       Custom Runner for pre-trained nnunet v2 models.
+-------------------------------------------------
+
+-------------------------------------------------
+Author: Jithendra kumar
+Email:  jithendra.kumar@bamfhealth.com
+-------------------------------------------------
+"""
+
+import os, shutil
+from mhubio.core import Module, Instance, InstanceData, DataType, FileType, IO
+
+
+@IO.ConfigInput('in_data', 'nifti', the="input data to run nnunet on")
+class NNUnetRunnerV2(Module):
+
+    nnunet_dataset: str = 'Dataset006_Liver'
+    nnunet_config: str = '3d_fullres'
+    input_data_type: DataType
+
+    @IO.Instance()
+    @IO.Input("in_data", the="input data to run nnunet on")
+    @IO.Output("out_data", 'VOLUME_001.nii.gz',
+               'nifti:mod=seg:model=nnunet:nnunet_dataset=Dataset006_Liver:nnunet_config=3d_fullres:'
+               'roi=SPLEEN,KIDNEY,GALLBLADDER,DUODENUM,PANCREAS,SMALL_INTESTINE,LUNG,LIVER,LIVER+NEOPLASM',
+               data='in_data', the="output data from nnunet")
+    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
+
+        # get the nnunet model to run
+        self.v("Running nnUNetv2_predict.")
+        self.v(f" > dataset:     {self.nnunet_dataset}")
+        self.v(f" > config:      {self.nnunet_config}")
+        self.v(f" > input data:  {in_data.abspath}")
+        self.v(f" > output data: {out_data.abspath}")
+
+        # download weights if not found
+        # NOTE: only for testing / debugging. For productiio always provide the weights in the Docker container.
+        if not os.path.isdir(os.path.join(os.environ["WEIGHTS_FOLDER"], '')):
+            print("Downloading nnUNet model weights...")
+            bash_command = ["nnUNet_download_pretrained_model", self.nnunet_dataset]
+            self.subprocess(bash_command, text=True)
+
+        # bring input data in nnunet specific format
+        # NOTE: only for nifti data as we hardcode the nnunet-formatted-filename (and extension) for now.
+        assert in_data.type.ftype == FileType.NIFTI
+        assert in_data.abspath.endswith('.nii.gz')
+        inp_dir = self.config.data.requestTempDir(label="nnunet-model-inp")
+        inp_file = f'VOLUME_001_0000.nii.gz'
+        shutil.copyfile(in_data.abspath, os.path.join(inp_dir, inp_file))
+
+        # define output folder (temp dir) and also override environment variable for nnunet
+        out_dir = self.config.data.requestTempDir(label="nnunet-model-out")
+        os.environ['nnUNet_results'] = out_dir
+
+        # create symlink in python
+        # NOTE: this is a workaround for the nnunet bash script that expects the model files to be in a output folder
+        #       This is not the case for the mhub data structure. So we create a symlink to the input data
+        #       in the nnunet input folder structure.
+        os.symlink(os.path.join(os.environ['WEIGHTS_FOLDER'], self.nnunet_dataset), os.path.join(out_dir, self.nnunet_dataset))
+
+        # construct nnunet inference command
+        bash_command  = ["nnUNetv2_predict"]
+        bash_command += ["-i", str(inp_dir)]
+        bash_command += ["-o", str(out_dir)]
+        bash_command += ["-d", self.nnunet_dataset]
+        bash_command += ["-c", self.nnunet_config]
+
+        self.v(f" > bash_command:     {bash_command}")
+        # run command
+        self.subprocess(bash_command, text=True)
+
+        # get output data
+        out_file = f'VOLUME_001.nii.gz'
+        out_path = os.path.join(out_dir, out_file)
+
+        # copy output data to instance
+        shutil.copyfile(out_path, out_data.abspath)
diff --git a/models/bamf_ct_liver_tumor/utils/PostProcessor.py b/models/bamf_ct_liver_tumor/utils/PostProcessor.py
@@ -0,0 +1,67 @@
+"""
+-------------------------------------------------
+MHub - Run Module for post processing on segmentations
+-------------------------------------------------
+-------------------------------------------------
+Author: Jithendra Kumar
+Email:  jithendra.kumar@bamfhealth.com
+-------------------------------------------------
+"""
+
+from mhubio.core import IO
+from mhubio.core import Module, Instance, InstanceData
+import SimpleITK as sitk
+import numpy as np
+from skimage import measure
+
+class PostProcessor(Module):
+
+    def n_connected(self, img_data: np.ndarray) -> np.ndarray:
+        """
+        Filters the connected components in the image, retaining only the largest components.
+
+        Parameters:
+        - img_data (np.ndarray): The input binary image.
+
+        Returns:
+        - np.ndarray: The filtered binary image.
+        """
+        img_data_mask = np.zeros(img_data.shape)
+        img_data_mask[img_data > 0] = 1
+        img_filtered = np.zeros(img_data_mask.shape)
+        blobs_labels = measure.label(img_data_mask, background=0)
+        lbl, counts = np.unique(blobs_labels, return_counts=True)
+        lbl_dict = {}
+        for i, j in zip(lbl, counts):
+            lbl_dict[i] = j
+        sorted_dict = dict(sorted(lbl_dict.items(), key=lambda x: x[1], reverse=True))
+        count = 0
+
+        for key, value in sorted_dict.items():
+            if count == 1:
+                print(key, value)
+                img_filtered[blobs_labels == key] = 1
+            count += 1
+
+        img_data[img_filtered != 1] = 0
+        return img_data
+
+    @IO.Instance()
+    @IO.Input('in_data', 'nifti:mod=seg:model=nnunet', the='input segmentations')
+    @IO.Output('out_data', 'bamf_processed.nii.gz', 'nifti:mod=seg:processor=bamf:roi=LIVER,LIVER+NEOPLASM', data='in_data', the="filtered Liver and tumor segmentation")
+    def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None:
+
+        # Log bamf runner info
+        self.log("Running Segmentation Post Processing on....")
+        self.log(f" > input data:  {in_data.abspath}")
+        self.log(f" > output data: {out_data.abspath}")
+
+        label_img = sitk.ReadImage(in_data.abspath)
+        seg_data = sitk.GetArrayFromImage(label_img)
+        seg_data[seg_data < 8] = 0
+        seg_data[seg_data == 8] = 1
+        seg_data[seg_data == 9] = 2
+        seg_data = self.n_connected(seg_data)
+        filtered_label_img = sitk.GetImageFromArray(seg_data)
+        filtered_label_img.CopyInformation(label_img)
+        sitk.WriteImage(filtered_label_img, out_data.abspath)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[model.deployment]
		test = "https://zenodo.org/records/13859260/files/test.zip?download=1"