From 34ca480873ed22a4b38cf504c042457b506f50ce Mon Sep 17 00:00:00 2001
From: Lars Widmer <lars.widmer2@uzh.ch>
Date: Fri, 13 Jan 2023 01:08:02 +0100
Subject: [PATCH 01/35] test

---
 lymph/midline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index c9c7f1a..b1c7220 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -4,7 +4,7 @@
 import pandas as pd
 
 from .bilateral import Bilateral
-from .timemarg import MarginalizorDict
+from .timemarg import MarginalizorDictjd
 
 
 class MidlineBilateral:

From e2d76c3c8de2cb46a82225967bbcabb85f2afe77 Mon Sep 17 00:00:00 2001
From: Lars Widmer <lars.widmer2@uzh.ch>
Date: Fri, 13 Jan 2023 01:09:11 +0100
Subject: [PATCH 02/35] test

---
 lymph/midline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index b1c7220..c9c7f1a 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -4,7 +4,7 @@
 import pandas as pd
 
 from .bilateral import Bilateral
-from .timemarg import MarginalizorDictjd
+from .timemarg import MarginalizorDict
 
 
 class MidlineBilateral:

From d05da344f4595eb7797209cb58bbf24ac3e8d636 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Wed, 19 Apr 2023 16:37:04 +0200
Subject: [PATCH 03/35] added Bilateral instances and diag_time_dists for
 unknown patients

---
 lymph/midline.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index c9c7f1a..51e5a57 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -64,12 +64,19 @@ def __init__(
         self.noext = Bilateral(
             graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
         )
+        self.ext_unknown = Bilateral(
+            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
+        )
+        self.noext_unknown = Bilateral(
+            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
+        )
         self.use_mixing = use_mixing
         if self.use_mixing:
             self.alpha_mix = 0.
 
         self.noext.diag_time_dists = self.ext.diag_time_dists
-
+        self.ext_unknown.diag_time_dists = self.ext.diag_time_dists
+        self.noext_unknown.diag_time_dists = self.ext.diag_time_dists
 
     @property
     def graph(self) -> Dict[Tuple[str], List[str]]:

From 584c07e8c057f7b628b9c8a7fb99664cc55587ba Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Fri, 21 Apr 2023 14:37:46 +0200
Subject: [PATCH 04/35] added sampler for midline extension and likelihood for
 unknown midline extension

---
 lymph/bilateral.py      |  45 ++++
 lymph/midline.py        |  92 ++++++-
 lymph/midline_timeev.py | 548 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 676 insertions(+), 9 deletions(-)
 create mode 100644 lymph/midline_timeev.py

diff --git a/lymph/bilateral.py b/lymph/bilateral.py
index b6576d7..f3b1151 100644
--- a/lymph/bilateral.py
+++ b/lymph/bilateral.py
@@ -427,6 +427,26 @@ def check_and_assign(self, new_params: np.ndarray):
 
         self.spread_probs = new_spread_probs
 
+    def t_stages(self):
+        stored_t_stages = set(self.ipsi.diagnose_matrices.keys())
+        provided_t_stages = set(self.ipsi.diag_time_dists.keys())
+        t_stages = list(stored_t_stages.intersection(provided_t_stages))
+
+        return t_stages
+    
+    def state_probs_ipsi(self):
+        max_t = self.diag_time_dists.max_t
+        state_probs = {}
+        state_probs["ipsi"] = self.ipsi._evolve(t_last=max_t)
+
+        return state_probs["ipsi"]
+
+    def state_probs_contra(self):
+        max_t = self.diag_time_dists.max_t
+        state_probs = {}
+        state_probs["contra"] = self.contra._evolve(t_last=max_t)
+
+        return state_probs["contra"]
 
     def _likelihood(
         self,
@@ -467,6 +487,31 @@ def _likelihood(
 
         return llh
 
+    def _likelihood2(
+        self,
+        stage,
+        state_probs,
+    ) -> float:
+        """Compute the (log-)likelihood of data, using the stored spread probs and
+        fixed distributions for marginalizing over diagnose times.
+
+        This method mainly exists so that the checking and assigning of the
+        spread probs can be skipped.
+        """
+        
+        joint_state_probs = (
+            state_probs["ipsi"].T
+             @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
+             @ state_probs["contra"]
+        )
+        p = np.sum(
+            self.ipsi.diagnose_matrices[stage]
+            * (joint_state_probs
+                @ self.contra.diagnose_matrices[stage]),
+            axis=0
+        )
+
+        return p
 
     def likelihood(
         self,
diff --git a/lymph/midline.py b/lymph/midline.py
index 51e5a57..8a3da16 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -126,21 +126,32 @@ def base_probs(self, new_params: np.ndarray):
 
         self.ext.ipsi.base_probs = new_params[:k]
         self.noext.ipsi.base_probs = new_params[:k]
+        self.ext_unknown.ipsi.base_probs = new_params[:k]
+        self.noext_unknown.ipsi.base_probs = new_params[:k]
 
         if self.use_mixing:
             self.noext.contra.base_probs = new_params[k:2*k]
+            self.noext_unknown.contra.base_probs = new_params[k:2*k]
             self.alpha_mix = new_params[-1]
             # compute linear combination
             self.ext.contra.base_probs = (
                 self.alpha_mix * self.ext.ipsi.base_probs
                 + (1. - self.alpha_mix) * self.noext.contra.base_probs
             )
+            self.ext_unknown.contra.base_probs = (
+                self.alpha_mix * self.ext_unknown.ipsi.base_probs
+                + (1. - self.alpha_mix) * self.noext_unknown.contra.base_probs
+            )
         else:
             self.ext.contra.base_probs = new_params[k:2*k]
             self.noext.contra.base_probs = new_params[2*k:3*k]
+            self.ext_unknown.contra.base_probs = new_params[k:2*k]
+            self.noext_unknown.contra.base_probs = new_params[2*k:3*k]
 
         # avoid unnecessary double computation of ipsilateral transition matrix
         self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+        self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+        self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
 
 
     @property
@@ -157,9 +168,13 @@ def trans_probs(self, new_params: np.ndarray):
         """
         self.noext.trans_probs = new_params
         self.ext.trans_probs = new_params
+        self.noext_unknown.trans_probs = new_params
+        self.ext_unknown.trans_probs = new_params
 
         # avoid unnecessary double computation of ipsilateral transition matrix
         self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+        self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+        self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
 
 
     @property
@@ -210,6 +225,8 @@ def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]):
         """
         self.ext.diag_time_dists = new_dists
         self.noext.diag_time_dists = self.ext.diag_time_dists
+        self.noext_unknown.diag_time_dists = self.ext.diag_time_dists
+        self.ext_unknown.diag_time_dists = self.ext.diag_time_dists
 
 
     @property
@@ -236,7 +253,25 @@ def modalities(self, modality_spsn: Dict[str, List[float]]):
         """
         self.noext.modalities = modality_spsn
         self.ext.modalities = modality_spsn
+        self.noext_unknown.modalities = modality_spsn
+        self.ext_unknown.modalities = modality_spsn
+
+    @property
+    def midext_prob(self):
+        """Assign the last of the new_params to the midline extension probability
+        """
+        try:
+            return self._midext_prob
+        except AttributeError as attr_err:
+            raise AttributeError(
+                "No midline extension probability has been assigned"
+            ) from attr_err
 
+    @midext_prob.setter
+    def midext_prob(self, new_params):
+        """A variable containing the midline extension probability
+        """
+        self._midext_prob = new_params[-1]
 
     @property
     def patient_data(self):
@@ -330,8 +365,9 @@ def load_data(
             :meth:`Unilateral.load_data`: Data loading method of the unilateral
             network.
         """
-        ext_data = data.loc[data[("info", "tumor", "midline_extension")]]
-        noext_data = data.loc[~data[("info", "tumor", "midline_extension")]]
+        ext_data = data.loc[(data[("info", "tumor", "midline_extension")]==True)]
+        noext_data = data.loc[(data[("info", "tumor", "midline_extension")]==False)]
+        unknown_data = data.loc[(data['info', 'tumor', 'midline_extension']!=False) & (data['info', 'tumor', 'midline_extension']!=True)] 
 
         self.ext.load_data(
             ext_data,
@@ -343,7 +379,18 @@ def load_data(
             modality_spsn=modality_spsn,
             mode=mode
         )
+        if len(unknown_data)>0:
+            self.ext_unknown.load_data(
+                unknown_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
 
+            self.noext_unknown.load_data(
+                unknown_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
 
     def check_and_assign(self, new_params: np.ndarray):
         """Check that the spread probability (rates) and the parameters for the
@@ -361,14 +408,20 @@ def check_and_assign(self, new_params: np.ndarray):
             invalid parameters.
         """
         k = len(self.spread_probs)
+        l = len(self.diag_time_dists)
         new_spread_probs = new_params[:k]
-        new_marg_params = new_params[k:]
+        new_marg_params = new_params[k:(k+l)]
 
         try:
             self.ext.ipsi.diag_time_dists.update(new_marg_params)
+            self.ext_unknown.ipsi.diag_time_dists.update(new_marg_params)
             self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
             self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists
             self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
+            self.ext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
+            self.noext_unknown.ipsi.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
+            self.noext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
+            self.midext_prob = new_params
         except ValueError as val_err:
             raise ValueError(
                 "Parameters for marginalization over diagnose times are invalid"
@@ -382,6 +435,10 @@ def check_and_assign(self, new_params: np.ndarray):
             raise ValueError(
                 "Spread probs must be between 0 and 1"
             )
+        if 0. > self.midext_prob or self.midext_prob > 1.:
+            raise ValueError(
+                "Midline extension probability must be between 0 and 1"
+            )
 
         self.spread_probs = new_spread_probs
 
@@ -433,12 +490,29 @@ def likelihood(
 
         llh = 0. if log else 1.
 
-        if log:
-            llh += self.ext._likelihood(log=log)
-            llh += self.noext._likelihood(log=log)
-        else:
-            llh *= self.ext._likelihood(log=log)
-            llh *= self.noext._likelihood(log=log)
+        if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']!=False) & (self.patient_data['info', 'tumor', 'midline_extension']!=True)])>0:
+            state_probs = {}
+            state_probs["ipsi"] = self.ext_unknown.state_probs_ipsi()
+            state_probs["contra"] = self.ext_unknown.state_probs_contra()
+            state_probs2 = {}
+            state_probs2["ipsi"] = self.noext_unknown.state_probs_ipsi()
+            state_probs2["contra"] = self.noext_unknown.state_probs_contra()
+            t_stages = list(set(self.patient_data[("info","tumor","t_stage")]))
+            t_stages = self.ext_unknown.t_stages()
+        
+
+            for stage in t_stages:
+                p = self.ext_unknown._likelihood2(stage=stage, state_probs=state_probs)*self.midext_prob
+                p2 = self.noext_unknown._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob)
+                llh += np.sum(np.log(p+p2))
+
+        if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False) | (self.patient_data['info', 'tumor', 'midline_extension']==True)])>0:
+            if log:
+                llh += self.ext._likelihood(log=log) + np.log(self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)])
+                llh += self.noext._likelihood(log=log) + np.log(1-self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)])
+            else:
+                llh *= self.ext._likelihood(log=log)*self.midext_prob**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)])
+                llh *= self.noext._likelihood(log=log)*(1-self.midext_prob)**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)])
 
         return llh
 
diff --git a/lymph/midline_timeev.py b/lymph/midline_timeev.py
new file mode 100644
index 0000000..cccd8fd
--- /dev/null
+++ b/lymph/midline_timeev.py
@@ -0,0 +1,548 @@
+from typing import Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import pandas as pd
+
+from .bilateral import Bilateral
+from .timemarg import MarginalizorDict
+
+
+class MidlineBilateral:
+    """Model a bilateral lymphatic system where an additional risk factor can
+    be provided in the data: Whether or not the primary tumor extended over the
+    mid-sagittal line.
+
+    It is reasonable to assume (and supported by data) that such an extension
+    significantly increases the risk for metastatic spread to the contralateral
+    side of the neck. This class attempts to capture this using a simple
+    assumption: We assume that the probability of spread to the contralateral
+    side for patients *with* midline extension is larger than for patients
+    *without* it, but smaller than the probability of spread to the ipsilateral
+    side. Formally:
+
+    .. math::
+        b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in}
+
+    where :math:`b_c^{\\in}` is the probability of spread from the primary tumor
+    to the contralateral side for patients with midline extension, and
+    :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear
+    mixing parameter.
+    """
+
+    def __init__(
+        self,
+        graph: Dict[Tuple[str], List[str]],
+        use_mixing: bool = True,
+        trans_symmetric: bool = True,
+        **_kwargs
+    ):
+        """The class is constructed in a similar fashion to the
+        :class:`Bilateral`: That class contains one :class:`Unilateral` for
+        each side of the neck, while this class will contain two instances of
+        :class:`Bilateral`, one for the case of a midline extension and one for
+        the case of no midline extension.
+
+        Args:
+            graph: Dictionary of the same kind as for initialization of
+                :class:`System`. This graph will be passed to the constructors of
+                two :class:`System` attributes of this class.
+            use_mixing: Describe the contralateral base spread probabilities for the
+                case of a midline extension as a linear combination between the base
+                spread probs of the ipsilateral side and the ones of the contralateral
+                side when no midline extension is present.
+            trans_symmetric: If ``True``, the spread probabilities among the
+                LNLs will be set symmetrically.
+
+        See Also:
+            :class:`Bilateral`: Two of these are held as attributes by this
+            class. One for the case of a mid-sagittal extension of the primary
+            tumor and one for the case of no such extension.
+        """
+        self.ext   = Bilateral(
+            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
+        )
+        self.noext = Bilateral(
+            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
+        )
+        
+        self.use_mixing = use_mixing
+        if self.use_mixing:
+            self.alpha_mix = 0.
+
+        self.noext.diag_time_dists = self.ext.diag_time_dists
+
+
+    @property
+    def graph(self) -> Dict[Tuple[str], List[str]]:
+        """Return the (unilateral) graph that was used to create this network.
+        """
+        return self.noext.graph
+
+
+    @property
+    def base_probs(self) -> np.ndarray:
+        """Base probabilities of metastatic lymphatic spread from the tumor(s)
+        to the lymph node levels. This will return the following concatenation of
+        base spread probs depending on whether ``use_mixing`` is set to ``True`` or
+        ``False``:
+
+        With the use of a mixing parameter:
+        +-----------+-------------------------+--------------+
+        | base ipsi | base contra (no midext) | mixing param |
+        +-----------+-------------------------+--------------+
+
+        Without it:
+        +-----------+----------------------+-------------------------+
+        | base ipsi | base contra (midext) | base contra (no midext) |
+        +-----------+----------------------+-------------------------+
+
+        When setting these, one needs to provide the respective shape.
+        """
+        if self.use_mixing:
+            return np.concatenate([
+                self.ext.ipsi.base_probs,
+                self.noext.contra.base_probs,
+                [self.alpha_mix],
+            ])
+        else:
+            return np.concatenate([
+                self.ext.ipsi.base_probs,
+                self.ext.contra.base_probs,
+                self.noext.contra.base_probs,
+            ])
+
+    @base_probs.setter
+    def base_probs(self, new_params: np.ndarray):
+        """Set the base probabilities from the tumor(s) to the LNLs, accounting
+        for the mixing parameter :math:`\\alpha``.
+        """
+        k = len(self.ext.ipsi.base_probs)
+
+        self.ext.ipsi.base_probs = new_params[:k]
+        self.noext.ipsi.base_probs = new_params[:k]
+
+        if self.use_mixing:
+            self.noext.contra.base_probs = new_params[k:2*k]
+            self.alpha_mix = new_params[-1]
+            # compute linear combination
+            self.ext.contra.base_probs = (
+                self.alpha_mix * self.ext.ipsi.base_probs
+                + (1. - self.alpha_mix) * self.noext.contra.base_probs
+            )
+        else:
+            self.ext.contra.base_probs = new_params[k:2*k]
+            self.noext.contra.base_probs = new_params[2*k:3*k]
+
+        # avoid unnecessary double computation of ipsilateral transition matrix
+        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+
+
+    @property
+    def trans_probs(self) -> np.ndarray:
+        """Probabilities of lymphatic spread among the lymph node levels. They
+        are assumed to be symmetric ipsi- & contralaterally by default.
+        """
+        return self.noext.trans_probs
+
+    @trans_probs.setter
+    def trans_probs(self, new_params: np.ndarray):
+        """Set the new spread probabilities for lymphatic spread from among the
+        LNLs.
+        """
+        self.noext.trans_probs = new_params
+        self.ext.trans_probs = new_params
+        # avoid unnecessary double computation of ipsilateral transition matrix
+        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+
+
+    @property
+    def spread_probs(self) -> np.ndarray:
+        """These are the probabilities representing the spread of cancer along
+        lymphatic drainage pathways per timestep.
+
+        It is composed of the base spread probs (possible with the mixing parameter)
+        and the probabilities of spread among the LNLs.
+
+        +-------------+-------------+
+        | base probs  | trans probs |
+        +-------------+-------------+
+        """
+        return np.concatenate([self.base_probs, self.trans_probs])
+
+    @spread_probs.setter
+    def spread_probs(self, new_params: np.ndarray):
+        """Set the new spread probabilities and the mixing parameter
+        :math:`\\alpha`.
+        """
+        num_base_probs = len(self.base_probs)
+
+        self.base_probs  = new_params[:num_base_probs]
+        self.trans_probs = new_params[num_base_probs:]
+
+
+    @property
+    def diag_time_dists(self) -> MarginalizorDict:
+        """This property holds the probability mass functions for marginalizing over
+        possible diagnose times for each T-stage.
+
+        When setting this property, one may also provide a normal Python dict, in
+        which case it tries to convert it to a :class:`MarginalizorDict`.
+
+        Note that the method will provide the same instance of this
+        :class:`MarginalizorDict` to both sides of the network.
+
+        See Also:
+            :class:`MarginalzorDict`, :class:`Marginalizor`.
+        """
+        return self.ext.diag_time_dists
+
+    @diag_time_dists.setter
+    def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]):
+        """Assign new :class:`MarginalizorDict` to this property. If it is a normal
+        Python dictionary, tr to convert it into a :class:`MarginalizorDict`.
+        """
+        self.ext.diag_time_dists = new_dists
+        self.noext.diag_time_dists = self.ext.diag_time_dists
+
+
+    @property
+    def modalities(self):
+        """A dictionary containing the specificity :math:`s_P` and sensitivity
+        :math:`s_N` values for each diagnostic modality.
+
+        Such a dictionary can also be provided to set this property and compute
+        the observation matrices of all used systems.
+
+        See Also:
+            :meth:`Bilateral.modalities`: Getting and setting this property in
+            the normal bilateral model.
+
+            :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and
+            :math:`s_N` for a unilateral model.
+        """
+        return self.noext.modalities
+
+    @modalities.setter
+    def modalities(self, modality_spsn: Dict[str, List[float]]):
+        """Call the respective getter and setter methods of the bilateral
+        components with and without midline extension.
+        """
+        self.noext.modalities = modality_spsn
+        self.ext.modalities = modality_spsn
+
+    @property
+    def midext_prob(self):
+        """Assign the last of the new_params to the midline extension probability
+        """
+        try:
+            return self._midext_prob
+        except AttributeError as attr_err:
+            raise AttributeError(
+                "No midline extension probability has been assigned"
+            ) from attr_err
+
+    @midext_prob.setter
+    def midext_prob(self, new_params):
+        """A variable containing the midline extension probability
+        """
+        self._midext_prob = new_params[-2]
+
+    @property
+    def midext_trans(self):
+        """Assign the last of the new_params to the midline extension transition probability
+        """
+        try:
+            return self._midext_trans
+        except AttributeError as attr_err:
+            raise AttributeError(
+                "No midline extension transition probability has been assigned"
+            ) from attr_err
+
+    @midext_trans.setter
+    def midext_trans(self, new_params):
+        """A variable containing the midline extension transition probability
+        """
+        self._midext_trans = new_params[-1]
+
+    @property
+    def patient_data(self):
+        """A pandas :class:`DataFrame` with rows of patients and columns of
+        patient and involvement details. The table's header should have three
+        levels that categorize the individual lymph node level's involvement to
+        the corresponding diagnostic modality (first level), the side of the
+        LNL (second level) and finaly the name of the LNL (third level).
+        Additionally, the patient's T-category must be stored under ('info',
+        'tumor', 't_stage') and whether the tumor extends over the mid-sagittal
+        line should be noted under ('info', 'tumor', 'midline_extension'). So,
+        part of this table could look like this:
+
+        +-----------------------------+------------------+------------------+
+        |            info             |       MRI        |       PET        |
+        +-----------------------------+--------+---------+--------+---------+
+        |            tumor            |  ipsi  | contra  |  ipsi  | contra  |
+        +---------+-------------------+--------+---------+--------+---------+
+        | t_stage | midline_extension |   II   |   II    |   II   |   II    |
+        +=========+===================+========+=========+========+=========+
+        | early   | ``True``          |``True``|``None`` |``True``|``False``|
+        +---------+-------------------+--------+---------+--------+---------+
+        | late    | ``True``          |``None``|``None`` |``None``|``None`` |
+        +---------+-------------------+--------+---------+--------+---------+
+        | early   | ``False``         |``True``|``False``|``True``|``True`` |
+        +---------+-------------------+--------+---------+--------+---------+
+        """
+        try:
+            return self._patient_data
+        except AttributeError:
+            raise AttributeError(
+                "No patient data has been loaded yet"
+            )
+
+    @patient_data.setter
+    def patient_data(self, patient_data: pd.DataFrame):
+        """Load the patient data. For now, this just calls the :meth:`load_data`
+        method, but at a later point, I would like to write a function here
+        that generates the pandas :class:`DataFrame` from the internal matrix
+        representation of the data.
+        """
+        self._patient_data = patient_data.copy()
+        self.load_data(patient_data)
+
+
+    def load_data(
+        self,
+        data: pd.DataFrame,
+        modality_spsn: Optional[Dict[str, List[float]]] = None,
+        mode = "HMM"
+    ):
+        """Load data as table of patients with involvement details and convert
+        it into internal representation of a matrix.
+
+        Args:
+            data: The table with rows of patients and columns of patient and
+                involvement details. The table's header must have three levels
+                that categorize the individual lymph node level's involvement
+                to the corresponding diagnostic modality (first level), the
+                side of the LNL (second level) and finaly the name of the LNL
+                (third level). Additionally, the patient's T-category must be
+                stored under ('info', 'tumor', 't_stage') and whether the tumor
+                extends over the mid-sagittal line should be noted under
+                ('info', 'tumor', 'midline_extension'). So, part of this table
+                could look like this:
+
+                +-----------------------------+---------------------+
+                |            info             |         MRI         |
+                +-----------------------------+----------+----------+
+                |            tumor            |   ipsi   |  contra  |
+                +---------+-------------------+----------+----------+
+                | t_stage | midline_extension |    II    |    II    |
+                +=========+===================+==========+==========+
+                | early   | ``True``          | ``True`` | ``None`` |
+                +---------+-------------------+----------+----------+
+                | late    | ``True``          | ``None`` | ``None`` |
+                +---------+-------------------+----------+----------+
+                | early   | ``False``         | ``True`` | ``True`` |
+                +---------+-------------------+----------+----------+
+
+            modality_spsn: If no diagnostic modalities have been defined yet,
+                this must be provided to build the observation matrix.
+
+        See Also:
+            :attr:`patient_data`: The attribute for loading and exporting data.
+
+            :meth:`Bilateral.load_data`: Loads data into a bilateral network by
+            splitting it into ipsi- & contralateral side and passing each to
+            the respective unilateral method (see below).
+
+            :meth:`Unilateral.load_data`: Data loading method of the unilateral
+            network.
+        """
+        ext_data = data
+        noext_data = data
+
+        self.ext.load_data(
+            ext_data,
+            modality_spsn=modality_spsn,
+            mode=mode
+        )
+        self.noext.load_data(
+            noext_data,
+            modality_spsn=modality_spsn,
+            mode=mode
+        )
+
+
+    def check_and_assign(self, new_params: np.ndarray):
+        """Check that the spread probability (rates) and the parameters for the
+        marginalization over diagnose times are all within limits and assign them to
+        the model. Also, make sure the ipsi- and contralateral distributions over
+        diagnose times are the same instance of the :class:`MarginalizorDict`.
+
+        Args:
+            new_params: The set of :attr:`spread_probs` and parameters to provide for
+                updating the parametrized distributions over diagnose times.
+
+        Warning:
+            This method assumes that the parametrized distributions (instances of
+            :class:`Marginalizor`) all raise a ``ValueError`` when provided with
+            invalid parameters.
+        """
+        k = len(self.spread_probs)
+        l = len(self.diag_time_dists)
+        new_spread_probs = new_params[:k]
+        new_marg_params = new_params[k:(k+l)]
+
+        try:
+            self.ext.ipsi.diag_time_dists.update(new_marg_params)
+            self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
+            self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists
+            self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
+            self.midext_prob = new_params
+            self.midext_trans = new_params
+        except ValueError as val_err:
+            raise ValueError(
+                "Parameters for marginalization over diagnose times are invalid"
+            ) from val_err
+
+        if new_spread_probs.shape != self.spread_probs.shape:
+            raise ValueError(
+                "Shape of provided spread parameters does not match network"
+            )
+        if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.):
+            raise ValueError(
+                "Spread probs must be between 0 and 1"
+            )
+        if 0. > np.any(0. > new_params) or np.any(new_params > 1.):
+            raise ValueError(
+                "Probabilitiex must be between 0 and 1"
+            )
+
+        self.spread_probs = new_spread_probs
+
+
+    def likelihood(
+        self,
+        data: Optional[pd.DataFrame] = None,
+        given_params: Optional[np.ndarray] = None,
+        log: bool = True,
+    ) -> float:
+        """Compute log-likelihood of (already stored) data, given the spread
+        probabilities and either a discrete diagnose time or a distribution to
+        use for marginalization over diagnose times.
+
+        Args:
+            data: Table with rows of patients and columns of per-LNL involvment. See
+                :meth:`load_data` for more details on how this should look like.
+
+            given_params: The likelihood is a function of these parameters. They mainly
+                consist of the :attr:`spread_probs` of the model. Any excess parameters
+                will be used to update the parametrized distributions used for
+                marginalizing over the diagnose times (see :attr:`diag_time_dists`).
+
+            log: When ``True``, the log-likelihood is returned.
+
+        Returns:
+            The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D`
+            is the data and :math:`\\theta` is the tuple of spread probabilities
+            and diagnose times or distributions over diagnose times.
+
+        See Also:
+            :attr:`spread_probs`: Property for getting and setting the spread
+            probabilities, of which a lymphatic network has as many as it has
+            :class:`Edge` instances (in case no symmetries apply).
+
+            :meth:`Unilateral.likelihood`: The log-likelihood function of
+            the unilateral system.
+
+            :meth:`Bilateral.likelihood`: The (log-)likelihood function of the
+            bilateral system.
+        """
+        if data is not None:
+            self.patient_data = data
+
+        try:
+            self.check_and_assign(given_params)
+        except ValueError:
+            return -np.inf if log else 0.
+
+        llh = 0. if log else 1.
+
+        
+        state_probs = {}
+        state_probs["ipsi"] = self.ext.state_probs_ipsi()
+        state_probs["contra"] = self.ext.state_probs_contra()
+        state_probs2 = {}
+        state_probs2["ipsi"] = self.noext.state_probs_ipsi()
+        state_probs2["contra"] = self.noext.state_probs_contra()
+        t_stages = list(set(self.patient_data[("info","tumor","t_stage")]))
+        t_stages = self.ext.t_stages()
+        
+
+        for stage in t_stages:
+            p = self.ext._likelihood2(stage=stage, state_probs=state_probs)*(self.midext_prob)
+            p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob)
+            llh += np.sum(np.log(p+p2))
+            
+        return llh
+
+
+
+    def risk(
+        self,
+        given_params: Optional[np.ndarray] = None,
+        midline_extension: bool = True,
+        **kwargs,
+    ) -> float:
+        """Compute the risk of nodal involvement given a specific diagnose.
+
+        Args:
+            spread_probs: Set ot new spread parameters. This also contains the
+                mixing parameter alpha in the last position.
+            midline_extension: Whether or not the patient's tumor extends over
+                the mid-sagittal line.
+
+        See Also:
+            :meth:`Bilateral.risk`: Depending on whether or not the patient's
+            tumor does extend over the midline, the risk function of the
+            respective :class:`Bilateral` instance gets called.
+        """
+        if given_params is not None:
+            self.check_and_assign(given_params)
+
+        if midline_extension:
+            return self.ext.risk(**kwargs)
+        else:
+            return self.noext.risk(**kwargs)
+
+
+    def generate_dataset(
+        self,
+        num_patients: int,
+        stage_dist: Dict[str, float],
+        ext_prob: float,
+        **_kwargs,
+    ) -> pd.DataFrame:
+        """Generate/sample a pandas :class:`DataFrame` from the defined network.
+
+        Args:
+            num_patients: Number of patients to generate.
+            stage_dist: Probability to find a patient in a certain T-stage.
+            ext_prob: Probability that a patient's primary tumor extends over
+                the mid-sagittal line.
+        """
+        drawn_ext = np.random.choice(
+            [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients
+        )
+        ext_dataset = self.ext.generate_dataset(
+            num_patients=num_patients,
+            stage_dist=stage_dist,
+        )
+        noext_dataset = self.noext.generate_dataset(
+            num_patients=num_patients,
+            stage_dist=stage_dist,
+        )
+
+        dataset = noext_dataset.copy()
+        dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext]
+        dataset[('info', 'tumor', 'midline_extension')] = drawn_ext
+
+        return dataset
\ No newline at end of file

From 50bbcf0585e482fed347393946dfd58432679eca Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Tue, 25 Apr 2023 01:36:13 +0200
Subject: [PATCH 05/35] added time evolution for midline extension

---
 lymph/midline_timeev.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/lymph/midline_timeev.py b/lymph/midline_timeev.py
index cccd8fd..2a1dc86 100644
--- a/lymph/midline_timeev.py
+++ b/lymph/midline_timeev.py
@@ -7,7 +7,7 @@
 from .timemarg import MarginalizorDict
 
 
-class MidlineBilateral:
+class MidlineBilateraltime:
     """Model a bilateral lymphatic system where an additional risk factor can
     be provided in the data: Whether or not the primary tumor extended over the
     mid-sagittal line.
@@ -230,10 +230,11 @@ def modalities(self, modality_spsn: Dict[str, List[float]]):
         self.noext.modalities = modality_spsn
         self.ext.modalities = modality_spsn
 
+    """
     @property
     def midext_prob(self):
-        """Assign the last of the new_params to the midline extension probability
-        """
+        Assign the last of the new_params to the midline extension probability
+        
         try:
             return self._midext_prob
         except AttributeError as attr_err:
@@ -243,10 +244,11 @@ def midext_prob(self):
 
     @midext_prob.setter
     def midext_prob(self, new_params):
-        """A variable containing the midline extension probability
-        """
+        A variable containing the midline extension probability
+        
         self._midext_prob = new_params[-2]
-
+    """
+        
     @property
     def midext_trans(self):
         """Assign the last of the new_params to the midline extension transition probability
@@ -396,7 +398,7 @@ def check_and_assign(self, new_params: np.ndarray):
             self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
             self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists
             self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
-            self.midext_prob = new_params
+            #self.midext_prob = new_params
             self.midext_trans = new_params
         except ValueError as val_err:
             raise ValueError(
@@ -413,7 +415,7 @@ def check_and_assign(self, new_params: np.ndarray):
             )
         if 0. > np.any(0. > new_params) or np.any(new_params > 1.):
             raise ValueError(
-                "Probabilitiex must be between 0 and 1"
+                "Probabilities must be between 0 and 1"
             )
 
         self.spread_probs = new_spread_probs
@@ -478,8 +480,8 @@ def likelihood(
         
 
         for stage in t_stages:
-            p = self.ext._likelihood2(stage=stage, state_probs=state_probs)*(self.midext_prob)
-            p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob)
+            p = self.ext._likelihood2(stage=stage, state_probs=state_probs)*(1-(1-self.midext_trans)**10)
+            p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_trans)**10
             llh += np.sum(np.log(p+p2))
             
         return llh

From 67984d4b951e2a0a2dce4fd1a691c352e4307b39 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Tue, 25 Apr 2023 15:43:36 +0200
Subject: [PATCH 06/35] new class MidlineBilateraltime for time evolution of
 Midline extension

---
 .DS_Store         | Bin 0 -> 6148 bytes
 lymph/__init__.py |   3 ++-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 .DS_Store

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..f062562b8f00852fb43e4c196fffafb7a467d02a
GIT binary patch
literal 6148
zcmeH~!AitX3`K9W!hoAux_n<C_y=VezrY_59Z{h&LhHg^ck|nGlNY6>bn8NHAn&$$
zNgq9nrU78PufrX%1+byJ;^fQHeBXUx7a6&vnm^;vKX*^ZW4Fj$4|r{ZTfE{O!?gYi
z9lr5_PrS{;176T$KtBzyAD|r(5CIVo0TB>^83Fcex6O4`ts)=-BJfMVzYm4(+NO?G
z<I}+<MgVHVa2T&+mY^07P}|h8DkC(jrPNw#Ibv8XXS`%yn>tpl<**z+EKjzaP%KX8
z^Gl?|+NxSbKm_IleD<boz5n0SSLT1uk%@o^{8Iwf>>u_!zFhX!#cO)6E%ZnFk1;pW
mxkM|bMl0rxx8j>gUh!-0x2a>*C}%v%srnIcE;14L3j$|jIvw}`

literal 0
HcmV?d00001

diff --git a/lymph/__init__.py b/lymph/__init__.py
index fca3dc9..07ab0d9 100644
--- a/lymph/__init__.py
+++ b/lymph/__init__.py
@@ -17,6 +17,7 @@
 from .bilateral import Bilateral, BilateralSystem
 from .edge import Edge
 from .midline import MidlineBilateral
+from .midline_timeev import MidlineBilateraltime
 from .node import Node
 from .timemarg import Marginalizor, MarginalizorDict
 from .unilateral import System, Unilateral
@@ -26,6 +27,6 @@
     "Edge",
     "Unilateral", "System",
     "Bilateral", "BilateralSystem",
-    "MidlineBilateral",
+    "MidlineBilateral", "MidlineBilateraltime",
     "Marginalizor", "MarginalizorDict",
 ]

From 5aee89af9a9ddd62c9bac2e50c771af2204a6fda Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Mon, 29 May 2023 15:51:42 +0200
Subject: [PATCH 07/35] fixed bug when loading only data with no midline
 extension info

---
 .DS_Store               | Bin 6148 -> 6148 bytes
 lymph/.DS_Store         | Bin 0 -> 6148 bytes
 lymph/midline.py        |  38 +++++++++++++++++++++++++++-----------
 lymph/midline_timeev.py |   4 ++--
 4 files changed, 29 insertions(+), 13 deletions(-)
 create mode 100644 lymph/.DS_Store

diff --git a/.DS_Store b/.DS_Store
index f062562b8f00852fb43e4c196fffafb7a467d02a..590cf61ff4e029544061ad8649cc4baa5fd1c15c 100644
GIT binary patch
delta 293
zcmZoMXfc=|#>B)qF;Q%yo+2ab!~pA!4;mPOj2`9sr1Ii|q@4UD1_p-hNd-BX#U%y?
z*BF_YSy<WFIoLV4Ibwq|^2>uu5=%;pof3<pLA;Rs{G22hJ25FNGqpTkK*TveuOu-u
zFSQ7)Au}Zvs3aylGcP5-+$p~_FQpi)Hy9$p!O6iHFCbp+Qf*;oqN8AHS*xP}Wf~gP
z)^c))D(hPZ#b@W_=H+(+-3A1Vj1ZcE7fQpZu89j}L~|G_8FCp47&6L&i}G^v^U^^&
nHtu-BvYDNOp9ARojTgT&Pv#df6aX310FnhG28PW6B3qaN2_sG{

delta 74
zcmZoMXfc=|#>CJ*F;Q%yo+2aj!~knX=E*!v%A6_1$vH{+`8f=mm6@NgYz|<~X4=fo
e!OsCyyxEcYJM(0I5kp3X$u>OFn`1;)FarSiofGx|

diff --git a/lymph/.DS_Store b/lymph/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
GIT binary patch
literal 6148
zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3
zem<@ulZcFPQ@L2!n>{z**<q8>++&mCkOWA81W14cNZ<zv;LbK1Poaz?KmsK2CSc!(
z0ynLxE!0092;Krf2c+FF_Fe*7ECH>lEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ
zLs35+`xjp>T0<F0fCPF1$Cyrb|F7^5{eNG?83~ZUUlGt@xh*qZDeu<Z%US-OSsOPv
j)R!Z4KLME7ReXlK;d!wEw5GODWMKRea10D2@KpjYNUI8I

literal 0
HcmV?d00001

diff --git a/lymph/midline.py b/lymph/midline.py
index 8a3da16..138dad8 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -369,17 +369,20 @@ def load_data(
         noext_data = data.loc[(data[("info", "tumor", "midline_extension")]==False)]
         unknown_data = data.loc[(data['info', 'tumor', 'midline_extension']!=False) & (data['info', 'tumor', 'midline_extension']!=True)] 
 
-        self.ext.load_data(
-            ext_data,
-            modality_spsn=modality_spsn,
-            mode=mode
-        )
-        self.noext.load_data(
-            noext_data,
-            modality_spsn=modality_spsn,
-            mode=mode
-        )
-        if len(unknown_data)>0:
+        if len(ext_data) > 0:
+            self.ext.load_data(
+                ext_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
+        
+        if len(noext_data) > 0:
+            self.noext.load_data(
+                noext_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
+        if len(unknown_data) > 0 & len(ext_data) + len(noext_data) > 0:
             self.ext_unknown.load_data(
                 unknown_data,
                 modality_spsn=modality_spsn,
@@ -392,6 +395,19 @@ def load_data(
                 mode=mode
             )
 
+        if len(unknown_data) > 0 & len(ext_data) + len(noext_data) == 0:
+            self.ext.load_data(
+                unknown_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
+
+            self.noext.load_data(
+                unknown_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
+
     def check_and_assign(self, new_params: np.ndarray):
         """Check that the spread probability (rates) and the parameters for the
         marginalization over diagnose times are all within limits and assign them to
diff --git a/lymph/midline_timeev.py b/lymph/midline_timeev.py
index 2a1dc86..a3a3f3c 100644
--- a/lymph/midline_timeev.py
+++ b/lymph/midline_timeev.py
@@ -480,8 +480,8 @@ def likelihood(
         
 
         for stage in t_stages:
-            p = self.ext._likelihood2(stage=stage, state_probs=state_probs)*(1-(1-self.midext_trans)**10)
-            p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_trans)**10
+            p = self.ext._likelihood2(stage=stage, state_probs=state_probs) * (1-((1-self.midext_trans) ** 10))
+            p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2) * ((1-self.midext_trans) ** 10)
             llh += np.sum(np.log(p+p2))
             
         return llh

From 5e8d07ddc072184253a7f57225b966f1ddf9970b Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 22 Jun 2023 14:45:38 +0200
Subject: [PATCH 08/35] deleted unused files

---
 lymph/midline_timeev2.py | 498 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 498 insertions(+)
 create mode 100644 lymph/midline_timeev2.py

diff --git a/lymph/midline_timeev2.py b/lymph/midline_timeev2.py
new file mode 100644
index 0000000..c82f676
--- /dev/null
+++ b/lymph/midline_timeev2.py
@@ -0,0 +1,498 @@
+from typing import Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import pandas as pd
+
+from .bilateral import Bilateral
+from .timemarg import MarginalizorDict
+
+
+class MidlineBilateraltime:
+    """Model a bilateral lymphatic system where an additional risk factor can
+    be provided in the data: Whether or not the primary tumor extended over the
+    mid-sagittal line.
+
+    It is reasonable to assume (and supported by data) that such an extension
+    significantly increases the risk for metastatic spread to the contralateral
+    side of the neck. This class attempts to capture this using a simple
+    assumption: We assume that the probability of spread to the contralateral
+    side for patients *with* midline extension is larger than for patients
+    *without* it, but smaller than the probability of spread to the ipsilateral
+    side. Formally:
+
+    .. math::
+        b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in}
+
+    where :math:`b_c^{\\in}` is the probability of spread from the primary tumor
+    to the contralateral side for patients with midline extension, and
+    :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear
+    mixing parameter.
+    """
+
+    def __init__(
+        self,
+        graph: Dict[Tuple[str], List[str]],
+        use_mixing: bool = True,
+        trans_symmetric: bool = True,
+        **_kwargs
+    ):
+        """The class is constructed in a similar fashion to the
+        :class:`Bilateral`: That class contains one :class:`Unilateral` for
+        each side of the neck, while this class will contain two instances of
+        :class:`Bilateral`, one for the case of a midline extension and one for
+        the case of no midline extension.
+
+        Args:
+            graph: Dictionary of the same kind as for initialization of
+                :class:`System`. This graph will be passed to the constructors of
+                two :class:`System` attributes of this class.
+            use_mixing: Describe the contralateral base spread probabilities for the
+                case of a midline extension as a linear combination between the base
+                spread probs of the ipsilateral side and the ones of the contralateral
+                side when no midline extension is present.
+            trans_symmetric: If ``True``, the spread probabilities among the
+                LNLs will be set symmetrically.
+
+        See Also:
+            :class:`Bilateral`: Two of these are held as attributes by this
+            class. One for the case of a mid-sagittal extension of the primary
+            tumor and one for the case of no such extension.
+        """
+        self.ext   = Bilateral(
+            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
+        )
+        self.noext = Bilateral(
+            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
+        )
+        self.use_mixing = use_mixing
+        if self.use_mixing:
+            self.alpha_mix = 0.
+
+        self.noext.diag_time_dists = self.ext.diag_time_dists
+
+
+    @property
+    def graph(self) -> Dict[Tuple[str], List[str]]:
+        """Return the (unilateral) graph that was used to create this network.
+        """
+        return self.noext.graph
+
+
+    @property
+    def base_probs(self) -> np.ndarray:
+        """Base probabilities of metastatic lymphatic spread from the tumor(s)
+        to the lymph node levels. This will return the following concatenation of
+        base spread probs depending on whether ``use_mixing`` is set to ``True`` or
+        ``False``:
+
+        With the use of a mixing parameter:
+        +-----------+-------------------------+--------------+
+        | base ipsi | base contra (no midext) | mixing param |
+        +-----------+-------------------------+--------------+
+
+        Without it:
+        +-----------+----------------------+-------------------------+
+        | base ipsi | base contra (midext) | base contra (no midext) |
+        +-----------+----------------------+-------------------------+
+
+        When setting these, one needs to provide the respective shape.
+        """
+        if self.use_mixing:
+            return np.concatenate([
+                self.ext.ipsi.base_probs,
+                self.noext.contra.base_probs,
+                [self.alpha_mix],
+            ])
+        else:
+            return np.concatenate([
+                self.ext.ipsi.base_probs,
+                self.ext.contra.base_probs,
+                self.noext.contra.base_probs,
+            ])
+
+    @base_probs.setter
+    def base_probs(self, new_params: np.ndarray):
+        """Set the base probabilities from the tumor(s) to the LNLs, accounting
+        for the mixing parameter :math:`\\alpha``.
+        """
+        k = len(self.ext.ipsi.base_probs)
+
+        self.ext.ipsi.base_probs = new_params[:k]
+        self.noext.ipsi.base_probs = new_params[:k]
+
+        if self.use_mixing:
+            self.noext.contra.base_probs = new_params[k:2*k]
+            self.alpha_mix = new_params[-1]
+            # compute linear combination
+            self.ext.contra.base_probs = (
+                self.alpha_mix * self.ext.ipsi.base_probs
+                + (1. - self.alpha_mix) * self.noext.contra.base_probs
+            )
+        else:
+            self.ext.contra.base_probs = new_params[k:2*k]
+            self.noext.contra.base_probs = new_params[2*k:3*k]
+
+        # avoid unnecessary double computation of ipsilateral transition matrix
+        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+
+
+    @property
+    def trans_probs(self) -> np.ndarray:
+        """Probabilities of lymphatic spread among the lymph node levels. They
+        are assumed to be symmetric ipsi- & contralaterally by default.
+        """
+        return self.noext.trans_probs
+
+    @trans_probs.setter
+    def trans_probs(self, new_params: np.ndarray):
+        """Set the new spread probabilities for lymphatic spread from among the
+        LNLs.
+        """
+        self.noext.trans_probs = new_params
+        self.ext.trans_probs = new_params
+
+        # avoid unnecessary double computation of ipsilateral transition matrix
+        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+
+
+    @property
+    def spread_probs(self) -> np.ndarray:
+        """These are the probabilities representing the spread of cancer along
+        lymphatic drainage pathways per timestep.
+
+        It is composed of the base spread probs (possible with the mixing parameter)
+        and the probabilities of spread among the LNLs.
+
+        +-------------+-------------+
+        | base probs  | trans probs |
+        +-------------+-------------+
+        """
+        return np.concatenate([self.base_probs, self.trans_probs])
+
+    @spread_probs.setter
+    def spread_probs(self, new_params: np.ndarray):
+        """Set the new spread probabilities and the mixing parameter
+        :math:`\\alpha`.
+        """
+        num_base_probs = len(self.base_probs)
+
+        self.base_probs  = new_params[:num_base_probs]
+        self.trans_probs = new_params[num_base_probs:]
+
+
+    @property
+    def diag_time_dists(self) -> MarginalizorDict:
+        """This property holds the probability mass functions for marginalizing over
+        possible diagnose times for each T-stage.
+
+        When setting this property, one may also provide a normal Python dict, in
+        which case it tries to convert it to a :class:`MarginalizorDict`.
+
+        Note that the method will provide the same instance of this
+        :class:`MarginalizorDict` to both sides of the network.
+
+        See Also:
+            :class:`MarginalzorDict`, :class:`Marginalizor`.
+        """
+        return self.ext.diag_time_dists
+
+    @diag_time_dists.setter
+    def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]):
+        """Assign new :class:`MarginalizorDict` to this property. If it is a normal
+        Python dictionary, tr to convert it into a :class:`MarginalizorDict`.
+        """
+        self.ext.diag_time_dists = new_dists
+        self.noext.diag_time_dists = self.ext.diag_time_dists
+
+
+    @property
+    def modalities(self):
+        """A dictionary containing the specificity :math:`s_P` and sensitivity
+        :math:`s_N` values for each diagnostic modality.
+
+        Such a dictionary can also be provided to set this property and compute
+        the observation matrices of all used systems.
+
+        See Also:
+            :meth:`Bilateral.modalities`: Getting and setting this property in
+            the normal bilateral model.
+
+            :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and
+            :math:`s_N` for a unilateral model.
+        """
+        return self.noext.modalities
+
+    @modalities.setter
+    def modalities(self, modality_spsn: Dict[str, List[float]]):
+        """Call the respective getter and setter methods of the bilateral
+        components with and without midline extension.
+        """
+        self.noext.modalities = modality_spsn
+        self.ext.modalities = modality_spsn
+
+
+    @property
+    def patient_data(self):
+        """A pandas :class:`DataFrame` with rows of patients and columns of
+        patient and involvement details. The table's header should have three
+        levels that categorize the individual lymph node level's involvement to
+        the corresponding diagnostic modality (first level), the side of the
+        LNL (second level) and finaly the name of the LNL (third level).
+        Additionally, the patient's T-category must be stored under ('info',
+        'tumor', 't_stage') and whether the tumor extends over the mid-sagittal
+        line should be noted under ('info', 'tumor', 'midline_extension'). So,
+        part of this table could look like this:
+
+        +-----------------------------+------------------+------------------+
+        |            info             |       MRI        |       PET        |
+        +-----------------------------+--------+---------+--------+---------+
+        |            tumor            |  ipsi  | contra  |  ipsi  | contra  |
+        +---------+-------------------+--------+---------+--------+---------+
+        | t_stage | midline_extension |   II   |   II    |   II   |   II    |
+        +=========+===================+========+=========+========+=========+
+        | early   | ``True``          |``True``|``None`` |``True``|``False``|
+        +---------+-------------------+--------+---------+--------+---------+
+        | late    | ``True``          |``None``|``None`` |``None``|``None`` |
+        +---------+-------------------+--------+---------+--------+---------+
+        | early   | ``False``         |``True``|``False``|``True``|``True`` |
+        +---------+-------------------+--------+---------+--------+---------+
+        """
+        try:
+            return self._patient_data
+        except AttributeError:
+            raise AttributeError(
+                "No patient data has been loaded yet"
+            )
+
+    @patient_data.setter
+    def patient_data(self, patient_data: pd.DataFrame):
+        """Load the patient data. For now, this just calls the :meth:`load_data`
+        method, but at a later point, I would like to write a function here
+        that generates the pandas :class:`DataFrame` from the internal matrix
+        representation of the data.
+        """
+        self._patient_data = patient_data.copy()
+        self.load_data(patient_data)
+
+
+    def load_data(
+        self,
+        data: pd.DataFrame,
+        modality_spsn: Optional[Dict[str, List[float]]] = None,
+        mode = "HMM"
+    ):
+        """Load data as table of patients with involvement details and convert
+        it into internal representation of a matrix.
+
+        Args:
+            data: The table with rows of patients and columns of patient and
+                involvement details. The table's header must have three levels
+                that categorize the individual lymph node level's involvement
+                to the corresponding diagnostic modality (first level), the
+                side of the LNL (second level) and finaly the name of the LNL
+                (third level). Additionally, the patient's T-category must be
+                stored under ('info', 'tumor', 't_stage') and whether the tumor
+                extends over the mid-sagittal line should be noted under
+                ('info', 'tumor', 'midline_extension'). So, part of this table
+                could look like this:
+
+                +-----------------------------+---------------------+
+                |            info             |         MRI         |
+                +-----------------------------+----------+----------+
+                |            tumor            |   ipsi   |  contra  |
+                +---------+-------------------+----------+----------+
+                | t_stage | midline_extension |    II    |    II    |
+                +=========+===================+==========+==========+
+                | early   | ``True``          | ``True`` | ``None`` |
+                +---------+-------------------+----------+----------+
+                | late    | ``True``          | ``None`` | ``None`` |
+                +---------+-------------------+----------+----------+
+                | early   | ``False``         | ``True`` | ``True`` |
+                +---------+-------------------+----------+----------+
+
+            modality_spsn: If no diagnostic modalities have been defined yet,
+                this must be provided to build the observation matrix.
+
+        See Also:
+            :attr:`patient_data`: The attribute for loading and exporting data.
+
+            :meth:`Bilateral.load_data`: Loads data into a bilateral network by
+            splitting it into ipsi- & contralateral side and passing each to
+            the respective unilateral method (see below).
+
+            :meth:`Unilateral.load_data`: Data loading method of the unilateral
+            network.
+        """
+        ext_data = data.loc[data[("info", "tumor", "midline_extension")]]
+        noext_data = data.loc[~data[("info", "tumor", "midline_extension")]]
+
+        self.ext.load_data(
+            ext_data,
+            modality_spsn=modality_spsn,
+            mode=mode
+        )
+        self.noext.load_data(
+            noext_data,
+            modality_spsn=modality_spsn,
+            mode=mode
+        )
+
+
+    def check_and_assign(self, new_params: np.ndarray):
+        """Check that the spread probability (rates) and the parameters for the
+        marginalization over diagnose times are all within limits and assign them to
+        the model. Also, make sure the ipsi- and contralateral distributions over
+        diagnose times are the same instance of the :class:`MarginalizorDict`.
+
+        Args:
+            new_params: The set of :attr:`spread_probs` and parameters to provide for
+                updating the parametrized distributions over diagnose times.
+
+        Warning:
+            This method assumes that the parametrized distributions (instances of
+            :class:`Marginalizor`) all raise a ``ValueError`` when provided with
+            invalid parameters.
+        """
+        k = len(self.spread_probs)
+        new_spread_probs = new_params[:k]
+        new_marg_params = new_params[k:]
+
+        try:
+            self.ext.ipsi.diag_time_dists.update(new_marg_params)
+            self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
+            self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists
+            self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
+        except ValueError as val_err:
+            raise ValueError(
+                "Parameters for marginalization over diagnose times are invalid"
+            ) from val_err
+
+        if new_spread_probs.shape != self.spread_probs.shape:
+            raise ValueError(
+                "Shape of provided spread parameters does not match network"
+            )
+        if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.):
+            raise ValueError(
+                "Spread probs must be between 0 and 1"
+            )
+
+        self.spread_probs = new_spread_probs
+
+
+    def likelihood(
+        self,
+        data: Optional[pd.DataFrame] = None,
+        given_params: Optional[np.ndarray] = None,
+        log: bool = True,
+    ) -> float:
+        """Compute log-likelihood of (already stored) data, given the spread
+        probabilities and either a discrete diagnose time or a distribution to
+        use for marginalization over diagnose times.
+
+        Args:
+            data: Table with rows of patients and columns of per-LNL involvment. See
+                :meth:`load_data` for more details on how this should look like.
+
+            given_params: The likelihood is a function of these parameters. They mainly
+                consist of the :attr:`spread_probs` of the model. Any excess parameters
+                will be used to update the parametrized distributions used for
+                marginalizing over the diagnose times (see :attr:`diag_time_dists`).
+
+            log: When ``True``, the log-likelihood is returned.
+
+        Returns:
+            The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D`
+            is the data and :math:`\\theta` is the tuple of spread probabilities
+            and diagnose times or distributions over diagnose times.
+
+        See Also:
+            :attr:`spread_probs`: Property for getting and setting the spread
+            probabilities, of which a lymphatic network has as many as it has
+            :class:`Edge` instances (in case no symmetries apply).
+
+            :meth:`Unilateral.likelihood`: The log-likelihood function of
+            the unilateral system.
+
+            :meth:`Bilateral.likelihood`: The (log-)likelihood function of the
+            bilateral system.
+        """
+        if data is not None:
+            self.patient_data = data
+
+        try:
+            self.check_and_assign(given_params)
+        except ValueError:
+            return -np.inf if log else 0.
+
+        llh = 0. if log else 1.
+
+        if log:
+            llh += self.ext._likelihood(log=log)
+            llh += self.noext._likelihood(log=log)
+        else:
+            llh *= self.ext._likelihood(log=log)
+            llh *= self.noext._likelihood(log=log)
+
+        return llh
+
+
+    def risk(
+        self,
+        given_params: Optional[np.ndarray] = None,
+        midline_extension: bool = True,
+        **kwargs,
+    ) -> float:
+        """Compute the risk of nodal involvement given a specific diagnose.
+
+        Args:
+            spread_probs: Set ot new spread parameters. This also contains the
+                mixing parameter alpha in the last position.
+            midline_extension: Whether or not the patient's tumor extends over
+                the mid-sagittal line.
+
+        See Also:
+            :meth:`Bilateral.risk`: Depending on whether or not the patient's
+            tumor does extend over the midline, the risk function of the
+            respective :class:`Bilateral` instance gets called.
+        """
+        if given_params is not None:
+            self.check_and_assign(given_params)
+
+        if midline_extension:
+            return self.ext.risk(**kwargs)
+        else:
+            return self.noext.risk(**kwargs)
+
+
+    def generate_dataset(
+        self,
+        num_patients: int,
+        stage_dist: Dict[str, float],
+        ext_prob: float,
+        **_kwargs,
+    ) -> pd.DataFrame:
+        """Generate/sample a pandas :class:`DataFrame` from the defined network.
+
+        Args:
+            num_patients: Number of patients to generate.
+            stage_dist: Probability to find a patient in a certain T-stage.
+            ext_prob: Probability that a patient's primary tumor extends over
+                the mid-sagittal line.
+        """
+        drawn_ext = np.random.choice(
+            [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients
+        )
+        ext_dataset = self.ext.generate_dataset(
+            num_patients=num_patients,
+            stage_dist=stage_dist,
+        )
+        noext_dataset = self.noext.generate_dataset(
+            num_patients=num_patients,
+            stage_dist=stage_dist,
+        )
+
+        dataset = noext_dataset.copy()
+        dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext]
+        dataset[('info', 'tumor', 'midline_extension')] = drawn_ext
+
+        return dataset
\ No newline at end of file

From 5e47de15309b54e27f713edd1bcc284304642459 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 22 Jun 2023 14:45:50 +0200
Subject: [PATCH 09/35] deleted unused files

---
 .DS_Store               | Bin 6148 -> 6148 bytes
 lymph/midline_timeev.py | 550 ----------------------------------------
 notebook/.DS_Store      | Bin 0 -> 6148 bytes
 tests/.DS_Store         | Bin 0 -> 6148 bytes
 4 files changed, 550 deletions(-)
 delete mode 100644 lymph/midline_timeev.py
 create mode 100644 notebook/.DS_Store
 create mode 100644 tests/.DS_Store

diff --git a/.DS_Store b/.DS_Store
index 590cf61ff4e029544061ad8649cc4baa5fd1c15c..398457f49e045549ecb6e922410235e839c8a9e6 100644
GIT binary patch
delta 95
zcmZoMXffDe#l*N{vNcnoyF_)hxrw2Uf{CSJt&T#qg_((tf~lc#Z7nB<sItCwP<(by
fZeD&D0~jzeLTCnFC=H{!H-BOhVcpEm@s}R}DR&bl

delta 94
zcmZoMXffDe#l*OMvNcnon|O7#g_((tf~jS#jslcvXi!_r$sww&Zygk$os*lF-^l<5
ZjEoSPffq`{sIJYQnM7DOvvK_62LKfw67>K8

diff --git a/lymph/midline_timeev.py b/lymph/midline_timeev.py
deleted file mode 100644
index a3a3f3c..0000000
--- a/lymph/midline_timeev.py
+++ /dev/null
@@ -1,550 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import numpy as np
-import pandas as pd
-
-from .bilateral import Bilateral
-from .timemarg import MarginalizorDict
-
-
-class MidlineBilateraltime:
-    """Model a bilateral lymphatic system where an additional risk factor can
-    be provided in the data: Whether or not the primary tumor extended over the
-    mid-sagittal line.
-
-    It is reasonable to assume (and supported by data) that such an extension
-    significantly increases the risk for metastatic spread to the contralateral
-    side of the neck. This class attempts to capture this using a simple
-    assumption: We assume that the probability of spread to the contralateral
-    side for patients *with* midline extension is larger than for patients
-    *without* it, but smaller than the probability of spread to the ipsilateral
-    side. Formally:
-
-    .. math::
-        b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in}
-
-    where :math:`b_c^{\\in}` is the probability of spread from the primary tumor
-    to the contralateral side for patients with midline extension, and
-    :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear
-    mixing parameter.
-    """
-
-    def __init__(
-        self,
-        graph: Dict[Tuple[str], List[str]],
-        use_mixing: bool = True,
-        trans_symmetric: bool = True,
-        **_kwargs
-    ):
-        """The class is constructed in a similar fashion to the
-        :class:`Bilateral`: That class contains one :class:`Unilateral` for
-        each side of the neck, while this class will contain two instances of
-        :class:`Bilateral`, one for the case of a midline extension and one for
-        the case of no midline extension.
-
-        Args:
-            graph: Dictionary of the same kind as for initialization of
-                :class:`System`. This graph will be passed to the constructors of
-                two :class:`System` attributes of this class.
-            use_mixing: Describe the contralateral base spread probabilities for the
-                case of a midline extension as a linear combination between the base
-                spread probs of the ipsilateral side and the ones of the contralateral
-                side when no midline extension is present.
-            trans_symmetric: If ``True``, the spread probabilities among the
-                LNLs will be set symmetrically.
-
-        See Also:
-            :class:`Bilateral`: Two of these are held as attributes by this
-            class. One for the case of a mid-sagittal extension of the primary
-            tumor and one for the case of no such extension.
-        """
-        self.ext   = Bilateral(
-            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
-        )
-        self.noext = Bilateral(
-            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
-        )
-        
-        self.use_mixing = use_mixing
-        if self.use_mixing:
-            self.alpha_mix = 0.
-
-        self.noext.diag_time_dists = self.ext.diag_time_dists
-
-
-    @property
-    def graph(self) -> Dict[Tuple[str], List[str]]:
-        """Return the (unilateral) graph that was used to create this network.
-        """
-        return self.noext.graph
-
-
-    @property
-    def base_probs(self) -> np.ndarray:
-        """Base probabilities of metastatic lymphatic spread from the tumor(s)
-        to the lymph node levels. This will return the following concatenation of
-        base spread probs depending on whether ``use_mixing`` is set to ``True`` or
-        ``False``:
-
-        With the use of a mixing parameter:
-        +-----------+-------------------------+--------------+
-        | base ipsi | base contra (no midext) | mixing param |
-        +-----------+-------------------------+--------------+
-
-        Without it:
-        +-----------+----------------------+-------------------------+
-        | base ipsi | base contra (midext) | base contra (no midext) |
-        +-----------+----------------------+-------------------------+
-
-        When setting these, one needs to provide the respective shape.
-        """
-        if self.use_mixing:
-            return np.concatenate([
-                self.ext.ipsi.base_probs,
-                self.noext.contra.base_probs,
-                [self.alpha_mix],
-            ])
-        else:
-            return np.concatenate([
-                self.ext.ipsi.base_probs,
-                self.ext.contra.base_probs,
-                self.noext.contra.base_probs,
-            ])
-
-    @base_probs.setter
-    def base_probs(self, new_params: np.ndarray):
-        """Set the base probabilities from the tumor(s) to the LNLs, accounting
-        for the mixing parameter :math:`\\alpha``.
-        """
-        k = len(self.ext.ipsi.base_probs)
-
-        self.ext.ipsi.base_probs = new_params[:k]
-        self.noext.ipsi.base_probs = new_params[:k]
-
-        if self.use_mixing:
-            self.noext.contra.base_probs = new_params[k:2*k]
-            self.alpha_mix = new_params[-1]
-            # compute linear combination
-            self.ext.contra.base_probs = (
-                self.alpha_mix * self.ext.ipsi.base_probs
-                + (1. - self.alpha_mix) * self.noext.contra.base_probs
-            )
-        else:
-            self.ext.contra.base_probs = new_params[k:2*k]
-            self.noext.contra.base_probs = new_params[2*k:3*k]
-
-        # avoid unnecessary double computation of ipsilateral transition matrix
-        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-
-
-    @property
-    def trans_probs(self) -> np.ndarray:
-        """Probabilities of lymphatic spread among the lymph node levels. They
-        are assumed to be symmetric ipsi- & contralaterally by default.
-        """
-        return self.noext.trans_probs
-
-    @trans_probs.setter
-    def trans_probs(self, new_params: np.ndarray):
-        """Set the new spread probabilities for lymphatic spread from among the
-        LNLs.
-        """
-        self.noext.trans_probs = new_params
-        self.ext.trans_probs = new_params
-        # avoid unnecessary double computation of ipsilateral transition matrix
-        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-
-
-    @property
-    def spread_probs(self) -> np.ndarray:
-        """These are the probabilities representing the spread of cancer along
-        lymphatic drainage pathways per timestep.
-
-        It is composed of the base spread probs (possible with the mixing parameter)
-        and the probabilities of spread among the LNLs.
-
-        +-------------+-------------+
-        | base probs  | trans probs |
-        +-------------+-------------+
-        """
-        return np.concatenate([self.base_probs, self.trans_probs])
-
-    @spread_probs.setter
-    def spread_probs(self, new_params: np.ndarray):
-        """Set the new spread probabilities and the mixing parameter
-        :math:`\\alpha`.
-        """
-        num_base_probs = len(self.base_probs)
-
-        self.base_probs  = new_params[:num_base_probs]
-        self.trans_probs = new_params[num_base_probs:]
-
-
-    @property
-    def diag_time_dists(self) -> MarginalizorDict:
-        """This property holds the probability mass functions for marginalizing over
-        possible diagnose times for each T-stage.
-
-        When setting this property, one may also provide a normal Python dict, in
-        which case it tries to convert it to a :class:`MarginalizorDict`.
-
-        Note that the method will provide the same instance of this
-        :class:`MarginalizorDict` to both sides of the network.
-
-        See Also:
-            :class:`MarginalzorDict`, :class:`Marginalizor`.
-        """
-        return self.ext.diag_time_dists
-
-    @diag_time_dists.setter
-    def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]):
-        """Assign new :class:`MarginalizorDict` to this property. If it is a normal
-        Python dictionary, tr to convert it into a :class:`MarginalizorDict`.
-        """
-        self.ext.diag_time_dists = new_dists
-        self.noext.diag_time_dists = self.ext.diag_time_dists
-
-
-    @property
-    def modalities(self):
-        """A dictionary containing the specificity :math:`s_P` and sensitivity
-        :math:`s_N` values for each diagnostic modality.
-
-        Such a dictionary can also be provided to set this property and compute
-        the observation matrices of all used systems.
-
-        See Also:
-            :meth:`Bilateral.modalities`: Getting and setting this property in
-            the normal bilateral model.
-
-            :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and
-            :math:`s_N` for a unilateral model.
-        """
-        return self.noext.modalities
-
-    @modalities.setter
-    def modalities(self, modality_spsn: Dict[str, List[float]]):
-        """Call the respective getter and setter methods of the bilateral
-        components with and without midline extension.
-        """
-        self.noext.modalities = modality_spsn
-        self.ext.modalities = modality_spsn
-
-    """
-    @property
-    def midext_prob(self):
-        Assign the last of the new_params to the midline extension probability
-        
-        try:
-            return self._midext_prob
-        except AttributeError as attr_err:
-            raise AttributeError(
-                "No midline extension probability has been assigned"
-            ) from attr_err
-
-    @midext_prob.setter
-    def midext_prob(self, new_params):
-        A variable containing the midline extension probability
-        
-        self._midext_prob = new_params[-2]
-    """
-        
-    @property
-    def midext_trans(self):
-        """Assign the last of the new_params to the midline extension transition probability
-        """
-        try:
-            return self._midext_trans
-        except AttributeError as attr_err:
-            raise AttributeError(
-                "No midline extension transition probability has been assigned"
-            ) from attr_err
-
-    @midext_trans.setter
-    def midext_trans(self, new_params):
-        """A variable containing the midline extension transition probability
-        """
-        self._midext_trans = new_params[-1]
-
-    @property
-    def patient_data(self):
-        """A pandas :class:`DataFrame` with rows of patients and columns of
-        patient and involvement details. The table's header should have three
-        levels that categorize the individual lymph node level's involvement to
-        the corresponding diagnostic modality (first level), the side of the
-        LNL (second level) and finaly the name of the LNL (third level).
-        Additionally, the patient's T-category must be stored under ('info',
-        'tumor', 't_stage') and whether the tumor extends over the mid-sagittal
-        line should be noted under ('info', 'tumor', 'midline_extension'). So,
-        part of this table could look like this:
-
-        +-----------------------------+------------------+------------------+
-        |            info             |       MRI        |       PET        |
-        +-----------------------------+--------+---------+--------+---------+
-        |            tumor            |  ipsi  | contra  |  ipsi  | contra  |
-        +---------+-------------------+--------+---------+--------+---------+
-        | t_stage | midline_extension |   II   |   II    |   II   |   II    |
-        +=========+===================+========+=========+========+=========+
-        | early   | ``True``          |``True``|``None`` |``True``|``False``|
-        +---------+-------------------+--------+---------+--------+---------+
-        | late    | ``True``          |``None``|``None`` |``None``|``None`` |
-        +---------+-------------------+--------+---------+--------+---------+
-        | early   | ``False``         |``True``|``False``|``True``|``True`` |
-        +---------+-------------------+--------+---------+--------+---------+
-        """
-        try:
-            return self._patient_data
-        except AttributeError:
-            raise AttributeError(
-                "No patient data has been loaded yet"
-            )
-
-    @patient_data.setter
-    def patient_data(self, patient_data: pd.DataFrame):
-        """Load the patient data. For now, this just calls the :meth:`load_data`
-        method, but at a later point, I would like to write a function here
-        that generates the pandas :class:`DataFrame` from the internal matrix
-        representation of the data.
-        """
-        self._patient_data = patient_data.copy()
-        self.load_data(patient_data)
-
-
-    def load_data(
-        self,
-        data: pd.DataFrame,
-        modality_spsn: Optional[Dict[str, List[float]]] = None,
-        mode = "HMM"
-    ):
-        """Load data as table of patients with involvement details and convert
-        it into internal representation of a matrix.
-
-        Args:
-            data: The table with rows of patients and columns of patient and
-                involvement details. The table's header must have three levels
-                that categorize the individual lymph node level's involvement
-                to the corresponding diagnostic modality (first level), the
-                side of the LNL (second level) and finaly the name of the LNL
-                (third level). Additionally, the patient's T-category must be
-                stored under ('info', 'tumor', 't_stage') and whether the tumor
-                extends over the mid-sagittal line should be noted under
-                ('info', 'tumor', 'midline_extension'). So, part of this table
-                could look like this:
-
-                +-----------------------------+---------------------+
-                |            info             |         MRI         |
-                +-----------------------------+----------+----------+
-                |            tumor            |   ipsi   |  contra  |
-                +---------+-------------------+----------+----------+
-                | t_stage | midline_extension |    II    |    II    |
-                +=========+===================+==========+==========+
-                | early   | ``True``          | ``True`` | ``None`` |
-                +---------+-------------------+----------+----------+
-                | late    | ``True``          | ``None`` | ``None`` |
-                +---------+-------------------+----------+----------+
-                | early   | ``False``         | ``True`` | ``True`` |
-                +---------+-------------------+----------+----------+
-
-            modality_spsn: If no diagnostic modalities have been defined yet,
-                this must be provided to build the observation matrix.
-
-        See Also:
-            :attr:`patient_data`: The attribute for loading and exporting data.
-
-            :meth:`Bilateral.load_data`: Loads data into a bilateral network by
-            splitting it into ipsi- & contralateral side and passing each to
-            the respective unilateral method (see below).
-
-            :meth:`Unilateral.load_data`: Data loading method of the unilateral
-            network.
-        """
-        ext_data = data
-        noext_data = data
-
-        self.ext.load_data(
-            ext_data,
-            modality_spsn=modality_spsn,
-            mode=mode
-        )
-        self.noext.load_data(
-            noext_data,
-            modality_spsn=modality_spsn,
-            mode=mode
-        )
-
-
-    def check_and_assign(self, new_params: np.ndarray):
-        """Check that the spread probability (rates) and the parameters for the
-        marginalization over diagnose times are all within limits and assign them to
-        the model. Also, make sure the ipsi- and contralateral distributions over
-        diagnose times are the same instance of the :class:`MarginalizorDict`.
-
-        Args:
-            new_params: The set of :attr:`spread_probs` and parameters to provide for
-                updating the parametrized distributions over diagnose times.
-
-        Warning:
-            This method assumes that the parametrized distributions (instances of
-            :class:`Marginalizor`) all raise a ``ValueError`` when provided with
-            invalid parameters.
-        """
-        k = len(self.spread_probs)
-        l = len(self.diag_time_dists)
-        new_spread_probs = new_params[:k]
-        new_marg_params = new_params[k:(k+l)]
-
-        try:
-            self.ext.ipsi.diag_time_dists.update(new_marg_params)
-            self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
-            self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists
-            self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
-            #self.midext_prob = new_params
-            self.midext_trans = new_params
-        except ValueError as val_err:
-            raise ValueError(
-                "Parameters for marginalization over diagnose times are invalid"
-            ) from val_err
-
-        if new_spread_probs.shape != self.spread_probs.shape:
-            raise ValueError(
-                "Shape of provided spread parameters does not match network"
-            )
-        if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.):
-            raise ValueError(
-                "Spread probs must be between 0 and 1"
-            )
-        if 0. > np.any(0. > new_params) or np.any(new_params > 1.):
-            raise ValueError(
-                "Probabilities must be between 0 and 1"
-            )
-
-        self.spread_probs = new_spread_probs
-
-
-    def likelihood(
-        self,
-        data: Optional[pd.DataFrame] = None,
-        given_params: Optional[np.ndarray] = None,
-        log: bool = True,
-    ) -> float:
-        """Compute log-likelihood of (already stored) data, given the spread
-        probabilities and either a discrete diagnose time or a distribution to
-        use for marginalization over diagnose times.
-
-        Args:
-            data: Table with rows of patients and columns of per-LNL involvment. See
-                :meth:`load_data` for more details on how this should look like.
-
-            given_params: The likelihood is a function of these parameters. They mainly
-                consist of the :attr:`spread_probs` of the model. Any excess parameters
-                will be used to update the parametrized distributions used for
-                marginalizing over the diagnose times (see :attr:`diag_time_dists`).
-
-            log: When ``True``, the log-likelihood is returned.
-
-        Returns:
-            The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D`
-            is the data and :math:`\\theta` is the tuple of spread probabilities
-            and diagnose times or distributions over diagnose times.
-
-        See Also:
-            :attr:`spread_probs`: Property for getting and setting the spread
-            probabilities, of which a lymphatic network has as many as it has
-            :class:`Edge` instances (in case no symmetries apply).
-
-            :meth:`Unilateral.likelihood`: The log-likelihood function of
-            the unilateral system.
-
-            :meth:`Bilateral.likelihood`: The (log-)likelihood function of the
-            bilateral system.
-        """
-        if data is not None:
-            self.patient_data = data
-
-        try:
-            self.check_and_assign(given_params)
-        except ValueError:
-            return -np.inf if log else 0.
-
-        llh = 0. if log else 1.
-
-        
-        state_probs = {}
-        state_probs["ipsi"] = self.ext.state_probs_ipsi()
-        state_probs["contra"] = self.ext.state_probs_contra()
-        state_probs2 = {}
-        state_probs2["ipsi"] = self.noext.state_probs_ipsi()
-        state_probs2["contra"] = self.noext.state_probs_contra()
-        t_stages = list(set(self.patient_data[("info","tumor","t_stage")]))
-        t_stages = self.ext.t_stages()
-        
-
-        for stage in t_stages:
-            p = self.ext._likelihood2(stage=stage, state_probs=state_probs) * (1-((1-self.midext_trans) ** 10))
-            p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2) * ((1-self.midext_trans) ** 10)
-            llh += np.sum(np.log(p+p2))
-            
-        return llh
-
-
-
-    def risk(
-        self,
-        given_params: Optional[np.ndarray] = None,
-        midline_extension: bool = True,
-        **kwargs,
-    ) -> float:
-        """Compute the risk of nodal involvement given a specific diagnose.
-
-        Args:
-            spread_probs: Set ot new spread parameters. This also contains the
-                mixing parameter alpha in the last position.
-            midline_extension: Whether or not the patient's tumor extends over
-                the mid-sagittal line.
-
-        See Also:
-            :meth:`Bilateral.risk`: Depending on whether or not the patient's
-            tumor does extend over the midline, the risk function of the
-            respective :class:`Bilateral` instance gets called.
-        """
-        if given_params is not None:
-            self.check_and_assign(given_params)
-
-        if midline_extension:
-            return self.ext.risk(**kwargs)
-        else:
-            return self.noext.risk(**kwargs)
-
-
-    def generate_dataset(
-        self,
-        num_patients: int,
-        stage_dist: Dict[str, float],
-        ext_prob: float,
-        **_kwargs,
-    ) -> pd.DataFrame:
-        """Generate/sample a pandas :class:`DataFrame` from the defined network.
-
-        Args:
-            num_patients: Number of patients to generate.
-            stage_dist: Probability to find a patient in a certain T-stage.
-            ext_prob: Probability that a patient's primary tumor extends over
-                the mid-sagittal line.
-        """
-        drawn_ext = np.random.choice(
-            [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients
-        )
-        ext_dataset = self.ext.generate_dataset(
-            num_patients=num_patients,
-            stage_dist=stage_dist,
-        )
-        noext_dataset = self.noext.generate_dataset(
-            num_patients=num_patients,
-            stage_dist=stage_dist,
-        )
-
-        dataset = noext_dataset.copy()
-        dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext]
-        dataset[('info', 'tumor', 'midline_extension')] = drawn_ext
-
-        return dataset
\ No newline at end of file
diff --git a/notebook/.DS_Store b/notebook/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..356e6aa405b98fa510816e4e5bb2f5d1b6f924df
GIT binary patch
literal 6148
zcmeHK!A`<J5S>L5FdF1!ykPbh@Cz!bKOjFK2%-s1Q6$D&PwGLhp8Xh4p1k=h9(}X3
zLbtWvG{(#%yKg%?vweLf9fpWZZ!zi;wTY;S!B|_z^o8+0kCN5AXA@}njOnD?KTeZb
zDN-GORRMl?bsEutE@=?HzpbbIUXsS~aGDO`Yd*fc+-$!;?}z)NZ}(@X`QJu_RBF%(
zoza9AbU|a92Y0{kb;=QW4p--|pBxJ3B<YGtbyUFT!3v)_bVz7M(;}Z%=P+}8f`~ky
z>$|;MuStUnQ7b;xQF%VCUB`zxsi^|0fGY443ZQ1QH8%uxRs~c6RbZ(Ae;+IcW8^Ut
zbe|4Pt_1+r;dX{K_Y$0AJw_fgLF~Ycqyi<?_!h%RI{enhMIJLjNhjl*599A_e1~G(
z-EqEk;bbB~omBx<AXH#YF56uH532kBFiEde0af5%DPZbxKkng?d~Gc~9M@WlafHFf
nahc#U1q)Y-5zD1`2g4csmKVUtV<rd>O#cWt8FW$wepP`FKmcw^

literal 0
HcmV?d00001

diff --git a/tests/.DS_Store b/tests/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..3ed183189058d69fe4a99f96ff42e47c750c2009
GIT binary patch
literal 6148
zcmeHKu};H447E#!pbm7Z81EOlFo!C9L4N>BQK^(P6>9fv{0E=Gz{D>w@F$Es+eb*0
z7KsI+%9ecZ;<IzkyXbsJL_FOsCPX75N}+;-Q#5--#zlKFh!z=SeaDg>=!UkmqN?ZZ
zhX2TbtlbdnwZ^L5QG5N?#d0#ct;&t1eKtPqSyp*IFRD2_(x;b;$MMJO<xk#=UwCis
z+gZqeN(t?d?~1nF44>cIW9^)Zw<f11wyeIh*f-vpmvvs5+r=4h2AqLoU;s5+Bpn)h
z>kK#p&OpO}><<Adm_@7&b?ZQ*R{&rFvkLlpRssew0JDg-AuJHKP@si!ykfA0!yckv
z7O^(8aAKVq?};;WyrF!Z)HtX+ac1bPGvEw#891@Wk<|aI!}tGgkUu#C&cIPIz)3#K
zr}(5;TYDcTwKhPnp(5f}8#W>6XeoxTl;U%!3hY5Tz${{I2n)my0!o87&cL5C@Cj*J
BQJw$*

literal 0
HcmV?d00001


From ade30e2f8e47c1206c8a701e176b5f041d2a9994 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 22 Jun 2023 15:09:22 +0200
Subject: [PATCH 10/35] added DS_Store to gitignore

---
 .DS_Store          | Bin 6148 -> 0 bytes
 .gitignore         |   4 +++-
 notebook/.DS_Store | Bin 6148 -> 0 bytes
 tests/.DS_Store    | Bin 6148 -> 0 bytes
 4 files changed, 3 insertions(+), 1 deletion(-)
 delete mode 100644 .DS_Store
 delete mode 100644 notebook/.DS_Store
 delete mode 100644 tests/.DS_Store

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 398457f49e045549ecb6e922410235e839c8a9e6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKUrPc(5T7;86A64M@NuD6DJA<yFNfI|=%OAf%<Dvtu3PcWBt*F9`a*q@K2I~d
zYZ%sJQ85EEzunoHJ?D4Ft^)v~H4Yj86##Hh33DYhp9rm!E=kUM3W$RC2$I{#S1MYH
zWXEr1fZkmmwxACoq>Jyb01-UGJq%&sPm-Y@DIY^${9zO(X}$g`ax3}5YSAe=W#`%(
zsEL<$({a-3jxK3-hM$2yA9wxpus7*c){j-3cEh+g(g|VE!;q_sFb>qDrN(iP=v>b%
zI3=glsZ^)a{br*kn}>~AO->K?nl-uG*qO~r&c^2U(P`%{x{uY9;aA{isAbLK9G-Eo
zP|+8KK6s-zQgI)HX`=$9(1QeRvY0U6laU!<2AF|A%YZ#Bo${ZpfPXkMzzlpB19Uz}
zR6^HcW>6m;XmksJm`Ar398)hrInttQF*ArOD8i&7np9z13}MpIFKwJ_F*9h=LD=R)
z*e46yp$PqS++XT&5UxSC%m6d+k%1L6t<e2{@^%0JF^Ol)05kBf7!dik*KT1+_HJD&
wj_z8CdV@+rahbvQ6ddTQ7-Q)wuA*u|zoY@8YcVs39u)o~plM*k4E!hqZ@oQQbN~PV

diff --git a/.gitignore b/.gitignore
index 3faa527..2b6f976 100644
--- a/.gitignore
+++ b/.gitignore
@@ -145,4 +145,6 @@ dmypy.json
 KEY
 
 # screenshots from Bertrand's interface
-bertrands_interface/
\ No newline at end of file
+bertrands_interface/
+
+**/.DS_Store
\ No newline at end of file
diff --git a/notebook/.DS_Store b/notebook/.DS_Store
deleted file mode 100644
index 356e6aa405b98fa510816e4e5bb2f5d1b6f924df..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHK!A`<J5S>L5FdF1!ykPbh@Cz!bKOjFK2%-s1Q6$D&PwGLhp8Xh4p1k=h9(}X3
zLbtWvG{(#%yKg%?vweLf9fpWZZ!zi;wTY;S!B|_z^o8+0kCN5AXA@}njOnD?KTeZb
zDN-GORRMl?bsEutE@=?HzpbbIUXsS~aGDO`Yd*fc+-$!;?}z)NZ}(@X`QJu_RBF%(
zoza9AbU|a92Y0{kb;=QW4p--|pBxJ3B<YGtbyUFT!3v)_bVz7M(;}Z%=P+}8f`~ky
z>$|;MuStUnQ7b;xQF%VCUB`zxsi^|0fGY443ZQ1QH8%uxRs~c6RbZ(Ae;+IcW8^Ut
zbe|4Pt_1+r;dX{K_Y$0AJw_fgLF~Ycqyi<?_!h%RI{enhMIJLjNhjl*599A_e1~G(
z-EqEk;bbB~omBx<AXH#YF56uH532kBFiEde0af5%DPZbxKkng?d~Gc~9M@WlafHFf
nahc#U1q)Y-5zD1`2g4csmKVUtV<rd>O#cWt8FW$wepP`FKmcw^

diff --git a/tests/.DS_Store b/tests/.DS_Store
deleted file mode 100644
index 3ed183189058d69fe4a99f96ff42e47c750c2009..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeHKu};H447E#!pbm7Z81EOlFo!C9L4N>BQK^(P6>9fv{0E=Gz{D>w@F$Es+eb*0
z7KsI+%9ecZ;<IzkyXbsJL_FOsCPX75N}+;-Q#5--#zlKFh!z=SeaDg>=!UkmqN?ZZ
zhX2TbtlbdnwZ^L5QG5N?#d0#ct;&t1eKtPqSyp*IFRD2_(x;b;$MMJO<xk#=UwCis
z+gZqeN(t?d?~1nF44>cIW9^)Zw<f11wyeIh*f-vpmvvs5+r=4h2AqLoU;s5+Bpn)h
z>kK#p&OpO}><<Adm_@7&b?ZQ*R{&rFvkLlpRssew0JDg-AuJHKP@si!ykfA0!yckv
z7O^(8aAKVq?};;WyrF!Z)HtX+ac1bPGvEw#891@Wk<|aI!}tGgkUu#C&cIPIz)3#K
zr}(5;TYDcTwKhPnp(5f}8#W>6XeoxTl;U%!3hY5Tz${{I2n)my0!o87&cL5C@Cj*J
BQJw$*


From b336c2ac637d8823b2cf8ca85d9a81e918e023c0 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 22 Jun 2023 16:05:07 +0200
Subject: [PATCH 11/35] removed unused class

---
 lymph/__init__.py        |   1 -
 lymph/midline_timeev2.py | 498 ---------------------------------------
 2 files changed, 499 deletions(-)
 delete mode 100644 lymph/midline_timeev2.py

diff --git a/lymph/__init__.py b/lymph/__init__.py
index 07ab0d9..93370f7 100644
--- a/lymph/__init__.py
+++ b/lymph/__init__.py
@@ -17,7 +17,6 @@
 from .bilateral import Bilateral, BilateralSystem
 from .edge import Edge
 from .midline import MidlineBilateral
-from .midline_timeev import MidlineBilateraltime
 from .node import Node
 from .timemarg import Marginalizor, MarginalizorDict
 from .unilateral import System, Unilateral
diff --git a/lymph/midline_timeev2.py b/lymph/midline_timeev2.py
deleted file mode 100644
index c82f676..0000000
--- a/lymph/midline_timeev2.py
+++ /dev/null
@@ -1,498 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import numpy as np
-import pandas as pd
-
-from .bilateral import Bilateral
-from .timemarg import MarginalizorDict
-
-
-class MidlineBilateraltime:
-    """Model a bilateral lymphatic system where an additional risk factor can
-    be provided in the data: Whether or not the primary tumor extended over the
-    mid-sagittal line.
-
-    It is reasonable to assume (and supported by data) that such an extension
-    significantly increases the risk for metastatic spread to the contralateral
-    side of the neck. This class attempts to capture this using a simple
-    assumption: We assume that the probability of spread to the contralateral
-    side for patients *with* midline extension is larger than for patients
-    *without* it, but smaller than the probability of spread to the ipsilateral
-    side. Formally:
-
-    .. math::
-        b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in}
-
-    where :math:`b_c^{\\in}` is the probability of spread from the primary tumor
-    to the contralateral side for patients with midline extension, and
-    :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear
-    mixing parameter.
-    """
-
-    def __init__(
-        self,
-        graph: Dict[Tuple[str], List[str]],
-        use_mixing: bool = True,
-        trans_symmetric: bool = True,
-        **_kwargs
-    ):
-        """The class is constructed in a similar fashion to the
-        :class:`Bilateral`: That class contains one :class:`Unilateral` for
-        each side of the neck, while this class will contain two instances of
-        :class:`Bilateral`, one for the case of a midline extension and one for
-        the case of no midline extension.
-
-        Args:
-            graph: Dictionary of the same kind as for initialization of
-                :class:`System`. This graph will be passed to the constructors of
-                two :class:`System` attributes of this class.
-            use_mixing: Describe the contralateral base spread probabilities for the
-                case of a midline extension as a linear combination between the base
-                spread probs of the ipsilateral side and the ones of the contralateral
-                side when no midline extension is present.
-            trans_symmetric: If ``True``, the spread probabilities among the
-                LNLs will be set symmetrically.
-
-        See Also:
-            :class:`Bilateral`: Two of these are held as attributes by this
-            class. One for the case of a mid-sagittal extension of the primary
-            tumor and one for the case of no such extension.
-        """
-        self.ext   = Bilateral(
-            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
-        )
-        self.noext = Bilateral(
-            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
-        )
-        self.use_mixing = use_mixing
-        if self.use_mixing:
-            self.alpha_mix = 0.
-
-        self.noext.diag_time_dists = self.ext.diag_time_dists
-
-
-    @property
-    def graph(self) -> Dict[Tuple[str], List[str]]:
-        """Return the (unilateral) graph that was used to create this network.
-        """
-        return self.noext.graph
-
-
-    @property
-    def base_probs(self) -> np.ndarray:
-        """Base probabilities of metastatic lymphatic spread from the tumor(s)
-        to the lymph node levels. This will return the following concatenation of
-        base spread probs depending on whether ``use_mixing`` is set to ``True`` or
-        ``False``:
-
-        With the use of a mixing parameter:
-        +-----------+-------------------------+--------------+
-        | base ipsi | base contra (no midext) | mixing param |
-        +-----------+-------------------------+--------------+
-
-        Without it:
-        +-----------+----------------------+-------------------------+
-        | base ipsi | base contra (midext) | base contra (no midext) |
-        +-----------+----------------------+-------------------------+
-
-        When setting these, one needs to provide the respective shape.
-        """
-        if self.use_mixing:
-            return np.concatenate([
-                self.ext.ipsi.base_probs,
-                self.noext.contra.base_probs,
-                [self.alpha_mix],
-            ])
-        else:
-            return np.concatenate([
-                self.ext.ipsi.base_probs,
-                self.ext.contra.base_probs,
-                self.noext.contra.base_probs,
-            ])
-
-    @base_probs.setter
-    def base_probs(self, new_params: np.ndarray):
-        """Set the base probabilities from the tumor(s) to the LNLs, accounting
-        for the mixing parameter :math:`\\alpha``.
-        """
-        k = len(self.ext.ipsi.base_probs)
-
-        self.ext.ipsi.base_probs = new_params[:k]
-        self.noext.ipsi.base_probs = new_params[:k]
-
-        if self.use_mixing:
-            self.noext.contra.base_probs = new_params[k:2*k]
-            self.alpha_mix = new_params[-1]
-            # compute linear combination
-            self.ext.contra.base_probs = (
-                self.alpha_mix * self.ext.ipsi.base_probs
-                + (1. - self.alpha_mix) * self.noext.contra.base_probs
-            )
-        else:
-            self.ext.contra.base_probs = new_params[k:2*k]
-            self.noext.contra.base_probs = new_params[2*k:3*k]
-
-        # avoid unnecessary double computation of ipsilateral transition matrix
-        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-
-
-    @property
-    def trans_probs(self) -> np.ndarray:
-        """Probabilities of lymphatic spread among the lymph node levels. They
-        are assumed to be symmetric ipsi- & contralaterally by default.
-        """
-        return self.noext.trans_probs
-
-    @trans_probs.setter
-    def trans_probs(self, new_params: np.ndarray):
-        """Set the new spread probabilities for lymphatic spread from among the
-        LNLs.
-        """
-        self.noext.trans_probs = new_params
-        self.ext.trans_probs = new_params
-
-        # avoid unnecessary double computation of ipsilateral transition matrix
-        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-
-
-    @property
-    def spread_probs(self) -> np.ndarray:
-        """These are the probabilities representing the spread of cancer along
-        lymphatic drainage pathways per timestep.
-
-        It is composed of the base spread probs (possible with the mixing parameter)
-        and the probabilities of spread among the LNLs.
-
-        +-------------+-------------+
-        | base probs  | trans probs |
-        +-------------+-------------+
-        """
-        return np.concatenate([self.base_probs, self.trans_probs])
-
-    @spread_probs.setter
-    def spread_probs(self, new_params: np.ndarray):
-        """Set the new spread probabilities and the mixing parameter
-        :math:`\\alpha`.
-        """
-        num_base_probs = len(self.base_probs)
-
-        self.base_probs  = new_params[:num_base_probs]
-        self.trans_probs = new_params[num_base_probs:]
-
-
-    @property
-    def diag_time_dists(self) -> MarginalizorDict:
-        """This property holds the probability mass functions for marginalizing over
-        possible diagnose times for each T-stage.
-
-        When setting this property, one may also provide a normal Python dict, in
-        which case it tries to convert it to a :class:`MarginalizorDict`.
-
-        Note that the method will provide the same instance of this
-        :class:`MarginalizorDict` to both sides of the network.
-
-        See Also:
-            :class:`MarginalzorDict`, :class:`Marginalizor`.
-        """
-        return self.ext.diag_time_dists
-
-    @diag_time_dists.setter
-    def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]):
-        """Assign new :class:`MarginalizorDict` to this property. If it is a normal
-        Python dictionary, tr to convert it into a :class:`MarginalizorDict`.
-        """
-        self.ext.diag_time_dists = new_dists
-        self.noext.diag_time_dists = self.ext.diag_time_dists
-
-
-    @property
-    def modalities(self):
-        """A dictionary containing the specificity :math:`s_P` and sensitivity
-        :math:`s_N` values for each diagnostic modality.
-
-        Such a dictionary can also be provided to set this property and compute
-        the observation matrices of all used systems.
-
-        See Also:
-            :meth:`Bilateral.modalities`: Getting and setting this property in
-            the normal bilateral model.
-
-            :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and
-            :math:`s_N` for a unilateral model.
-        """
-        return self.noext.modalities
-
-    @modalities.setter
-    def modalities(self, modality_spsn: Dict[str, List[float]]):
-        """Call the respective getter and setter methods of the bilateral
-        components with and without midline extension.
-        """
-        self.noext.modalities = modality_spsn
-        self.ext.modalities = modality_spsn
-
-
-    @property
-    def patient_data(self):
-        """A pandas :class:`DataFrame` with rows of patients and columns of
-        patient and involvement details. The table's header should have three
-        levels that categorize the individual lymph node level's involvement to
-        the corresponding diagnostic modality (first level), the side of the
-        LNL (second level) and finaly the name of the LNL (third level).
-        Additionally, the patient's T-category must be stored under ('info',
-        'tumor', 't_stage') and whether the tumor extends over the mid-sagittal
-        line should be noted under ('info', 'tumor', 'midline_extension'). So,
-        part of this table could look like this:
-
-        +-----------------------------+------------------+------------------+
-        |            info             |       MRI        |       PET        |
-        +-----------------------------+--------+---------+--------+---------+
-        |            tumor            |  ipsi  | contra  |  ipsi  | contra  |
-        +---------+-------------------+--------+---------+--------+---------+
-        | t_stage | midline_extension |   II   |   II    |   II   |   II    |
-        +=========+===================+========+=========+========+=========+
-        | early   | ``True``          |``True``|``None`` |``True``|``False``|
-        +---------+-------------------+--------+---------+--------+---------+
-        | late    | ``True``          |``None``|``None`` |``None``|``None`` |
-        +---------+-------------------+--------+---------+--------+---------+
-        | early   | ``False``         |``True``|``False``|``True``|``True`` |
-        +---------+-------------------+--------+---------+--------+---------+
-        """
-        try:
-            return self._patient_data
-        except AttributeError:
-            raise AttributeError(
-                "No patient data has been loaded yet"
-            )
-
-    @patient_data.setter
-    def patient_data(self, patient_data: pd.DataFrame):
-        """Load the patient data. For now, this just calls the :meth:`load_data`
-        method, but at a later point, I would like to write a function here
-        that generates the pandas :class:`DataFrame` from the internal matrix
-        representation of the data.
-        """
-        self._patient_data = patient_data.copy()
-        self.load_data(patient_data)
-
-
-    def load_data(
-        self,
-        data: pd.DataFrame,
-        modality_spsn: Optional[Dict[str, List[float]]] = None,
-        mode = "HMM"
-    ):
-        """Load data as table of patients with involvement details and convert
-        it into internal representation of a matrix.
-
-        Args:
-            data: The table with rows of patients and columns of patient and
-                involvement details. The table's header must have three levels
-                that categorize the individual lymph node level's involvement
-                to the corresponding diagnostic modality (first level), the
-                side of the LNL (second level) and finaly the name of the LNL
-                (third level). Additionally, the patient's T-category must be
-                stored under ('info', 'tumor', 't_stage') and whether the tumor
-                extends over the mid-sagittal line should be noted under
-                ('info', 'tumor', 'midline_extension'). So, part of this table
-                could look like this:
-
-                +-----------------------------+---------------------+
-                |            info             |         MRI         |
-                +-----------------------------+----------+----------+
-                |            tumor            |   ipsi   |  contra  |
-                +---------+-------------------+----------+----------+
-                | t_stage | midline_extension |    II    |    II    |
-                +=========+===================+==========+==========+
-                | early   | ``True``          | ``True`` | ``None`` |
-                +---------+-------------------+----------+----------+
-                | late    | ``True``          | ``None`` | ``None`` |
-                +---------+-------------------+----------+----------+
-                | early   | ``False``         | ``True`` | ``True`` |
-                +---------+-------------------+----------+----------+
-
-            modality_spsn: If no diagnostic modalities have been defined yet,
-                this must be provided to build the observation matrix.
-
-        See Also:
-            :attr:`patient_data`: The attribute for loading and exporting data.
-
-            :meth:`Bilateral.load_data`: Loads data into a bilateral network by
-            splitting it into ipsi- & contralateral side and passing each to
-            the respective unilateral method (see below).
-
-            :meth:`Unilateral.load_data`: Data loading method of the unilateral
-            network.
-        """
-        ext_data = data.loc[data[("info", "tumor", "midline_extension")]]
-        noext_data = data.loc[~data[("info", "tumor", "midline_extension")]]
-
-        self.ext.load_data(
-            ext_data,
-            modality_spsn=modality_spsn,
-            mode=mode
-        )
-        self.noext.load_data(
-            noext_data,
-            modality_spsn=modality_spsn,
-            mode=mode
-        )
-
-
-    def check_and_assign(self, new_params: np.ndarray):
-        """Check that the spread probability (rates) and the parameters for the
-        marginalization over diagnose times are all within limits and assign them to
-        the model. Also, make sure the ipsi- and contralateral distributions over
-        diagnose times are the same instance of the :class:`MarginalizorDict`.
-
-        Args:
-            new_params: The set of :attr:`spread_probs` and parameters to provide for
-                updating the parametrized distributions over diagnose times.
-
-        Warning:
-            This method assumes that the parametrized distributions (instances of
-            :class:`Marginalizor`) all raise a ``ValueError`` when provided with
-            invalid parameters.
-        """
-        k = len(self.spread_probs)
-        new_spread_probs = new_params[:k]
-        new_marg_params = new_params[k:]
-
-        try:
-            self.ext.ipsi.diag_time_dists.update(new_marg_params)
-            self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
-            self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists
-            self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
-        except ValueError as val_err:
-            raise ValueError(
-                "Parameters for marginalization over diagnose times are invalid"
-            ) from val_err
-
-        if new_spread_probs.shape != self.spread_probs.shape:
-            raise ValueError(
-                "Shape of provided spread parameters does not match network"
-            )
-        if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.):
-            raise ValueError(
-                "Spread probs must be between 0 and 1"
-            )
-
-        self.spread_probs = new_spread_probs
-
-
-    def likelihood(
-        self,
-        data: Optional[pd.DataFrame] = None,
-        given_params: Optional[np.ndarray] = None,
-        log: bool = True,
-    ) -> float:
-        """Compute log-likelihood of (already stored) data, given the spread
-        probabilities and either a discrete diagnose time or a distribution to
-        use for marginalization over diagnose times.
-
-        Args:
-            data: Table with rows of patients and columns of per-LNL involvment. See
-                :meth:`load_data` for more details on how this should look like.
-
-            given_params: The likelihood is a function of these parameters. They mainly
-                consist of the :attr:`spread_probs` of the model. Any excess parameters
-                will be used to update the parametrized distributions used for
-                marginalizing over the diagnose times (see :attr:`diag_time_dists`).
-
-            log: When ``True``, the log-likelihood is returned.
-
-        Returns:
-            The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D`
-            is the data and :math:`\\theta` is the tuple of spread probabilities
-            and diagnose times or distributions over diagnose times.
-
-        See Also:
-            :attr:`spread_probs`: Property for getting and setting the spread
-            probabilities, of which a lymphatic network has as many as it has
-            :class:`Edge` instances (in case no symmetries apply).
-
-            :meth:`Unilateral.likelihood`: The log-likelihood function of
-            the unilateral system.
-
-            :meth:`Bilateral.likelihood`: The (log-)likelihood function of the
-            bilateral system.
-        """
-        if data is not None:
-            self.patient_data = data
-
-        try:
-            self.check_and_assign(given_params)
-        except ValueError:
-            return -np.inf if log else 0.
-
-        llh = 0. if log else 1.
-
-        if log:
-            llh += self.ext._likelihood(log=log)
-            llh += self.noext._likelihood(log=log)
-        else:
-            llh *= self.ext._likelihood(log=log)
-            llh *= self.noext._likelihood(log=log)
-
-        return llh
-
-
-    def risk(
-        self,
-        given_params: Optional[np.ndarray] = None,
-        midline_extension: bool = True,
-        **kwargs,
-    ) -> float:
-        """Compute the risk of nodal involvement given a specific diagnose.
-
-        Args:
-            spread_probs: Set ot new spread parameters. This also contains the
-                mixing parameter alpha in the last position.
-            midline_extension: Whether or not the patient's tumor extends over
-                the mid-sagittal line.
-
-        See Also:
-            :meth:`Bilateral.risk`: Depending on whether or not the patient's
-            tumor does extend over the midline, the risk function of the
-            respective :class:`Bilateral` instance gets called.
-        """
-        if given_params is not None:
-            self.check_and_assign(given_params)
-
-        if midline_extension:
-            return self.ext.risk(**kwargs)
-        else:
-            return self.noext.risk(**kwargs)
-
-
-    def generate_dataset(
-        self,
-        num_patients: int,
-        stage_dist: Dict[str, float],
-        ext_prob: float,
-        **_kwargs,
-    ) -> pd.DataFrame:
-        """Generate/sample a pandas :class:`DataFrame` from the defined network.
-
-        Args:
-            num_patients: Number of patients to generate.
-            stage_dist: Probability to find a patient in a certain T-stage.
-            ext_prob: Probability that a patient's primary tumor extends over
-                the mid-sagittal line.
-        """
-        drawn_ext = np.random.choice(
-            [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients
-        )
-        ext_dataset = self.ext.generate_dataset(
-            num_patients=num_patients,
-            stage_dist=stage_dist,
-        )
-        noext_dataset = self.noext.generate_dataset(
-            num_patients=num_patients,
-            stage_dist=stage_dist,
-        )
-
-        dataset = noext_dataset.copy()
-        dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext]
-        dataset[('info', 'tumor', 'midline_extension')] = drawn_ext
-
-        return dataset
\ No newline at end of file

From 443d59819d929cdddffc10ad92f106723447b9e5 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Wed, 12 Jul 2023 17:10:12 +0200
Subject: [PATCH 12/35] added multiple functions needed for time evolution of
 midline extension

---
 lymph/midline.py     | 388 +++++++++++++++++++++-------
 lymph/midline_old.py | 595 +++++++++++++++++++++++++++++++++++++++++++
 lymph/unilateral.py  |  33 ++-
 3 files changed, 907 insertions(+), 109 deletions(-)
 create mode 100644 lymph/midline_old.py

diff --git a/lymph/midline.py b/lymph/midline.py
index 138dad8..598a3a7 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -2,9 +2,12 @@
 
 import numpy as np
 import pandas as pd
+from numpy.linalg import matrix_power as mat_pow
 
 from .bilateral import Bilateral
+from .node import Node
 from .timemarg import MarginalizorDict
+from .unilateral import change_base
 
 
 class MidlineBilateral:
@@ -64,26 +67,57 @@ def __init__(
         self.noext = Bilateral(
             graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
         )
-        self.ext_unknown = Bilateral(
-            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
-        )
-        self.noext_unknown = Bilateral(
-            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
-        )
         self.use_mixing = use_mixing
         if self.use_mixing:
             self.alpha_mix = 0.
 
         self.noext.diag_time_dists = self.ext.diag_time_dists
-        self.ext_unknown.diag_time_dists = self.ext.diag_time_dists
-        self.noext_unknown.diag_time_dists = self.ext.diag_time_dists
+        self.midexstates = ["0", "1"]  
 
     @property
     def graph(self) -> Dict[Tuple[str], List[str]]:
         """Return the (unilateral) graph that was used to create this network.
         """
         return self.noext.graph
+    
+    def _gen_midexstate_list(self):
+        """Generates the list of (hidden) midline states.
+        """
+        if not hasattr(self, "_midexstate_list"):
+            self._midexstate_list = np.zeros(
+                shape=(2**1, 1), dtype=int
+                )
+        for i in range(2**1):
+            self._midexstate_list[i] = [
+                int(digit) for digit in change_base(i, 2, length=1)
+                ]
 
+    @property
+    def midexstate_list(self):
+        """Return list of all possible hidden midline states.
+        """
+        try:
+            return self._midexstate_list
+        except AttributeError:
+            self._gen_midexstate_list()
+            return self._midexstate_list
+    
+    @property
+    def midext_prob(self):
+        """Assign the last of the new_params to the midline extension probability
+        """
+        try:
+            return self._midext_prob
+        except AttributeError as attr_err:
+            raise AttributeError(
+                "No midline extension probability has been assigned"
+            ) from attr_err
+
+    @midext_prob.setter
+    def midext_prob(self, new_params):
+        """A variable containing the midline extension probability
+        """
+        self._midext_prob = new_params[-1]
 
     @property
     def base_probs(self) -> np.ndarray:
@@ -126,33 +160,21 @@ def base_probs(self, new_params: np.ndarray):
 
         self.ext.ipsi.base_probs = new_params[:k]
         self.noext.ipsi.base_probs = new_params[:k]
-        self.ext_unknown.ipsi.base_probs = new_params[:k]
-        self.noext_unknown.ipsi.base_probs = new_params[:k]
 
         if self.use_mixing:
             self.noext.contra.base_probs = new_params[k:2*k]
-            self.noext_unknown.contra.base_probs = new_params[k:2*k]
             self.alpha_mix = new_params[-1]
             # compute linear combination
             self.ext.contra.base_probs = (
                 self.alpha_mix * self.ext.ipsi.base_probs
                 + (1. - self.alpha_mix) * self.noext.contra.base_probs
             )
-            self.ext_unknown.contra.base_probs = (
-                self.alpha_mix * self.ext_unknown.ipsi.base_probs
-                + (1. - self.alpha_mix) * self.noext_unknown.contra.base_probs
-            )
         else:
             self.ext.contra.base_probs = new_params[k:2*k]
             self.noext.contra.base_probs = new_params[2*k:3*k]
-            self.ext_unknown.contra.base_probs = new_params[k:2*k]
-            self.noext_unknown.contra.base_probs = new_params[2*k:3*k]
 
         # avoid unnecessary double computation of ipsilateral transition matrix
         self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-        self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-        self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-
 
     @property
     def trans_probs(self) -> np.ndarray:
@@ -168,14 +190,89 @@ def trans_probs(self, new_params: np.ndarray):
         """
         self.noext.trans_probs = new_params
         self.ext.trans_probs = new_params
-        self.noext_unknown.trans_probs = new_params
-        self.ext_unknown.trans_probs = new_params
-
+    
         # avoid unnecessary double computation of ipsilateral transition matrix
         self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-        self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-        self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
 
+    def _gen_allowed_midextransitions(self):
+        """Generate the allowed midline extension transitions.
+        """
+        self._allowed_midextransitions = {}
+        for i in range(len(self.midexstate_list)):
+            self._allowed_midextransitions[i] = []
+            for j in range(len(self.midexstate_list)):
+                if not np.any(np.greater(self.midexstate_list[i,:],
+                                         self.midexstate_list[j,:])):
+                    self._allowed_midextransitions[i].append(j)
+
+    @property
+    def allowed_midextransitions(self):
+        """Return a dictionary that contains for each row :math:`i` of the
+        transition matrix :math:`\\mathbf{A}` the column numbers :math:`j` for
+        which the transtion probability :math:`P\\left( x_j \\mid x_i \\right)`
+        is not zero due to the forbidden self-healing.
+
+        For example: The hidden state ``[True, False]`` in a network with only
+        one tumor and two LNLs (one involved, one healthy) corresponds to the
+        index ``1`` and can only evolve into the state ``[True, True]``, which
+        has index 3. So, the key-value pair for that particular hidden state
+        would be ``1: [3]``.
+        """
+        try:
+            return self._allowed_midextransitions
+        except AttributeError:
+            self._gen_allowed_midextransitions()
+            return self._allowed_midextransitions
+
+    def _gen_midextransition_matrix(self):
+        """Generate the midline extension transition matrix :math:`\\mathbf{A}`, which contains
+        the :math:`P \\left( S_{t+1} \\mid S_t \\right)`. :math:`\\mathbf{A}`
+        is a square matrix with size ``(# of states)``. The lower diagonal is
+        zero.
+        """
+        if not hasattr(self, "_midextransition_matrix"):
+            shape = (2**1, 2**1)
+            self._midextransition_matrix = np.zeros(shape=shape)
+
+        for i,state in enumerate(self.midexstate_list):
+            self.state = state
+            for j in self.allowed_midextransitions[i]:
+                midextransition_prob = self.comp_midextransition_prob(self.midexstate_list[j])
+                self._midextransition_matrix[i,j] = midextransition_prob
+
+    @property
+    def midextransition_matrix(self) -> np.ndarray:
+        """Return the midline extension listtransition matrix :math:`\\mathbf{A}`, which contains the
+        probability to transition from any state :math:`S_t` to any other state
+        :math:`S_{t+1}` one timestep later:
+        :math:`P \\left( S_{t+1} \\mid S_t \\right)`. :math:`\\mathbf{A}` is a
+        square matrix with size ``(# of states)``. The lower diagonal is zero,
+        because those entries correspond to transitions that would require
+        self-healing.
+        """
+        try:
+            return self._midextransition_matrix
+        except AttributeError:
+            self._gen_midextransition_matrix()
+            return self._midextransition_matrix
+
+    @property
+    def midextrans_probs(self) -> np.ndarray:
+        """Probabilities of lymphatic spread among the lymph node levels. They
+        are assumed to be symmetric ipsi- & contralaterally by default.
+        """
+        return self.noext.midextrans_probs
+
+    @midextrans_probs.setter
+    def midextrans_probs(self, new_params: np.ndarray):
+        """Set the new spread probabilities for lymphatic spread from among the
+        LNLs.
+        """
+        self.noext.midextrans_probs = new_params
+        self.ext.midextrans_probs = new_params
+
+        # avoid unnecessary double computation of ipsilateral transition matrix
+        self.noext.ipsi._midextransition_matrix = self.ext.ipsi.midextransition_matrix
 
     @property
     def spread_probs(self) -> np.ndarray:
@@ -225,9 +322,40 @@ def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]):
         """
         self.ext.diag_time_dists = new_dists
         self.noext.diag_time_dists = self.ext.diag_time_dists
-        self.noext_unknown.diag_time_dists = self.ext.diag_time_dists
-        self.ext_unknown.diag_time_dists = self.ext.diag_time_dists
+    
+    def comp_midextransition_prob(
+        self,
+        newstate: List[int],
+        acquire: bool = False
+    ) -> float:
+        """Computes the probability to transition to ``newstate``, given its
+        current state.
 
+        Args:
+            newstate: List of new states for each LNL in the lymphatic
+                system. The transition probability :math:`t` will be computed
+                from the current states to these states.
+            acquire: if ``True``, after computing and returning the probability,
+                the system updates its own state to be ``newstate``.
+                (default: ``False``)
+
+        Returns:
+            Transition probability :math:`t`.
+        """
+        res = 1.
+        for i, midexstate in enumerate(self.midexstates):
+            if not midexstate.state:
+                in_states = [1]
+                in_weights = [self.midext_prob]
+                res *= Node.trans_prob(in_states, in_weights)[newstate[i]]
+            elif not newstate[i]:
+                res = 0.
+                break
+
+        if acquire:
+            self.state = newstate
+
+        return res
 
     @property
     def modalities(self):
@@ -253,9 +381,7 @@ def modalities(self, modality_spsn: Dict[str, List[float]]):
         """
         self.noext.modalities = modality_spsn
         self.ext.modalities = modality_spsn
-        self.noext_unknown.modalities = modality_spsn
-        self.ext_unknown.modalities = modality_spsn
-
+        
     @property
     def midext_prob(self):
         """Assign the last of the new_params to the midline extension probability
@@ -316,6 +442,60 @@ def patient_data(self, patient_data: pd.DataFrame):
         self._patient_data = patient_data.copy()
         self.load_data(patient_data)
 
+    def _evolve_onestep(
+        self, start_state: Optional[np.ndarray] = None
+    ) -> np.ndarray:
+        """Evolve hidden Markov model based system over one time step. Compute
+        :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t`
+        is the time.
+
+        Args:
+            start_state: The current state.
+
+        Returns:
+            A matrix with the new state
+
+        :meta public:
+        """
+        # All healthy state at beginning
+        if start_state is None:
+            start_state = np.zeros(shape=len(self.state_list), dtype=float)
+            start_state[0] = 1.
+
+        # compute involvement at first time-step
+        new_state = start_state @ self.transition_matrix
+
+        return new_state
+
+    def _evolve_midext(
+        self, start_midexstate: Optional[np.ndarray] = None
+    ) -> np.ndarray:
+        """Evolve hidden Markov model based system over one time step. Compute
+        :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t`
+        is the time.
+
+        Args:
+            start_midexstate: The current midline extension state.
+
+        Returns:
+            The new midline extension state
+
+        :meta public:
+        """
+
+        # compute involvement at first time-step
+        new_midexstate = start_midexstate @ self.midextransition_matrix
+
+        return new_midexstate
+
+    """
+    def state_probs_midext(self):
+        max_t = self.diag_time_dists.max_t
+        self.ext.state_probs_midext = {}
+        self.noext.state_probs_midext = {}
+        self.noext.state_probs_midext = self.noext._evolve_midext(t_last=max_t)
+        self.ext.state_probs_midext = self.ext.
+    """
 
     def load_data(
         self,
@@ -365,48 +545,18 @@ def load_data(
             :meth:`Unilateral.load_data`: Data loading method of the unilateral
             network.
         """
-        ext_data = data.loc[(data[("info", "tumor", "midline_extension")]==True)]
-        noext_data = data.loc[(data[("info", "tumor", "midline_extension")]==False)]
-        unknown_data = data.loc[(data['info', 'tumor', 'midline_extension']!=False) & (data['info', 'tumor', 'midline_extension']!=True)] 
 
-        if len(ext_data) > 0:
-            self.ext.load_data(
-                ext_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-        
-        if len(noext_data) > 0:
-            self.noext.load_data(
-                noext_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-        if len(unknown_data) > 0 & len(ext_data) + len(noext_data) > 0:
-            self.ext_unknown.load_data(
-                unknown_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-
-            self.noext_unknown.load_data(
-                unknown_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-
-        if len(unknown_data) > 0 & len(ext_data) + len(noext_data) == 0:
-            self.ext.load_data(
-                unknown_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-
-            self.noext.load_data(
-                unknown_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
+        self.ext.load_data(
+            data,
+            modality_spsn=modality_spsn,
+            mode=mode
+        )
+    
+        self.noext.load_data(
+            data,
+            modality_spsn=modality_spsn,
+            mode=mode
+        )
 
     def check_and_assign(self, new_params: np.ndarray):
         """Check that the spread probability (rates) and the parameters for the
@@ -430,13 +580,9 @@ def check_and_assign(self, new_params: np.ndarray):
 
         try:
             self.ext.ipsi.diag_time_dists.update(new_marg_params)
-            self.ext_unknown.ipsi.diag_time_dists.update(new_marg_params)
             self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
             self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists
             self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
-            self.ext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
-            self.noext_unknown.ipsi.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
-            self.noext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
             self.midext_prob = new_params
         except ValueError as val_err:
             raise ValueError(
@@ -458,6 +604,72 @@ def check_and_assign(self, new_params: np.ndarray):
 
         self.spread_probs = new_spread_probs
 
+    def _likelihood_mid(
+        self,
+        log: bool = True
+    ) -> float:
+        """Compute the (log-)likelihood of data, using the stored spread probs and
+        fixed distributions for marginalizing over diagnose times.
+
+        This method mainly exists so that the checking and assigning of the
+        spread probs can be skipped.
+        """
+        stored_t_stages = set(self.ipsi.diagnose_matrices.keys())
+        provided_t_stages = set(self.ipsi.diag_time_dists.keys())
+        t_stages = list(stored_t_stages.intersection(provided_t_stages))
+        
+        max_t = self.diag_time_dists.max_t
+        state_probs_ex = np.zeros(shape=len(self.state_list), dtype=float)
+        state_probs_ex[0] = 1.
+        state_probs_nox = np.zeros(shape=len(self.state_list), dtype=float)
+        state_probs_nox[0] = 1.
+        state_probs_midext = np.zeros(shape=len(self.midexstate_list), dtype=float)
+        state_probs_midext[0] = 1.
+
+        llh_ex = 0. if log else 1.
+        llh_nox = 0. if log else 1.
+
+        #if 1 1 in diagnose matrix midline extension:
+        #state_probs_midext[0] * llh_nox + state_probs_midext[1] * llh_ex
+
+        for i in range(max_t):
+            for stage in t_stages:
+                state_probs_nox["ipsi"] = self.noext.ipsi._evolve_onestep(start_state = state_probs_nox["ipsi"])
+                state_probs_nox["contra"] = self.noext.contra._evolve_onestep(start_state = state_probs_nox["contra"])
+                state_probs_ex["ipsi"] = state_probs_midext[1] * self.ext.ipsi._evolve_onestep(start_state = state_probs_ex["ipsi"]) + state_probs_midext[0] * state_probs_nox["ipsi"]
+                state_probs_ex["contra"] = state_probs_midext[1] * self.ext.contra._evolve_onestep(start_state = state_probs_ex["contra"]) + state_probs_midext[0] * state_probs_nox["contra"]
+                state_probs_midext = self._evolve_midext(start_midextstate = state_probs_midext)
+
+                joint_state_probs_nox = (
+                    state_probs_nox["ipsi"].T
+                    @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
+                    @ state_probs_nox["contra"]
+                )
+                joint_state_probs_ex = (
+                    state_probs_ex["ipsi"].T
+                    @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
+                    @ state_probs_ex["contra"]
+                )
+                p_ex = np.sum(
+                    self.ext.ipsi.diagnose_matrices[stage]
+                    * (joint_state_probs_ex
+                        @ self.ext.contra.diagnose_matrices[stage]),
+                    axis=0
+                    )
+                p_nox = np.sum(
+                    self.noext.ipsi.diagnose_matrices[stage]
+                    * (joint_state_probs_nox
+                        @ self.noext.contra.diagnose_matrices[stage]),
+                    axis=0
+                    )
+                if log:
+                    llh_ex += np.sum(np.log(p_ex))
+                    llh_nox += np.sum(np.log(p_nox))
+                else:
+                    llh_ex *= np.prod(p_ex)
+                    llh_nox *= np.prod(p_nox)
+
+        return llh_ex + llh_nox
 
     def likelihood(
         self,
@@ -504,31 +716,13 @@ def likelihood(
         except ValueError:
             return -np.inf if log else 0.
 
-        llh = 0. if log else 1.
-
-        if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']!=False) & (self.patient_data['info', 'tumor', 'midline_extension']!=True)])>0:
-            state_probs = {}
-            state_probs["ipsi"] = self.ext_unknown.state_probs_ipsi()
-            state_probs["contra"] = self.ext_unknown.state_probs_contra()
-            state_probs2 = {}
-            state_probs2["ipsi"] = self.noext_unknown.state_probs_ipsi()
-            state_probs2["contra"] = self.noext_unknown.state_probs_contra()
-            t_stages = list(set(self.patient_data[("info","tumor","t_stage")]))
-            t_stages = self.ext_unknown.t_stages()
-        
-
-            for stage in t_stages:
-                p = self.ext_unknown._likelihood2(stage=stage, state_probs=state_probs)*self.midext_prob
-                p2 = self.noext_unknown._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob)
-                llh += np.sum(np.log(p+p2))
-
         if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False) | (self.patient_data['info', 'tumor', 'midline_extension']==True)])>0:
             if log:
-                llh += self.ext._likelihood(log=log) + np.log(self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)])
-                llh += self.noext._likelihood(log=log) + np.log(1-self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)])
+                llh += self.ext._likelihood_mid(log=log)
+                llh += self.noext._likelihood_mid(log=log)
             else:
-                llh *= self.ext._likelihood(log=log)*self.midext_prob**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)])
-                llh *= self.noext._likelihood(log=log)*(1-self.midext_prob)**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)])
+                llh *= self.ext._likelihood_mid(log=log)
+                llh *= self.noext._likelihood_mid(log=log)
 
         return llh
 
diff --git a/lymph/midline_old.py b/lymph/midline_old.py
new file mode 100644
index 0000000..138dad8
--- /dev/null
+++ b/lymph/midline_old.py
@@ -0,0 +1,595 @@
+from typing import Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import pandas as pd
+
+from .bilateral import Bilateral
+from .timemarg import MarginalizorDict
+
+
+class MidlineBilateral:
+    """Model a bilateral lymphatic system where an additional risk factor can
+    be provided in the data: Whether or not the primary tumor extended over the
+    mid-sagittal line.
+
+    It is reasonable to assume (and supported by data) that such an extension
+    significantly increases the risk for metastatic spread to the contralateral
+    side of the neck. This class attempts to capture this using a simple
+    assumption: We assume that the probability of spread to the contralateral
+    side for patients *with* midline extension is larger than for patients
+    *without* it, but smaller than the probability of spread to the ipsilateral
+    side. Formally:
+
+    .. math::
+        b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in}
+
+    where :math:`b_c^{\\in}` is the probability of spread from the primary tumor
+    to the contralateral side for patients with midline extension, and
+    :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear
+    mixing parameter.
+    """
+
+    def __init__(
+        self,
+        graph: Dict[Tuple[str], List[str]],
+        use_mixing: bool = True,
+        trans_symmetric: bool = True,
+        **_kwargs
+    ):
+        """The class is constructed in a similar fashion to the
+        :class:`Bilateral`: That class contains one :class:`Unilateral` for
+        each side of the neck, while this class will contain two instances of
+        :class:`Bilateral`, one for the case of a midline extension and one for
+        the case of no midline extension.
+
+        Args:
+            graph: Dictionary of the same kind as for initialization of
+                :class:`System`. This graph will be passed to the constructors of
+                two :class:`System` attributes of this class.
+            use_mixing: Describe the contralateral base spread probabilities for the
+                case of a midline extension as a linear combination between the base
+                spread probs of the ipsilateral side and the ones of the contralateral
+                side when no midline extension is present.
+            trans_symmetric: If ``True``, the spread probabilities among the
+                LNLs will be set symmetrically.
+
+        See Also:
+            :class:`Bilateral`: Two of these are held as attributes by this
+            class. One for the case of a mid-sagittal extension of the primary
+            tumor and one for the case of no such extension.
+        """
+        self.ext   = Bilateral(
+            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
+        )
+        self.noext = Bilateral(
+            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
+        )
+        self.ext_unknown = Bilateral(
+            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
+        )
+        self.noext_unknown = Bilateral(
+            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
+        )
+        self.use_mixing = use_mixing
+        if self.use_mixing:
+            self.alpha_mix = 0.
+
+        self.noext.diag_time_dists = self.ext.diag_time_dists
+        self.ext_unknown.diag_time_dists = self.ext.diag_time_dists
+        self.noext_unknown.diag_time_dists = self.ext.diag_time_dists
+
+    @property
+    def graph(self) -> Dict[Tuple[str], List[str]]:
+        """Return the (unilateral) graph that was used to create this network.
+        """
+        return self.noext.graph
+
+
+    @property
+    def base_probs(self) -> np.ndarray:
+        """Base probabilities of metastatic lymphatic spread from the tumor(s)
+        to the lymph node levels. This will return the following concatenation of
+        base spread probs depending on whether ``use_mixing`` is set to ``True`` or
+        ``False``:
+
+        With the use of a mixing parameter:
+        +-----------+-------------------------+--------------+
+        | base ipsi | base contra (no midext) | mixing param |
+        +-----------+-------------------------+--------------+
+
+        Without it:
+        +-----------+----------------------+-------------------------+
+        | base ipsi | base contra (midext) | base contra (no midext) |
+        +-----------+----------------------+-------------------------+
+
+        When setting these, one needs to provide the respective shape.
+        """
+        if self.use_mixing:
+            return np.concatenate([
+                self.ext.ipsi.base_probs,
+                self.noext.contra.base_probs,
+                [self.alpha_mix],
+            ])
+        else:
+            return np.concatenate([
+                self.ext.ipsi.base_probs,
+                self.ext.contra.base_probs,
+                self.noext.contra.base_probs,
+            ])
+
+    @base_probs.setter
+    def base_probs(self, new_params: np.ndarray):
+        """Set the base probabilities from the tumor(s) to the LNLs, accounting
+        for the mixing parameter :math:`\\alpha``.
+        """
+        k = len(self.ext.ipsi.base_probs)
+
+        self.ext.ipsi.base_probs = new_params[:k]
+        self.noext.ipsi.base_probs = new_params[:k]
+        self.ext_unknown.ipsi.base_probs = new_params[:k]
+        self.noext_unknown.ipsi.base_probs = new_params[:k]
+
+        if self.use_mixing:
+            self.noext.contra.base_probs = new_params[k:2*k]
+            self.noext_unknown.contra.base_probs = new_params[k:2*k]
+            self.alpha_mix = new_params[-1]
+            # compute linear combination
+            self.ext.contra.base_probs = (
+                self.alpha_mix * self.ext.ipsi.base_probs
+                + (1. - self.alpha_mix) * self.noext.contra.base_probs
+            )
+            self.ext_unknown.contra.base_probs = (
+                self.alpha_mix * self.ext_unknown.ipsi.base_probs
+                + (1. - self.alpha_mix) * self.noext_unknown.contra.base_probs
+            )
+        else:
+            self.ext.contra.base_probs = new_params[k:2*k]
+            self.noext.contra.base_probs = new_params[2*k:3*k]
+            self.ext_unknown.contra.base_probs = new_params[k:2*k]
+            self.noext_unknown.contra.base_probs = new_params[2*k:3*k]
+
+        # avoid unnecessary double computation of ipsilateral transition matrix
+        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+        self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+        self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+
+
+    @property
+    def trans_probs(self) -> np.ndarray:
+        """Probabilities of lymphatic spread among the lymph node levels. They
+        are assumed to be symmetric ipsi- & contralaterally by default.
+        """
+        return self.noext.trans_probs
+
+    @trans_probs.setter
+    def trans_probs(self, new_params: np.ndarray):
+        """Set the new spread probabilities for lymphatic spread from among the
+        LNLs.
+        """
+        self.noext.trans_probs = new_params
+        self.ext.trans_probs = new_params
+        self.noext_unknown.trans_probs = new_params
+        self.ext_unknown.trans_probs = new_params
+
+        # avoid unnecessary double computation of ipsilateral transition matrix
+        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+        self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+        self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
+
+
+    @property
+    def spread_probs(self) -> np.ndarray:
+        """These are the probabilities representing the spread of cancer along
+        lymphatic drainage pathways per timestep.
+
+        It is composed of the base spread probs (possible with the mixing parameter)
+        and the probabilities of spread among the LNLs.
+
+        +-------------+-------------+
+        | base probs  | trans probs |
+        +-------------+-------------+
+        """
+        return np.concatenate([self.base_probs, self.trans_probs])
+
+    @spread_probs.setter
+    def spread_probs(self, new_params: np.ndarray):
+        """Set the new spread probabilities and the mixing parameter
+        :math:`\\alpha`.
+        """
+        num_base_probs = len(self.base_probs)
+
+        self.base_probs  = new_params[:num_base_probs]
+        self.trans_probs = new_params[num_base_probs:]
+
+
+    @property
+    def diag_time_dists(self) -> MarginalizorDict:
+        """This property holds the probability mass functions for marginalizing over
+        possible diagnose times for each T-stage.
+
+        When setting this property, one may also provide a normal Python dict, in
+        which case it tries to convert it to a :class:`MarginalizorDict`.
+
+        Note that the method will provide the same instance of this
+        :class:`MarginalizorDict` to both sides of the network.
+
+        See Also:
+            :class:`MarginalzorDict`, :class:`Marginalizor`.
+        """
+        return self.ext.diag_time_dists
+
+    @diag_time_dists.setter
+    def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]):
+        """Assign new :class:`MarginalizorDict` to this property. If it is a normal
+        Python dictionary, tr to convert it into a :class:`MarginalizorDict`.
+        """
+        self.ext.diag_time_dists = new_dists
+        self.noext.diag_time_dists = self.ext.diag_time_dists
+        self.noext_unknown.diag_time_dists = self.ext.diag_time_dists
+        self.ext_unknown.diag_time_dists = self.ext.diag_time_dists
+
+
+    @property
+    def modalities(self):
+        """A dictionary containing the specificity :math:`s_P` and sensitivity
+        :math:`s_N` values for each diagnostic modality.
+
+        Such a dictionary can also be provided to set this property and compute
+        the observation matrices of all used systems.
+
+        See Also:
+            :meth:`Bilateral.modalities`: Getting and setting this property in
+            the normal bilateral model.
+
+            :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and
+            :math:`s_N` for a unilateral model.
+        """
+        return self.noext.modalities
+
+    @modalities.setter
+    def modalities(self, modality_spsn: Dict[str, List[float]]):
+        """Call the respective getter and setter methods of the bilateral
+        components with and without midline extension.
+        """
+        self.noext.modalities = modality_spsn
+        self.ext.modalities = modality_spsn
+        self.noext_unknown.modalities = modality_spsn
+        self.ext_unknown.modalities = modality_spsn
+
+    @property
+    def midext_prob(self):
+        """Assign the last of the new_params to the midline extension probability
+        """
+        try:
+            return self._midext_prob
+        except AttributeError as attr_err:
+            raise AttributeError(
+                "No midline extension probability has been assigned"
+            ) from attr_err
+
+    @midext_prob.setter
+    def midext_prob(self, new_params):
+        """A variable containing the midline extension probability
+        """
+        self._midext_prob = new_params[-1]
+
+    @property
+    def patient_data(self):
+        """A pandas :class:`DataFrame` with rows of patients and columns of
+        patient and involvement details. The table's header should have three
+        levels that categorize the individual lymph node level's involvement to
+        the corresponding diagnostic modality (first level), the side of the
+        LNL (second level) and finaly the name of the LNL (third level).
+        Additionally, the patient's T-category must be stored under ('info',
+        'tumor', 't_stage') and whether the tumor extends over the mid-sagittal
+        line should be noted under ('info', 'tumor', 'midline_extension'). So,
+        part of this table could look like this:
+
+        +-----------------------------+------------------+------------------+
+        |            info             |       MRI        |       PET        |
+        +-----------------------------+--------+---------+--------+---------+
+        |            tumor            |  ipsi  | contra  |  ipsi  | contra  |
+        +---------+-------------------+--------+---------+--------+---------+
+        | t_stage | midline_extension |   II   |   II    |   II   |   II    |
+        +=========+===================+========+=========+========+=========+
+        | early   | ``True``          |``True``|``None`` |``True``|``False``|
+        +---------+-------------------+--------+---------+--------+---------+
+        | late    | ``True``          |``None``|``None`` |``None``|``None`` |
+        +---------+-------------------+--------+---------+--------+---------+
+        | early   | ``False``         |``True``|``False``|``True``|``True`` |
+        +---------+-------------------+--------+---------+--------+---------+
+        """
+        try:
+            return self._patient_data
+        except AttributeError:
+            raise AttributeError(
+                "No patient data has been loaded yet"
+            )
+
+    @patient_data.setter
+    def patient_data(self, patient_data: pd.DataFrame):
+        """Load the patient data. For now, this just calls the :meth:`load_data`
+        method, but at a later point, I would like to write a function here
+        that generates the pandas :class:`DataFrame` from the internal matrix
+        representation of the data.
+        """
+        self._patient_data = patient_data.copy()
+        self.load_data(patient_data)
+
+
+    def load_data(
+        self,
+        data: pd.DataFrame,
+        modality_spsn: Optional[Dict[str, List[float]]] = None,
+        mode = "HMM"
+    ):
+        """Load data as table of patients with involvement details and convert
+        it into internal representation of a matrix.
+
+        Args:
+            data: The table with rows of patients and columns of patient and
+                involvement details. The table's header must have three levels
+                that categorize the individual lymph node level's involvement
+                to the corresponding diagnostic modality (first level), the
+                side of the LNL (second level) and finaly the name of the LNL
+                (third level). Additionally, the patient's T-category must be
+                stored under ('info', 'tumor', 't_stage') and whether the tumor
+                extends over the mid-sagittal line should be noted under
+                ('info', 'tumor', 'midline_extension'). So, part of this table
+                could look like this:
+
+                +-----------------------------+---------------------+
+                |            info             |         MRI         |
+                +-----------------------------+----------+----------+
+                |            tumor            |   ipsi   |  contra  |
+                +---------+-------------------+----------+----------+
+                | t_stage | midline_extension |    II    |    II    |
+                +=========+===================+==========+==========+
+                | early   | ``True``          | ``True`` | ``None`` |
+                +---------+-------------------+----------+----------+
+                | late    | ``True``          | ``None`` | ``None`` |
+                +---------+-------------------+----------+----------+
+                | early   | ``False``         | ``True`` | ``True`` |
+                +---------+-------------------+----------+----------+
+
+            modality_spsn: If no diagnostic modalities have been defined yet,
+                this must be provided to build the observation matrix.
+
+        See Also:
+            :attr:`patient_data`: The attribute for loading and exporting data.
+
+            :meth:`Bilateral.load_data`: Loads data into a bilateral network by
+            splitting it into ipsi- & contralateral side and passing each to
+            the respective unilateral method (see below).
+
+            :meth:`Unilateral.load_data`: Data loading method of the unilateral
+            network.
+        """
+        ext_data = data.loc[(data[("info", "tumor", "midline_extension")]==True)]
+        noext_data = data.loc[(data[("info", "tumor", "midline_extension")]==False)]
+        unknown_data = data.loc[(data['info', 'tumor', 'midline_extension']!=False) & (data['info', 'tumor', 'midline_extension']!=True)] 
+
+        if len(ext_data) > 0:
+            self.ext.load_data(
+                ext_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
+        
+        if len(noext_data) > 0:
+            self.noext.load_data(
+                noext_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
+        if len(unknown_data) > 0 & len(ext_data) + len(noext_data) > 0:
+            self.ext_unknown.load_data(
+                unknown_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
+
+            self.noext_unknown.load_data(
+                unknown_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
+
+        if len(unknown_data) > 0 & len(ext_data) + len(noext_data) == 0:
+            self.ext.load_data(
+                unknown_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
+
+            self.noext.load_data(
+                unknown_data,
+                modality_spsn=modality_spsn,
+                mode=mode
+            )
+
+    def check_and_assign(self, new_params: np.ndarray):
+        """Check that the spread probability (rates) and the parameters for the
+        marginalization over diagnose times are all within limits and assign them to
+        the model. Also, make sure the ipsi- and contralateral distributions over
+        diagnose times are the same instance of the :class:`MarginalizorDict`.
+
+        Args:
+            new_params: The set of :attr:`spread_probs` and parameters to provide for
+                updating the parametrized distributions over diagnose times.
+
+        Warning:
+            This method assumes that the parametrized distributions (instances of
+            :class:`Marginalizor`) all raise a ``ValueError`` when provided with
+            invalid parameters.
+        """
+        k = len(self.spread_probs)
+        l = len(self.diag_time_dists)
+        new_spread_probs = new_params[:k]
+        new_marg_params = new_params[k:(k+l)]
+
+        try:
+            self.ext.ipsi.diag_time_dists.update(new_marg_params)
+            self.ext_unknown.ipsi.diag_time_dists.update(new_marg_params)
+            self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
+            self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists
+            self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
+            self.ext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
+            self.noext_unknown.ipsi.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
+            self.noext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
+            self.midext_prob = new_params
+        except ValueError as val_err:
+            raise ValueError(
+                "Parameters for marginalization over diagnose times are invalid"
+            ) from val_err
+
+        if new_spread_probs.shape != self.spread_probs.shape:
+            raise ValueError(
+                "Shape of provided spread parameters does not match network"
+            )
+        if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.):
+            raise ValueError(
+                "Spread probs must be between 0 and 1"
+            )
+        if 0. > self.midext_prob or self.midext_prob > 1.:
+            raise ValueError(
+                "Midline extension probability must be between 0 and 1"
+            )
+
+        self.spread_probs = new_spread_probs
+
+
+    def likelihood(
+        self,
+        data: Optional[pd.DataFrame] = None,
+        given_params: Optional[np.ndarray] = None,
+        log: bool = True,
+    ) -> float:
+        """Compute log-likelihood of (already stored) data, given the spread
+        probabilities and either a discrete diagnose time or a distribution to
+        use for marginalization over diagnose times.
+
+        Args:
+            data: Table with rows of patients and columns of per-LNL involvment. See
+                :meth:`load_data` for more details on how this should look like.
+
+            given_params: The likelihood is a function of these parameters. They mainly
+                consist of the :attr:`spread_probs` of the model. Any excess parameters
+                will be used to update the parametrized distributions used for
+                marginalizing over the diagnose times (see :attr:`diag_time_dists`).
+
+            log: When ``True``, the log-likelihood is returned.
+
+        Returns:
+            The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D`
+            is the data and :math:`\\theta` is the tuple of spread probabilities
+            and diagnose times or distributions over diagnose times.
+
+        See Also:
+            :attr:`spread_probs`: Property for getting and setting the spread
+            probabilities, of which a lymphatic network has as many as it has
+            :class:`Edge` instances (in case no symmetries apply).
+
+            :meth:`Unilateral.likelihood`: The log-likelihood function of
+            the unilateral system.
+
+            :meth:`Bilateral.likelihood`: The (log-)likelihood function of the
+            bilateral system.
+        """
+        if data is not None:
+            self.patient_data = data
+
+        try:
+            self.check_and_assign(given_params)
+        except ValueError:
+            return -np.inf if log else 0.
+
+        llh = 0. if log else 1.
+
+        if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']!=False) & (self.patient_data['info', 'tumor', 'midline_extension']!=True)])>0:
+            state_probs = {}
+            state_probs["ipsi"] = self.ext_unknown.state_probs_ipsi()
+            state_probs["contra"] = self.ext_unknown.state_probs_contra()
+            state_probs2 = {}
+            state_probs2["ipsi"] = self.noext_unknown.state_probs_ipsi()
+            state_probs2["contra"] = self.noext_unknown.state_probs_contra()
+            t_stages = list(set(self.patient_data[("info","tumor","t_stage")]))
+            t_stages = self.ext_unknown.t_stages()
+        
+
+            for stage in t_stages:
+                p = self.ext_unknown._likelihood2(stage=stage, state_probs=state_probs)*self.midext_prob
+                p2 = self.noext_unknown._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob)
+                llh += np.sum(np.log(p+p2))
+
+        if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False) | (self.patient_data['info', 'tumor', 'midline_extension']==True)])>0:
+            if log:
+                llh += self.ext._likelihood(log=log) + np.log(self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)])
+                llh += self.noext._likelihood(log=log) + np.log(1-self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)])
+            else:
+                llh *= self.ext._likelihood(log=log)*self.midext_prob**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)])
+                llh *= self.noext._likelihood(log=log)*(1-self.midext_prob)**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)])
+
+        return llh
+
+
+    def risk(
+        self,
+        given_params: Optional[np.ndarray] = None,
+        midline_extension: bool = True,
+        **kwargs,
+    ) -> float:
+        """Compute the risk of nodal involvement given a specific diagnose.
+
+        Args:
+            spread_probs: Set ot new spread parameters. This also contains the
+                mixing parameter alpha in the last position.
+            midline_extension: Whether or not the patient's tumor extends over
+                the mid-sagittal line.
+
+        See Also:
+            :meth:`Bilateral.risk`: Depending on whether or not the patient's
+            tumor does extend over the midline, the risk function of the
+            respective :class:`Bilateral` instance gets called.
+        """
+        if given_params is not None:
+            self.check_and_assign(given_params)
+
+        if midline_extension:
+            return self.ext.risk(**kwargs)
+        else:
+            return self.noext.risk(**kwargs)
+
+
+    def generate_dataset(
+        self,
+        num_patients: int,
+        stage_dist: Dict[str, float],
+        ext_prob: float,
+        **_kwargs,
+    ) -> pd.DataFrame:
+        """Generate/sample a pandas :class:`DataFrame` from the defined network.
+
+        Args:
+            num_patients: Number of patients to generate.
+            stage_dist: Probability to find a patient in a certain T-stage.
+            ext_prob: Probability that a patient's primary tumor extends over
+                the mid-sagittal line.
+        """
+        drawn_ext = np.random.choice(
+            [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients
+        )
+        ext_dataset = self.ext.generate_dataset(
+            num_patients=num_patients,
+            stage_dist=stage_dist,
+        )
+        noext_dataset = self.noext.generate_dataset(
+            num_patients=num_patients,
+            stage_dist=stage_dist,
+        )
+
+        dataset = noext_dataset.copy()
+        dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext]
+        dataset[('info', 'tumor', 'midline_extension')] = drawn_ext
+
+        return dataset
\ No newline at end of file
diff --git a/lymph/unilateral.py b/lymph/unilateral.py
index 1f5aa1b..01527c3 100644
--- a/lymph/unilateral.py
+++ b/lymph/unilateral.py
@@ -358,27 +358,38 @@ def comp_diagnose_prob(
         return prob
 
 
-    def _gen_state_list(self):
+    def _gen_state_list(self, midline):
         """Generates the list of (hidden) states.
         """
         if not hasattr(self, "_state_list"):
-            self._state_list = np.zeros(
-                shape=(2**len(self.lnls), len(self.lnls)), dtype=int
-            )
-        for i in range(2**len(self.lnls)):
-            self._state_list[i] = [
-                int(digit) for digit in change_base(i, 2, length=len(self.lnls))
-            ]
+            if midline == False:
+                self._state_list = np.zeros(
+                    shape=(2**len(self.lnls), len(self.lnls)), dtype=int
+                )
+            if midline == True:
+                self._state_list = np.zeros(
+                    shape=(2**1, 1), dtype=int
+                )
+        if midline == False:
+            for i in range(2**len(self.lnls)):
+                self._state_list[i] = [
+                    int(digit) for digit in change_base(i, 2, length=len(self.lnls))
+                ]
+        if midline == True:
+            for i in range(2**1):
+                self._state_list[i] = [
+                    int(digit) for digit in change_base(i, 2, length=1)
+                ]
 
     @property
-    def state_list(self):
+    def state_list(self, midline = False):
         """Return list of all possible hidden states. They are arranged in the
         same order as the lymph node levels in the network/graph.
         """
         try:
             return self._state_list
         except AttributeError:
-            self._gen_state_list()
+            self._gen_state_list(midline)
             return self._state_list
 
 
@@ -497,7 +508,6 @@ def transition_matrix(self) -> np.ndarray:
             self._gen_transition_matrix()
             return self._transition_matrix
 
-
     @property
     def modalities(self):
         """Return specificity & sensitivity stored in this :class:`System` for
@@ -790,7 +800,6 @@ def _evolve(
 
         return state_probs
 
-
     def check_and_assign(self, new_params: np.ndarray):
         """Check that the spread probability (rates) and the parameters for the
         marginalization over diagnose times are all within limits and assign them to

From 1db3047e5dc13fa31351bf8a466e6bb1ebaabab1 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 13 Jul 2023 17:30:20 +0200
Subject: [PATCH 13/35] deleted unused code to separate midline and unilateral
 class

---
 lymph/unilateral.py | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/lymph/unilateral.py b/lymph/unilateral.py
index 01527c3..a0c4a09 100644
--- a/lymph/unilateral.py
+++ b/lymph/unilateral.py
@@ -362,24 +362,13 @@ def _gen_state_list(self, midline):
         """Generates the list of (hidden) states.
         """
         if not hasattr(self, "_state_list"):
-            if midline == False:
-                self._state_list = np.zeros(
-                    shape=(2**len(self.lnls), len(self.lnls)), dtype=int
-                )
-            if midline == True:
-                self._state_list = np.zeros(
-                    shape=(2**1, 1), dtype=int
-                )
-        if midline == False:
-            for i in range(2**len(self.lnls)):
-                self._state_list[i] = [
-                    int(digit) for digit in change_base(i, 2, length=len(self.lnls))
-                ]
-        if midline == True:
-            for i in range(2**1):
-                self._state_list[i] = [
-                    int(digit) for digit in change_base(i, 2, length=1)
-                ]
+            self._state_list = np.zeros(
+                shape=(2**len(self.lnls), len(self.lnls)), dtype=int
+            )
+        for i in range(2**len(self.lnls)):
+            self._state_list[i] = [
+                int(digit) for digit in change_base(i, 2, length=len(self.lnls))
+            ]
 
     @property
     def state_list(self, midline = False):

From 350c42b7702de3a1b58d4af39f8fa355cc25e1d9 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 13 Jul 2023 17:31:02 +0200
Subject: [PATCH 14/35] deleted unused code to separate unilateral and midline
 class

---
 lymph/unilateral.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lymph/unilateral.py b/lymph/unilateral.py
index a0c4a09..ca9e695 100644
--- a/lymph/unilateral.py
+++ b/lymph/unilateral.py
@@ -358,7 +358,7 @@ def comp_diagnose_prob(
         return prob
 
 
-    def _gen_state_list(self, midline):
+    def _gen_state_list(self):
         """Generates the list of (hidden) states.
         """
         if not hasattr(self, "_state_list"):
@@ -371,14 +371,14 @@ def _gen_state_list(self, midline):
             ]
 
     @property
-    def state_list(self, midline = False):
+    def state_list(self):
         """Return list of all possible hidden states. They are arranged in the
         same order as the lymph node levels in the network/graph.
         """
         try:
             return self._state_list
         except AttributeError:
-            self._gen_state_list(midline)
+            self._gen_state_list()
             return self._state_list
 
 
From 5bf0bf2e14d2e03e583e7935050e9bc4e2014c5f Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 13 Jul 2023 17:35:11 +0200
Subject: [PATCH 15/35] deleted old file

---
 lymph/midline_old.py | 595 -------------------------------------------
 1 file changed, 595 deletions(-)
 delete mode 100644 lymph/midline_old.py

diff --git a/lymph/midline_old.py b/lymph/midline_old.py
deleted file mode 100644
index 138dad8..0000000
--- a/lymph/midline_old.py
+++ /dev/null
@@ -1,595 +0,0 @@
-from typing import Dict, List, Optional, Tuple, Union
-
-import numpy as np
-import pandas as pd
-
-from .bilateral import Bilateral
-from .timemarg import MarginalizorDict
-
-
-class MidlineBilateral:
-    """Model a bilateral lymphatic system where an additional risk factor can
-    be provided in the data: Whether or not the primary tumor extended over the
-    mid-sagittal line.
-
-    It is reasonable to assume (and supported by data) that such an extension
-    significantly increases the risk for metastatic spread to the contralateral
-    side of the neck. This class attempts to capture this using a simple
-    assumption: We assume that the probability of spread to the contralateral
-    side for patients *with* midline extension is larger than for patients
-    *without* it, but smaller than the probability of spread to the ipsilateral
-    side. Formally:
-
-    .. math::
-        b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in}
-
-    where :math:`b_c^{\\in}` is the probability of spread from the primary tumor
-    to the contralateral side for patients with midline extension, and
-    :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear
-    mixing parameter.
-    """
-
-    def __init__(
-        self,
-        graph: Dict[Tuple[str], List[str]],
-        use_mixing: bool = True,
-        trans_symmetric: bool = True,
-        **_kwargs
-    ):
-        """The class is constructed in a similar fashion to the
-        :class:`Bilateral`: That class contains one :class:`Unilateral` for
-        each side of the neck, while this class will contain two instances of
-        :class:`Bilateral`, one for the case of a midline extension and one for
-        the case of no midline extension.
-
-        Args:
-            graph: Dictionary of the same kind as for initialization of
-                :class:`System`. This graph will be passed to the constructors of
-                two :class:`System` attributes of this class.
-            use_mixing: Describe the contralateral base spread probabilities for the
-                case of a midline extension as a linear combination between the base
-                spread probs of the ipsilateral side and the ones of the contralateral
-                side when no midline extension is present.
-            trans_symmetric: If ``True``, the spread probabilities among the
-                LNLs will be set symmetrically.
-
-        See Also:
-            :class:`Bilateral`: Two of these are held as attributes by this
-            class. One for the case of a mid-sagittal extension of the primary
-            tumor and one for the case of no such extension.
-        """
-        self.ext   = Bilateral(
-            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
-        )
-        self.noext = Bilateral(
-            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
-        )
-        self.ext_unknown = Bilateral(
-            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
-        )
-        self.noext_unknown = Bilateral(
-            graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric
-        )
-        self.use_mixing = use_mixing
-        if self.use_mixing:
-            self.alpha_mix = 0.
-
-        self.noext.diag_time_dists = self.ext.diag_time_dists
-        self.ext_unknown.diag_time_dists = self.ext.diag_time_dists
-        self.noext_unknown.diag_time_dists = self.ext.diag_time_dists
-
-    @property
-    def graph(self) -> Dict[Tuple[str], List[str]]:
-        """Return the (unilateral) graph that was used to create this network.
-        """
-        return self.noext.graph
-
-
-    @property
-    def base_probs(self) -> np.ndarray:
-        """Base probabilities of metastatic lymphatic spread from the tumor(s)
-        to the lymph node levels. This will return the following concatenation of
-        base spread probs depending on whether ``use_mixing`` is set to ``True`` or
-        ``False``:
-
-        With the use of a mixing parameter:
-        +-----------+-------------------------+--------------+
-        | base ipsi | base contra (no midext) | mixing param |
-        +-----------+-------------------------+--------------+
-
-        Without it:
-        +-----------+----------------------+-------------------------+
-        | base ipsi | base contra (midext) | base contra (no midext) |
-        +-----------+----------------------+-------------------------+
-
-        When setting these, one needs to provide the respective shape.
-        """
-        if self.use_mixing:
-            return np.concatenate([
-                self.ext.ipsi.base_probs,
-                self.noext.contra.base_probs,
-                [self.alpha_mix],
-            ])
-        else:
-            return np.concatenate([
-                self.ext.ipsi.base_probs,
-                self.ext.contra.base_probs,
-                self.noext.contra.base_probs,
-            ])
-
-    @base_probs.setter
-    def base_probs(self, new_params: np.ndarray):
-        """Set the base probabilities from the tumor(s) to the LNLs, accounting
-        for the mixing parameter :math:`\\alpha``.
-        """
-        k = len(self.ext.ipsi.base_probs)
-
-        self.ext.ipsi.base_probs = new_params[:k]
-        self.noext.ipsi.base_probs = new_params[:k]
-        self.ext_unknown.ipsi.base_probs = new_params[:k]
-        self.noext_unknown.ipsi.base_probs = new_params[:k]
-
-        if self.use_mixing:
-            self.noext.contra.base_probs = new_params[k:2*k]
-            self.noext_unknown.contra.base_probs = new_params[k:2*k]
-            self.alpha_mix = new_params[-1]
-            # compute linear combination
-            self.ext.contra.base_probs = (
-                self.alpha_mix * self.ext.ipsi.base_probs
-                + (1. - self.alpha_mix) * self.noext.contra.base_probs
-            )
-            self.ext_unknown.contra.base_probs = (
-                self.alpha_mix * self.ext_unknown.ipsi.base_probs
-                + (1. - self.alpha_mix) * self.noext_unknown.contra.base_probs
-            )
-        else:
-            self.ext.contra.base_probs = new_params[k:2*k]
-            self.noext.contra.base_probs = new_params[2*k:3*k]
-            self.ext_unknown.contra.base_probs = new_params[k:2*k]
-            self.noext_unknown.contra.base_probs = new_params[2*k:3*k]
-
-        # avoid unnecessary double computation of ipsilateral transition matrix
-        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-        self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-        self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-
-
-    @property
-    def trans_probs(self) -> np.ndarray:
-        """Probabilities of lymphatic spread among the lymph node levels. They
-        are assumed to be symmetric ipsi- & contralaterally by default.
-        """
-        return self.noext.trans_probs
-
-    @trans_probs.setter
-    def trans_probs(self, new_params: np.ndarray):
-        """Set the new spread probabilities for lymphatic spread from among the
-        LNLs.
-        """
-        self.noext.trans_probs = new_params
-        self.ext.trans_probs = new_params
-        self.noext_unknown.trans_probs = new_params
-        self.ext_unknown.trans_probs = new_params
-
-        # avoid unnecessary double computation of ipsilateral transition matrix
-        self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-        self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-        self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
-
-
-    @property
-    def spread_probs(self) -> np.ndarray:
-        """These are the probabilities representing the spread of cancer along
-        lymphatic drainage pathways per timestep.
-
-        It is composed of the base spread probs (possible with the mixing parameter)
-        and the probabilities of spread among the LNLs.
-
-        +-------------+-------------+
-        | base probs  | trans probs |
-        +-------------+-------------+
-        """
-        return np.concatenate([self.base_probs, self.trans_probs])
-
-    @spread_probs.setter
-    def spread_probs(self, new_params: np.ndarray):
-        """Set the new spread probabilities and the mixing parameter
-        :math:`\\alpha`.
-        """
-        num_base_probs = len(self.base_probs)
-
-        self.base_probs  = new_params[:num_base_probs]
-        self.trans_probs = new_params[num_base_probs:]
-
-
-    @property
-    def diag_time_dists(self) -> MarginalizorDict:
-        """This property holds the probability mass functions for marginalizing over
-        possible diagnose times for each T-stage.
-
-        When setting this property, one may also provide a normal Python dict, in
-        which case it tries to convert it to a :class:`MarginalizorDict`.
-
-        Note that the method will provide the same instance of this
-        :class:`MarginalizorDict` to both sides of the network.
-
-        See Also:
-            :class:`MarginalzorDict`, :class:`Marginalizor`.
-        """
-        return self.ext.diag_time_dists
-
-    @diag_time_dists.setter
-    def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]):
-        """Assign new :class:`MarginalizorDict` to this property. If it is a normal
-        Python dictionary, tr to convert it into a :class:`MarginalizorDict`.
-        """
-        self.ext.diag_time_dists = new_dists
-        self.noext.diag_time_dists = self.ext.diag_time_dists
-        self.noext_unknown.diag_time_dists = self.ext.diag_time_dists
-        self.ext_unknown.diag_time_dists = self.ext.diag_time_dists
-
-
-    @property
-    def modalities(self):
-        """A dictionary containing the specificity :math:`s_P` and sensitivity
-        :math:`s_N` values for each diagnostic modality.
-
-        Such a dictionary can also be provided to set this property and compute
-        the observation matrices of all used systems.
-
-        See Also:
-            :meth:`Bilateral.modalities`: Getting and setting this property in
-            the normal bilateral model.
-
-            :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and
-            :math:`s_N` for a unilateral model.
-        """
-        return self.noext.modalities
-
-    @modalities.setter
-    def modalities(self, modality_spsn: Dict[str, List[float]]):
-        """Call the respective getter and setter methods of the bilateral
-        components with and without midline extension.
-        """
-        self.noext.modalities = modality_spsn
-        self.ext.modalities = modality_spsn
-        self.noext_unknown.modalities = modality_spsn
-        self.ext_unknown.modalities = modality_spsn
-
-    @property
-    def midext_prob(self):
-        """Assign the last of the new_params to the midline extension probability
-        """
-        try:
-            return self._midext_prob
-        except AttributeError as attr_err:
-            raise AttributeError(
-                "No midline extension probability has been assigned"
-            ) from attr_err
-
-    @midext_prob.setter
-    def midext_prob(self, new_params):
-        """A variable containing the midline extension probability
-        """
-        self._midext_prob = new_params[-1]
-
-    @property
-    def patient_data(self):
-        """A pandas :class:`DataFrame` with rows of patients and columns of
-        patient and involvement details. The table's header should have three
-        levels that categorize the individual lymph node level's involvement to
-        the corresponding diagnostic modality (first level), the side of the
-        LNL (second level) and finaly the name of the LNL (third level).
-        Additionally, the patient's T-category must be stored under ('info',
-        'tumor', 't_stage') and whether the tumor extends over the mid-sagittal
-        line should be noted under ('info', 'tumor', 'midline_extension'). So,
-        part of this table could look like this:
-
-        +-----------------------------+------------------+------------------+
-        |            info             |       MRI        |       PET        |
-        +-----------------------------+--------+---------+--------+---------+
-        |            tumor            |  ipsi  | contra  |  ipsi  | contra  |
-        +---------+-------------------+--------+---------+--------+---------+
-        | t_stage | midline_extension |   II   |   II    |   II   |   II    |
-        +=========+===================+========+=========+========+=========+
-        | early   | ``True``          |``True``|``None`` |``True``|``False``|
-        +---------+-------------------+--------+---------+--------+---------+
-        | late    | ``True``          |``None``|``None`` |``None``|``None`` |
-        +---------+-------------------+--------+---------+--------+---------+
-        | early   | ``False``         |``True``|``False``|``True``|``True`` |
-        +---------+-------------------+--------+---------+--------+---------+
-        """
-        try:
-            return self._patient_data
-        except AttributeError:
-            raise AttributeError(
-                "No patient data has been loaded yet"
-            )
-
-    @patient_data.setter
-    def patient_data(self, patient_data: pd.DataFrame):
-        """Load the patient data. For now, this just calls the :meth:`load_data`
-        method, but at a later point, I would like to write a function here
-        that generates the pandas :class:`DataFrame` from the internal matrix
-        representation of the data.
-        """
-        self._patient_data = patient_data.copy()
-        self.load_data(patient_data)
-
-
-    def load_data(
-        self,
-        data: pd.DataFrame,
-        modality_spsn: Optional[Dict[str, List[float]]] = None,
-        mode = "HMM"
-    ):
-        """Load data as table of patients with involvement details and convert
-        it into internal representation of a matrix.
-
-        Args:
-            data: The table with rows of patients and columns of patient and
-                involvement details. The table's header must have three levels
-                that categorize the individual lymph node level's involvement
-                to the corresponding diagnostic modality (first level), the
-                side of the LNL (second level) and finaly the name of the LNL
-                (third level). Additionally, the patient's T-category must be
-                stored under ('info', 'tumor', 't_stage') and whether the tumor
-                extends over the mid-sagittal line should be noted under
-                ('info', 'tumor', 'midline_extension'). So, part of this table
-                could look like this:
-
-                +-----------------------------+---------------------+
-                |            info             |         MRI         |
-                +-----------------------------+----------+----------+
-                |            tumor            |   ipsi   |  contra  |
-                +---------+-------------------+----------+----------+
-                | t_stage | midline_extension |    II    |    II    |
-                +=========+===================+==========+==========+
-                | early   | ``True``          | ``True`` | ``None`` |
-                +---------+-------------------+----------+----------+
-                | late    | ``True``          | ``None`` | ``None`` |
-                +---------+-------------------+----------+----------+
-                | early   | ``False``         | ``True`` | ``True`` |
-                +---------+-------------------+----------+----------+
-
-            modality_spsn: If no diagnostic modalities have been defined yet,
-                this must be provided to build the observation matrix.
-
-        See Also:
-            :attr:`patient_data`: The attribute for loading and exporting data.
-
-            :meth:`Bilateral.load_data`: Loads data into a bilateral network by
-            splitting it into ipsi- & contralateral side and passing each to
-            the respective unilateral method (see below).
-
-            :meth:`Unilateral.load_data`: Data loading method of the unilateral
-            network.
-        """
-        ext_data = data.loc[(data[("info", "tumor", "midline_extension")]==True)]
-        noext_data = data.loc[(data[("info", "tumor", "midline_extension")]==False)]
-        unknown_data = data.loc[(data['info', 'tumor', 'midline_extension']!=False) & (data['info', 'tumor', 'midline_extension']!=True)] 
-
-        if len(ext_data) > 0:
-            self.ext.load_data(
-                ext_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-        
-        if len(noext_data) > 0:
-            self.noext.load_data(
-                noext_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-        if len(unknown_data) > 0 & len(ext_data) + len(noext_data) > 0:
-            self.ext_unknown.load_data(
-                unknown_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-
-            self.noext_unknown.load_data(
-                unknown_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-
-        if len(unknown_data) > 0 & len(ext_data) + len(noext_data) == 0:
-            self.ext.load_data(
-                unknown_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-
-            self.noext.load_data(
-                unknown_data,
-                modality_spsn=modality_spsn,
-                mode=mode
-            )
-
-    def check_and_assign(self, new_params: np.ndarray):
-        """Check that the spread probability (rates) and the parameters for the
-        marginalization over diagnose times are all within limits and assign them to
-        the model. Also, make sure the ipsi- and contralateral distributions over
-        diagnose times are the same instance of the :class:`MarginalizorDict`.
-
-        Args:
-            new_params: The set of :attr:`spread_probs` and parameters to provide for
-                updating the parametrized distributions over diagnose times.
-
-        Warning:
-            This method assumes that the parametrized distributions (instances of
-            :class:`Marginalizor`) all raise a ``ValueError`` when provided with
-            invalid parameters.
-        """
-        k = len(self.spread_probs)
-        l = len(self.diag_time_dists)
-        new_spread_probs = new_params[:k]
-        new_marg_params = new_params[k:(k+l)]
-
-        try:
-            self.ext.ipsi.diag_time_dists.update(new_marg_params)
-            self.ext_unknown.ipsi.diag_time_dists.update(new_marg_params)
-            self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
-            self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists
-            self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
-            self.ext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
-            self.noext_unknown.ipsi.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
-            self.noext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists
-            self.midext_prob = new_params
-        except ValueError as val_err:
-            raise ValueError(
-                "Parameters for marginalization over diagnose times are invalid"
-            ) from val_err
-
-        if new_spread_probs.shape != self.spread_probs.shape:
-            raise ValueError(
-                "Shape of provided spread parameters does not match network"
-            )
-        if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.):
-            raise ValueError(
-                "Spread probs must be between 0 and 1"
-            )
-        if 0. > self.midext_prob or self.midext_prob > 1.:
-            raise ValueError(
-                "Midline extension probability must be between 0 and 1"
-            )
-
-        self.spread_probs = new_spread_probs
-
-
-    def likelihood(
-        self,
-        data: Optional[pd.DataFrame] = None,
-        given_params: Optional[np.ndarray] = None,
-        log: bool = True,
-    ) -> float:
-        """Compute log-likelihood of (already stored) data, given the spread
-        probabilities and either a discrete diagnose time or a distribution to
-        use for marginalization over diagnose times.
-
-        Args:
-            data: Table with rows of patients and columns of per-LNL involvment. See
-                :meth:`load_data` for more details on how this should look like.
-
-            given_params: The likelihood is a function of these parameters. They mainly
-                consist of the :attr:`spread_probs` of the model. Any excess parameters
-                will be used to update the parametrized distributions used for
-                marginalizing over the diagnose times (see :attr:`diag_time_dists`).
-
-            log: When ``True``, the log-likelihood is returned.
-
-        Returns:
-            The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D`
-            is the data and :math:`\\theta` is the tuple of spread probabilities
-            and diagnose times or distributions over diagnose times.
-
-        See Also:
-            :attr:`spread_probs`: Property for getting and setting the spread
-            probabilities, of which a lymphatic network has as many as it has
-            :class:`Edge` instances (in case no symmetries apply).
-
-            :meth:`Unilateral.likelihood`: The log-likelihood function of
-            the unilateral system.
-
-            :meth:`Bilateral.likelihood`: The (log-)likelihood function of the
-            bilateral system.
-        """
-        if data is not None:
-            self.patient_data = data
-
-        try:
-            self.check_and_assign(given_params)
-        except ValueError:
-            return -np.inf if log else 0.
-
-        llh = 0. if log else 1.
-
-        if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']!=False) & (self.patient_data['info', 'tumor', 'midline_extension']!=True)])>0:
-            state_probs = {}
-            state_probs["ipsi"] = self.ext_unknown.state_probs_ipsi()
-            state_probs["contra"] = self.ext_unknown.state_probs_contra()
-            state_probs2 = {}
-            state_probs2["ipsi"] = self.noext_unknown.state_probs_ipsi()
-            state_probs2["contra"] = self.noext_unknown.state_probs_contra()
-            t_stages = list(set(self.patient_data[("info","tumor","t_stage")]))
-            t_stages = self.ext_unknown.t_stages()
-        
-
-            for stage in t_stages:
-                p = self.ext_unknown._likelihood2(stage=stage, state_probs=state_probs)*self.midext_prob
-                p2 = self.noext_unknown._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob)
-                llh += np.sum(np.log(p+p2))
-
-        if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False) | (self.patient_data['info', 'tumor', 'midline_extension']==True)])>0:
-            if log:
-                llh += self.ext._likelihood(log=log) + np.log(self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)])
-                llh += self.noext._likelihood(log=log) + np.log(1-self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)])
-            else:
-                llh *= self.ext._likelihood(log=log)*self.midext_prob**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)])
-                llh *= self.noext._likelihood(log=log)*(1-self.midext_prob)**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)])
-
-        return llh
-
-
-    def risk(
-        self,
-        given_params: Optional[np.ndarray] = None,
-        midline_extension: bool = True,
-        **kwargs,
-    ) -> float:
-        """Compute the risk of nodal involvement given a specific diagnose.
-
-        Args:
-            spread_probs: Set ot new spread parameters. This also contains the
-                mixing parameter alpha in the last position.
-            midline_extension: Whether or not the patient's tumor extends over
-                the mid-sagittal line.
-
-        See Also:
-            :meth:`Bilateral.risk`: Depending on whether or not the patient's
-            tumor does extend over the midline, the risk function of the
-            respective :class:`Bilateral` instance gets called.
-        """
-        if given_params is not None:
-            self.check_and_assign(given_params)
-
-        if midline_extension:
-            return self.ext.risk(**kwargs)
-        else:
-            return self.noext.risk(**kwargs)
-
-
-    def generate_dataset(
-        self,
-        num_patients: int,
-        stage_dist: Dict[str, float],
-        ext_prob: float,
-        **_kwargs,
-    ) -> pd.DataFrame:
-        """Generate/sample a pandas :class:`DataFrame` from the defined network.
-
-        Args:
-            num_patients: Number of patients to generate.
-            stage_dist: Probability to find a patient in a certain T-stage.
-            ext_prob: Probability that a patient's primary tumor extends over
-                the mid-sagittal line.
-        """
-        drawn_ext = np.random.choice(
-            [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients
-        )
-        ext_dataset = self.ext.generate_dataset(
-            num_patients=num_patients,
-            stage_dist=stage_dist,
-        )
-        noext_dataset = self.noext.generate_dataset(
-            num_patients=num_patients,
-            stage_dist=stage_dist,
-        )
-
-        dataset = noext_dataset.copy()
-        dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext]
-        dataset[('info', 'tumor', 'midline_extension')] = drawn_ext
-
-        return dataset
\ No newline at end of file

From 05b8fa2efe026f27656d09975eca0b5ffe0ec6b3 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 13 Jul 2023 17:36:36 +0200
Subject: [PATCH 16/35] started implementation of midline diagnose matrix

---
 lymph/midline.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/lymph/midline.py b/lymph/midline.py
index 598a3a7..dca5995 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -399,6 +399,45 @@ def midext_prob(self, new_params):
         """
         self._midext_prob = new_params[-1]
 
+    def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str):
+        """Generate the matrix containing the probabilities to see the provided
+        diagnose, given any possible hidden state. The resulting matrix has
+        size :math:`2^N \\times M` where :math:`N` is the number of nodes in
+        the graph and :math:`M` the number of patients.
+
+        Args:
+            table: pandas ``DataFrame`` containing rows of patients. Must have
+                ``MultiIndex`` columns with two levels: First, the modalities
+                and second, the LNLs.
+            t_stage: The T-stage all the patients in ``table`` belong to.
+        """
+        if not hasattr(self, "_diagnose_matrices_midext"):
+            self._diagnose_matrices_midext = {}
+
+        self.patient_data['info', 'tumor', 'midline_extension']
+        self.patient_data['info', 'tumor', 't_stage'].unique()
+
+        shape = (len(self.midexstate_list), len(table))
+        self._diagnose_matrices_midext[t_stage] = np.ones(shape=shape)
+
+        for i,state in enumerate(self.midexstate_list):
+            self.state = state
+
+            for j, (_, patient) in enumerate(table.iterrows()):
+                patient_obs_prob = self.comp_diagnose_prob(patient)
+                self._diagnose_matrices_midext[t_stage][i,j] = patient_obs_prob
+
+
+    @property
+    def diagnose_matrices_midext(self):
+        try:
+            return self._diagnose_matrices_midext
+        except AttributeError as att_err:
+            raise AttributeError(
+                "No data has been loaded and hence no observation matrix has "
+                "been computed."
+            ) from att_err
+
     @property
     def patient_data(self):
         """A pandas :class:`DataFrame` with rows of patients and columns of

From 34850f5f4273bbfa2156b062ff6405c215f3967d Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Fri, 14 Jul 2023 00:22:29 +0200
Subject: [PATCH 17/35] deleted .DS_Store

---
 lymph/.DS_Store | Bin 6148 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 lymph/.DS_Store

diff --git a/lymph/.DS_Store b/lymph/.DS_Store
deleted file mode 100644
index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3
zem<@ulZcFPQ@L2!n>{z**<q8>++&mCkOWA81W14cNZ<zv;LbK1Poaz?KmsK2CSc!(
z0ynLxE!0092;Krf2c+FF_Fe*7ECH>lEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ
zLs35+`xjp>T0<F0fCPF1$Cyrb|F7^5{eNG?83~ZUUlGt@xh*qZDeu<Z%US-OSsOPv
j)R!Z4KLME7ReXlK;d!wEw5GODWMKRea10D2@KpjYNUI8I


From 47415ac5b8acae62a67232abe2b31399a559b2c8 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Fri, 14 Jul 2023 15:19:07 +0200
Subject: [PATCH 18/35] initialized ipsi and contra states separately

---
 lymph/midline.py | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index dca5995..abc9920 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -190,7 +190,6 @@ def trans_probs(self, new_params: np.ndarray):
         """
         self.noext.trans_probs = new_params
         self.ext.trans_probs = new_params
-    
         # avoid unnecessary double computation of ipsilateral transition matrix
         self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
 
@@ -658,10 +657,14 @@ def _likelihood_mid(
         t_stages = list(stored_t_stages.intersection(provided_t_stages))
         
         max_t = self.diag_time_dists.max_t
-        state_probs_ex = np.zeros(shape=len(self.state_list), dtype=float)
-        state_probs_ex[0] = 1.
-        state_probs_nox = np.zeros(shape=len(self.state_list), dtype=float)
-        state_probs_nox[0] = 1.
+        state_probs_ipsi_ex = np.zeros(shape=len(self.state_list), dtype=float)
+        state_probs_ipsi_ex[0] = 1.
+        state_probs_ipsi_nox = np.zeros(shape=len(self.state_list), dtype=float)
+        state_probs_ipsi_nox[0] = 1.
+        state_probs_contra_ex = np.zeros(shape=len(self.state_list), dtype=float)
+        state_probs_contra_ex[0] = 1.
+        state_probs_contra_nox = np.zeros(shape=len(self.state_list), dtype=float)
+        state_probs_contra_nox[0] = 1.
         state_probs_midext = np.zeros(shape=len(self.midexstate_list), dtype=float)
         state_probs_midext[0] = 1.
 
@@ -673,21 +676,21 @@ def _likelihood_mid(
 
         for i in range(max_t):
             for stage in t_stages:
-                state_probs_nox["ipsi"] = self.noext.ipsi._evolve_onestep(start_state = state_probs_nox["ipsi"])
-                state_probs_nox["contra"] = self.noext.contra._evolve_onestep(start_state = state_probs_nox["contra"])
-                state_probs_ex["ipsi"] = state_probs_midext[1] * self.ext.ipsi._evolve_onestep(start_state = state_probs_ex["ipsi"]) + state_probs_midext[0] * state_probs_nox["ipsi"]
-                state_probs_ex["contra"] = state_probs_midext[1] * self.ext.contra._evolve_onestep(start_state = state_probs_ex["contra"]) + state_probs_midext[0] * state_probs_nox["contra"]
+                state_probs_ipsi_nox = self.noext.ipsi._evolve_onestep(start_state = state_probs_ipsi_nox)
+                state_probs_contra_nox = self.noext.contra._evolve_onestep(start_state = state_probs_contra_nox)
+                state_probs_ipsi_ex = state_probs_midext[1] * self.ext.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex) + state_probs_midext[0] * state_probs_ipsi_nox
+                state_probs_contra_ex = state_probs_midext[1] * self.ext.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[0] * state_probs_contra_nox
                 state_probs_midext = self._evolve_midext(start_midextstate = state_probs_midext)
 
                 joint_state_probs_nox = (
-                    state_probs_nox["ipsi"].T
+                    state_probs_ipsi_nox.T
                     @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
-                    @ state_probs_nox["contra"]
+                    @ state_probs_contra_nox
                 )
                 joint_state_probs_ex = (
-                    state_probs_ex["ipsi"].T
+                    state_probs_ipsi_ex.T
                     @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
-                    @ state_probs_ex["contra"]
+                    @ state_probs_contra_ex
                 )
                 p_ex = np.sum(
                     self.ext.ipsi.diagnose_matrices[stage]

From 29105db757030dc06fa4e44ed2fde93c4f0886e7 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 20 Jul 2023 14:52:11 +0200
Subject: [PATCH 19/35] revise likelihood and related functioncs

---
 lymph/midline.py | 189 ++++++++++++++++++++++-------------------------
 1 file changed, 90 insertions(+), 99 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index abc9920..cffa750 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -380,23 +380,6 @@ def modalities(self, modality_spsn: Dict[str, List[float]]):
         """
         self.noext.modalities = modality_spsn
         self.ext.modalities = modality_spsn
-        
-    @property
-    def midext_prob(self):
-        """Assign the last of the new_params to the midline extension probability
-        """
-        try:
-            return self._midext_prob
-        except AttributeError as attr_err:
-            raise AttributeError(
-                "No midline extension probability has been assigned"
-            ) from attr_err
-
-    @midext_prob.setter
-    def midext_prob(self, new_params):
-        """A variable containing the midline extension probability
-        """
-        self._midext_prob = new_params[-1]
 
     def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str):
         """Generate the matrix containing the probabilities to see the provided
@@ -412,21 +395,18 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str):
         """
         if not hasattr(self, "_diagnose_matrices_midext"):
             self._diagnose_matrices_midext = {}
-
-        self.patient_data['info', 'tumor', 'midline_extension']
-        self.patient_data['info', 'tumor', 't_stage'].unique()
-
-        shape = (len(self.midexstate_list), len(table))
-        self._diagnose_matrices_midext[t_stage] = np.ones(shape=shape)
-
-        for i,state in enumerate(self.midexstate_list):
-            self.state = state
-
-            for j, (_, patient) in enumerate(table.iterrows()):
-                patient_obs_prob = self.comp_diagnose_prob(patient)
-                self._diagnose_matrices_midext[t_stage][i,j] = patient_obs_prob
-
-
+        self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2))
+        for patient in len(table):
+            if table.iloc[patient]['info', 'tumor', 'midline_extension'] == False:
+                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
+                self.diagnose_matrices_midext[t_stage][patient, 1] = 0
+            if table.iloc[patient]['info', 'tumor', 'midline_extension'] == True:
+                self.diagnose_matrices_midext[t_stage][patient, 0] = 0
+                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
+            if (table.iloc[patient]['info', 'tumor', 'midline_extension'] != False) & (table.iloc[patient]['info', 'tumor', 'midline_extension'] != True):
+                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
+                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
+    
     @property
     def diagnose_matrices_midext(self):
         try:
@@ -526,15 +506,6 @@ def _evolve_midext(
 
         return new_midexstate
 
-    """
-    def state_probs_midext(self):
-        max_t = self.diag_time_dists.max_t
-        self.ext.state_probs_midext = {}
-        self.noext.state_probs_midext = {}
-        self.noext.state_probs_midext = self.noext._evolve_midext(t_last=max_t)
-        self.ext.state_probs_midext = self.ext.
-    """
-
     def load_data(
         self,
         data: pd.DataFrame,
@@ -657,61 +628,81 @@ def _likelihood_mid(
         t_stages = list(stored_t_stages.intersection(provided_t_stages))
         
         max_t = self.diag_time_dists.max_t
-        state_probs_ipsi_ex = np.zeros(shape=len(self.state_list), dtype=float)
-        state_probs_ipsi_ex[0] = 1.
-        state_probs_ipsi_nox = np.zeros(shape=len(self.state_list), dtype=float)
-        state_probs_ipsi_nox[0] = 1.
-        state_probs_contra_ex = np.zeros(shape=len(self.state_list), dtype=float)
-        state_probs_contra_ex[0] = 1.
-        state_probs_contra_nox = np.zeros(shape=len(self.state_list), dtype=float)
-        state_probs_contra_nox[0] = 1.
-        state_probs_midext = np.zeros(shape=len(self.midexstate_list), dtype=float)
-        state_probs_midext[0] = 1.
+        state_probs_ipsi_ex = np.zeros(
+            shape=(max_t + 1, len(self.state_list)),
+            dtype=float
+        )
+        state_probs_ipsi_ex[0,0] = 1.
+        state_probs_ipsi_nox = np.zeros(
+            shape=(max_t + 1, len(self.state_list)),
+            dtype=float
+        )
+        state_probs_ipsi_nox[0,0] = 1.
+        state_probs_contra_ex = np.zeros(
+            shape=(max_t + 1, len(self.state_list)),
+            dtype=float
+        )
+        state_probs_contra_ex[0,0] = 1.
+        state_probs_contra_nox = np.zeros(
+            shape=(max_t + 1, len(self.state_list)),
+            dtype=float
+        )
+        state_probs_contra_nox[0,0] = 1.
+        state_probs_midext = np.zeros(
+            shape=(max_t + 1, 2),
+            dtype=float
+        )
+        state_probs_midext[0,0] = 1.
 
         llh_ex = 0. if log else 1.
-        llh_nox = 0. if log else 1.
-
-        #if 1 1 in diagnose matrix midline extension:
-        #state_probs_midext[0] * llh_nox + state_probs_midext[1] * llh_ex
-
-        for i in range(max_t):
-            for stage in t_stages:
-                state_probs_ipsi_nox = self.noext.ipsi._evolve_onestep(start_state = state_probs_ipsi_nox)
-                state_probs_contra_nox = self.noext.contra._evolve_onestep(start_state = state_probs_contra_nox)
-                state_probs_ipsi_ex = state_probs_midext[1] * self.ext.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex) + state_probs_midext[0] * state_probs_ipsi_nox
-                state_probs_contra_ex = state_probs_midext[1] * self.ext.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[0] * state_probs_contra_nox
-                state_probs_midext = self._evolve_midext(start_midextstate = state_probs_midext)
-
-                joint_state_probs_nox = (
-                    state_probs_ipsi_nox.T
-                    @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
-                    @ state_probs_contra_nox
+        llh_nox = 0. if log else 1.  
+
+        for stage in t_stages:
+            state_probs = np.zeros(
+            shape=(len_time_range + 1, len(self.state_list)),
+            dtype=float
+        )
+            state_probs_ipsi_nox = self.ipsi._evolve_onestep(start_state = state_probs_ipsi_nox)
+            state_probs_contra_nox = self.contra._evolve_onestep(start_state = state_probs_contra_nox)
+            state_probs_ipsi_ex = state_probs_midext[1] * self.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex) + state_probs_midext[0] * state_probs_ipsi_nox
+            state_probs_contra_ex = state_probs_midext[1] * self.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[0] * state_probs_contra_nox
+            state_probs_midext = self._evolve_midext(start_midextstate = state_probs_midext)
+                
+            for i in len(self.diagnose_matrices_midext[stage][:,0]):
+                if ((self.diagnose_matrices_midext[stage][i,0]==1) & (self.diagnose_matrices_midext[stage][i,1]==1)):
+                    self.diagnose_matrices_midext[stage][i,0] = state_probs_midext[0]
+                    self.diagnose_matrices_midext[stage][i,1] = state_probs_midext[1]
+
+            joint_state_probs_nox = (
+                state_probs_ipsi_nox.T
+                @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
+                @ state_probs_contra_nox
+            )
+            joint_state_probs_ex = (
+                state_probs_ipsi_ex.T
+                @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
+                @ state_probs_contra_ex
+            )
+            p_ex = np.sum(
+                self.ipsi.diagnose_matrices[stage]
+                * (joint_state_probs_ex
+                    @ self.contra.diagnose_matrices[stage]),
+                axis=0
                 )
-                joint_state_probs_ex = (
-                    state_probs_ipsi_ex.T
-                    @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
-                    @ state_probs_contra_ex
+            p_ex_weighted = p_ex.T * self.diagnose_matrices_midext[stage][:,1]
+            p_nox = np.sum(
+                self.ipsi.diagnose_matrices[stage]
+                * (joint_state_probs_nox
+                    @ self.contra.diagnose_matrices[stage]),
+                axis=0
                 )
-                p_ex = np.sum(
-                    self.ext.ipsi.diagnose_matrices[stage]
-                    * (joint_state_probs_ex
-                        @ self.ext.contra.diagnose_matrices[stage]),
-                    axis=0
-                    )
-                p_nox = np.sum(
-                    self.noext.ipsi.diagnose_matrices[stage]
-                    * (joint_state_probs_nox
-                        @ self.noext.contra.diagnose_matrices[stage]),
-                    axis=0
-                    )
-                if log:
-                    llh_ex += np.sum(np.log(p_ex))
-                    llh_nox += np.sum(np.log(p_nox))
-                else:
-                    llh_ex *= np.prod(p_ex)
-                    llh_nox *= np.prod(p_nox)
-
-        return llh_ex + llh_nox
+            p_nox_weighted = p_nox.T * self.diagnose_matrices_midext[stage][:,0]
+            if log:
+                llh += np.sum(np.log(p_ex_weighted)) + np.sum(np.log(p_nox_weighted))
+            else:
+                llh *= np.prod(p_ex_weighted) * np.prod(p_nox_weighted)
+
+        return llh
 
     def likelihood(
         self,
@@ -758,13 +749,13 @@ def likelihood(
         except ValueError:
             return -np.inf if log else 0.
 
-        if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False) | (self.patient_data['info', 'tumor', 'midline_extension']==True)])>0:
-            if log:
-                llh += self.ext._likelihood_mid(log=log)
-                llh += self.noext._likelihood_mid(log=log)
-            else:
-                llh *= self.ext._likelihood_mid(log=log)
-                llh *= self.noext._likelihood_mid(log=log)
+        
+        if log:
+            llh += self.ext._likelihood_mid(log=log)
+            llh += self.noext._likelihood_mid(log=log)
+        else:
+            llh *= self.ext._likelihood_mid(log=log)
+            llh *= self.noext._likelihood_mid(log=log)
 
         return llh
 

From 860a8c7de55297967a613ebb9210a6223becc2cd Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Wed, 26 Jul 2023 09:42:08 +0200
Subject: [PATCH 20/35] revise likelihood and depending functions

---
 lymph/bilateral.py  | 26 ---------------
 lymph/midline.py    | 77 ++++++++++++++++++++++++++++-----------------
 lymph/unilateral.py |  1 -
 3 files changed, 48 insertions(+), 56 deletions(-)

diff --git a/lymph/bilateral.py b/lymph/bilateral.py
index f3b1151..b0701b8 100644
--- a/lymph/bilateral.py
+++ b/lymph/bilateral.py
@@ -487,32 +487,6 @@ def _likelihood(
 
         return llh
 
-    def _likelihood2(
-        self,
-        stage,
-        state_probs,
-    ) -> float:
-        """Compute the (log-)likelihood of data, using the stored spread probs and
-        fixed distributions for marginalizing over diagnose times.
-
-        This method mainly exists so that the checking and assigning of the
-        spread probs can be skipped.
-        """
-        
-        joint_state_probs = (
-            state_probs["ipsi"].T
-             @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
-             @ state_probs["contra"]
-        )
-        p = np.sum(
-            self.ipsi.diagnose_matrices[stage]
-            * (joint_state_probs
-                @ self.contra.diagnose_matrices[stage]),
-            axis=0
-        )
-
-        return p
-
     def likelihood(
         self,
         data: Optional[pd.DataFrame] = None,
diff --git a/lymph/midline.py b/lymph/midline.py
index cffa750..d180120 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -101,6 +101,18 @@ def midexstate_list(self):
         except AttributeError:
             self._gen_midexstate_list()
             return self._midexstate_list
+        
+    @property
+    def midexstate(self):
+        """Return the currently set state of the system.
+        """
+        return self._midexstate
+
+    @midexstate.setter
+    def midexstate(self, newstate: np.ndarray):
+        """Sets the state of the system to ``newstate``.
+        """
+        self._midexstate = newstate
     
     @property
     def midext_prob(self):
@@ -233,8 +245,8 @@ def _gen_midextransition_matrix(self):
             shape = (2**1, 2**1)
             self._midextransition_matrix = np.zeros(shape=shape)
 
-        for i,state in enumerate(self.midexstate_list):
-            self.state = state
+        for i,midexstate in enumerate(self.midexstate_list):
+            self.midexstate = midexstate
             for j in self.allowed_midextransitions[i]:
                 midextransition_prob = self.comp_midextransition_prob(self.midexstate_list[j])
                 self._midextransition_matrix[i,j] = midextransition_prob
@@ -342,17 +354,15 @@ def comp_midextransition_prob(
             Transition probability :math:`t`.
         """
         res = 1.
-        for i, midexstate in enumerate(self.midexstates):
-            if not midexstate.state:
-                in_states = [1]
-                in_weights = [self.midext_prob]
-                res *= Node.trans_prob(in_states, in_weights)[newstate[i]]
-            elif not newstate[i]:
-                res = 0.
-                break
+        if not self.midexstate:
+            in_states = [1]
+            in_weights = [self.midext_prob]
+            res *= Node.trans_prob(in_states, in_weights)[newstate]
+        elif not newstate:
+            res = 0.
 
         if acquire:
-            self.state = newstate
+            self.midexstate = newstate
 
         return res
 
@@ -477,13 +487,17 @@ def _evolve_onestep(
         """
         # All healthy state at beginning
         if start_state is None:
-            start_state = np.zeros(shape=len(self.state_list), dtype=float)
-            start_state[0] = 1.
+            start_state = np.zeros(
+            shape=(self.diag_time_dists.max_t + 1, 2),
+            dtype=float
+            )
+            start_state[0,0] = 1.
 
         # compute involvement at first time-step
-        new_state = start_state @ self.transition_matrix
+        for i in range(len(start_state)-1):
+            start_state[i+1,:] = start_state[i,:] @ self.transition_matrix
 
-        return new_state
+        return start_state
 
     def _evolve_midext(
         self, start_midexstate: Optional[np.ndarray] = None
@@ -500,11 +514,18 @@ def _evolve_midext(
 
         :meta public:
         """
+        if start_midexstate is None:
+            start_midexstate = np.zeros(
+            shape=(self.diag_time_dists.max_t + 1, 2),
+            dtype=float
+            )
+            start_midexstate[0,0] = 1.
 
         # compute involvement at first time-step
-        new_midexstate = start_midexstate @ self.midextransition_matrix
+        for i in range(len(start_midexstate)-1):
+            start_midexstate[i+1,:] = start_midexstate[i,:] @ self.midextransition_matrix
 
-        return new_midexstate
+        return start_midexstate
 
     def load_data(
         self,
@@ -654,24 +675,21 @@ def _likelihood_mid(
         )
         state_probs_midext[0,0] = 1.
 
-        llh_ex = 0. if log else 1.
-        llh_nox = 0. if log else 1.  
+        llh = 0. if log else 1.
 
         for stage in t_stages:
-            state_probs = np.zeros(
-            shape=(len_time_range + 1, len(self.state_list)),
-            dtype=float
-        )
             state_probs_ipsi_nox = self.ipsi._evolve_onestep(start_state = state_probs_ipsi_nox)
             state_probs_contra_nox = self.contra._evolve_onestep(start_state = state_probs_contra_nox)
-            state_probs_ipsi_ex = state_probs_midext[1] * self.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex) + state_probs_midext[0] * state_probs_ipsi_nox
-            state_probs_contra_ex = state_probs_midext[1] * self.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[0] * state_probs_contra_nox
-            state_probs_midext = self._evolve_midext(start_midextstate = state_probs_midext)
+            state_probs_midext = self._evolve_midext(start_midexstate = state_probs_midext)
+
+            for i in range(max_t):
+                state_probs_ipsi_ex[(i+1),:] = state_probs_midext[i,1] * self.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex)[1,:] + state_probs_midext[i,0] * state_probs_ipsi_nox[(i+1),:]
+                state_probs_contra_ex[(i+1),:] = state_probs_midext[i,1] * self.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[i,0] * state_probs_contra_nox[(i+1),:]
                 
-            for i in len(self.diagnose_matrices_midext[stage][:,0]):
+            for i in range(len(self.diagnose_matrices_midext[stage][:,0])):
                 if ((self.diagnose_matrices_midext[stage][i,0]==1) & (self.diagnose_matrices_midext[stage][i,1]==1)):
-                    self.diagnose_matrices_midext[stage][i,0] = state_probs_midext[0]
-                    self.diagnose_matrices_midext[stage][i,1] = state_probs_midext[1]
+                    self.diagnose_matrices_midext[stage][i,0] = state_probs_midext[i,0]
+                    self.diagnose_matrices_midext[stage][i,1] = state_probs_midext[i,1]
 
             joint_state_probs_nox = (
                 state_probs_ipsi_nox.T
@@ -749,6 +767,7 @@ def likelihood(
         except ValueError:
             return -np.inf if log else 0.
 
+        llh = 0. if log else 1.
         
         if log:
             llh += self.ext._likelihood_mid(log=log)
diff --git a/lymph/unilateral.py b/lymph/unilateral.py
index ca9e695..c125db3 100644
--- a/lymph/unilateral.py
+++ b/lymph/unilateral.py
@@ -169,7 +169,6 @@ def state(self, newstate: np.ndarray):
         for i, node in enumerate(self.lnls):  # only set lnl's states
             node.state = newstate[i]
 
-
     @property
     def base_probs(self):
         """The spread probablities parametrizing the edges that represent the

From d725fa3a514f05210d39a9280546483588eaf2bb Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Mon, 31 Jul 2023 17:18:05 +0200
Subject: [PATCH 21/35] fix errors in likelihood function and loading data

---
 lymph/bilateral.py  |   1 +
 lymph/midline.py    | 283 ++++++++++----------------------------------
 lymph/unilateral.py |  29 +++++
 3 files changed, 95 insertions(+), 218 deletions(-)

diff --git a/lymph/bilateral.py b/lymph/bilateral.py
index b0701b8..45973b0 100644
--- a/lymph/bilateral.py
+++ b/lymph/bilateral.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 import pandas as pd
+import pdb
 
 from .timemarg import MarginalizorDict
 from .unilateral import Unilateral
diff --git a/lymph/midline.py b/lymph/midline.py
index d180120..db4b1a4 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -1,7 +1,9 @@
+import warnings
 from typing import Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import pandas as pd
+import pdb
 from numpy.linalg import matrix_power as mat_pow
 
 from .bilateral import Bilateral
@@ -205,36 +207,6 @@ def trans_probs(self, new_params: np.ndarray):
         # avoid unnecessary double computation of ipsilateral transition matrix
         self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
 
-    def _gen_allowed_midextransitions(self):
-        """Generate the allowed midline extension transitions.
-        """
-        self._allowed_midextransitions = {}
-        for i in range(len(self.midexstate_list)):
-            self._allowed_midextransitions[i] = []
-            for j in range(len(self.midexstate_list)):
-                if not np.any(np.greater(self.midexstate_list[i,:],
-                                         self.midexstate_list[j,:])):
-                    self._allowed_midextransitions[i].append(j)
-
-    @property
-    def allowed_midextransitions(self):
-        """Return a dictionary that contains for each row :math:`i` of the
-        transition matrix :math:`\\mathbf{A}` the column numbers :math:`j` for
-        which the transtion probability :math:`P\\left( x_j \\mid x_i \\right)`
-        is not zero due to the forbidden self-healing.
-
-        For example: The hidden state ``[True, False]`` in a network with only
-        one tumor and two LNLs (one involved, one healthy) corresponds to the
-        index ``1`` and can only evolve into the state ``[True, True]``, which
-        has index 3. So, the key-value pair for that particular hidden state
-        would be ``1: [3]``.
-        """
-        try:
-            return self._allowed_midextransitions
-        except AttributeError:
-            self._gen_allowed_midextransitions()
-            return self._allowed_midextransitions
-
     def _gen_midextransition_matrix(self):
         """Generate the midline extension transition matrix :math:`\\mathbf{A}`, which contains
         the :math:`P \\left( S_{t+1} \\mid S_t \\right)`. :math:`\\mathbf{A}`
@@ -245,11 +217,10 @@ def _gen_midextransition_matrix(self):
             shape = (2**1, 2**1)
             self._midextransition_matrix = np.zeros(shape=shape)
 
-        for i,midexstate in enumerate(self.midexstate_list):
-            self.midexstate = midexstate
-            for j in self.allowed_midextransitions[i]:
-                midextransition_prob = self.comp_midextransition_prob(self.midexstate_list[j])
-                self._midextransition_matrix[i,j] = midextransition_prob
+        self._midextransition_matrix[0,0] = 1 - self.midext_prob
+        self._midextransition_matrix[0,1] = self.midext_prob
+        self._midextransition_matrix[1,0] = 0
+        self._midextransition_matrix[1,1] = 1
 
     @property
     def midextransition_matrix(self) -> np.ndarray:
@@ -267,24 +238,6 @@ def midextransition_matrix(self) -> np.ndarray:
             self._gen_midextransition_matrix()
             return self._midextransition_matrix
 
-    @property
-    def midextrans_probs(self) -> np.ndarray:
-        """Probabilities of lymphatic spread among the lymph node levels. They
-        are assumed to be symmetric ipsi- & contralaterally by default.
-        """
-        return self.noext.midextrans_probs
-
-    @midextrans_probs.setter
-    def midextrans_probs(self, new_params: np.ndarray):
-        """Set the new spread probabilities for lymphatic spread from among the
-        LNLs.
-        """
-        self.noext.midextrans_probs = new_params
-        self.ext.midextrans_probs = new_params
-
-        # avoid unnecessary double computation of ipsilateral transition matrix
-        self.noext.ipsi._midextransition_matrix = self.ext.ipsi.midextransition_matrix
-
     @property
     def spread_probs(self) -> np.ndarray:
         """These are the probabilities representing the spread of cancer along
@@ -333,38 +286,6 @@ def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]):
         """
         self.ext.diag_time_dists = new_dists
         self.noext.diag_time_dists = self.ext.diag_time_dists
-    
-    def comp_midextransition_prob(
-        self,
-        newstate: List[int],
-        acquire: bool = False
-    ) -> float:
-        """Computes the probability to transition to ``newstate``, given its
-        current state.
-
-        Args:
-            newstate: List of new states for each LNL in the lymphatic
-                system. The transition probability :math:`t` will be computed
-                from the current states to these states.
-            acquire: if ``True``, after computing and returning the probability,
-                the system updates its own state to be ``newstate``.
-                (default: ``False``)
-
-        Returns:
-            Transition probability :math:`t`.
-        """
-        res = 1.
-        if not self.midexstate:
-            in_states = [1]
-            in_weights = [self.midext_prob]
-            res *= Node.trans_prob(in_states, in_weights)[newstate]
-        elif not newstate:
-            res = 0.
-
-        if acquire:
-            self.midexstate = newstate
-
-        return res
 
     @property
     def modalities(self):
@@ -406,7 +327,7 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str):
         if not hasattr(self, "_diagnose_matrices_midext"):
             self._diagnose_matrices_midext = {}
         self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2))
-        for patient in len(table):
+        for patient in range(len(table)):
             if table.iloc[patient]['info', 'tumor', 'midline_extension'] == False:
                 self.diagnose_matrices_midext[t_stage][patient, 0] = 1
                 self.diagnose_matrices_midext[t_stage][patient, 1] = 0
@@ -470,35 +391,6 @@ def patient_data(self, patient_data: pd.DataFrame):
         self._patient_data = patient_data.copy()
         self.load_data(patient_data)
 
-    def _evolve_onestep(
-        self, start_state: Optional[np.ndarray] = None
-    ) -> np.ndarray:
-        """Evolve hidden Markov model based system over one time step. Compute
-        :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t`
-        is the time.
-
-        Args:
-            start_state: The current state.
-
-        Returns:
-            A matrix with the new state
-
-        :meta public:
-        """
-        # All healthy state at beginning
-        if start_state is None:
-            start_state = np.zeros(
-            shape=(self.diag_time_dists.max_t + 1, 2),
-            dtype=float
-            )
-            start_state[0,0] = 1.
-
-        # compute involvement at first time-step
-        for i in range(len(start_state)-1):
-            start_state[i+1,:] = start_state[i,:] @ self.transition_matrix
-
-        return start_state
-
     def _evolve_midext(
         self, start_midexstate: Optional[np.ndarray] = None
     ) -> np.ndarray:
@@ -588,6 +480,21 @@ def load_data(
             mode=mode
         )
 
+        if mode=="HMM":
+            t_stages = list(set(data[("info", "tumor", "t_stage")]))
+
+            for stage in t_stages:
+                table = data.loc[
+                    data[("info", "tumor", "t_stage")] == stage
+                ]
+                self._gen_diagnose_matrices_midext(table, stage)
+                if stage not in self.diag_time_dists:
+                    warnings.warn(
+                        "No distribution for marginalizing over diagnose times has "
+                        f"been defined for T-stage {stage}. During inference, all "
+                        "patients in this T-stage will be ignored."
+                    )
+
     def check_and_assign(self, new_params: np.ndarray):
         """Check that the spread probability (rates) and the parameters for the
         marginalization over diagnose times are all within limits and assign them to
@@ -634,9 +541,11 @@ def check_and_assign(self, new_params: np.ndarray):
 
         self.spread_probs = new_spread_probs
 
-    def _likelihood_mid(
+    def likelihood(
         self,
-        log: bool = True
+        data: Optional[pd.DataFrame] = None,
+        given_params: Optional[np.ndarray] = None,
+        log: bool = True,
     ) -> float:
         """Compute the (log-)likelihood of data, using the stored spread probs and
         fixed distributions for marginalizing over diagnose times.
@@ -644,141 +553,79 @@ def _likelihood_mid(
         This method mainly exists so that the checking and assigning of the
         spread probs can be skipped.
         """
-        stored_t_stages = set(self.ipsi.diagnose_matrices.keys())
-        provided_t_stages = set(self.ipsi.diag_time_dists.keys())
+        if data is not None:
+            self.patient_data = data
+
+        try:
+            self.check_and_assign(given_params)
+        except ValueError:
+            return -np.inf if log else 0.
+
+        llh = 0. if log else 1.
+
+        stored_t_stages = set(self.ext.ipsi.diagnose_matrices.keys())
+        provided_t_stages = set(self.ext.ipsi.diag_time_dists.keys())
         t_stages = list(stored_t_stages.intersection(provided_t_stages))
         
         max_t = self.diag_time_dists.max_t
+        llh = 0. if log else 1.
+
+        state_probs_ipsi_nox = self.noext.ipsi._evolve_onestep()
+        state_probs_contra_nox = self.noext.contra._evolve_onestep()
+        state_probs_midext = self._evolve_midext()
         state_probs_ipsi_ex = np.zeros(
-            shape=(max_t + 1, len(self.state_list)),
-            dtype=float
+        shape=(max_t + 1, len(self.ext.ipsi.state_list)),
+        dtype=float
         )
         state_probs_ipsi_ex[0,0] = 1.
-        state_probs_ipsi_nox = np.zeros(
-            shape=(max_t + 1, len(self.state_list)),
-            dtype=float
-        )
-        state_probs_ipsi_nox[0,0] = 1.
         state_probs_contra_ex = np.zeros(
-            shape=(max_t + 1, len(self.state_list)),
-            dtype=float
+        shape=(max_t + 1, len(self.ext.ipsi.state_list)),
+        dtype=float
         )
         state_probs_contra_ex[0,0] = 1.
-        state_probs_contra_nox = np.zeros(
-            shape=(max_t + 1, len(self.state_list)),
-            dtype=float
-        )
-        state_probs_contra_nox[0,0] = 1.
-        state_probs_midext = np.zeros(
-            shape=(max_t + 1, 2),
-            dtype=float
-        )
-        state_probs_midext[0,0] = 1.
-
-        llh = 0. if log else 1.
 
         for stage in t_stages:
-            state_probs_ipsi_nox = self.ipsi._evolve_onestep(start_state = state_probs_ipsi_nox)
-            state_probs_contra_nox = self.contra._evolve_onestep(start_state = state_probs_contra_nox)
-            state_probs_midext = self._evolve_midext(start_midexstate = state_probs_midext)
-
             for i in range(max_t):
-                state_probs_ipsi_ex[(i+1),:] = state_probs_midext[i,1] * self.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex)[1,:] + state_probs_midext[i,0] * state_probs_ipsi_nox[(i+1),:]
-                state_probs_contra_ex[(i+1),:] = state_probs_midext[i,1] * self.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[i,0] * state_probs_contra_nox[(i+1),:]
-                
-            for i in range(len(self.diagnose_matrices_midext[stage][:,0])):
-                if ((self.diagnose_matrices_midext[stage][i,0]==1) & (self.diagnose_matrices_midext[stage][i,1]==1)):
-                    self.diagnose_matrices_midext[stage][i,0] = state_probs_midext[i,0]
-                    self.diagnose_matrices_midext[stage][i,1] = state_probs_midext[i,1]
+                state_probs_ipsi_ex[(i+1),:] = state_probs_midext[i,1] * state_probs_ipsi_ex[i,:] @ self.ext.ipsi.transition_matrix + state_probs_midext[i,0] * state_probs_ipsi_nox[(i+1),:]
+                state_probs_contra_ex[(i+1),:] = state_probs_midext[i,1] * state_probs_contra_ex[i,:] @ self.ext.contra.transition_matrix + state_probs_midext[i,0] * state_probs_contra_nox[(i+1),:]
 
             joint_state_probs_nox = (
                 state_probs_ipsi_nox.T
-                @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
+                @ np.diag(self.noext.ipsi.diag_time_dists[stage].pmf)
                 @ state_probs_contra_nox
             )
             joint_state_probs_ex = (
                 state_probs_ipsi_ex.T
-                @ np.diag(self.ipsi.diag_time_dists[stage].pmf)
+                @ np.diag(self.ext.ipsi.diag_time_dists[stage].pmf)
                 @ state_probs_contra_ex
             )
             p_ex = np.sum(
-                self.ipsi.diagnose_matrices[stage]
+                self.ext.ipsi.diagnose_matrices[stage]
                 * (joint_state_probs_ex
-                    @ self.contra.diagnose_matrices[stage]),
+                    @ self.ext.contra.diagnose_matrices[stage]),
                 axis=0
                 )
-            p_ex_weighted = p_ex.T * self.diagnose_matrices_midext[stage][:,1]
             p_nox = np.sum(
-                self.ipsi.diagnose_matrices[stage]
+                self.noext.ipsi.diagnose_matrices[stage]
                 * (joint_state_probs_nox
-                    @ self.contra.diagnose_matrices[stage]),
+                    @ self.noext.contra.diagnose_matrices[stage]),
                 axis=0
                 )
-            p_nox_weighted = p_nox.T * self.diagnose_matrices_midext[stage][:,0]
-            if log:
-                llh += np.sum(np.log(p_ex_weighted)) + np.sum(np.log(p_nox_weighted))
-            else:
-                llh *= np.prod(p_ex_weighted) * np.prod(p_nox_weighted)
 
-        return llh
+            p = np.array([p_nox,p_ex]) * np.array([[state_probs_midext[max_t,0]], [state_probs_midext[max_t,1]]])
 
-    def likelihood(
-        self,
-        data: Optional[pd.DataFrame] = None,
-        given_params: Optional[np.ndarray] = None,
-        log: bool = True,
-    ) -> float:
-        """Compute log-likelihood of (already stored) data, given the spread
-        probabilities and either a discrete diagnose time or a distribution to
-        use for marginalization over diagnose times.
-
-        Args:
-            data: Table with rows of patients and columns of per-LNL involvment. See
-                :meth:`load_data` for more details on how this should look like.
-
-            given_params: The likelihood is a function of these parameters. They mainly
-                consist of the :attr:`spread_probs` of the model. Any excess parameters
-                will be used to update the parametrized distributions used for
-                marginalizing over the diagnose times (see :attr:`diag_time_dists`).
-
-            log: When ``True``, the log-likelihood is returned.
-
-        Returns:
-            The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D`
-            is the data and :math:`\\theta` is the tuple of spread probabilities
-            and diagnose times or distributions over diagnose times.
-
-        See Also:
-            :attr:`spread_probs`: Property for getting and setting the spread
-            probabilities, of which a lymphatic network has as many as it has
-            :class:`Edge` instances (in case no symmetries apply).
-
-            :meth:`Unilateral.likelihood`: The log-likelihood function of
-            the unilateral system.
-
-            :meth:`Bilateral.likelihood`: The (log-)likelihood function of the
-            bilateral system.
-        """
-        if data is not None:
-            self.patient_data = data
-
-        try:
-            self.check_and_assign(given_params)
-        except ValueError:
-            return -np.inf if log else 0.
-
-        llh = 0. if log else 1.
-        
-        if log:
-            llh += self.ext._likelihood_mid(log=log)
-            llh += self.noext._likelihood_mid(log=log)
-        else:
-            llh *= self.ext._likelihood_mid(log=log)
-            llh *= self.noext._likelihood_mid(log=log)
+            if log:
+                p = np.log(p)
+                #data not yet loaded correctly (or at all)
+                llh_nox_ex = p @ self.diagnose_matrices_midext[stage]
+                llh += np.sum(llh_nox_ex)
+            #probably wrong. Matrix multiplication sums over patients but it should multiply.
+            else:
+                llh_nox_ex = p @ self.diagnose_matrices_midext[stage]
+                llh *= np.prod(llh_nox_ex)
 
         return llh
 
-
     def risk(
         self,
         given_params: Optional[np.ndarray] = None,
diff --git a/lymph/unilateral.py b/lymph/unilateral.py
index c125db3..9a7d583 100644
--- a/lymph/unilateral.py
+++ b/lymph/unilateral.py
@@ -787,6 +787,35 @@ def _evolve(
         state_probs[-1] = state
 
         return state_probs
+    
+    def _evolve_onestep(
+        self, start_state: Optional[np.ndarray] = None
+    ) -> np.ndarray:
+        """Evolve hidden Markov model based system over one time step. Compute
+        :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t`
+        is the time.
+
+        Args:
+            start_state: The current state.
+
+        Returns:
+            A matrix with the new state
+
+        :meta public:
+        """
+        # All healthy state at beginning
+        if start_state is None:
+            start_state = np.zeros(
+            shape=(self.diag_time_dists.max_t + 1, len(self.state_list)),
+            dtype=float
+            )
+            start_state[0,0] = 1.
+
+        # compute involvement at first time-step
+        for i in range(len(start_state)-1):
+            start_state[i+1,:] = start_state[i,:] @ self.transition_matrix
+
+        return start_state
 
     def check_and_assign(self, new_params: np.ndarray):
         """Check that the spread probability (rates) and the parameters for the

From aa5ceacf7165d095528c69aba68df0a4008ae6fe Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Mon, 31 Jul 2023 17:18:49 +0200
Subject: [PATCH 22/35] fix errors in likelihood function and loading data

---
 lymph/midline.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index db4b1a4..f3944ca 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -616,7 +616,6 @@ def likelihood(
 
             if log:
                 p = np.log(p)
-                #data not yet loaded correctly (or at all)
                 llh_nox_ex = p @ self.diagnose_matrices_midext[stage]
                 llh += np.sum(llh_nox_ex)
             #probably wrong. Matrix multiplication sums over patients but it should multiply.

From 74034ea0a267317db971aa483dab12aaa27e82aa Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 3 Aug 2023 15:24:18 +0200
Subject: [PATCH 23/35] fix errors in midline extension transition matrixand
 delete unused code

---
 lymph/midline.py    | 83 +++++++--------------------------------------
 lymph/unilateral.py |  1 -
 2 files changed, 12 insertions(+), 72 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index f3944ca..0200298 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -74,7 +74,6 @@ def __init__(
             self.alpha_mix = 0.
 
         self.noext.diag_time_dists = self.ext.diag_time_dists
-        self.midexstates = ["0", "1"]  
 
     @property
     def graph(self) -> Dict[Tuple[str], List[str]]:
@@ -82,40 +81,6 @@ def graph(self) -> Dict[Tuple[str], List[str]]:
         """
         return self.noext.graph
     
-    def _gen_midexstate_list(self):
-        """Generates the list of (hidden) midline states.
-        """
-        if not hasattr(self, "_midexstate_list"):
-            self._midexstate_list = np.zeros(
-                shape=(2**1, 1), dtype=int
-                )
-        for i in range(2**1):
-            self._midexstate_list[i] = [
-                int(digit) for digit in change_base(i, 2, length=1)
-                ]
-
-    @property
-    def midexstate_list(self):
-        """Return list of all possible hidden midline states.
-        """
-        try:
-            return self._midexstate_list
-        except AttributeError:
-            self._gen_midexstate_list()
-            return self._midexstate_list
-        
-    @property
-    def midexstate(self):
-        """Return the currently set state of the system.
-        """
-        return self._midexstate
-
-    @midexstate.setter
-    def midexstate(self, newstate: np.ndarray):
-        """Sets the state of the system to ``newstate``.
-        """
-        self._midexstate = newstate
-    
     @property
     def midext_prob(self):
         """Assign the last of the new_params to the midline extension probability
@@ -177,7 +142,7 @@ def base_probs(self, new_params: np.ndarray):
 
         if self.use_mixing:
             self.noext.contra.base_probs = new_params[k:2*k]
-            self.alpha_mix = new_params[-1]
+            self.alpha_mix = new_params[-2]
             # compute linear combination
             self.ext.contra.base_probs = (
                 self.alpha_mix * self.ext.ipsi.base_probs
@@ -207,37 +172,6 @@ def trans_probs(self, new_params: np.ndarray):
         # avoid unnecessary double computation of ipsilateral transition matrix
         self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix
 
-    def _gen_midextransition_matrix(self):
-        """Generate the midline extension transition matrix :math:`\\mathbf{A}`, which contains
-        the :math:`P \\left( S_{t+1} \\mid S_t \\right)`. :math:`\\mathbf{A}`
-        is a square matrix with size ``(# of states)``. The lower diagonal is
-        zero.
-        """
-        if not hasattr(self, "_midextransition_matrix"):
-            shape = (2**1, 2**1)
-            self._midextransition_matrix = np.zeros(shape=shape)
-
-        self._midextransition_matrix[0,0] = 1 - self.midext_prob
-        self._midextransition_matrix[0,1] = self.midext_prob
-        self._midextransition_matrix[1,0] = 0
-        self._midextransition_matrix[1,1] = 1
-
-    @property
-    def midextransition_matrix(self) -> np.ndarray:
-        """Return the midline extension listtransition matrix :math:`\\mathbf{A}`, which contains the
-        probability to transition from any state :math:`S_t` to any other state
-        :math:`S_{t+1}` one timestep later:
-        :math:`P \\left( S_{t+1} \\mid S_t \\right)`. :math:`\\mathbf{A}` is a
-        square matrix with size ``(# of states)``. The lower diagonal is zero,
-        because those entries correspond to transitions that would require
-        self-healing.
-        """
-        try:
-            return self._midextransition_matrix
-        except AttributeError:
-            self._gen_midextransition_matrix()
-            return self._midextransition_matrix
-
     @property
     def spread_probs(self) -> np.ndarray:
         """These are the probabilities representing the spread of cancer along
@@ -392,7 +326,7 @@ def patient_data(self, patient_data: pd.DataFrame):
         self.load_data(patient_data)
 
     def _evolve_midext(
-        self, start_midexstate: Optional[np.ndarray] = None
+        self,  new_params: np.ndarray, start_midexstate: Optional[np.ndarray] = None
     ) -> np.ndarray:
         """Evolve hidden Markov model based system over one time step. Compute
         :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t`
@@ -412,11 +346,18 @@ def _evolve_midext(
             dtype=float
             )
             start_midexstate[0,0] = 1.
+        
+        self.midext_prob = new_params
+
+        midextransition_matrix = np.zeros(shape=(2**1, 2**1))
+        midextransition_matrix[0,0] = 1 - self.midext_prob
+        midextransition_matrix[0,1] = self.midext_prob
+        midextransition_matrix[1,0] = 0
+        midextransition_matrix[1,1] = 1
 
         # compute involvement at first time-step
         for i in range(len(start_midexstate)-1):
-            start_midexstate[i+1,:] = start_midexstate[i,:] @ self.midextransition_matrix
-
+            start_midexstate[i+1,:] = start_midexstate[i,:] @ midextransition_matrix
         return start_midexstate
 
     def load_data(
@@ -572,7 +513,7 @@ def likelihood(
 
         state_probs_ipsi_nox = self.noext.ipsi._evolve_onestep()
         state_probs_contra_nox = self.noext.contra._evolve_onestep()
-        state_probs_midext = self._evolve_midext()
+        state_probs_midext = self._evolve_midext(new_params = given_params)
         state_probs_ipsi_ex = np.zeros(
         shape=(max_t + 1, len(self.ext.ipsi.state_list)),
         dtype=float
diff --git a/lymph/unilateral.py b/lymph/unilateral.py
index 9a7d583..ce6ddde 100644
--- a/lymph/unilateral.py
+++ b/lymph/unilateral.py
@@ -814,7 +814,6 @@ def _evolve_onestep(
         # compute involvement at first time-step
         for i in range(len(start_state)-1):
             start_state[i+1,:] = start_state[i,:] @ self.transition_matrix
-
         return start_state
 
     def check_and_assign(self, new_params: np.ndarray):

From 1d73a8b89a116d8a3150f011a2226cffc1e97b2f Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Wed, 9 Aug 2023 00:12:36 +0200
Subject: [PATCH 24/35] optimize code for readability

---
 lymph/midline.py | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index 0200298..e0e44bd 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -261,17 +261,18 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str):
         if not hasattr(self, "_diagnose_matrices_midext"):
             self._diagnose_matrices_midext = {}
         self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2))
-        for patient in range(len(table)):
-            if table.iloc[patient]['info', 'tumor', 'midline_extension'] == False:
-                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
-                self.diagnose_matrices_midext[t_stage][patient, 1] = 0
-            if table.iloc[patient]['info', 'tumor', 'midline_extension'] == True:
-                self.diagnose_matrices_midext[t_stage][patient, 0] = 0
-                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
-            if (table.iloc[patient]['info', 'tumor', 'midline_extension'] != False) & (table.iloc[patient]['info', 'tumor', 'midline_extension'] != True):
-                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
-                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
-    
+        midext_column = table["info", "tumor", "midline_extension"]
+        for idx, midline_extension in midext_column.items():
+            if pd.isna(midline_extension):
+                self.diagnose_matrices_midext[t_stage][idx, 0] = 1
+                self.diagnose_matrices_midext[t_stage][idx, 1] = 1
+            elif midline_extension:
+                self.diagnose_matrices_midext[t_stage][idx, 0] = 0
+                self.diagnose_matrices_midext[t_stage][idx, 1] = 1
+            elif not midline_extension:
+                self.diagnose_matrices_midext[t_stage][idx, 0] = 1
+                self.diagnose_matrices_midext[t_stage][idx, 1] = 0
+
     @property
     def diagnose_matrices_midext(self):
         try:
@@ -349,13 +350,13 @@ def _evolve_midext(
         
         self.midext_prob = new_params
 
-        midextransition_matrix = np.zeros(shape=(2**1, 2**1))
-        midextransition_matrix[0,0] = 1 - self.midext_prob
-        midextransition_matrix[0,1] = self.midext_prob
-        midextransition_matrix[1,0] = 0
+        midextransition_matrix = np.array(
+            [[1 - self.midext_prob, self.midext_prob],
+            [0.                  , 1.              ]]
+        )
         midextransition_matrix[1,1] = 1
 
-        # compute involvement at first time-step
+        # compute involvement for all time steps
         for i in range(len(start_midexstate)-1):
             start_midexstate[i+1,:] = start_midexstate[i,:] @ midextransition_matrix
         return start_midexstate

From b95b84664568226c5006f1021c220c6c61dbe4fe Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Mon, 14 Aug 2023 09:01:22 +0200
Subject: [PATCH 25/35] rename evolve_onestep function to evolve_stepwise

---
 lymph/unilateral.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lymph/unilateral.py b/lymph/unilateral.py
index ce6ddde..d668964 100644
--- a/lymph/unilateral.py
+++ b/lymph/unilateral.py
@@ -788,7 +788,7 @@ def _evolve(
 
         return state_probs
     
-    def _evolve_onestep(
+    def _evolve_stepwise(
         self, start_state: Optional[np.ndarray] = None
     ) -> np.ndarray:
         """Evolve hidden Markov model based system over one time step. Compute

From 09b101db74255485cdfe282a19180ef2ca151483 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Mon, 14 Aug 2023 09:01:45 +0200
Subject: [PATCH 26/35] fix problems with mixing parameter and likelihood

---
 lymph/midline.py | 47 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index e0e44bd..ca99fab 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -261,18 +261,30 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str):
         if not hasattr(self, "_diagnose_matrices_midext"):
             self._diagnose_matrices_midext = {}
         self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2))
-        midext_column = table["info", "tumor", "midline_extension"]
-        for idx, midline_extension in midext_column.items():
-            if pd.isna(midline_extension):
-                self.diagnose_matrices_midext[t_stage][idx, 0] = 1
-                self.diagnose_matrices_midext[t_stage][idx, 1] = 1
+        midline_extension = table['info', 'tumor', 'midline_extension']
+        for patient in range(len(table)):
+            if table.iloc[patient]['info', 'tumor', 'midline_extension'] == False:
+                self.diagnose_matrices_midext[t_stage][patient, 0] = 0
+                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
+            if table.iloc[patient]['info', 'tumor', 'midline_extension'] == True:
+                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
+                self.diagnose_matrices_midext[t_stage][patient, 1] = 0
+            if (table.iloc[patient]['info', 'tumor', 'midline_extension'] != False) & (table.iloc[patient]['info', 'tumor', 'midline_extension'] != True):
+                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
+                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
+        """
+        for patient, row in table.iterrows():
+            midline_extension = row['info', 'tumor', 'midline_extension']
+            if not midline_extension:
+                self.diagnose_matrices_midext[t_stage][patient, 0] = 0
+                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
             elif midline_extension:
-                self.diagnose_matrices_midext[t_stage][idx, 0] = 0
-                self.diagnose_matrices_midext[t_stage][idx, 1] = 1
-            elif not midline_extension:
-                self.diagnose_matrices_midext[t_stage][idx, 0] = 1
-                self.diagnose_matrices_midext[t_stage][idx, 1] = 0
-
+                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
+                self.diagnose_matrices_midext[t_stage][patient, 1] = 0
+            else:
+                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
+                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
+        """
     @property
     def diagnose_matrices_midext(self):
         try:
@@ -512,8 +524,8 @@ def likelihood(
         max_t = self.diag_time_dists.max_t
         llh = 0. if log else 1.
 
-        state_probs_ipsi_nox = self.noext.ipsi._evolve_onestep()
-        state_probs_contra_nox = self.noext.contra._evolve_onestep()
+        state_probs_ipsi_nox = self.noext.ipsi._evolve_stepwise()
+        state_probs_contra_nox = self.noext.contra._evolve_stepwise()
         state_probs_midext = self._evolve_midext(new_params = given_params)
         state_probs_ipsi_ex = np.zeros(
         shape=(max_t + 1, len(self.ext.ipsi.state_list)),
@@ -557,12 +569,13 @@ def likelihood(
             p = np.array([p_nox,p_ex]) * np.array([[state_probs_midext[max_t,0]], [state_probs_midext[max_t,1]]])
 
             if log:
-                p = np.log(p)
-                llh_nox_ex = p @ self.diagnose_matrices_midext[stage]
+                llh_nox_ex = p.T * self.diagnose_matrices_midext[stage]
+                llh_nox_ex = np.log(llh_nox_ex[llh_nox_ex !=0])
                 llh += np.sum(llh_nox_ex)
-            #probably wrong. Matrix multiplication sums over patients but it should multiply.
+            
             else:
-                llh_nox_ex = p @ self.diagnose_matrices_midext[stage]
+                llh_nox_ex = p.T * self.diagnose_matrices_midext[stage]
+                llh_nox_ex = llh_nox_ex[llh_nox_ex !=0]
                 llh *= np.prod(llh_nox_ex)
 
         return llh

From 27cdb1678d71ec7d287c32f6d10883edba905833 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Tue, 15 Aug 2023 16:40:09 +0200
Subject: [PATCH 27/35] change: try potential fix for midline evolution

---
 .pre-commit-config.yaml |  20 ++++-
 lymph/midline.py        | 189 ++++++++++++++++------------------------
 lymph/unilateral.py     |  29 +-----
 3 files changed, 91 insertions(+), 147 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a8d3f74..e939d45 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,21 +1,33 @@
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v2.3.0
+  rev: v4.4.0
   hooks:
   - id: trailing-whitespace
+  - id: end-of-file-fixer
+  - id: check-merge-conflict
+  - id: check-toml
+  - id: check-yaml
 - repo: https://github.com/hadialqattan/pycln
-  rev: v1.1.0
+  rev: v2.1.5
   hooks:
   - id: pycln
     args: [--config=pyproject.toml]
 - repo: https://github.com/PyCQA/isort
-  rev: 5.10.1
+  rev: 5.12.0
   hooks:
   - id: isort
     files: "\\.(py)$"
     args: [--settings-path=pyproject.toml]
+- repo: https://github.com/asottile/pyupgrade
+  rev: v3.9.0
+  hooks:
+  - id: pyupgrade
+- repo: https://github.com/kynan/nbstripout
+  rev: 0.6.0
+  hooks:
+  - id: nbstripout
 - repo: https://github.com/compilerla/conventional-pre-commit
-  rev: v2.0.0
+  rev: v2.3.0
   hooks:
   - id: conventional-pre-commit
     stages: [commit-msg]
diff --git a/lymph/midline.py b/lymph/midline.py
index ca99fab..2b6c92e 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -3,13 +3,9 @@
 
 import numpy as np
 import pandas as pd
-import pdb
-from numpy.linalg import matrix_power as mat_pow
 
 from .bilateral import Bilateral
-from .node import Node
 from .timemarg import MarginalizorDict
-from .unilateral import change_base
 
 
 class MidlineBilateral:
@@ -38,6 +34,7 @@ def __init__(
         self,
         graph: Dict[Tuple[str], List[str]],
         use_mixing: bool = True,
+        evolve_midext: bool = False,
         trans_symmetric: bool = True,
         **_kwargs
     ):
@@ -73,30 +70,16 @@ def __init__(
         if self.use_mixing:
             self.alpha_mix = 0.
 
+        self.evolve_midext = evolve_midext
+        if self.evolve_midext:
+            self.midext = 0.
+
         self.noext.diag_time_dists = self.ext.diag_time_dists
 
     @property
     def graph(self) -> Dict[Tuple[str], List[str]]:
-        """Return the (unilateral) graph that was used to create this network.
-        """
+        """Return the (unilateral) graph that was used to create this network."""
         return self.noext.graph
-    
-    @property
-    def midext_prob(self):
-        """Assign the last of the new_params to the midline extension probability
-        """
-        try:
-            return self._midext_prob
-        except AttributeError as attr_err:
-            raise AttributeError(
-                "No midline extension probability has been assigned"
-            ) from attr_err
-
-    @midext_prob.setter
-    def midext_prob(self, new_params):
-        """A variable containing the midline extension probability
-        """
-        self._midext_prob = new_params[-1]
 
     @property
     def base_probs(self) -> np.ndarray:
@@ -123,12 +106,11 @@ def base_probs(self) -> np.ndarray:
                 self.noext.contra.base_probs,
                 [self.alpha_mix],
             ])
-        else:
-            return np.concatenate([
-                self.ext.ipsi.base_probs,
-                self.ext.contra.base_probs,
-                self.noext.contra.base_probs,
-            ])
+        return np.concatenate([
+            self.ext.ipsi.base_probs,
+            self.ext.contra.base_probs,
+            self.noext.contra.base_probs,
+        ])
 
     @base_probs.setter
     def base_probs(self, new_params: np.ndarray):
@@ -180,10 +162,13 @@ def spread_probs(self) -> np.ndarray:
         It is composed of the base spread probs (possible with the mixing parameter)
         and the probabilities of spread among the LNLs.
 
-        +-------------+-------------+
-        | base probs  | trans probs |
-        +-------------+-------------+
+        +-------------+-------------+-------------+
+        | base probs  | trans probs | midext prob |
+        +-------------+-------------+-------------+
         """
+        if self.evolve_midext:
+            return np.concatenate([self.base_probs, self.trans_probs, self.midext_prob])
+
         return np.concatenate([self.base_probs, self.trans_probs])
 
     @spread_probs.setter
@@ -193,8 +178,14 @@ def spread_probs(self, new_params: np.ndarray):
         """
         num_base_probs = len(self.base_probs)
 
+        if self.evolve_midext:
+            self.midext_prob = new_params[-1]
+            end = -1
+        else:
+            end = None
+
         self.base_probs  = new_params[:num_base_probs]
-        self.trans_probs = new_params[num_base_probs:]
+        self.trans_probs = new_params[num_base_probs:end]
 
 
     @property
@@ -260,31 +251,19 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str):
         """
         if not hasattr(self, "_diagnose_matrices_midext"):
             self._diagnose_matrices_midext = {}
+
         self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2))
-        midline_extension = table['info', 'tumor', 'midline_extension']
-        for patient in range(len(table)):
-            if table.iloc[patient]['info', 'tumor', 'midline_extension'] == False:
-                self.diagnose_matrices_midext[t_stage][patient, 0] = 0
-                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
-            if table.iloc[patient]['info', 'tumor', 'midline_extension'] == True:
-                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
-                self.diagnose_matrices_midext[t_stage][patient, 1] = 0
-            if (table.iloc[patient]['info', 'tumor', 'midline_extension'] != False) & (table.iloc[patient]['info', 'tumor', 'midline_extension'] != True):
-                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
-                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
-        """
-        for patient, row in table.iterrows():
-            midline_extension = row['info', 'tumor', 'midline_extension']
-            if not midline_extension:
-                self.diagnose_matrices_midext[t_stage][patient, 0] = 0
-                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
-            elif midline_extension:
-                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
-                self.diagnose_matrices_midext[t_stage][patient, 1] = 0
-            else:
-                self.diagnose_matrices_midext[t_stage][patient, 0] = 1
-                self.diagnose_matrices_midext[t_stage][patient, 1] = 1
-        """
+        for idx, (_, patient) in enumerate(table.iterrows()):
+            midext = patient["info", "tumor", "midline_extension"]
+            if midext == False:
+                self.diagnose_matrices_midext[t_stage][idx] = np.array([0, 1])
+
+            if midext == True:
+                self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 0])
+
+            if pd.isna(midext):
+                self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 1])
+
     @property
     def diagnose_matrices_midext(self):
         try:
@@ -338,40 +317,31 @@ def patient_data(self, patient_data: pd.DataFrame):
         self._patient_data = patient_data.copy()
         self.load_data(patient_data)
 
-    def _evolve_midext(
-        self,  new_params: np.ndarray, start_midexstate: Optional[np.ndarray] = None
-    ) -> np.ndarray:
+    def _evolve_midext(self) -> np.ndarray:
         """Evolve hidden Markov model based system over one time step. Compute
         :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t`
         is the time.
 
-        Args:
-            start_midexstate: The current midline extension state.
-
         Returns:
             The new midline extension state
 
         :meta public:
         """
-        if start_midexstate is None:
-            start_midexstate = np.zeros(
-            shape=(self.diag_time_dists.max_t + 1, 2),
-            dtype=float
-            )
-            start_midexstate[0,0] = 1.
-        
-        self.midext_prob = new_params
-
-        midextransition_matrix = np.array(
-            [[1 - self.midext_prob, self.midext_prob],
-            [0.                  , 1.              ]]
+        midext_states = np.zeros(
+        shape=(self.diag_time_dists.max_t + 1, 2),
+        dtype=float
         )
-        midextransition_matrix[1,1] = 1
+        midext_states[0,0] = 1.
+
+        midextransition_matrix = np.array([
+            [1 - self.midext_prob, self.midext_prob],
+            [0.                  , 1.              ],
+        ])
 
         # compute involvement for all time steps
-        for i in range(len(start_midexstate)-1):
-            start_midexstate[i+1,:] = start_midexstate[i,:] @ midextransition_matrix
-        return start_midexstate
+        for i in range(len(midext_states)-1):
+            midext_states[i+1,:] = midext_states[i,:] @ midextransition_matrix
+        return midext_states
 
     def load_data(
         self,
@@ -427,7 +397,7 @@ def load_data(
             modality_spsn=modality_spsn,
             mode=mode
         )
-    
+
         self.noext.load_data(
             data,
             modality_spsn=modality_spsn,
@@ -474,7 +444,6 @@ def check_and_assign(self, new_params: np.ndarray):
             self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
             self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists
             self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists
-            self.midext_prob = new_params
         except ValueError as val_err:
             raise ValueError(
                 "Parameters for marginalization over diagnose times are invalid"
@@ -488,10 +457,6 @@ def check_and_assign(self, new_params: np.ndarray):
             raise ValueError(
                 "Spread probs must be between 0 and 1"
             )
-        if 0. > self.midext_prob or self.midext_prob > 1.:
-            raise ValueError(
-                "Midline extension probability must be between 0 and 1"
-            )
 
         self.spread_probs = new_spread_probs
 
@@ -520,63 +485,57 @@ def likelihood(
         stored_t_stages = set(self.ext.ipsi.diagnose_matrices.keys())
         provided_t_stages = set(self.ext.ipsi.diag_time_dists.keys())
         t_stages = list(stored_t_stages.intersection(provided_t_stages))
-        
+
         max_t = self.diag_time_dists.max_t
         llh = 0. if log else 1.
 
-        state_probs_ipsi_nox = self.noext.ipsi._evolve_stepwise()
-        state_probs_contra_nox = self.noext.contra._evolve_stepwise()
-        state_probs_midext = self._evolve_midext(new_params = given_params)
-        state_probs_ipsi_ex = np.zeros(
-        shape=(max_t + 1, len(self.ext.ipsi.state_list)),
-        dtype=float
-        )
-        state_probs_ipsi_ex[0,0] = 1.
+        state_probs_ipsi = self.noext.ipsi._evolve(t_last=max_t)
+        state_probs_contra_nox = self.noext.contra._evolve(t_last=max_t)
+        state_probs_midext = self._evolve_midext()
+
         state_probs_contra_ex = np.zeros(
-        shape=(max_t + 1, len(self.ext.ipsi.state_list)),
-        dtype=float
+            shape=(max_t + 1, len(self.ext.ipsi.state_list)),
+            dtype=float
         )
         state_probs_contra_ex[0,0] = 1.
 
         for stage in t_stages:
             for i in range(max_t):
-                state_probs_ipsi_ex[(i+1),:] = state_probs_midext[i,1] * state_probs_ipsi_ex[i,:] @ self.ext.ipsi.transition_matrix + state_probs_midext[i,0] * state_probs_ipsi_nox[(i+1),:]
-                state_probs_contra_ex[(i+1),:] = state_probs_midext[i,1] * state_probs_contra_ex[i,:] @ self.ext.contra.transition_matrix + state_probs_midext[i,0] * state_probs_contra_nox[(i+1),:]
+                state_probs_contra_ex[i + 1] = (
+                    state_probs_midext[i,1] * state_probs_contra_ex[i] @ self.ext.contra.transition_matrix
+                    + state_probs_midext[i,0] * state_probs_contra_nox[i + 1]
+                )
 
             joint_state_probs_nox = (
-                state_probs_ipsi_nox.T
+                state_probs_ipsi.T
                 @ np.diag(self.noext.ipsi.diag_time_dists[stage].pmf)
                 @ state_probs_contra_nox
             )
             joint_state_probs_ex = (
-                state_probs_ipsi_ex.T
+                state_probs_ipsi.T
                 @ np.diag(self.ext.ipsi.diag_time_dists[stage].pmf)
                 @ state_probs_contra_ex
             )
-            p_ex = np.sum(
-                self.ext.ipsi.diagnose_matrices[stage]
-                * (joint_state_probs_ex
-                    @ self.ext.contra.diagnose_matrices[stage]),
-                axis=0
-                )
             p_nox = np.sum(
                 self.noext.ipsi.diagnose_matrices[stage]
                 * (joint_state_probs_nox
                     @ self.noext.contra.diagnose_matrices[stage]),
                 axis=0
                 )
+            p_ex = np.sum(
+                self.ext.ipsi.diagnose_matrices[stage]
+                * (joint_state_probs_ex
+                    @ self.ext.contra.diagnose_matrices[stage]),
+                axis=0
+                )
 
-            p = np.array([p_nox,p_ex]) * np.array([[state_probs_midext[max_t,0]], [state_probs_midext[max_t,1]]])
+            p = np.vstack((p_nox, p_ex))
+            stage_llh = p @ self.diagnose_matrices_midext[stage]
 
             if log:
-                llh_nox_ex = p.T * self.diagnose_matrices_midext[stage]
-                llh_nox_ex = np.log(llh_nox_ex[llh_nox_ex !=0])
-                llh += np.sum(llh_nox_ex)
-            
+                llh += np.sum(np.log(stage_llh))
             else:
-                llh_nox_ex = p.T * self.diagnose_matrices_midext[stage]
-                llh_nox_ex = llh_nox_ex[llh_nox_ex !=0]
-                llh *= np.prod(llh_nox_ex)
+                llh *= np.prod(stage_llh)
 
         return llh
 
@@ -639,4 +598,4 @@ def generate_dataset(
         dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext]
         dataset[('info', 'tumor', 'midline_extension')] = drawn_ext
 
-        return dataset
\ No newline at end of file
+        return dataset
diff --git a/lymph/unilateral.py b/lymph/unilateral.py
index d668964..8a3321b 100644
--- a/lymph/unilateral.py
+++ b/lymph/unilateral.py
@@ -787,34 +787,7 @@ def _evolve(
         state_probs[-1] = state
 
         return state_probs
-    
-    def _evolve_stepwise(
-        self, start_state: Optional[np.ndarray] = None
-    ) -> np.ndarray:
-        """Evolve hidden Markov model based system over one time step. Compute
-        :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t`
-        is the time.
-
-        Args:
-            start_state: The current state.
-
-        Returns:
-            A matrix with the new state
 
-        :meta public:
-        """
-        # All healthy state at beginning
-        if start_state is None:
-            start_state = np.zeros(
-            shape=(self.diag_time_dists.max_t + 1, len(self.state_list)),
-            dtype=float
-            )
-            start_state[0,0] = 1.
-
-        # compute involvement at first time-step
-        for i in range(len(start_state)-1):
-            start_state[i+1,:] = start_state[i,:] @ self.transition_matrix
-        return start_state
 
     def check_and_assign(self, new_params: np.ndarray):
         """Check that the spread probability (rates) and the parameters for the
@@ -1117,4 +1090,4 @@ def __init__(self, *args, **kwargs):
             DeprecationWarning
         )
 
-        super().__init__(*args, **kwargs)
\ No newline at end of file
+        super().__init__(*args, **kwargs)

From b6577e4a5ad89c310391cdc651f58e767f75226c Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Tue, 15 Aug 2023 16:58:40 +0200
Subject: [PATCH 28/35] fix: wrong name of midext_prob

---
 lymph/midline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index 2b6c92e..5035f9d 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -72,7 +72,7 @@ def __init__(
 
         self.evolve_midext = evolve_midext
         if self.evolve_midext:
-            self.midext = 0.
+            self.midext_prob = 0.
 
         self.noext.diag_time_dists = self.ext.diag_time_dists
 

From cffae92710fd111ecfe71303fbb373bf81e09e8d Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Tue, 15 Aug 2023 18:11:54 +0200
Subject: [PATCH 29/35] fix: wrong shape of posterior in likelihood

---
 lymph/midline.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index 5035f9d..5e4076f 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -167,7 +167,7 @@ def spread_probs(self) -> np.ndarray:
         +-------------+-------------+-------------+
         """
         if self.evolve_midext:
-            return np.concatenate([self.base_probs, self.trans_probs, self.midext_prob])
+            return np.concatenate([self.base_probs, self.trans_probs, [self.midext_prob]])
 
         return np.concatenate([self.base_probs, self.trans_probs])
 
@@ -255,14 +255,12 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str):
         self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2))
         for idx, (_, patient) in enumerate(table.iterrows()):
             midext = patient["info", "tumor", "midline_extension"]
-            if midext == False:
-                self.diagnose_matrices_midext[t_stage][idx] = np.array([0, 1])
-
-            if midext == True:
-                self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 0])
-
             if pd.isna(midext):
                 self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 1])
+            elif not midext:
+                self.diagnose_matrices_midext[t_stage][idx] = np.array([0, 1])
+            else:
+                self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 0])
 
     @property
     def diagnose_matrices_midext(self):
@@ -530,7 +528,7 @@ def likelihood(
                 )
 
             p = np.vstack((p_nox, p_ex))
-            stage_llh = p @ self.diagnose_matrices_midext[stage]
+            stage_llh = (p * self.diagnose_matrices_midext[stage].T).sum(axis=0)
 
             if log:
                 llh += np.sum(np.log(stage_llh))

From aa5f1b5764b0ffe2ba8ffe1072ceb5c90a9bdbe3 Mon Sep 17 00:00:00 2001
From: rmnldwg <48687784+rmnldwg@users.noreply.github.com>
Date: Wed, 16 Aug 2023 17:36:06 +0200
Subject: [PATCH 30/35] fix: alpha mix assignment & explicit midext llh

This still does not seem to work.
---
 lymph/midline.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index 5e4076f..a3ade7b 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -124,7 +124,7 @@ def base_probs(self, new_params: np.ndarray):
 
         if self.use_mixing:
             self.noext.contra.base_probs = new_params[k:2*k]
-            self.alpha_mix = new_params[-2]
+            self.alpha_mix = new_params[-1]
             # compute linear combination
             self.ext.contra.base_probs = (
                 self.alpha_mix * self.ext.ipsi.base_probs
@@ -487,9 +487,16 @@ def likelihood(
         max_t = self.diag_time_dists.max_t
         llh = 0. if log else 1.
 
-        state_probs_ipsi = self.noext.ipsi._evolve(t_last=max_t)
-        state_probs_contra_nox = self.noext.contra._evolve(t_last=max_t)
         state_probs_midext = self._evolve_midext()
+        state_probs_ipsi = self.ext.ipsi._evolve(t_last=max_t)
+
+        # I think we need to discount the contralateral state probabilities for no
+        # midline extension, as it becomes less and less likely that the tumor
+        # does not cross the midline the longer it has been growing.
+        state_probs_contra_nox = (
+            self.noext.contra._evolve(t_last=max_t)
+            * state_probs_midext[:,0].reshape(-1,1)
+        )
 
         state_probs_contra_ex = np.zeros(
             shape=(max_t + 1, len(self.ext.ipsi.state_list)),
@@ -528,7 +535,14 @@ def likelihood(
                 )
 
             p = np.vstack((p_nox, p_ex))
-            stage_llh = (p * self.diagnose_matrices_midext[stage].T).sum(axis=0)
+            stage_llh = (
+                (p * self.diagnose_matrices_midext[stage].T).sum(axis=0)
+                * (
+                    self.diag_time_dists[stage].pmf
+                    @ state_probs_midext
+                    @ self.diagnose_matrices_midext[stage].T
+                )
+            )
 
             if log:
                 llh += np.sum(np.log(stage_llh))

From e662c9061162b57e92c30c2fa4e7b216ae2fdde5 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Fri, 18 Aug 2023 14:11:56 +0200
Subject: [PATCH 31/35] fix: invert midext data matrix & correct evolution

---
 lymph/midline.py | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index a3ade7b..75b5124 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -258,9 +258,9 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str):
             if pd.isna(midext):
                 self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 1])
             elif not midext:
-                self.diagnose_matrices_midext[t_stage][idx] = np.array([0, 1])
-            else:
                 self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 0])
+            else:
+                self.diagnose_matrices_midext[t_stage][idx] = np.array([0, 1])
 
     @property
     def diagnose_matrices_midext(self):
@@ -502,19 +502,18 @@ def likelihood(
             shape=(max_t + 1, len(self.ext.ipsi.state_list)),
             dtype=float
         )
-        state_probs_contra_ex[0,0] = 1.
 
         for stage in t_stages:
             for i in range(max_t):
                 state_probs_contra_ex[i + 1] = (
-                    state_probs_midext[i,1] * state_probs_contra_ex[i] @ self.ext.contra.transition_matrix
-                    + state_probs_midext[i,0] * state_probs_contra_nox[i + 1]
-                )
+                    self.midext_prob * state_probs_contra_nox[i]
+                    + state_probs_contra_ex[i]
+                ) @ self.ext.contra.transition_matrix
 
             joint_state_probs_nox = (
                 state_probs_ipsi.T
                 @ np.diag(self.noext.ipsi.diag_time_dists[stage].pmf)
-                @ state_probs_contra_nox
+                @ (state_probs_contra_nox * state_probs_midext[:,0].reshape(-1,1))
             )
             joint_state_probs_ex = (
                 state_probs_ipsi.T
@@ -535,14 +534,7 @@ def likelihood(
                 )
 
             p = np.vstack((p_nox, p_ex))
-            stage_llh = (
-                (p * self.diagnose_matrices_midext[stage].T).sum(axis=0)
-                * (
-                    self.diag_time_dists[stage].pmf
-                    @ state_probs_midext
-                    @ self.diagnose_matrices_midext[stage].T
-                )
-            )
+            stage_llh = (p * self.diagnose_matrices_midext[stage].T).sum(axis=0)
 
             if log:
                 llh += np.sum(np.log(stage_llh))

From 0024895c407997f09ce131893fda5a3f46c87c93 Mon Sep 17 00:00:00 2001
From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com>
Date: Fri, 18 Aug 2023 14:42:12 +0200
Subject: [PATCH 32/35] fix: double multiply of midext evo with contra nox

---
 lymph/midline.py | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index 75b5124..b724f4f 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -489,19 +489,8 @@ def likelihood(
 
         state_probs_midext = self._evolve_midext()
         state_probs_ipsi = self.ext.ipsi._evolve(t_last=max_t)
-
-        # I think we need to discount the contralateral state probabilities for no
-        # midline extension, as it becomes less and less likely that the tumor
-        # does not cross the midline the longer it has been growing.
-        state_probs_contra_nox = (
-            self.noext.contra._evolve(t_last=max_t)
-            * state_probs_midext[:,0].reshape(-1,1)
-        )
-
-        state_probs_contra_ex = np.zeros(
-            shape=(max_t + 1, len(self.ext.ipsi.state_list)),
-            dtype=float
-        )
+        state_probs_contra_nox = self.noext.contra._evolve(t_last=max_t)
+        state_probs_contra_ex = np.zeros_like(state_probs_contra_nox)
 
         for stage in t_stages:
             for i in range(max_t):
@@ -510,6 +499,9 @@ def likelihood(
                     + state_probs_contra_ex[i]
                 ) @ self.ext.contra.transition_matrix
 
+            # the two `joint_state_probs` below together represent the joint probability
+            # of any ips- AND contralateral state AND any midline extension state
+            # marginalized over the diagnose time.
             joint_state_probs_nox = (
                 state_probs_ipsi.T
                 @ np.diag(self.noext.ipsi.diag_time_dists[stage].pmf)
@@ -520,6 +512,7 @@ def likelihood(
                 @ np.diag(self.ext.ipsi.diag_time_dists[stage].pmf)
                 @ state_probs_contra_ex
             )
+
             p_nox = np.sum(
                 self.noext.ipsi.diagnose_matrices[stage]
                 * (joint_state_probs_nox

From bb5d22ac2507c3dce51a9c784e527a2a116597c2 Mon Sep 17 00:00:00 2001
From: rmnldwg <48687784+rmnldwg@users.noreply.github.com>
Date: Mon, 21 Aug 2023 11:20:27 +0200
Subject: [PATCH 33/35] fix: joint state probs sum to one now

---
 lymph/midline.py | 61 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index b724f4f..4fd33e5 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -315,6 +315,7 @@ def patient_data(self, patient_data: pd.DataFrame):
         self._patient_data = patient_data.copy()
         self.load_data(patient_data)
 
+
     def _evolve_midext(self) -> np.ndarray:
         """Evolve hidden Markov model based system over one time step. Compute
         :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t`
@@ -341,6 +342,32 @@ def _evolve_midext(self) -> np.ndarray:
             midext_states[i+1,:] = midext_states[i,:] @ midextransition_matrix
         return midext_states
 
+
+    def _evolve_contra(
+        self,
+        t_last: int,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """Evolve contra side as mixture of with & without midline extension."""
+        state_probs_midext = np.zeros(
+            shape=(t_last + 1, len(self.ext.contra.state_list))
+        )
+        state_probs_noext = np.zeros(
+            shape=(t_last + 1, len(self.noext.contra.state_list))
+        )
+        state_probs_noext[0,0] = 1.
+
+        for t in range(t_last):
+            state_probs_noext[t+1] = (
+                (1. - self.midext_prob) * state_probs_noext[t]
+            ) @ self.noext.contra.transition_matrix
+            state_probs_midext[t+1] = (
+                self.midext_prob * state_probs_noext[t]
+                + state_probs_midext[t]
+            ) @ self.ext.contra.transition_matrix
+
+        return state_probs_noext, state_probs_midext
+
+
     def load_data(
         self,
         data: pd.DataFrame,
@@ -417,6 +444,7 @@ def load_data(
                         "patients in this T-stage will be ignored."
                     )
 
+
     def check_and_assign(self, new_params: np.ndarray):
         """Check that the spread probability (rates) and the parameters for the
         marginalization over diagnose times are all within limits and assign them to
@@ -458,6 +486,7 @@ def check_and_assign(self, new_params: np.ndarray):
 
         self.spread_probs = new_spread_probs
 
+
     def likelihood(
         self,
         data: Optional[pd.DataFrame] = None,
@@ -478,8 +507,6 @@ def likelihood(
         except ValueError:
             return -np.inf if log else 0.
 
-        llh = 0. if log else 1.
-
         stored_t_stages = set(self.ext.ipsi.diagnose_matrices.keys())
         provided_t_stages = set(self.ext.ipsi.diag_time_dists.keys())
         t_stages = list(stored_t_stages.intersection(provided_t_stages))
@@ -487,25 +514,18 @@ def likelihood(
         max_t = self.diag_time_dists.max_t
         llh = 0. if log else 1.
 
-        state_probs_midext = self._evolve_midext()
+        # state_probs_midext = self._evolve_midext()
         state_probs_ipsi = self.ext.ipsi._evolve(t_last=max_t)
-        state_probs_contra_nox = self.noext.contra._evolve(t_last=max_t)
-        state_probs_contra_ex = np.zeros_like(state_probs_contra_nox)
+        state_probs_contra_nox, state_probs_contra_ex = self._evolve_contra(t_last=max_t)
 
         for stage in t_stages:
-            for i in range(max_t):
-                state_probs_contra_ex[i + 1] = (
-                    self.midext_prob * state_probs_contra_nox[i]
-                    + state_probs_contra_ex[i]
-                ) @ self.ext.contra.transition_matrix
-
             # the two `joint_state_probs` below together represent the joint probability
-            # of any ips- AND contralateral state AND any midline extension state
+            # of any ipsi- AND contralateral state AND any midline extension state
             # marginalized over the diagnose time.
             joint_state_probs_nox = (
                 state_probs_ipsi.T
                 @ np.diag(self.noext.ipsi.diag_time_dists[stage].pmf)
-                @ (state_probs_contra_nox * state_probs_midext[:,0].reshape(-1,1))
+                @ state_probs_contra_nox
             )
             joint_state_probs_ex = (
                 state_probs_ipsi.T
@@ -513,21 +533,23 @@ def likelihood(
                 @ state_probs_contra_ex
             )
 
-            p_nox = np.sum(
+            joint_diag_probs_nox = np.sum(
                 self.noext.ipsi.diagnose_matrices[stage]
                 * (joint_state_probs_nox
                     @ self.noext.contra.diagnose_matrices[stage]),
                 axis=0
-                )
-            p_ex = np.sum(
+            )
+            joint_diag_probs_ex = np.sum(
                 self.ext.ipsi.diagnose_matrices[stage]
                 * (joint_state_probs_ex
                     @ self.ext.contra.diagnose_matrices[stage]),
                 axis=0
-                )
+            )
 
-            p = np.vstack((p_nox, p_ex))
-            stage_llh = (p * self.diagnose_matrices_midext[stage].T).sum(axis=0)
+            joint_diag_probs = np.vstack((joint_diag_probs_nox, joint_diag_probs_ex))
+            stage_llh = (
+                joint_diag_probs * self.diagnose_matrices_midext[stage].T
+            ).sum(axis=0)
 
             if log:
                 llh += np.sum(np.log(stage_llh))
@@ -536,6 +558,7 @@ def likelihood(
 
         return llh
 
+
     def risk(
         self,
         given_params: Optional[np.ndarray] = None,

From b9f2048f2c7809061ebd04a378de538882949323 Mon Sep 17 00:00:00 2001
From: rmnldwg <48687784+rmnldwg@users.noreply.github.com>
Date: Mon, 21 Aug 2023 15:25:49 +0200
Subject: [PATCH 34/35] fix: code works without evolution again

---
 lymph/midline.py | 59 ++++++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index 4fd33e5..9a31359 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -71,8 +71,7 @@ def __init__(
             self.alpha_mix = 0.
 
         self.evolve_midext = evolve_midext
-        if self.evolve_midext:
-            self.midext_prob = 0.
+        self.midext_prob = 0.
 
         self.noext.diag_time_dists = self.ext.diag_time_dists
 
@@ -166,10 +165,7 @@ def spread_probs(self) -> np.ndarray:
         | base probs  | trans probs | midext prob |
         +-------------+-------------+-------------+
         """
-        if self.evolve_midext:
-            return np.concatenate([self.base_probs, self.trans_probs, [self.midext_prob]])
-
-        return np.concatenate([self.base_probs, self.trans_probs])
+        return np.concatenate([self.base_probs, self.trans_probs, [self.midext_prob]])
 
     @spread_probs.setter
     def spread_probs(self, new_params: np.ndarray):
@@ -178,14 +174,9 @@ def spread_probs(self, new_params: np.ndarray):
         """
         num_base_probs = len(self.base_probs)
 
-        if self.evolve_midext:
-            self.midext_prob = new_params[-1]
-            end = -1
-        else:
-            end = None
-
         self.base_probs  = new_params[:num_base_probs]
-        self.trans_probs = new_params[num_base_probs:end]
+        self.trans_probs = new_params[num_base_probs:-1]
+        self.midext_prob = new_params[-1]
 
 
     @property
@@ -327,8 +318,8 @@ def _evolve_midext(self) -> np.ndarray:
         :meta public:
         """
         midext_states = np.zeros(
-        shape=(self.diag_time_dists.max_t + 1, 2),
-        dtype=float
+            shape=(self.diag_time_dists.max_t + 1, 2),
+            dtype=float
         )
         midext_states[0,0] = 1.
 
@@ -348,22 +339,40 @@ def _evolve_contra(
         t_last: int,
     ) -> Tuple[np.ndarray, np.ndarray]:
         """Evolve contra side as mixture of with & without midline extension."""
-        state_probs_midext = np.zeros(
-            shape=(t_last + 1, len(self.ext.contra.state_list))
-        )
         state_probs_noext = np.zeros(
             shape=(t_last + 1, len(self.noext.contra.state_list))
         )
         state_probs_noext[0,0] = 1.
 
+        state_probs_midext = np.zeros(
+            shape=(t_last + 1, len(self.ext.contra.state_list))
+        )
+        if not self.evolve_midext:
+            state_probs_noext[0,0] = (1. - self.midext_prob)
+            state_probs_midext[0,0] = self.midext_prob
+
         for t in range(t_last):
-            state_probs_noext[t+1] = (
-                (1. - self.midext_prob) * state_probs_noext[t]
-            ) @ self.noext.contra.transition_matrix
-            state_probs_midext[t+1] = (
-                self.midext_prob * state_probs_noext[t]
-                + state_probs_midext[t]
-            ) @ self.ext.contra.transition_matrix
+            # When evolving over the midline extension state, there's a chance at any
+            # time step that the tumor grows over the midline and starts spreading to
+            # the contralateral side more aggressively.
+            if self.evolve_midext:
+                state_probs_noext[t+1] = (
+                    (1. - self.midext_prob) * state_probs_noext[t]
+                ) @ self.noext.contra.transition_matrix
+                state_probs_midext[t+1] = (
+                    self.midext_prob * state_probs_noext[t]
+                    + state_probs_midext[t]
+                ) @ self.ext.contra.transition_matrix
+
+            # When we do not evolve, the tumor is considered lateralized or extending
+            # over the midline from the start.
+            else:
+                state_probs_noext[t+1] = (
+                    state_probs_noext[t] @ self.noext.contra.transition_matrix
+                )
+                state_probs_midext[t+1] = (
+                    state_probs_midext[t] @ self.ext.contra.transition_matrix
+                )
 
         return state_probs_noext, state_probs_midext
 

From e02c47c172d6c5714b0ea4e0356147f25e2c5046 Mon Sep 17 00:00:00 2001
From: larstwi <101893533+larstwi@users.noreply.github.com>
Date: Thu, 26 Oct 2023 05:06:37 +0200
Subject: [PATCH 35/35] adapted code for changes in lyscripts prevalence
 prediction (midext timeevolution model)

---
 lymph/midline.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/lymph/midline.py b/lymph/midline.py
index 9a31359..91fc692 100644
--- a/lymph/midline.py
+++ b/lymph/midline.py
@@ -501,6 +501,8 @@ def likelihood(
         data: Optional[pd.DataFrame] = None,
         given_params: Optional[np.ndarray] = None,
         log: bool = True,
+        t_stages: Optional[List] = None,
+        prevalence_calc: bool = False
     ) -> float:
         """Compute the (log-)likelihood of data, using the stored spread probs and
         fixed distributions for marginalizing over diagnose times.
@@ -510,15 +512,15 @@ def likelihood(
         """
         if data is not None:
             self.patient_data = data
-
+        
         try:
             self.check_and_assign(given_params)
         except ValueError:
             return -np.inf if log else 0.
-
-        stored_t_stages = set(self.ext.ipsi.diagnose_matrices.keys())
-        provided_t_stages = set(self.ext.ipsi.diag_time_dists.keys())
-        t_stages = list(stored_t_stages.intersection(provided_t_stages))
+        if t_stages is None:
+            stored_t_stages = set(self.ext.ipsi.diagnose_matrices.keys())
+            provided_t_stages = set(self.ext.ipsi.diag_time_dists.keys())
+            t_stages = list(stored_t_stages.intersection(provided_t_stages))
 
         max_t = self.diag_time_dists.max_t
         llh = 0. if log else 1.
@@ -526,7 +528,6 @@ def likelihood(
         # state_probs_midext = self._evolve_midext()
         state_probs_ipsi = self.ext.ipsi._evolve(t_last=max_t)
         state_probs_contra_nox, state_probs_contra_ex = self._evolve_contra(t_last=max_t)
-
         for stage in t_stages:
             # the two `joint_state_probs` below together represent the joint probability
             # of any ipsi- AND contralateral state AND any midline extension state
@@ -541,7 +542,6 @@ def likelihood(
                 @ np.diag(self.ext.ipsi.diag_time_dists[stage].pmf)
                 @ state_probs_contra_ex
             )
-
             joint_diag_probs_nox = np.sum(
                 self.noext.ipsi.diagnose_matrices[stage]
                 * (joint_state_probs_nox
@@ -559,12 +559,13 @@ def likelihood(
             stage_llh = (
                 joint_diag_probs * self.diagnose_matrices_midext[stage].T
             ).sum(axis=0)
-
             if log:
                 llh += np.sum(np.log(stage_llh))
             else:
-                llh *= np.prod(stage_llh)
-
+                if prevalence_calc:
+                    llh = stage_llh
+                else:
+                    llh *= np.prod(stage_llh)
         return llh