From 34ca480873ed22a4b38cf504c042457b506f50ce Mon Sep 17 00:00:00 2001 From: Lars Widmer Date: Fri, 13 Jan 2023 01:08:02 +0100 Subject: [PATCH 01/35] test --- lymph/midline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lymph/midline.py b/lymph/midline.py index c9c7f1a..b1c7220 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -4,7 +4,7 @@ import pandas as pd from .bilateral import Bilateral -from .timemarg import MarginalizorDict +from .timemarg import MarginalizorDictjd class MidlineBilateral: From e2d76c3c8de2cb46a82225967bbcabb85f2afe77 Mon Sep 17 00:00:00 2001 From: Lars Widmer Date: Fri, 13 Jan 2023 01:09:11 +0100 Subject: [PATCH 02/35] test --- lymph/midline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lymph/midline.py b/lymph/midline.py index b1c7220..c9c7f1a 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -4,7 +4,7 @@ import pandas as pd from .bilateral import Bilateral -from .timemarg import MarginalizorDictjd +from .timemarg import MarginalizorDict class MidlineBilateral: From d05da344f4595eb7797209cb58bbf24ac3e8d636 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Wed, 19 Apr 2023 16:37:04 +0200 Subject: [PATCH 03/35] added Bilateral instances and diag_time_dists for unknown patients --- lymph/midline.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lymph/midline.py b/lymph/midline.py index c9c7f1a..51e5a57 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -64,12 +64,19 @@ def __init__( self.noext = Bilateral( graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric ) + self.ext_unknown = Bilateral( + graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric + ) + self.noext_unknown = Bilateral( + graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric + ) self.use_mixing = use_mixing if self.use_mixing: self.alpha_mix = 0. self.noext.diag_time_dists = self.ext.diag_time_dists - + self.ext_unknown.diag_time_dists = self.ext.diag_time_dists + self.noext_unknown.diag_time_dists = self.ext.diag_time_dists @property def graph(self) -> Dict[Tuple[str], List[str]]: From 584c07e8c057f7b628b9c8a7fb99664cc55587ba Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Fri, 21 Apr 2023 14:37:46 +0200 Subject: [PATCH 04/35] added sampler for midline extension and likelihood for unknown midline extension --- lymph/bilateral.py | 45 ++++ lymph/midline.py | 92 ++++++- lymph/midline_timeev.py | 548 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 676 insertions(+), 9 deletions(-) create mode 100644 lymph/midline_timeev.py diff --git a/lymph/bilateral.py b/lymph/bilateral.py index b6576d7..f3b1151 100644 --- a/lymph/bilateral.py +++ b/lymph/bilateral.py @@ -427,6 +427,26 @@ def check_and_assign(self, new_params: np.ndarray): self.spread_probs = new_spread_probs + def t_stages(self): + stored_t_stages = set(self.ipsi.diagnose_matrices.keys()) + provided_t_stages = set(self.ipsi.diag_time_dists.keys()) + t_stages = list(stored_t_stages.intersection(provided_t_stages)) + + return t_stages + + def state_probs_ipsi(self): + max_t = self.diag_time_dists.max_t + state_probs = {} + state_probs["ipsi"] = self.ipsi._evolve(t_last=max_t) + + return state_probs["ipsi"] + + def state_probs_contra(self): + max_t = self.diag_time_dists.max_t + state_probs = {} + state_probs["contra"] = self.contra._evolve(t_last=max_t) + + return state_probs["contra"] def _likelihood( self, @@ -467,6 +487,31 @@ def _likelihood( return llh + def _likelihood2( + self, + stage, + state_probs, + ) -> float: + """Compute the (log-)likelihood of data, using the stored spread probs and + fixed distributions for marginalizing over diagnose times. + + This method mainly exists so that the checking and assigning of the + spread probs can be skipped. + """ + + joint_state_probs = ( + state_probs["ipsi"].T + @ np.diag(self.ipsi.diag_time_dists[stage].pmf) + @ state_probs["contra"] + ) + p = np.sum( + self.ipsi.diagnose_matrices[stage] + * (joint_state_probs + @ self.contra.diagnose_matrices[stage]), + axis=0 + ) + + return p def likelihood( self, diff --git a/lymph/midline.py b/lymph/midline.py index 51e5a57..8a3da16 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -126,21 +126,32 @@ def base_probs(self, new_params: np.ndarray): self.ext.ipsi.base_probs = new_params[:k] self.noext.ipsi.base_probs = new_params[:k] + self.ext_unknown.ipsi.base_probs = new_params[:k] + self.noext_unknown.ipsi.base_probs = new_params[:k] if self.use_mixing: self.noext.contra.base_probs = new_params[k:2*k] + self.noext_unknown.contra.base_probs = new_params[k:2*k] self.alpha_mix = new_params[-1] # compute linear combination self.ext.contra.base_probs = ( self.alpha_mix * self.ext.ipsi.base_probs + (1. - self.alpha_mix) * self.noext.contra.base_probs ) + self.ext_unknown.contra.base_probs = ( + self.alpha_mix * self.ext_unknown.ipsi.base_probs + + (1. - self.alpha_mix) * self.noext_unknown.contra.base_probs + ) else: self.ext.contra.base_probs = new_params[k:2*k] self.noext.contra.base_probs = new_params[2*k:3*k] + self.ext_unknown.contra.base_probs = new_params[k:2*k] + self.noext_unknown.contra.base_probs = new_params[2*k:3*k] # avoid unnecessary double computation of ipsilateral transition matrix self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix @property @@ -157,9 +168,13 @@ def trans_probs(self, new_params: np.ndarray): """ self.noext.trans_probs = new_params self.ext.trans_probs = new_params + self.noext_unknown.trans_probs = new_params + self.ext_unknown.trans_probs = new_params # avoid unnecessary double computation of ipsilateral transition matrix self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix @property @@ -210,6 +225,8 @@ def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]): """ self.ext.diag_time_dists = new_dists self.noext.diag_time_dists = self.ext.diag_time_dists + self.noext_unknown.diag_time_dists = self.ext.diag_time_dists + self.ext_unknown.diag_time_dists = self.ext.diag_time_dists @property @@ -236,7 +253,25 @@ def modalities(self, modality_spsn: Dict[str, List[float]]): """ self.noext.modalities = modality_spsn self.ext.modalities = modality_spsn + self.noext_unknown.modalities = modality_spsn + self.ext_unknown.modalities = modality_spsn + + @property + def midext_prob(self): + """Assign the last of the new_params to the midline extension probability + """ + try: + return self._midext_prob + except AttributeError as attr_err: + raise AttributeError( + "No midline extension probability has been assigned" + ) from attr_err + @midext_prob.setter + def midext_prob(self, new_params): + """A variable containing the midline extension probability + """ + self._midext_prob = new_params[-1] @property def patient_data(self): @@ -330,8 +365,9 @@ def load_data( :meth:`Unilateral.load_data`: Data loading method of the unilateral network. """ - ext_data = data.loc[data[("info", "tumor", "midline_extension")]] - noext_data = data.loc[~data[("info", "tumor", "midline_extension")]] + ext_data = data.loc[(data[("info", "tumor", "midline_extension")]==True)] + noext_data = data.loc[(data[("info", "tumor", "midline_extension")]==False)] + unknown_data = data.loc[(data['info', 'tumor', 'midline_extension']!=False) & (data['info', 'tumor', 'midline_extension']!=True)] self.ext.load_data( ext_data, @@ -343,7 +379,18 @@ def load_data( modality_spsn=modality_spsn, mode=mode ) + if len(unknown_data)>0: + self.ext_unknown.load_data( + unknown_data, + modality_spsn=modality_spsn, + mode=mode + ) + self.noext_unknown.load_data( + unknown_data, + modality_spsn=modality_spsn, + mode=mode + ) def check_and_assign(self, new_params: np.ndarray): """Check that the spread probability (rates) and the parameters for the @@ -361,14 +408,20 @@ def check_and_assign(self, new_params: np.ndarray): invalid parameters. """ k = len(self.spread_probs) + l = len(self.diag_time_dists) new_spread_probs = new_params[:k] - new_marg_params = new_params[k:] + new_marg_params = new_params[k:(k+l)] try: self.ext.ipsi.diag_time_dists.update(new_marg_params) + self.ext_unknown.ipsi.diag_time_dists.update(new_marg_params) self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists + self.ext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists + self.noext_unknown.ipsi.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists + self.noext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists + self.midext_prob = new_params except ValueError as val_err: raise ValueError( "Parameters for marginalization over diagnose times are invalid" @@ -382,6 +435,10 @@ def check_and_assign(self, new_params: np.ndarray): raise ValueError( "Spread probs must be between 0 and 1" ) + if 0. > self.midext_prob or self.midext_prob > 1.: + raise ValueError( + "Midline extension probability must be between 0 and 1" + ) self.spread_probs = new_spread_probs @@ -433,12 +490,29 @@ def likelihood( llh = 0. if log else 1. - if log: - llh += self.ext._likelihood(log=log) - llh += self.noext._likelihood(log=log) - else: - llh *= self.ext._likelihood(log=log) - llh *= self.noext._likelihood(log=log) + if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']!=False) & (self.patient_data['info', 'tumor', 'midline_extension']!=True)])>0: + state_probs = {} + state_probs["ipsi"] = self.ext_unknown.state_probs_ipsi() + state_probs["contra"] = self.ext_unknown.state_probs_contra() + state_probs2 = {} + state_probs2["ipsi"] = self.noext_unknown.state_probs_ipsi() + state_probs2["contra"] = self.noext_unknown.state_probs_contra() + t_stages = list(set(self.patient_data[("info","tumor","t_stage")])) + t_stages = self.ext_unknown.t_stages() + + + for stage in t_stages: + p = self.ext_unknown._likelihood2(stage=stage, state_probs=state_probs)*self.midext_prob + p2 = self.noext_unknown._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob) + llh += np.sum(np.log(p+p2)) + + if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False) | (self.patient_data['info', 'tumor', 'midline_extension']==True)])>0: + if log: + llh += self.ext._likelihood(log=log) + np.log(self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)]) + llh += self.noext._likelihood(log=log) + np.log(1-self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)]) + else: + llh *= self.ext._likelihood(log=log)*self.midext_prob**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)]) + llh *= self.noext._likelihood(log=log)*(1-self.midext_prob)**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)]) return llh diff --git a/lymph/midline_timeev.py b/lymph/midline_timeev.py new file mode 100644 index 0000000..cccd8fd --- /dev/null +++ b/lymph/midline_timeev.py @@ -0,0 +1,548 @@ +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd + +from .bilateral import Bilateral +from .timemarg import MarginalizorDict + + +class MidlineBilateral: + """Model a bilateral lymphatic system where an additional risk factor can + be provided in the data: Whether or not the primary tumor extended over the + mid-sagittal line. + + It is reasonable to assume (and supported by data) that such an extension + significantly increases the risk for metastatic spread to the contralateral + side of the neck. This class attempts to capture this using a simple + assumption: We assume that the probability of spread to the contralateral + side for patients *with* midline extension is larger than for patients + *without* it, but smaller than the probability of spread to the ipsilateral + side. Formally: + + .. math:: + b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in} + + where :math:`b_c^{\\in}` is the probability of spread from the primary tumor + to the contralateral side for patients with midline extension, and + :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear + mixing parameter. + """ + + def __init__( + self, + graph: Dict[Tuple[str], List[str]], + use_mixing: bool = True, + trans_symmetric: bool = True, + **_kwargs + ): + """The class is constructed in a similar fashion to the + :class:`Bilateral`: That class contains one :class:`Unilateral` for + each side of the neck, while this class will contain two instances of + :class:`Bilateral`, one for the case of a midline extension and one for + the case of no midline extension. + + Args: + graph: Dictionary of the same kind as for initialization of + :class:`System`. This graph will be passed to the constructors of + two :class:`System` attributes of this class. + use_mixing: Describe the contralateral base spread probabilities for the + case of a midline extension as a linear combination between the base + spread probs of the ipsilateral side and the ones of the contralateral + side when no midline extension is present. + trans_symmetric: If ``True``, the spread probabilities among the + LNLs will be set symmetrically. + + See Also: + :class:`Bilateral`: Two of these are held as attributes by this + class. One for the case of a mid-sagittal extension of the primary + tumor and one for the case of no such extension. + """ + self.ext = Bilateral( + graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric + ) + self.noext = Bilateral( + graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric + ) + + self.use_mixing = use_mixing + if self.use_mixing: + self.alpha_mix = 0. + + self.noext.diag_time_dists = self.ext.diag_time_dists + + + @property + def graph(self) -> Dict[Tuple[str], List[str]]: + """Return the (unilateral) graph that was used to create this network. + """ + return self.noext.graph + + + @property + def base_probs(self) -> np.ndarray: + """Base probabilities of metastatic lymphatic spread from the tumor(s) + to the lymph node levels. This will return the following concatenation of + base spread probs depending on whether ``use_mixing`` is set to ``True`` or + ``False``: + + With the use of a mixing parameter: + +-----------+-------------------------+--------------+ + | base ipsi | base contra (no midext) | mixing param | + +-----------+-------------------------+--------------+ + + Without it: + +-----------+----------------------+-------------------------+ + | base ipsi | base contra (midext) | base contra (no midext) | + +-----------+----------------------+-------------------------+ + + When setting these, one needs to provide the respective shape. + """ + if self.use_mixing: + return np.concatenate([ + self.ext.ipsi.base_probs, + self.noext.contra.base_probs, + [self.alpha_mix], + ]) + else: + return np.concatenate([ + self.ext.ipsi.base_probs, + self.ext.contra.base_probs, + self.noext.contra.base_probs, + ]) + + @base_probs.setter + def base_probs(self, new_params: np.ndarray): + """Set the base probabilities from the tumor(s) to the LNLs, accounting + for the mixing parameter :math:`\\alpha``. + """ + k = len(self.ext.ipsi.base_probs) + + self.ext.ipsi.base_probs = new_params[:k] + self.noext.ipsi.base_probs = new_params[:k] + + if self.use_mixing: + self.noext.contra.base_probs = new_params[k:2*k] + self.alpha_mix = new_params[-1] + # compute linear combination + self.ext.contra.base_probs = ( + self.alpha_mix * self.ext.ipsi.base_probs + + (1. - self.alpha_mix) * self.noext.contra.base_probs + ) + else: + self.ext.contra.base_probs = new_params[k:2*k] + self.noext.contra.base_probs = new_params[2*k:3*k] + + # avoid unnecessary double computation of ipsilateral transition matrix + self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + + + @property + def trans_probs(self) -> np.ndarray: + """Probabilities of lymphatic spread among the lymph node levels. They + are assumed to be symmetric ipsi- & contralaterally by default. + """ + return self.noext.trans_probs + + @trans_probs.setter + def trans_probs(self, new_params: np.ndarray): + """Set the new spread probabilities for lymphatic spread from among the + LNLs. + """ + self.noext.trans_probs = new_params + self.ext.trans_probs = new_params + # avoid unnecessary double computation of ipsilateral transition matrix + self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + + + @property + def spread_probs(self) -> np.ndarray: + """These are the probabilities representing the spread of cancer along + lymphatic drainage pathways per timestep. + + It is composed of the base spread probs (possible with the mixing parameter) + and the probabilities of spread among the LNLs. + + +-------------+-------------+ + | base probs | trans probs | + +-------------+-------------+ + """ + return np.concatenate([self.base_probs, self.trans_probs]) + + @spread_probs.setter + def spread_probs(self, new_params: np.ndarray): + """Set the new spread probabilities and the mixing parameter + :math:`\\alpha`. + """ + num_base_probs = len(self.base_probs) + + self.base_probs = new_params[:num_base_probs] + self.trans_probs = new_params[num_base_probs:] + + + @property + def diag_time_dists(self) -> MarginalizorDict: + """This property holds the probability mass functions for marginalizing over + possible diagnose times for each T-stage. + + When setting this property, one may also provide a normal Python dict, in + which case it tries to convert it to a :class:`MarginalizorDict`. + + Note that the method will provide the same instance of this + :class:`MarginalizorDict` to both sides of the network. + + See Also: + :class:`MarginalzorDict`, :class:`Marginalizor`. + """ + return self.ext.diag_time_dists + + @diag_time_dists.setter + def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]): + """Assign new :class:`MarginalizorDict` to this property. If it is a normal + Python dictionary, tr to convert it into a :class:`MarginalizorDict`. + """ + self.ext.diag_time_dists = new_dists + self.noext.diag_time_dists = self.ext.diag_time_dists + + + @property + def modalities(self): + """A dictionary containing the specificity :math:`s_P` and sensitivity + :math:`s_N` values for each diagnostic modality. + + Such a dictionary can also be provided to set this property and compute + the observation matrices of all used systems. + + See Also: + :meth:`Bilateral.modalities`: Getting and setting this property in + the normal bilateral model. + + :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and + :math:`s_N` for a unilateral model. + """ + return self.noext.modalities + + @modalities.setter + def modalities(self, modality_spsn: Dict[str, List[float]]): + """Call the respective getter and setter methods of the bilateral + components with and without midline extension. + """ + self.noext.modalities = modality_spsn + self.ext.modalities = modality_spsn + + @property + def midext_prob(self): + """Assign the last of the new_params to the midline extension probability + """ + try: + return self._midext_prob + except AttributeError as attr_err: + raise AttributeError( + "No midline extension probability has been assigned" + ) from attr_err + + @midext_prob.setter + def midext_prob(self, new_params): + """A variable containing the midline extension probability + """ + self._midext_prob = new_params[-2] + + @property + def midext_trans(self): + """Assign the last of the new_params to the midline extension transition probability + """ + try: + return self._midext_trans + except AttributeError as attr_err: + raise AttributeError( + "No midline extension transition probability has been assigned" + ) from attr_err + + @midext_trans.setter + def midext_trans(self, new_params): + """A variable containing the midline extension transition probability + """ + self._midext_trans = new_params[-1] + + @property + def patient_data(self): + """A pandas :class:`DataFrame` with rows of patients and columns of + patient and involvement details. The table's header should have three + levels that categorize the individual lymph node level's involvement to + the corresponding diagnostic modality (first level), the side of the + LNL (second level) and finaly the name of the LNL (third level). + Additionally, the patient's T-category must be stored under ('info', + 'tumor', 't_stage') and whether the tumor extends over the mid-sagittal + line should be noted under ('info', 'tumor', 'midline_extension'). So, + part of this table could look like this: + + +-----------------------------+------------------+------------------+ + | info | MRI | PET | + +-----------------------------+--------+---------+--------+---------+ + | tumor | ipsi | contra | ipsi | contra | + +---------+-------------------+--------+---------+--------+---------+ + | t_stage | midline_extension | II | II | II | II | + +=========+===================+========+=========+========+=========+ + | early | ``True`` |``True``|``None`` |``True``|``False``| + +---------+-------------------+--------+---------+--------+---------+ + | late | ``True`` |``None``|``None`` |``None``|``None`` | + +---------+-------------------+--------+---------+--------+---------+ + | early | ``False`` |``True``|``False``|``True``|``True`` | + +---------+-------------------+--------+---------+--------+---------+ + """ + try: + return self._patient_data + except AttributeError: + raise AttributeError( + "No patient data has been loaded yet" + ) + + @patient_data.setter + def patient_data(self, patient_data: pd.DataFrame): + """Load the patient data. For now, this just calls the :meth:`load_data` + method, but at a later point, I would like to write a function here + that generates the pandas :class:`DataFrame` from the internal matrix + representation of the data. + """ + self._patient_data = patient_data.copy() + self.load_data(patient_data) + + + def load_data( + self, + data: pd.DataFrame, + modality_spsn: Optional[Dict[str, List[float]]] = None, + mode = "HMM" + ): + """Load data as table of patients with involvement details and convert + it into internal representation of a matrix. + + Args: + data: The table with rows of patients and columns of patient and + involvement details. The table's header must have three levels + that categorize the individual lymph node level's involvement + to the corresponding diagnostic modality (first level), the + side of the LNL (second level) and finaly the name of the LNL + (third level). Additionally, the patient's T-category must be + stored under ('info', 'tumor', 't_stage') and whether the tumor + extends over the mid-sagittal line should be noted under + ('info', 'tumor', 'midline_extension'). So, part of this table + could look like this: + + +-----------------------------+---------------------+ + | info | MRI | + +-----------------------------+----------+----------+ + | tumor | ipsi | contra | + +---------+-------------------+----------+----------+ + | t_stage | midline_extension | II | II | + +=========+===================+==========+==========+ + | early | ``True`` | ``True`` | ``None`` | + +---------+-------------------+----------+----------+ + | late | ``True`` | ``None`` | ``None`` | + +---------+-------------------+----------+----------+ + | early | ``False`` | ``True`` | ``True`` | + +---------+-------------------+----------+----------+ + + modality_spsn: If no diagnostic modalities have been defined yet, + this must be provided to build the observation matrix. + + See Also: + :attr:`patient_data`: The attribute for loading and exporting data. + + :meth:`Bilateral.load_data`: Loads data into a bilateral network by + splitting it into ipsi- & contralateral side and passing each to + the respective unilateral method (see below). + + :meth:`Unilateral.load_data`: Data loading method of the unilateral + network. + """ + ext_data = data + noext_data = data + + self.ext.load_data( + ext_data, + modality_spsn=modality_spsn, + mode=mode + ) + self.noext.load_data( + noext_data, + modality_spsn=modality_spsn, + mode=mode + ) + + + def check_and_assign(self, new_params: np.ndarray): + """Check that the spread probability (rates) and the parameters for the + marginalization over diagnose times are all within limits and assign them to + the model. Also, make sure the ipsi- and contralateral distributions over + diagnose times are the same instance of the :class:`MarginalizorDict`. + + Args: + new_params: The set of :attr:`spread_probs` and parameters to provide for + updating the parametrized distributions over diagnose times. + + Warning: + This method assumes that the parametrized distributions (instances of + :class:`Marginalizor`) all raise a ``ValueError`` when provided with + invalid parameters. + """ + k = len(self.spread_probs) + l = len(self.diag_time_dists) + new_spread_probs = new_params[:k] + new_marg_params = new_params[k:(k+l)] + + try: + self.ext.ipsi.diag_time_dists.update(new_marg_params) + self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists + self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists + self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists + self.midext_prob = new_params + self.midext_trans = new_params + except ValueError as val_err: + raise ValueError( + "Parameters for marginalization over diagnose times are invalid" + ) from val_err + + if new_spread_probs.shape != self.spread_probs.shape: + raise ValueError( + "Shape of provided spread parameters does not match network" + ) + if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.): + raise ValueError( + "Spread probs must be between 0 and 1" + ) + if 0. > np.any(0. > new_params) or np.any(new_params > 1.): + raise ValueError( + "Probabilitiex must be between 0 and 1" + ) + + self.spread_probs = new_spread_probs + + + def likelihood( + self, + data: Optional[pd.DataFrame] = None, + given_params: Optional[np.ndarray] = None, + log: bool = True, + ) -> float: + """Compute log-likelihood of (already stored) data, given the spread + probabilities and either a discrete diagnose time or a distribution to + use for marginalization over diagnose times. + + Args: + data: Table with rows of patients and columns of per-LNL involvment. See + :meth:`load_data` for more details on how this should look like. + + given_params: The likelihood is a function of these parameters. They mainly + consist of the :attr:`spread_probs` of the model. Any excess parameters + will be used to update the parametrized distributions used for + marginalizing over the diagnose times (see :attr:`diag_time_dists`). + + log: When ``True``, the log-likelihood is returned. + + Returns: + The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D` + is the data and :math:`\\theta` is the tuple of spread probabilities + and diagnose times or distributions over diagnose times. + + See Also: + :attr:`spread_probs`: Property for getting and setting the spread + probabilities, of which a lymphatic network has as many as it has + :class:`Edge` instances (in case no symmetries apply). + + :meth:`Unilateral.likelihood`: The log-likelihood function of + the unilateral system. + + :meth:`Bilateral.likelihood`: The (log-)likelihood function of the + bilateral system. + """ + if data is not None: + self.patient_data = data + + try: + self.check_and_assign(given_params) + except ValueError: + return -np.inf if log else 0. + + llh = 0. if log else 1. + + + state_probs = {} + state_probs["ipsi"] = self.ext.state_probs_ipsi() + state_probs["contra"] = self.ext.state_probs_contra() + state_probs2 = {} + state_probs2["ipsi"] = self.noext.state_probs_ipsi() + state_probs2["contra"] = self.noext.state_probs_contra() + t_stages = list(set(self.patient_data[("info","tumor","t_stage")])) + t_stages = self.ext.t_stages() + + + for stage in t_stages: + p = self.ext._likelihood2(stage=stage, state_probs=state_probs)*(self.midext_prob) + p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob) + llh += np.sum(np.log(p+p2)) + + return llh + + + + def risk( + self, + given_params: Optional[np.ndarray] = None, + midline_extension: bool = True, + **kwargs, + ) -> float: + """Compute the risk of nodal involvement given a specific diagnose. + + Args: + spread_probs: Set ot new spread parameters. This also contains the + mixing parameter alpha in the last position. + midline_extension: Whether or not the patient's tumor extends over + the mid-sagittal line. + + See Also: + :meth:`Bilateral.risk`: Depending on whether or not the patient's + tumor does extend over the midline, the risk function of the + respective :class:`Bilateral` instance gets called. + """ + if given_params is not None: + self.check_and_assign(given_params) + + if midline_extension: + return self.ext.risk(**kwargs) + else: + return self.noext.risk(**kwargs) + + + def generate_dataset( + self, + num_patients: int, + stage_dist: Dict[str, float], + ext_prob: float, + **_kwargs, + ) -> pd.DataFrame: + """Generate/sample a pandas :class:`DataFrame` from the defined network. + + Args: + num_patients: Number of patients to generate. + stage_dist: Probability to find a patient in a certain T-stage. + ext_prob: Probability that a patient's primary tumor extends over + the mid-sagittal line. + """ + drawn_ext = np.random.choice( + [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients + ) + ext_dataset = self.ext.generate_dataset( + num_patients=num_patients, + stage_dist=stage_dist, + ) + noext_dataset = self.noext.generate_dataset( + num_patients=num_patients, + stage_dist=stage_dist, + ) + + dataset = noext_dataset.copy() + dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext] + dataset[('info', 'tumor', 'midline_extension')] = drawn_ext + + return dataset \ No newline at end of file From 50bbcf0585e482fed347393946dfd58432679eca Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Tue, 25 Apr 2023 01:36:13 +0200 Subject: [PATCH 05/35] added time evolution for midline extension --- lymph/midline_timeev.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/lymph/midline_timeev.py b/lymph/midline_timeev.py index cccd8fd..2a1dc86 100644 --- a/lymph/midline_timeev.py +++ b/lymph/midline_timeev.py @@ -7,7 +7,7 @@ from .timemarg import MarginalizorDict -class MidlineBilateral: +class MidlineBilateraltime: """Model a bilateral lymphatic system where an additional risk factor can be provided in the data: Whether or not the primary tumor extended over the mid-sagittal line. @@ -230,10 +230,11 @@ def modalities(self, modality_spsn: Dict[str, List[float]]): self.noext.modalities = modality_spsn self.ext.modalities = modality_spsn + """ @property def midext_prob(self): - """Assign the last of the new_params to the midline extension probability - """ + Assign the last of the new_params to the midline extension probability + try: return self._midext_prob except AttributeError as attr_err: @@ -243,10 +244,11 @@ def midext_prob(self): @midext_prob.setter def midext_prob(self, new_params): - """A variable containing the midline extension probability - """ + A variable containing the midline extension probability + self._midext_prob = new_params[-2] - + """ + @property def midext_trans(self): """Assign the last of the new_params to the midline extension transition probability @@ -396,7 +398,7 @@ def check_and_assign(self, new_params: np.ndarray): self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists - self.midext_prob = new_params + #self.midext_prob = new_params self.midext_trans = new_params except ValueError as val_err: raise ValueError( @@ -413,7 +415,7 @@ def check_and_assign(self, new_params: np.ndarray): ) if 0. > np.any(0. > new_params) or np.any(new_params > 1.): raise ValueError( - "Probabilitiex must be between 0 and 1" + "Probabilities must be between 0 and 1" ) self.spread_probs = new_spread_probs @@ -478,8 +480,8 @@ def likelihood( for stage in t_stages: - p = self.ext._likelihood2(stage=stage, state_probs=state_probs)*(self.midext_prob) - p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob) + p = self.ext._likelihood2(stage=stage, state_probs=state_probs)*(1-(1-self.midext_trans)**10) + p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_trans)**10 llh += np.sum(np.log(p+p2)) return llh From 67984d4b951e2a0a2dce4fd1a691c352e4307b39 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Tue, 25 Apr 2023 15:43:36 +0200 Subject: [PATCH 06/35] new class MidlineBilateraltime for time evolution of Midline extension --- .DS_Store | Bin 0 -> 6148 bytes lymph/__init__.py | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 .DS_Store diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..f062562b8f00852fb43e4c196fffafb7a467d02a GIT binary patch literal 6148 zcmeH~!AitX3`K9W!hoAux_nbn8NHAn&$$ zNgq9nrU78PufrX%1+byJ;^fQHeBXUx7a6&vnm^;vKX*^ZW4Fj$4|r{ZTfE{O!?gYi z9lr5_PrS{;176T$KtBzyAD|r(5CIVo0TB>^83Fcex6O4`ts)=-BJfMVzYm4(+NO?G ztpl<**z+EKjzaP%KX8 z^Gl?|+NxSbKm_IleD>u_!zFhX!#cO)6E%ZnFk1;pW mxkM|bMl0rxx8j>gUh!-0x2a>*C}%v%srnIcE;14L3j$|jIvw}` literal 0 HcmV?d00001 diff --git a/lymph/__init__.py b/lymph/__init__.py index fca3dc9..07ab0d9 100644 --- a/lymph/__init__.py +++ b/lymph/__init__.py @@ -17,6 +17,7 @@ from .bilateral import Bilateral, BilateralSystem from .edge import Edge from .midline import MidlineBilateral +from .midline_timeev import MidlineBilateraltime from .node import Node from .timemarg import Marginalizor, MarginalizorDict from .unilateral import System, Unilateral @@ -26,6 +27,6 @@ "Edge", "Unilateral", "System", "Bilateral", "BilateralSystem", - "MidlineBilateral", + "MidlineBilateral", "MidlineBilateraltime", "Marginalizor", "MarginalizorDict", ] From 5aee89af9a9ddd62c9bac2e50c771af2204a6fda Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Mon, 29 May 2023 15:51:42 +0200 Subject: [PATCH 07/35] fixed bug when loading only data with no midline extension info --- .DS_Store | Bin 6148 -> 6148 bytes lymph/.DS_Store | Bin 0 -> 6148 bytes lymph/midline.py | 38 +++++++++++++++++++++++++++----------- lymph/midline_timeev.py | 4 ++-- 4 files changed, 29 insertions(+), 13 deletions(-) create mode 100644 lymph/.DS_Store diff --git a/.DS_Store b/.DS_Store index f062562b8f00852fb43e4c196fffafb7a467d02a..590cf61ff4e029544061ad8649cc4baa5fd1c15c 100644 GIT binary patch delta 293 zcmZoMXfc=|#>B)qF;Q%yo+2ab!~pA!4;mPOj2`9sr1Ii|q@4UD1_p-hNd-BX#U%y? z*BF_YSyuu5=%;pof3CJ*F;Q%yo+2aj!~knX=E*!v%A6_1$vH{+`8f=mm6@NgYz|<~X4=fo e!OsCyyxEcYJM(0I5kp3X$u>OFn`1;)FarSiofGx| diff --git a/lymph/.DS_Store b/lymph/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T00: + if len(ext_data) > 0: + self.ext.load_data( + ext_data, + modality_spsn=modality_spsn, + mode=mode + ) + + if len(noext_data) > 0: + self.noext.load_data( + noext_data, + modality_spsn=modality_spsn, + mode=mode + ) + if len(unknown_data) > 0 & len(ext_data) + len(noext_data) > 0: self.ext_unknown.load_data( unknown_data, modality_spsn=modality_spsn, @@ -392,6 +395,19 @@ def load_data( mode=mode ) + if len(unknown_data) > 0 & len(ext_data) + len(noext_data) == 0: + self.ext.load_data( + unknown_data, + modality_spsn=modality_spsn, + mode=mode + ) + + self.noext.load_data( + unknown_data, + modality_spsn=modality_spsn, + mode=mode + ) + def check_and_assign(self, new_params: np.ndarray): """Check that the spread probability (rates) and the parameters for the marginalization over diagnose times are all within limits and assign them to diff --git a/lymph/midline_timeev.py b/lymph/midline_timeev.py index 2a1dc86..a3a3f3c 100644 --- a/lymph/midline_timeev.py +++ b/lymph/midline_timeev.py @@ -480,8 +480,8 @@ def likelihood( for stage in t_stages: - p = self.ext._likelihood2(stage=stage, state_probs=state_probs)*(1-(1-self.midext_trans)**10) - p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_trans)**10 + p = self.ext._likelihood2(stage=stage, state_probs=state_probs) * (1-((1-self.midext_trans) ** 10)) + p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2) * ((1-self.midext_trans) ** 10) llh += np.sum(np.log(p+p2)) return llh From 5e8d07ddc072184253a7f57225b966f1ddf9970b Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 22 Jun 2023 14:45:38 +0200 Subject: [PATCH 08/35] deleted unused files --- lymph/midline_timeev2.py | 498 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 498 insertions(+) create mode 100644 lymph/midline_timeev2.py diff --git a/lymph/midline_timeev2.py b/lymph/midline_timeev2.py new file mode 100644 index 0000000..c82f676 --- /dev/null +++ b/lymph/midline_timeev2.py @@ -0,0 +1,498 @@ +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd + +from .bilateral import Bilateral +from .timemarg import MarginalizorDict + + +class MidlineBilateraltime: + """Model a bilateral lymphatic system where an additional risk factor can + be provided in the data: Whether or not the primary tumor extended over the + mid-sagittal line. + + It is reasonable to assume (and supported by data) that such an extension + significantly increases the risk for metastatic spread to the contralateral + side of the neck. This class attempts to capture this using a simple + assumption: We assume that the probability of spread to the contralateral + side for patients *with* midline extension is larger than for patients + *without* it, but smaller than the probability of spread to the ipsilateral + side. Formally: + + .. math:: + b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in} + + where :math:`b_c^{\\in}` is the probability of spread from the primary tumor + to the contralateral side for patients with midline extension, and + :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear + mixing parameter. + """ + + def __init__( + self, + graph: Dict[Tuple[str], List[str]], + use_mixing: bool = True, + trans_symmetric: bool = True, + **_kwargs + ): + """The class is constructed in a similar fashion to the + :class:`Bilateral`: That class contains one :class:`Unilateral` for + each side of the neck, while this class will contain two instances of + :class:`Bilateral`, one for the case of a midline extension and one for + the case of no midline extension. + + Args: + graph: Dictionary of the same kind as for initialization of + :class:`System`. This graph will be passed to the constructors of + two :class:`System` attributes of this class. + use_mixing: Describe the contralateral base spread probabilities for the + case of a midline extension as a linear combination between the base + spread probs of the ipsilateral side and the ones of the contralateral + side when no midline extension is present. + trans_symmetric: If ``True``, the spread probabilities among the + LNLs will be set symmetrically. + + See Also: + :class:`Bilateral`: Two of these are held as attributes by this + class. One for the case of a mid-sagittal extension of the primary + tumor and one for the case of no such extension. + """ + self.ext = Bilateral( + graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric + ) + self.noext = Bilateral( + graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric + ) + self.use_mixing = use_mixing + if self.use_mixing: + self.alpha_mix = 0. + + self.noext.diag_time_dists = self.ext.diag_time_dists + + + @property + def graph(self) -> Dict[Tuple[str], List[str]]: + """Return the (unilateral) graph that was used to create this network. + """ + return self.noext.graph + + + @property + def base_probs(self) -> np.ndarray: + """Base probabilities of metastatic lymphatic spread from the tumor(s) + to the lymph node levels. This will return the following concatenation of + base spread probs depending on whether ``use_mixing`` is set to ``True`` or + ``False``: + + With the use of a mixing parameter: + +-----------+-------------------------+--------------+ + | base ipsi | base contra (no midext) | mixing param | + +-----------+-------------------------+--------------+ + + Without it: + +-----------+----------------------+-------------------------+ + | base ipsi | base contra (midext) | base contra (no midext) | + +-----------+----------------------+-------------------------+ + + When setting these, one needs to provide the respective shape. + """ + if self.use_mixing: + return np.concatenate([ + self.ext.ipsi.base_probs, + self.noext.contra.base_probs, + [self.alpha_mix], + ]) + else: + return np.concatenate([ + self.ext.ipsi.base_probs, + self.ext.contra.base_probs, + self.noext.contra.base_probs, + ]) + + @base_probs.setter + def base_probs(self, new_params: np.ndarray): + """Set the base probabilities from the tumor(s) to the LNLs, accounting + for the mixing parameter :math:`\\alpha``. + """ + k = len(self.ext.ipsi.base_probs) + + self.ext.ipsi.base_probs = new_params[:k] + self.noext.ipsi.base_probs = new_params[:k] + + if self.use_mixing: + self.noext.contra.base_probs = new_params[k:2*k] + self.alpha_mix = new_params[-1] + # compute linear combination + self.ext.contra.base_probs = ( + self.alpha_mix * self.ext.ipsi.base_probs + + (1. - self.alpha_mix) * self.noext.contra.base_probs + ) + else: + self.ext.contra.base_probs = new_params[k:2*k] + self.noext.contra.base_probs = new_params[2*k:3*k] + + # avoid unnecessary double computation of ipsilateral transition matrix + self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + + + @property + def trans_probs(self) -> np.ndarray: + """Probabilities of lymphatic spread among the lymph node levels. They + are assumed to be symmetric ipsi- & contralaterally by default. + """ + return self.noext.trans_probs + + @trans_probs.setter + def trans_probs(self, new_params: np.ndarray): + """Set the new spread probabilities for lymphatic spread from among the + LNLs. + """ + self.noext.trans_probs = new_params + self.ext.trans_probs = new_params + + # avoid unnecessary double computation of ipsilateral transition matrix + self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + + + @property + def spread_probs(self) -> np.ndarray: + """These are the probabilities representing the spread of cancer along + lymphatic drainage pathways per timestep. + + It is composed of the base spread probs (possible with the mixing parameter) + and the probabilities of spread among the LNLs. + + +-------------+-------------+ + | base probs | trans probs | + +-------------+-------------+ + """ + return np.concatenate([self.base_probs, self.trans_probs]) + + @spread_probs.setter + def spread_probs(self, new_params: np.ndarray): + """Set the new spread probabilities and the mixing parameter + :math:`\\alpha`. + """ + num_base_probs = len(self.base_probs) + + self.base_probs = new_params[:num_base_probs] + self.trans_probs = new_params[num_base_probs:] + + + @property + def diag_time_dists(self) -> MarginalizorDict: + """This property holds the probability mass functions for marginalizing over + possible diagnose times for each T-stage. + + When setting this property, one may also provide a normal Python dict, in + which case it tries to convert it to a :class:`MarginalizorDict`. + + Note that the method will provide the same instance of this + :class:`MarginalizorDict` to both sides of the network. + + See Also: + :class:`MarginalzorDict`, :class:`Marginalizor`. + """ + return self.ext.diag_time_dists + + @diag_time_dists.setter + def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]): + """Assign new :class:`MarginalizorDict` to this property. If it is a normal + Python dictionary, tr to convert it into a :class:`MarginalizorDict`. + """ + self.ext.diag_time_dists = new_dists + self.noext.diag_time_dists = self.ext.diag_time_dists + + + @property + def modalities(self): + """A dictionary containing the specificity :math:`s_P` and sensitivity + :math:`s_N` values for each diagnostic modality. + + Such a dictionary can also be provided to set this property and compute + the observation matrices of all used systems. + + See Also: + :meth:`Bilateral.modalities`: Getting and setting this property in + the normal bilateral model. + + :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and + :math:`s_N` for a unilateral model. + """ + return self.noext.modalities + + @modalities.setter + def modalities(self, modality_spsn: Dict[str, List[float]]): + """Call the respective getter and setter methods of the bilateral + components with and without midline extension. + """ + self.noext.modalities = modality_spsn + self.ext.modalities = modality_spsn + + + @property + def patient_data(self): + """A pandas :class:`DataFrame` with rows of patients and columns of + patient and involvement details. The table's header should have three + levels that categorize the individual lymph node level's involvement to + the corresponding diagnostic modality (first level), the side of the + LNL (second level) and finaly the name of the LNL (third level). + Additionally, the patient's T-category must be stored under ('info', + 'tumor', 't_stage') and whether the tumor extends over the mid-sagittal + line should be noted under ('info', 'tumor', 'midline_extension'). So, + part of this table could look like this: + + +-----------------------------+------------------+------------------+ + | info | MRI | PET | + +-----------------------------+--------+---------+--------+---------+ + | tumor | ipsi | contra | ipsi | contra | + +---------+-------------------+--------+---------+--------+---------+ + | t_stage | midline_extension | II | II | II | II | + +=========+===================+========+=========+========+=========+ + | early | ``True`` |``True``|``None`` |``True``|``False``| + +---------+-------------------+--------+---------+--------+---------+ + | late | ``True`` |``None``|``None`` |``None``|``None`` | + +---------+-------------------+--------+---------+--------+---------+ + | early | ``False`` |``True``|``False``|``True``|``True`` | + +---------+-------------------+--------+---------+--------+---------+ + """ + try: + return self._patient_data + except AttributeError: + raise AttributeError( + "No patient data has been loaded yet" + ) + + @patient_data.setter + def patient_data(self, patient_data: pd.DataFrame): + """Load the patient data. For now, this just calls the :meth:`load_data` + method, but at a later point, I would like to write a function here + that generates the pandas :class:`DataFrame` from the internal matrix + representation of the data. + """ + self._patient_data = patient_data.copy() + self.load_data(patient_data) + + + def load_data( + self, + data: pd.DataFrame, + modality_spsn: Optional[Dict[str, List[float]]] = None, + mode = "HMM" + ): + """Load data as table of patients with involvement details and convert + it into internal representation of a matrix. + + Args: + data: The table with rows of patients and columns of patient and + involvement details. The table's header must have three levels + that categorize the individual lymph node level's involvement + to the corresponding diagnostic modality (first level), the + side of the LNL (second level) and finaly the name of the LNL + (third level). Additionally, the patient's T-category must be + stored under ('info', 'tumor', 't_stage') and whether the tumor + extends over the mid-sagittal line should be noted under + ('info', 'tumor', 'midline_extension'). So, part of this table + could look like this: + + +-----------------------------+---------------------+ + | info | MRI | + +-----------------------------+----------+----------+ + | tumor | ipsi | contra | + +---------+-------------------+----------+----------+ + | t_stage | midline_extension | II | II | + +=========+===================+==========+==========+ + | early | ``True`` | ``True`` | ``None`` | + +---------+-------------------+----------+----------+ + | late | ``True`` | ``None`` | ``None`` | + +---------+-------------------+----------+----------+ + | early | ``False`` | ``True`` | ``True`` | + +---------+-------------------+----------+----------+ + + modality_spsn: If no diagnostic modalities have been defined yet, + this must be provided to build the observation matrix. + + See Also: + :attr:`patient_data`: The attribute for loading and exporting data. + + :meth:`Bilateral.load_data`: Loads data into a bilateral network by + splitting it into ipsi- & contralateral side and passing each to + the respective unilateral method (see below). + + :meth:`Unilateral.load_data`: Data loading method of the unilateral + network. + """ + ext_data = data.loc[data[("info", "tumor", "midline_extension")]] + noext_data = data.loc[~data[("info", "tumor", "midline_extension")]] + + self.ext.load_data( + ext_data, + modality_spsn=modality_spsn, + mode=mode + ) + self.noext.load_data( + noext_data, + modality_spsn=modality_spsn, + mode=mode + ) + + + def check_and_assign(self, new_params: np.ndarray): + """Check that the spread probability (rates) and the parameters for the + marginalization over diagnose times are all within limits and assign them to + the model. Also, make sure the ipsi- and contralateral distributions over + diagnose times are the same instance of the :class:`MarginalizorDict`. + + Args: + new_params: The set of :attr:`spread_probs` and parameters to provide for + updating the parametrized distributions over diagnose times. + + Warning: + This method assumes that the parametrized distributions (instances of + :class:`Marginalizor`) all raise a ``ValueError`` when provided with + invalid parameters. + """ + k = len(self.spread_probs) + new_spread_probs = new_params[:k] + new_marg_params = new_params[k:] + + try: + self.ext.ipsi.diag_time_dists.update(new_marg_params) + self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists + self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists + self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists + except ValueError as val_err: + raise ValueError( + "Parameters for marginalization over diagnose times are invalid" + ) from val_err + + if new_spread_probs.shape != self.spread_probs.shape: + raise ValueError( + "Shape of provided spread parameters does not match network" + ) + if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.): + raise ValueError( + "Spread probs must be between 0 and 1" + ) + + self.spread_probs = new_spread_probs + + + def likelihood( + self, + data: Optional[pd.DataFrame] = None, + given_params: Optional[np.ndarray] = None, + log: bool = True, + ) -> float: + """Compute log-likelihood of (already stored) data, given the spread + probabilities and either a discrete diagnose time or a distribution to + use for marginalization over diagnose times. + + Args: + data: Table with rows of patients and columns of per-LNL involvment. See + :meth:`load_data` for more details on how this should look like. + + given_params: The likelihood is a function of these parameters. They mainly + consist of the :attr:`spread_probs` of the model. Any excess parameters + will be used to update the parametrized distributions used for + marginalizing over the diagnose times (see :attr:`diag_time_dists`). + + log: When ``True``, the log-likelihood is returned. + + Returns: + The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D` + is the data and :math:`\\theta` is the tuple of spread probabilities + and diagnose times or distributions over diagnose times. + + See Also: + :attr:`spread_probs`: Property for getting and setting the spread + probabilities, of which a lymphatic network has as many as it has + :class:`Edge` instances (in case no symmetries apply). + + :meth:`Unilateral.likelihood`: The log-likelihood function of + the unilateral system. + + :meth:`Bilateral.likelihood`: The (log-)likelihood function of the + bilateral system. + """ + if data is not None: + self.patient_data = data + + try: + self.check_and_assign(given_params) + except ValueError: + return -np.inf if log else 0. + + llh = 0. if log else 1. + + if log: + llh += self.ext._likelihood(log=log) + llh += self.noext._likelihood(log=log) + else: + llh *= self.ext._likelihood(log=log) + llh *= self.noext._likelihood(log=log) + + return llh + + + def risk( + self, + given_params: Optional[np.ndarray] = None, + midline_extension: bool = True, + **kwargs, + ) -> float: + """Compute the risk of nodal involvement given a specific diagnose. + + Args: + spread_probs: Set ot new spread parameters. This also contains the + mixing parameter alpha in the last position. + midline_extension: Whether or not the patient's tumor extends over + the mid-sagittal line. + + See Also: + :meth:`Bilateral.risk`: Depending on whether or not the patient's + tumor does extend over the midline, the risk function of the + respective :class:`Bilateral` instance gets called. + """ + if given_params is not None: + self.check_and_assign(given_params) + + if midline_extension: + return self.ext.risk(**kwargs) + else: + return self.noext.risk(**kwargs) + + + def generate_dataset( + self, + num_patients: int, + stage_dist: Dict[str, float], + ext_prob: float, + **_kwargs, + ) -> pd.DataFrame: + """Generate/sample a pandas :class:`DataFrame` from the defined network. + + Args: + num_patients: Number of patients to generate. + stage_dist: Probability to find a patient in a certain T-stage. + ext_prob: Probability that a patient's primary tumor extends over + the mid-sagittal line. + """ + drawn_ext = np.random.choice( + [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients + ) + ext_dataset = self.ext.generate_dataset( + num_patients=num_patients, + stage_dist=stage_dist, + ) + noext_dataset = self.noext.generate_dataset( + num_patients=num_patients, + stage_dist=stage_dist, + ) + + dataset = noext_dataset.copy() + dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext] + dataset[('info', 'tumor', 'midline_extension')] = drawn_ext + + return dataset \ No newline at end of file From 5e47de15309b54e27f713edd1bcc284304642459 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 22 Jun 2023 14:45:50 +0200 Subject: [PATCH 09/35] deleted unused files --- .DS_Store | Bin 6148 -> 6148 bytes lymph/midline_timeev.py | 550 ---------------------------------------- notebook/.DS_Store | Bin 0 -> 6148 bytes tests/.DS_Store | Bin 0 -> 6148 bytes 4 files changed, 550 deletions(-) delete mode 100644 lymph/midline_timeev.py create mode 100644 notebook/.DS_Store create mode 100644 tests/.DS_Store diff --git a/.DS_Store b/.DS_Store index 590cf61ff4e029544061ad8649cc4baa5fd1c15c..398457f49e045549ecb6e922410235e839c8a9e6 100644 GIT binary patch delta 95 zcmZoMXffDe#l*N{vNcnoyF_)hxrw2Uf{CSJt&T#qg_((tf~lc#Z7nBK8 diff --git a/lymph/midline_timeev.py b/lymph/midline_timeev.py deleted file mode 100644 index a3a3f3c..0000000 --- a/lymph/midline_timeev.py +++ /dev/null @@ -1,550 +0,0 @@ -from typing import Dict, List, Optional, Tuple, Union - -import numpy as np -import pandas as pd - -from .bilateral import Bilateral -from .timemarg import MarginalizorDict - - -class MidlineBilateraltime: - """Model a bilateral lymphatic system where an additional risk factor can - be provided in the data: Whether or not the primary tumor extended over the - mid-sagittal line. - - It is reasonable to assume (and supported by data) that such an extension - significantly increases the risk for metastatic spread to the contralateral - side of the neck. This class attempts to capture this using a simple - assumption: We assume that the probability of spread to the contralateral - side for patients *with* midline extension is larger than for patients - *without* it, but smaller than the probability of spread to the ipsilateral - side. Formally: - - .. math:: - b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in} - - where :math:`b_c^{\\in}` is the probability of spread from the primary tumor - to the contralateral side for patients with midline extension, and - :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear - mixing parameter. - """ - - def __init__( - self, - graph: Dict[Tuple[str], List[str]], - use_mixing: bool = True, - trans_symmetric: bool = True, - **_kwargs - ): - """The class is constructed in a similar fashion to the - :class:`Bilateral`: That class contains one :class:`Unilateral` for - each side of the neck, while this class will contain two instances of - :class:`Bilateral`, one for the case of a midline extension and one for - the case of no midline extension. - - Args: - graph: Dictionary of the same kind as for initialization of - :class:`System`. This graph will be passed to the constructors of - two :class:`System` attributes of this class. - use_mixing: Describe the contralateral base spread probabilities for the - case of a midline extension as a linear combination between the base - spread probs of the ipsilateral side and the ones of the contralateral - side when no midline extension is present. - trans_symmetric: If ``True``, the spread probabilities among the - LNLs will be set symmetrically. - - See Also: - :class:`Bilateral`: Two of these are held as attributes by this - class. One for the case of a mid-sagittal extension of the primary - tumor and one for the case of no such extension. - """ - self.ext = Bilateral( - graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric - ) - self.noext = Bilateral( - graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric - ) - - self.use_mixing = use_mixing - if self.use_mixing: - self.alpha_mix = 0. - - self.noext.diag_time_dists = self.ext.diag_time_dists - - - @property - def graph(self) -> Dict[Tuple[str], List[str]]: - """Return the (unilateral) graph that was used to create this network. - """ - return self.noext.graph - - - @property - def base_probs(self) -> np.ndarray: - """Base probabilities of metastatic lymphatic spread from the tumor(s) - to the lymph node levels. This will return the following concatenation of - base spread probs depending on whether ``use_mixing`` is set to ``True`` or - ``False``: - - With the use of a mixing parameter: - +-----------+-------------------------+--------------+ - | base ipsi | base contra (no midext) | mixing param | - +-----------+-------------------------+--------------+ - - Without it: - +-----------+----------------------+-------------------------+ - | base ipsi | base contra (midext) | base contra (no midext) | - +-----------+----------------------+-------------------------+ - - When setting these, one needs to provide the respective shape. - """ - if self.use_mixing: - return np.concatenate([ - self.ext.ipsi.base_probs, - self.noext.contra.base_probs, - [self.alpha_mix], - ]) - else: - return np.concatenate([ - self.ext.ipsi.base_probs, - self.ext.contra.base_probs, - self.noext.contra.base_probs, - ]) - - @base_probs.setter - def base_probs(self, new_params: np.ndarray): - """Set the base probabilities from the tumor(s) to the LNLs, accounting - for the mixing parameter :math:`\\alpha``. - """ - k = len(self.ext.ipsi.base_probs) - - self.ext.ipsi.base_probs = new_params[:k] - self.noext.ipsi.base_probs = new_params[:k] - - if self.use_mixing: - self.noext.contra.base_probs = new_params[k:2*k] - self.alpha_mix = new_params[-1] - # compute linear combination - self.ext.contra.base_probs = ( - self.alpha_mix * self.ext.ipsi.base_probs - + (1. - self.alpha_mix) * self.noext.contra.base_probs - ) - else: - self.ext.contra.base_probs = new_params[k:2*k] - self.noext.contra.base_probs = new_params[2*k:3*k] - - # avoid unnecessary double computation of ipsilateral transition matrix - self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - - - @property - def trans_probs(self) -> np.ndarray: - """Probabilities of lymphatic spread among the lymph node levels. They - are assumed to be symmetric ipsi- & contralaterally by default. - """ - return self.noext.trans_probs - - @trans_probs.setter - def trans_probs(self, new_params: np.ndarray): - """Set the new spread probabilities for lymphatic spread from among the - LNLs. - """ - self.noext.trans_probs = new_params - self.ext.trans_probs = new_params - # avoid unnecessary double computation of ipsilateral transition matrix - self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - - - @property - def spread_probs(self) -> np.ndarray: - """These are the probabilities representing the spread of cancer along - lymphatic drainage pathways per timestep. - - It is composed of the base spread probs (possible with the mixing parameter) - and the probabilities of spread among the LNLs. - - +-------------+-------------+ - | base probs | trans probs | - +-------------+-------------+ - """ - return np.concatenate([self.base_probs, self.trans_probs]) - - @spread_probs.setter - def spread_probs(self, new_params: np.ndarray): - """Set the new spread probabilities and the mixing parameter - :math:`\\alpha`. - """ - num_base_probs = len(self.base_probs) - - self.base_probs = new_params[:num_base_probs] - self.trans_probs = new_params[num_base_probs:] - - - @property - def diag_time_dists(self) -> MarginalizorDict: - """This property holds the probability mass functions for marginalizing over - possible diagnose times for each T-stage. - - When setting this property, one may also provide a normal Python dict, in - which case it tries to convert it to a :class:`MarginalizorDict`. - - Note that the method will provide the same instance of this - :class:`MarginalizorDict` to both sides of the network. - - See Also: - :class:`MarginalzorDict`, :class:`Marginalizor`. - """ - return self.ext.diag_time_dists - - @diag_time_dists.setter - def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]): - """Assign new :class:`MarginalizorDict` to this property. If it is a normal - Python dictionary, tr to convert it into a :class:`MarginalizorDict`. - """ - self.ext.diag_time_dists = new_dists - self.noext.diag_time_dists = self.ext.diag_time_dists - - - @property - def modalities(self): - """A dictionary containing the specificity :math:`s_P` and sensitivity - :math:`s_N` values for each diagnostic modality. - - Such a dictionary can also be provided to set this property and compute - the observation matrices of all used systems. - - See Also: - :meth:`Bilateral.modalities`: Getting and setting this property in - the normal bilateral model. - - :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and - :math:`s_N` for a unilateral model. - """ - return self.noext.modalities - - @modalities.setter - def modalities(self, modality_spsn: Dict[str, List[float]]): - """Call the respective getter and setter methods of the bilateral - components with and without midline extension. - """ - self.noext.modalities = modality_spsn - self.ext.modalities = modality_spsn - - """ - @property - def midext_prob(self): - Assign the last of the new_params to the midline extension probability - - try: - return self._midext_prob - except AttributeError as attr_err: - raise AttributeError( - "No midline extension probability has been assigned" - ) from attr_err - - @midext_prob.setter - def midext_prob(self, new_params): - A variable containing the midline extension probability - - self._midext_prob = new_params[-2] - """ - - @property - def midext_trans(self): - """Assign the last of the new_params to the midline extension transition probability - """ - try: - return self._midext_trans - except AttributeError as attr_err: - raise AttributeError( - "No midline extension transition probability has been assigned" - ) from attr_err - - @midext_trans.setter - def midext_trans(self, new_params): - """A variable containing the midline extension transition probability - """ - self._midext_trans = new_params[-1] - - @property - def patient_data(self): - """A pandas :class:`DataFrame` with rows of patients and columns of - patient and involvement details. The table's header should have three - levels that categorize the individual lymph node level's involvement to - the corresponding diagnostic modality (first level), the side of the - LNL (second level) and finaly the name of the LNL (third level). - Additionally, the patient's T-category must be stored under ('info', - 'tumor', 't_stage') and whether the tumor extends over the mid-sagittal - line should be noted under ('info', 'tumor', 'midline_extension'). So, - part of this table could look like this: - - +-----------------------------+------------------+------------------+ - | info | MRI | PET | - +-----------------------------+--------+---------+--------+---------+ - | tumor | ipsi | contra | ipsi | contra | - +---------+-------------------+--------+---------+--------+---------+ - | t_stage | midline_extension | II | II | II | II | - +=========+===================+========+=========+========+=========+ - | early | ``True`` |``True``|``None`` |``True``|``False``| - +---------+-------------------+--------+---------+--------+---------+ - | late | ``True`` |``None``|``None`` |``None``|``None`` | - +---------+-------------------+--------+---------+--------+---------+ - | early | ``False`` |``True``|``False``|``True``|``True`` | - +---------+-------------------+--------+---------+--------+---------+ - """ - try: - return self._patient_data - except AttributeError: - raise AttributeError( - "No patient data has been loaded yet" - ) - - @patient_data.setter - def patient_data(self, patient_data: pd.DataFrame): - """Load the patient data. For now, this just calls the :meth:`load_data` - method, but at a later point, I would like to write a function here - that generates the pandas :class:`DataFrame` from the internal matrix - representation of the data. - """ - self._patient_data = patient_data.copy() - self.load_data(patient_data) - - - def load_data( - self, - data: pd.DataFrame, - modality_spsn: Optional[Dict[str, List[float]]] = None, - mode = "HMM" - ): - """Load data as table of patients with involvement details and convert - it into internal representation of a matrix. - - Args: - data: The table with rows of patients and columns of patient and - involvement details. The table's header must have three levels - that categorize the individual lymph node level's involvement - to the corresponding diagnostic modality (first level), the - side of the LNL (second level) and finaly the name of the LNL - (third level). Additionally, the patient's T-category must be - stored under ('info', 'tumor', 't_stage') and whether the tumor - extends over the mid-sagittal line should be noted under - ('info', 'tumor', 'midline_extension'). So, part of this table - could look like this: - - +-----------------------------+---------------------+ - | info | MRI | - +-----------------------------+----------+----------+ - | tumor | ipsi | contra | - +---------+-------------------+----------+----------+ - | t_stage | midline_extension | II | II | - +=========+===================+==========+==========+ - | early | ``True`` | ``True`` | ``None`` | - +---------+-------------------+----------+----------+ - | late | ``True`` | ``None`` | ``None`` | - +---------+-------------------+----------+----------+ - | early | ``False`` | ``True`` | ``True`` | - +---------+-------------------+----------+----------+ - - modality_spsn: If no diagnostic modalities have been defined yet, - this must be provided to build the observation matrix. - - See Also: - :attr:`patient_data`: The attribute for loading and exporting data. - - :meth:`Bilateral.load_data`: Loads data into a bilateral network by - splitting it into ipsi- & contralateral side and passing each to - the respective unilateral method (see below). - - :meth:`Unilateral.load_data`: Data loading method of the unilateral - network. - """ - ext_data = data - noext_data = data - - self.ext.load_data( - ext_data, - modality_spsn=modality_spsn, - mode=mode - ) - self.noext.load_data( - noext_data, - modality_spsn=modality_spsn, - mode=mode - ) - - - def check_and_assign(self, new_params: np.ndarray): - """Check that the spread probability (rates) and the parameters for the - marginalization over diagnose times are all within limits and assign them to - the model. Also, make sure the ipsi- and contralateral distributions over - diagnose times are the same instance of the :class:`MarginalizorDict`. - - Args: - new_params: The set of :attr:`spread_probs` and parameters to provide for - updating the parametrized distributions over diagnose times. - - Warning: - This method assumes that the parametrized distributions (instances of - :class:`Marginalizor`) all raise a ``ValueError`` when provided with - invalid parameters. - """ - k = len(self.spread_probs) - l = len(self.diag_time_dists) - new_spread_probs = new_params[:k] - new_marg_params = new_params[k:(k+l)] - - try: - self.ext.ipsi.diag_time_dists.update(new_marg_params) - self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists - self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists - self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists - #self.midext_prob = new_params - self.midext_trans = new_params - except ValueError as val_err: - raise ValueError( - "Parameters for marginalization over diagnose times are invalid" - ) from val_err - - if new_spread_probs.shape != self.spread_probs.shape: - raise ValueError( - "Shape of provided spread parameters does not match network" - ) - if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.): - raise ValueError( - "Spread probs must be between 0 and 1" - ) - if 0. > np.any(0. > new_params) or np.any(new_params > 1.): - raise ValueError( - "Probabilities must be between 0 and 1" - ) - - self.spread_probs = new_spread_probs - - - def likelihood( - self, - data: Optional[pd.DataFrame] = None, - given_params: Optional[np.ndarray] = None, - log: bool = True, - ) -> float: - """Compute log-likelihood of (already stored) data, given the spread - probabilities and either a discrete diagnose time or a distribution to - use for marginalization over diagnose times. - - Args: - data: Table with rows of patients and columns of per-LNL involvment. See - :meth:`load_data` for more details on how this should look like. - - given_params: The likelihood is a function of these parameters. They mainly - consist of the :attr:`spread_probs` of the model. Any excess parameters - will be used to update the parametrized distributions used for - marginalizing over the diagnose times (see :attr:`diag_time_dists`). - - log: When ``True``, the log-likelihood is returned. - - Returns: - The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D` - is the data and :math:`\\theta` is the tuple of spread probabilities - and diagnose times or distributions over diagnose times. - - See Also: - :attr:`spread_probs`: Property for getting and setting the spread - probabilities, of which a lymphatic network has as many as it has - :class:`Edge` instances (in case no symmetries apply). - - :meth:`Unilateral.likelihood`: The log-likelihood function of - the unilateral system. - - :meth:`Bilateral.likelihood`: The (log-)likelihood function of the - bilateral system. - """ - if data is not None: - self.patient_data = data - - try: - self.check_and_assign(given_params) - except ValueError: - return -np.inf if log else 0. - - llh = 0. if log else 1. - - - state_probs = {} - state_probs["ipsi"] = self.ext.state_probs_ipsi() - state_probs["contra"] = self.ext.state_probs_contra() - state_probs2 = {} - state_probs2["ipsi"] = self.noext.state_probs_ipsi() - state_probs2["contra"] = self.noext.state_probs_contra() - t_stages = list(set(self.patient_data[("info","tumor","t_stage")])) - t_stages = self.ext.t_stages() - - - for stage in t_stages: - p = self.ext._likelihood2(stage=stage, state_probs=state_probs) * (1-((1-self.midext_trans) ** 10)) - p2 = self.noext._likelihood2(stage=stage, state_probs=state_probs2) * ((1-self.midext_trans) ** 10) - llh += np.sum(np.log(p+p2)) - - return llh - - - - def risk( - self, - given_params: Optional[np.ndarray] = None, - midline_extension: bool = True, - **kwargs, - ) -> float: - """Compute the risk of nodal involvement given a specific diagnose. - - Args: - spread_probs: Set ot new spread parameters. This also contains the - mixing parameter alpha in the last position. - midline_extension: Whether or not the patient's tumor extends over - the mid-sagittal line. - - See Also: - :meth:`Bilateral.risk`: Depending on whether or not the patient's - tumor does extend over the midline, the risk function of the - respective :class:`Bilateral` instance gets called. - """ - if given_params is not None: - self.check_and_assign(given_params) - - if midline_extension: - return self.ext.risk(**kwargs) - else: - return self.noext.risk(**kwargs) - - - def generate_dataset( - self, - num_patients: int, - stage_dist: Dict[str, float], - ext_prob: float, - **_kwargs, - ) -> pd.DataFrame: - """Generate/sample a pandas :class:`DataFrame` from the defined network. - - Args: - num_patients: Number of patients to generate. - stage_dist: Probability to find a patient in a certain T-stage. - ext_prob: Probability that a patient's primary tumor extends over - the mid-sagittal line. - """ - drawn_ext = np.random.choice( - [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients - ) - ext_dataset = self.ext.generate_dataset( - num_patients=num_patients, - stage_dist=stage_dist, - ) - noext_dataset = self.noext.generate_dataset( - num_patients=num_patients, - stage_dist=stage_dist, - ) - - dataset = noext_dataset.copy() - dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext] - dataset[('info', 'tumor', 'midline_extension')] = drawn_ext - - return dataset \ No newline at end of file diff --git a/notebook/.DS_Store b/notebook/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..356e6aa405b98fa510816e4e5bb2f5d1b6f924df GIT binary patch literal 6148 zcmeHK!A`L5FdF1!ykPbh@Cz!bKOjFK2%-s1Q6$D&PwGLhp8Xh4p1k=h9(}X3 zLbtWvG{(#%yKg%?vweLf9fpWZZ!zi;wTY;S!B|_z^o8+0kCN5AXA@}njOnD?KTeZb zDN-GORRMl?bsEutE@=?HzpbbIUXsS~aGDO`Yd*fc+-$!;?}z)NZ}(@X`QJu_RBF%( zoza9AbU|a92Y0{kb;=QW4p--|pBxJ3B$|;MuStUnQ7b;xQF%VCUB`zxsi^|0fGY443ZQ1QH8%uxRs~c6RbZ(Ae;+IcW8^Ut zbe|4Pt_1+r;dX{K_Y$0AJw_fgLF~YcqyiO#cWt8FW$wepP`FKmcw^ literal 0 HcmV?d00001 diff --git a/tests/.DS_Store b/tests/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..3ed183189058d69fe4a99f96ff42e47c750c2009 GIT binary patch literal 6148 zcmeHKu};H447E#!pbm7Z81EOlFo!C9L4N>BQK^(P6>9fv{0E=Gz{D>w@F$Es+eb*0 z7KsI+%9ecZ;=!UkmqN?ZZ zhX2TbtlbdnwZ^L5QG5N?#d0#ct;&t1eKtPqSyp*IFRD2_(x;b;$MMJOcIW9^)ZwkK#p&OpO}><6XeoxTl;U%!3hY5Tz${{I2n)my0!o87&cL5C@Cj*J BQJw$* literal 0 HcmV?d00001 From ade30e2f8e47c1206c8a701e176b5f041d2a9994 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 22 Jun 2023 15:09:22 +0200 Subject: [PATCH 10/35] added DS_Store to gitignore --- .DS_Store | Bin 6148 -> 0 bytes .gitignore | 4 +++- notebook/.DS_Store | Bin 6148 -> 0 bytes tests/.DS_Store | Bin 6148 -> 0 bytes 4 files changed, 3 insertions(+), 1 deletion(-) delete mode 100644 .DS_Store delete mode 100644 notebook/.DS_Store delete mode 100644 tests/.DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 398457f49e045549ecb6e922410235e839c8a9e6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKUrPc(5T7;86A64M@NuD6DJAJyb01-UGJq%&sPm-Y@DIY^${9zO(X}$g`ax3}5YSAe=W#`%( zsEL<$({a-3jxK3-hM$2yA9wxpus7*c){j-3cEh+g(g|VE!;q_sFb>qDrN(iP=v>b% zI3=glsZ^)a{br*kn}>~AO->K?nl-uG*qO~r&c^2U(P`%{x{uY9;aA{isAbLK9G-Eo zP|+8KK6s-zQgI)HX`=$9(1QeRvY0U6laU!<2AF|A%YZ#Bo${ZpfPXkMzzlpB19Uz} zR6^HcW>6m;XmksJm`Ar398)hrInttQF*ArOD8i&7np9z13}MpIFKwJ_F*9h=LD=R) z*e46yp$PqS++XT&5UxSC%m6d+k%1L6tL5FdF1!ykPbh@Cz!bKOjFK2%-s1Q6$D&PwGLhp8Xh4p1k=h9(}X3 zLbtWvG{(#%yKg%?vweLf9fpWZZ!zi;wTY;S!B|_z^o8+0kCN5AXA@}njOnD?KTeZb zDN-GORRMl?bsEutE@=?HzpbbIUXsS~aGDO`Yd*fc+-$!;?}z)NZ}(@X`QJu_RBF%( zoza9AbU|a92Y0{kb;=QW4p--|pBxJ3B$|;MuStUnQ7b;xQF%VCUB`zxsi^|0fGY443ZQ1QH8%uxRs~c6RbZ(Ae;+IcW8^Ut zbe|4Pt_1+r;dX{K_Y$0AJw_fgLF~YcqyiO#cWt8FW$wepP`FKmcw^ diff --git a/tests/.DS_Store b/tests/.DS_Store deleted file mode 100644 index 3ed183189058d69fe4a99f96ff42e47c750c2009..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKu};H447E#!pbm7Z81EOlFo!C9L4N>BQK^(P6>9fv{0E=Gz{D>w@F$Es+eb*0 z7KsI+%9ecZ;=!UkmqN?ZZ zhX2TbtlbdnwZ^L5QG5N?#d0#ct;&t1eKtPqSyp*IFRD2_(x;b;$MMJOcIW9^)ZwkK#p&OpO}><6XeoxTl;U%!3hY5Tz${{I2n)my0!o87&cL5C@Cj*J BQJw$* From b336c2ac637d8823b2cf8ca85d9a81e918e023c0 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 22 Jun 2023 16:05:07 +0200 Subject: [PATCH 11/35] removed unused class --- lymph/__init__.py | 1 - lymph/midline_timeev2.py | 498 --------------------------------------- 2 files changed, 499 deletions(-) delete mode 100644 lymph/midline_timeev2.py diff --git a/lymph/__init__.py b/lymph/__init__.py index 07ab0d9..93370f7 100644 --- a/lymph/__init__.py +++ b/lymph/__init__.py @@ -17,7 +17,6 @@ from .bilateral import Bilateral, BilateralSystem from .edge import Edge from .midline import MidlineBilateral -from .midline_timeev import MidlineBilateraltime from .node import Node from .timemarg import Marginalizor, MarginalizorDict from .unilateral import System, Unilateral diff --git a/lymph/midline_timeev2.py b/lymph/midline_timeev2.py deleted file mode 100644 index c82f676..0000000 --- a/lymph/midline_timeev2.py +++ /dev/null @@ -1,498 +0,0 @@ -from typing import Dict, List, Optional, Tuple, Union - -import numpy as np -import pandas as pd - -from .bilateral import Bilateral -from .timemarg import MarginalizorDict - - -class MidlineBilateraltime: - """Model a bilateral lymphatic system where an additional risk factor can - be provided in the data: Whether or not the primary tumor extended over the - mid-sagittal line. - - It is reasonable to assume (and supported by data) that such an extension - significantly increases the risk for metastatic spread to the contralateral - side of the neck. This class attempts to capture this using a simple - assumption: We assume that the probability of spread to the contralateral - side for patients *with* midline extension is larger than for patients - *without* it, but smaller than the probability of spread to the ipsilateral - side. Formally: - - .. math:: - b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in} - - where :math:`b_c^{\\in}` is the probability of spread from the primary tumor - to the contralateral side for patients with midline extension, and - :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear - mixing parameter. - """ - - def __init__( - self, - graph: Dict[Tuple[str], List[str]], - use_mixing: bool = True, - trans_symmetric: bool = True, - **_kwargs - ): - """The class is constructed in a similar fashion to the - :class:`Bilateral`: That class contains one :class:`Unilateral` for - each side of the neck, while this class will contain two instances of - :class:`Bilateral`, one for the case of a midline extension and one for - the case of no midline extension. - - Args: - graph: Dictionary of the same kind as for initialization of - :class:`System`. This graph will be passed to the constructors of - two :class:`System` attributes of this class. - use_mixing: Describe the contralateral base spread probabilities for the - case of a midline extension as a linear combination between the base - spread probs of the ipsilateral side and the ones of the contralateral - side when no midline extension is present. - trans_symmetric: If ``True``, the spread probabilities among the - LNLs will be set symmetrically. - - See Also: - :class:`Bilateral`: Two of these are held as attributes by this - class. One for the case of a mid-sagittal extension of the primary - tumor and one for the case of no such extension. - """ - self.ext = Bilateral( - graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric - ) - self.noext = Bilateral( - graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric - ) - self.use_mixing = use_mixing - if self.use_mixing: - self.alpha_mix = 0. - - self.noext.diag_time_dists = self.ext.diag_time_dists - - - @property - def graph(self) -> Dict[Tuple[str], List[str]]: - """Return the (unilateral) graph that was used to create this network. - """ - return self.noext.graph - - - @property - def base_probs(self) -> np.ndarray: - """Base probabilities of metastatic lymphatic spread from the tumor(s) - to the lymph node levels. This will return the following concatenation of - base spread probs depending on whether ``use_mixing`` is set to ``True`` or - ``False``: - - With the use of a mixing parameter: - +-----------+-------------------------+--------------+ - | base ipsi | base contra (no midext) | mixing param | - +-----------+-------------------------+--------------+ - - Without it: - +-----------+----------------------+-------------------------+ - | base ipsi | base contra (midext) | base contra (no midext) | - +-----------+----------------------+-------------------------+ - - When setting these, one needs to provide the respective shape. - """ - if self.use_mixing: - return np.concatenate([ - self.ext.ipsi.base_probs, - self.noext.contra.base_probs, - [self.alpha_mix], - ]) - else: - return np.concatenate([ - self.ext.ipsi.base_probs, - self.ext.contra.base_probs, - self.noext.contra.base_probs, - ]) - - @base_probs.setter - def base_probs(self, new_params: np.ndarray): - """Set the base probabilities from the tumor(s) to the LNLs, accounting - for the mixing parameter :math:`\\alpha``. - """ - k = len(self.ext.ipsi.base_probs) - - self.ext.ipsi.base_probs = new_params[:k] - self.noext.ipsi.base_probs = new_params[:k] - - if self.use_mixing: - self.noext.contra.base_probs = new_params[k:2*k] - self.alpha_mix = new_params[-1] - # compute linear combination - self.ext.contra.base_probs = ( - self.alpha_mix * self.ext.ipsi.base_probs - + (1. - self.alpha_mix) * self.noext.contra.base_probs - ) - else: - self.ext.contra.base_probs = new_params[k:2*k] - self.noext.contra.base_probs = new_params[2*k:3*k] - - # avoid unnecessary double computation of ipsilateral transition matrix - self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - - - @property - def trans_probs(self) -> np.ndarray: - """Probabilities of lymphatic spread among the lymph node levels. They - are assumed to be symmetric ipsi- & contralaterally by default. - """ - return self.noext.trans_probs - - @trans_probs.setter - def trans_probs(self, new_params: np.ndarray): - """Set the new spread probabilities for lymphatic spread from among the - LNLs. - """ - self.noext.trans_probs = new_params - self.ext.trans_probs = new_params - - # avoid unnecessary double computation of ipsilateral transition matrix - self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - - - @property - def spread_probs(self) -> np.ndarray: - """These are the probabilities representing the spread of cancer along - lymphatic drainage pathways per timestep. - - It is composed of the base spread probs (possible with the mixing parameter) - and the probabilities of spread among the LNLs. - - +-------------+-------------+ - | base probs | trans probs | - +-------------+-------------+ - """ - return np.concatenate([self.base_probs, self.trans_probs]) - - @spread_probs.setter - def spread_probs(self, new_params: np.ndarray): - """Set the new spread probabilities and the mixing parameter - :math:`\\alpha`. - """ - num_base_probs = len(self.base_probs) - - self.base_probs = new_params[:num_base_probs] - self.trans_probs = new_params[num_base_probs:] - - - @property - def diag_time_dists(self) -> MarginalizorDict: - """This property holds the probability mass functions for marginalizing over - possible diagnose times for each T-stage. - - When setting this property, one may also provide a normal Python dict, in - which case it tries to convert it to a :class:`MarginalizorDict`. - - Note that the method will provide the same instance of this - :class:`MarginalizorDict` to both sides of the network. - - See Also: - :class:`MarginalzorDict`, :class:`Marginalizor`. - """ - return self.ext.diag_time_dists - - @diag_time_dists.setter - def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]): - """Assign new :class:`MarginalizorDict` to this property. If it is a normal - Python dictionary, tr to convert it into a :class:`MarginalizorDict`. - """ - self.ext.diag_time_dists = new_dists - self.noext.diag_time_dists = self.ext.diag_time_dists - - - @property - def modalities(self): - """A dictionary containing the specificity :math:`s_P` and sensitivity - :math:`s_N` values for each diagnostic modality. - - Such a dictionary can also be provided to set this property and compute - the observation matrices of all used systems. - - See Also: - :meth:`Bilateral.modalities`: Getting and setting this property in - the normal bilateral model. - - :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and - :math:`s_N` for a unilateral model. - """ - return self.noext.modalities - - @modalities.setter - def modalities(self, modality_spsn: Dict[str, List[float]]): - """Call the respective getter and setter methods of the bilateral - components with and without midline extension. - """ - self.noext.modalities = modality_spsn - self.ext.modalities = modality_spsn - - - @property - def patient_data(self): - """A pandas :class:`DataFrame` with rows of patients and columns of - patient and involvement details. The table's header should have three - levels that categorize the individual lymph node level's involvement to - the corresponding diagnostic modality (first level), the side of the - LNL (second level) and finaly the name of the LNL (third level). - Additionally, the patient's T-category must be stored under ('info', - 'tumor', 't_stage') and whether the tumor extends over the mid-sagittal - line should be noted under ('info', 'tumor', 'midline_extension'). So, - part of this table could look like this: - - +-----------------------------+------------------+------------------+ - | info | MRI | PET | - +-----------------------------+--------+---------+--------+---------+ - | tumor | ipsi | contra | ipsi | contra | - +---------+-------------------+--------+---------+--------+---------+ - | t_stage | midline_extension | II | II | II | II | - +=========+===================+========+=========+========+=========+ - | early | ``True`` |``True``|``None`` |``True``|``False``| - +---------+-------------------+--------+---------+--------+---------+ - | late | ``True`` |``None``|``None`` |``None``|``None`` | - +---------+-------------------+--------+---------+--------+---------+ - | early | ``False`` |``True``|``False``|``True``|``True`` | - +---------+-------------------+--------+---------+--------+---------+ - """ - try: - return self._patient_data - except AttributeError: - raise AttributeError( - "No patient data has been loaded yet" - ) - - @patient_data.setter - def patient_data(self, patient_data: pd.DataFrame): - """Load the patient data. For now, this just calls the :meth:`load_data` - method, but at a later point, I would like to write a function here - that generates the pandas :class:`DataFrame` from the internal matrix - representation of the data. - """ - self._patient_data = patient_data.copy() - self.load_data(patient_data) - - - def load_data( - self, - data: pd.DataFrame, - modality_spsn: Optional[Dict[str, List[float]]] = None, - mode = "HMM" - ): - """Load data as table of patients with involvement details and convert - it into internal representation of a matrix. - - Args: - data: The table with rows of patients and columns of patient and - involvement details. The table's header must have three levels - that categorize the individual lymph node level's involvement - to the corresponding diagnostic modality (first level), the - side of the LNL (second level) and finaly the name of the LNL - (third level). Additionally, the patient's T-category must be - stored under ('info', 'tumor', 't_stage') and whether the tumor - extends over the mid-sagittal line should be noted under - ('info', 'tumor', 'midline_extension'). So, part of this table - could look like this: - - +-----------------------------+---------------------+ - | info | MRI | - +-----------------------------+----------+----------+ - | tumor | ipsi | contra | - +---------+-------------------+----------+----------+ - | t_stage | midline_extension | II | II | - +=========+===================+==========+==========+ - | early | ``True`` | ``True`` | ``None`` | - +---------+-------------------+----------+----------+ - | late | ``True`` | ``None`` | ``None`` | - +---------+-------------------+----------+----------+ - | early | ``False`` | ``True`` | ``True`` | - +---------+-------------------+----------+----------+ - - modality_spsn: If no diagnostic modalities have been defined yet, - this must be provided to build the observation matrix. - - See Also: - :attr:`patient_data`: The attribute for loading and exporting data. - - :meth:`Bilateral.load_data`: Loads data into a bilateral network by - splitting it into ipsi- & contralateral side and passing each to - the respective unilateral method (see below). - - :meth:`Unilateral.load_data`: Data loading method of the unilateral - network. - """ - ext_data = data.loc[data[("info", "tumor", "midline_extension")]] - noext_data = data.loc[~data[("info", "tumor", "midline_extension")]] - - self.ext.load_data( - ext_data, - modality_spsn=modality_spsn, - mode=mode - ) - self.noext.load_data( - noext_data, - modality_spsn=modality_spsn, - mode=mode - ) - - - def check_and_assign(self, new_params: np.ndarray): - """Check that the spread probability (rates) and the parameters for the - marginalization over diagnose times are all within limits and assign them to - the model. Also, make sure the ipsi- and contralateral distributions over - diagnose times are the same instance of the :class:`MarginalizorDict`. - - Args: - new_params: The set of :attr:`spread_probs` and parameters to provide for - updating the parametrized distributions over diagnose times. - - Warning: - This method assumes that the parametrized distributions (instances of - :class:`Marginalizor`) all raise a ``ValueError`` when provided with - invalid parameters. - """ - k = len(self.spread_probs) - new_spread_probs = new_params[:k] - new_marg_params = new_params[k:] - - try: - self.ext.ipsi.diag_time_dists.update(new_marg_params) - self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists - self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists - self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists - except ValueError as val_err: - raise ValueError( - "Parameters for marginalization over diagnose times are invalid" - ) from val_err - - if new_spread_probs.shape != self.spread_probs.shape: - raise ValueError( - "Shape of provided spread parameters does not match network" - ) - if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.): - raise ValueError( - "Spread probs must be between 0 and 1" - ) - - self.spread_probs = new_spread_probs - - - def likelihood( - self, - data: Optional[pd.DataFrame] = None, - given_params: Optional[np.ndarray] = None, - log: bool = True, - ) -> float: - """Compute log-likelihood of (already stored) data, given the spread - probabilities and either a discrete diagnose time or a distribution to - use for marginalization over diagnose times. - - Args: - data: Table with rows of patients and columns of per-LNL involvment. See - :meth:`load_data` for more details on how this should look like. - - given_params: The likelihood is a function of these parameters. They mainly - consist of the :attr:`spread_probs` of the model. Any excess parameters - will be used to update the parametrized distributions used for - marginalizing over the diagnose times (see :attr:`diag_time_dists`). - - log: When ``True``, the log-likelihood is returned. - - Returns: - The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D` - is the data and :math:`\\theta` is the tuple of spread probabilities - and diagnose times or distributions over diagnose times. - - See Also: - :attr:`spread_probs`: Property for getting and setting the spread - probabilities, of which a lymphatic network has as many as it has - :class:`Edge` instances (in case no symmetries apply). - - :meth:`Unilateral.likelihood`: The log-likelihood function of - the unilateral system. - - :meth:`Bilateral.likelihood`: The (log-)likelihood function of the - bilateral system. - """ - if data is not None: - self.patient_data = data - - try: - self.check_and_assign(given_params) - except ValueError: - return -np.inf if log else 0. - - llh = 0. if log else 1. - - if log: - llh += self.ext._likelihood(log=log) - llh += self.noext._likelihood(log=log) - else: - llh *= self.ext._likelihood(log=log) - llh *= self.noext._likelihood(log=log) - - return llh - - - def risk( - self, - given_params: Optional[np.ndarray] = None, - midline_extension: bool = True, - **kwargs, - ) -> float: - """Compute the risk of nodal involvement given a specific diagnose. - - Args: - spread_probs: Set ot new spread parameters. This also contains the - mixing parameter alpha in the last position. - midline_extension: Whether or not the patient's tumor extends over - the mid-sagittal line. - - See Also: - :meth:`Bilateral.risk`: Depending on whether or not the patient's - tumor does extend over the midline, the risk function of the - respective :class:`Bilateral` instance gets called. - """ - if given_params is not None: - self.check_and_assign(given_params) - - if midline_extension: - return self.ext.risk(**kwargs) - else: - return self.noext.risk(**kwargs) - - - def generate_dataset( - self, - num_patients: int, - stage_dist: Dict[str, float], - ext_prob: float, - **_kwargs, - ) -> pd.DataFrame: - """Generate/sample a pandas :class:`DataFrame` from the defined network. - - Args: - num_patients: Number of patients to generate. - stage_dist: Probability to find a patient in a certain T-stage. - ext_prob: Probability that a patient's primary tumor extends over - the mid-sagittal line. - """ - drawn_ext = np.random.choice( - [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients - ) - ext_dataset = self.ext.generate_dataset( - num_patients=num_patients, - stage_dist=stage_dist, - ) - noext_dataset = self.noext.generate_dataset( - num_patients=num_patients, - stage_dist=stage_dist, - ) - - dataset = noext_dataset.copy() - dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext] - dataset[('info', 'tumor', 'midline_extension')] = drawn_ext - - return dataset \ No newline at end of file From 443d59819d929cdddffc10ad92f106723447b9e5 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Wed, 12 Jul 2023 17:10:12 +0200 Subject: [PATCH 12/35] added multiple functions needed for time evolution of midline extension --- lymph/midline.py | 388 +++++++++++++++++++++------- lymph/midline_old.py | 595 +++++++++++++++++++++++++++++++++++++++++++ lymph/unilateral.py | 33 ++- 3 files changed, 907 insertions(+), 109 deletions(-) create mode 100644 lymph/midline_old.py diff --git a/lymph/midline.py b/lymph/midline.py index 138dad8..598a3a7 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -2,9 +2,12 @@ import numpy as np import pandas as pd +from numpy.linalg import matrix_power as mat_pow from .bilateral import Bilateral +from .node import Node from .timemarg import MarginalizorDict +from .unilateral import change_base class MidlineBilateral: @@ -64,26 +67,57 @@ def __init__( self.noext = Bilateral( graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric ) - self.ext_unknown = Bilateral( - graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric - ) - self.noext_unknown = Bilateral( - graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric - ) self.use_mixing = use_mixing if self.use_mixing: self.alpha_mix = 0. self.noext.diag_time_dists = self.ext.diag_time_dists - self.ext_unknown.diag_time_dists = self.ext.diag_time_dists - self.noext_unknown.diag_time_dists = self.ext.diag_time_dists + self.midexstates = ["0", "1"] @property def graph(self) -> Dict[Tuple[str], List[str]]: """Return the (unilateral) graph that was used to create this network. """ return self.noext.graph + + def _gen_midexstate_list(self): + """Generates the list of (hidden) midline states. + """ + if not hasattr(self, "_midexstate_list"): + self._midexstate_list = np.zeros( + shape=(2**1, 1), dtype=int + ) + for i in range(2**1): + self._midexstate_list[i] = [ + int(digit) for digit in change_base(i, 2, length=1) + ] + @property + def midexstate_list(self): + """Return list of all possible hidden midline states. + """ + try: + return self._midexstate_list + except AttributeError: + self._gen_midexstate_list() + return self._midexstate_list + + @property + def midext_prob(self): + """Assign the last of the new_params to the midline extension probability + """ + try: + return self._midext_prob + except AttributeError as attr_err: + raise AttributeError( + "No midline extension probability has been assigned" + ) from attr_err + + @midext_prob.setter + def midext_prob(self, new_params): + """A variable containing the midline extension probability + """ + self._midext_prob = new_params[-1] @property def base_probs(self) -> np.ndarray: @@ -126,33 +160,21 @@ def base_probs(self, new_params: np.ndarray): self.ext.ipsi.base_probs = new_params[:k] self.noext.ipsi.base_probs = new_params[:k] - self.ext_unknown.ipsi.base_probs = new_params[:k] - self.noext_unknown.ipsi.base_probs = new_params[:k] if self.use_mixing: self.noext.contra.base_probs = new_params[k:2*k] - self.noext_unknown.contra.base_probs = new_params[k:2*k] self.alpha_mix = new_params[-1] # compute linear combination self.ext.contra.base_probs = ( self.alpha_mix * self.ext.ipsi.base_probs + (1. - self.alpha_mix) * self.noext.contra.base_probs ) - self.ext_unknown.contra.base_probs = ( - self.alpha_mix * self.ext_unknown.ipsi.base_probs - + (1. - self.alpha_mix) * self.noext_unknown.contra.base_probs - ) else: self.ext.contra.base_probs = new_params[k:2*k] self.noext.contra.base_probs = new_params[2*k:3*k] - self.ext_unknown.contra.base_probs = new_params[k:2*k] - self.noext_unknown.contra.base_probs = new_params[2*k:3*k] # avoid unnecessary double computation of ipsilateral transition matrix self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - @property def trans_probs(self) -> np.ndarray: @@ -168,14 +190,89 @@ def trans_probs(self, new_params: np.ndarray): """ self.noext.trans_probs = new_params self.ext.trans_probs = new_params - self.noext_unknown.trans_probs = new_params - self.ext_unknown.trans_probs = new_params - + # avoid unnecessary double computation of ipsilateral transition matrix self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + def _gen_allowed_midextransitions(self): + """Generate the allowed midline extension transitions. + """ + self._allowed_midextransitions = {} + for i in range(len(self.midexstate_list)): + self._allowed_midextransitions[i] = [] + for j in range(len(self.midexstate_list)): + if not np.any(np.greater(self.midexstate_list[i,:], + self.midexstate_list[j,:])): + self._allowed_midextransitions[i].append(j) + + @property + def allowed_midextransitions(self): + """Return a dictionary that contains for each row :math:`i` of the + transition matrix :math:`\\mathbf{A}` the column numbers :math:`j` for + which the transtion probability :math:`P\\left( x_j \\mid x_i \\right)` + is not zero due to the forbidden self-healing. + + For example: The hidden state ``[True, False]`` in a network with only + one tumor and two LNLs (one involved, one healthy) corresponds to the + index ``1`` and can only evolve into the state ``[True, True]``, which + has index 3. So, the key-value pair for that particular hidden state + would be ``1: [3]``. + """ + try: + return self._allowed_midextransitions + except AttributeError: + self._gen_allowed_midextransitions() + return self._allowed_midextransitions + + def _gen_midextransition_matrix(self): + """Generate the midline extension transition matrix :math:`\\mathbf{A}`, which contains + the :math:`P \\left( S_{t+1} \\mid S_t \\right)`. :math:`\\mathbf{A}` + is a square matrix with size ``(# of states)``. The lower diagonal is + zero. + """ + if not hasattr(self, "_midextransition_matrix"): + shape = (2**1, 2**1) + self._midextransition_matrix = np.zeros(shape=shape) + + for i,state in enumerate(self.midexstate_list): + self.state = state + for j in self.allowed_midextransitions[i]: + midextransition_prob = self.comp_midextransition_prob(self.midexstate_list[j]) + self._midextransition_matrix[i,j] = midextransition_prob + + @property + def midextransition_matrix(self) -> np.ndarray: + """Return the midline extension listtransition matrix :math:`\\mathbf{A}`, which contains the + probability to transition from any state :math:`S_t` to any other state + :math:`S_{t+1}` one timestep later: + :math:`P \\left( S_{t+1} \\mid S_t \\right)`. :math:`\\mathbf{A}` is a + square matrix with size ``(# of states)``. The lower diagonal is zero, + because those entries correspond to transitions that would require + self-healing. + """ + try: + return self._midextransition_matrix + except AttributeError: + self._gen_midextransition_matrix() + return self._midextransition_matrix + + @property + def midextrans_probs(self) -> np.ndarray: + """Probabilities of lymphatic spread among the lymph node levels. They + are assumed to be symmetric ipsi- & contralaterally by default. + """ + return self.noext.midextrans_probs + + @midextrans_probs.setter + def midextrans_probs(self, new_params: np.ndarray): + """Set the new spread probabilities for lymphatic spread from among the + LNLs. + """ + self.noext.midextrans_probs = new_params + self.ext.midextrans_probs = new_params + + # avoid unnecessary double computation of ipsilateral transition matrix + self.noext.ipsi._midextransition_matrix = self.ext.ipsi.midextransition_matrix @property def spread_probs(self) -> np.ndarray: @@ -225,9 +322,40 @@ def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]): """ self.ext.diag_time_dists = new_dists self.noext.diag_time_dists = self.ext.diag_time_dists - self.noext_unknown.diag_time_dists = self.ext.diag_time_dists - self.ext_unknown.diag_time_dists = self.ext.diag_time_dists + + def comp_midextransition_prob( + self, + newstate: List[int], + acquire: bool = False + ) -> float: + """Computes the probability to transition to ``newstate``, given its + current state. + Args: + newstate: List of new states for each LNL in the lymphatic + system. The transition probability :math:`t` will be computed + from the current states to these states. + acquire: if ``True``, after computing and returning the probability, + the system updates its own state to be ``newstate``. + (default: ``False``) + + Returns: + Transition probability :math:`t`. + """ + res = 1. + for i, midexstate in enumerate(self.midexstates): + if not midexstate.state: + in_states = [1] + in_weights = [self.midext_prob] + res *= Node.trans_prob(in_states, in_weights)[newstate[i]] + elif not newstate[i]: + res = 0. + break + + if acquire: + self.state = newstate + + return res @property def modalities(self): @@ -253,9 +381,7 @@ def modalities(self, modality_spsn: Dict[str, List[float]]): """ self.noext.modalities = modality_spsn self.ext.modalities = modality_spsn - self.noext_unknown.modalities = modality_spsn - self.ext_unknown.modalities = modality_spsn - + @property def midext_prob(self): """Assign the last of the new_params to the midline extension probability @@ -316,6 +442,60 @@ def patient_data(self, patient_data: pd.DataFrame): self._patient_data = patient_data.copy() self.load_data(patient_data) + def _evolve_onestep( + self, start_state: Optional[np.ndarray] = None + ) -> np.ndarray: + """Evolve hidden Markov model based system over one time step. Compute + :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t` + is the time. + + Args: + start_state: The current state. + + Returns: + A matrix with the new state + + :meta public: + """ + # All healthy state at beginning + if start_state is None: + start_state = np.zeros(shape=len(self.state_list), dtype=float) + start_state[0] = 1. + + # compute involvement at first time-step + new_state = start_state @ self.transition_matrix + + return new_state + + def _evolve_midext( + self, start_midexstate: Optional[np.ndarray] = None + ) -> np.ndarray: + """Evolve hidden Markov model based system over one time step. Compute + :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t` + is the time. + + Args: + start_midexstate: The current midline extension state. + + Returns: + The new midline extension state + + :meta public: + """ + + # compute involvement at first time-step + new_midexstate = start_midexstate @ self.midextransition_matrix + + return new_midexstate + + """ + def state_probs_midext(self): + max_t = self.diag_time_dists.max_t + self.ext.state_probs_midext = {} + self.noext.state_probs_midext = {} + self.noext.state_probs_midext = self.noext._evolve_midext(t_last=max_t) + self.ext.state_probs_midext = self.ext. + """ def load_data( self, @@ -365,48 +545,18 @@ def load_data( :meth:`Unilateral.load_data`: Data loading method of the unilateral network. """ - ext_data = data.loc[(data[("info", "tumor", "midline_extension")]==True)] - noext_data = data.loc[(data[("info", "tumor", "midline_extension")]==False)] - unknown_data = data.loc[(data['info', 'tumor', 'midline_extension']!=False) & (data['info', 'tumor', 'midline_extension']!=True)] - if len(ext_data) > 0: - self.ext.load_data( - ext_data, - modality_spsn=modality_spsn, - mode=mode - ) - - if len(noext_data) > 0: - self.noext.load_data( - noext_data, - modality_spsn=modality_spsn, - mode=mode - ) - if len(unknown_data) > 0 & len(ext_data) + len(noext_data) > 0: - self.ext_unknown.load_data( - unknown_data, - modality_spsn=modality_spsn, - mode=mode - ) - - self.noext_unknown.load_data( - unknown_data, - modality_spsn=modality_spsn, - mode=mode - ) - - if len(unknown_data) > 0 & len(ext_data) + len(noext_data) == 0: - self.ext.load_data( - unknown_data, - modality_spsn=modality_spsn, - mode=mode - ) - - self.noext.load_data( - unknown_data, - modality_spsn=modality_spsn, - mode=mode - ) + self.ext.load_data( + data, + modality_spsn=modality_spsn, + mode=mode + ) + + self.noext.load_data( + data, + modality_spsn=modality_spsn, + mode=mode + ) def check_and_assign(self, new_params: np.ndarray): """Check that the spread probability (rates) and the parameters for the @@ -430,13 +580,9 @@ def check_and_assign(self, new_params: np.ndarray): try: self.ext.ipsi.diag_time_dists.update(new_marg_params) - self.ext_unknown.ipsi.diag_time_dists.update(new_marg_params) self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists - self.ext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists - self.noext_unknown.ipsi.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists - self.noext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists self.midext_prob = new_params except ValueError as val_err: raise ValueError( @@ -458,6 +604,72 @@ def check_and_assign(self, new_params: np.ndarray): self.spread_probs = new_spread_probs + def _likelihood_mid( + self, + log: bool = True + ) -> float: + """Compute the (log-)likelihood of data, using the stored spread probs and + fixed distributions for marginalizing over diagnose times. + + This method mainly exists so that the checking and assigning of the + spread probs can be skipped. + """ + stored_t_stages = set(self.ipsi.diagnose_matrices.keys()) + provided_t_stages = set(self.ipsi.diag_time_dists.keys()) + t_stages = list(stored_t_stages.intersection(provided_t_stages)) + + max_t = self.diag_time_dists.max_t + state_probs_ex = np.zeros(shape=len(self.state_list), dtype=float) + state_probs_ex[0] = 1. + state_probs_nox = np.zeros(shape=len(self.state_list), dtype=float) + state_probs_nox[0] = 1. + state_probs_midext = np.zeros(shape=len(self.midexstate_list), dtype=float) + state_probs_midext[0] = 1. + + llh_ex = 0. if log else 1. + llh_nox = 0. if log else 1. + + #if 1 1 in diagnose matrix midline extension: + #state_probs_midext[0] * llh_nox + state_probs_midext[1] * llh_ex + + for i in range(max_t): + for stage in t_stages: + state_probs_nox["ipsi"] = self.noext.ipsi._evolve_onestep(start_state = state_probs_nox["ipsi"]) + state_probs_nox["contra"] = self.noext.contra._evolve_onestep(start_state = state_probs_nox["contra"]) + state_probs_ex["ipsi"] = state_probs_midext[1] * self.ext.ipsi._evolve_onestep(start_state = state_probs_ex["ipsi"]) + state_probs_midext[0] * state_probs_nox["ipsi"] + state_probs_ex["contra"] = state_probs_midext[1] * self.ext.contra._evolve_onestep(start_state = state_probs_ex["contra"]) + state_probs_midext[0] * state_probs_nox["contra"] + state_probs_midext = self._evolve_midext(start_midextstate = state_probs_midext) + + joint_state_probs_nox = ( + state_probs_nox["ipsi"].T + @ np.diag(self.ipsi.diag_time_dists[stage].pmf) + @ state_probs_nox["contra"] + ) + joint_state_probs_ex = ( + state_probs_ex["ipsi"].T + @ np.diag(self.ipsi.diag_time_dists[stage].pmf) + @ state_probs_ex["contra"] + ) + p_ex = np.sum( + self.ext.ipsi.diagnose_matrices[stage] + * (joint_state_probs_ex + @ self.ext.contra.diagnose_matrices[stage]), + axis=0 + ) + p_nox = np.sum( + self.noext.ipsi.diagnose_matrices[stage] + * (joint_state_probs_nox + @ self.noext.contra.diagnose_matrices[stage]), + axis=0 + ) + if log: + llh_ex += np.sum(np.log(p_ex)) + llh_nox += np.sum(np.log(p_nox)) + else: + llh_ex *= np.prod(p_ex) + llh_nox *= np.prod(p_nox) + + return llh_ex + llh_nox def likelihood( self, @@ -504,31 +716,13 @@ def likelihood( except ValueError: return -np.inf if log else 0. - llh = 0. if log else 1. - - if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']!=False) & (self.patient_data['info', 'tumor', 'midline_extension']!=True)])>0: - state_probs = {} - state_probs["ipsi"] = self.ext_unknown.state_probs_ipsi() - state_probs["contra"] = self.ext_unknown.state_probs_contra() - state_probs2 = {} - state_probs2["ipsi"] = self.noext_unknown.state_probs_ipsi() - state_probs2["contra"] = self.noext_unknown.state_probs_contra() - t_stages = list(set(self.patient_data[("info","tumor","t_stage")])) - t_stages = self.ext_unknown.t_stages() - - - for stage in t_stages: - p = self.ext_unknown._likelihood2(stage=stage, state_probs=state_probs)*self.midext_prob - p2 = self.noext_unknown._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob) - llh += np.sum(np.log(p+p2)) - if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False) | (self.patient_data['info', 'tumor', 'midline_extension']==True)])>0: if log: - llh += self.ext._likelihood(log=log) + np.log(self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)]) - llh += self.noext._likelihood(log=log) + np.log(1-self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)]) + llh += self.ext._likelihood_mid(log=log) + llh += self.noext._likelihood_mid(log=log) else: - llh *= self.ext._likelihood(log=log)*self.midext_prob**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)]) - llh *= self.noext._likelihood(log=log)*(1-self.midext_prob)**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)]) + llh *= self.ext._likelihood_mid(log=log) + llh *= self.noext._likelihood_mid(log=log) return llh diff --git a/lymph/midline_old.py b/lymph/midline_old.py new file mode 100644 index 0000000..138dad8 --- /dev/null +++ b/lymph/midline_old.py @@ -0,0 +1,595 @@ +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd + +from .bilateral import Bilateral +from .timemarg import MarginalizorDict + + +class MidlineBilateral: + """Model a bilateral lymphatic system where an additional risk factor can + be provided in the data: Whether or not the primary tumor extended over the + mid-sagittal line. + + It is reasonable to assume (and supported by data) that such an extension + significantly increases the risk for metastatic spread to the contralateral + side of the neck. This class attempts to capture this using a simple + assumption: We assume that the probability of spread to the contralateral + side for patients *with* midline extension is larger than for patients + *without* it, but smaller than the probability of spread to the ipsilateral + side. Formally: + + .. math:: + b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in} + + where :math:`b_c^{\\in}` is the probability of spread from the primary tumor + to the contralateral side for patients with midline extension, and + :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear + mixing parameter. + """ + + def __init__( + self, + graph: Dict[Tuple[str], List[str]], + use_mixing: bool = True, + trans_symmetric: bool = True, + **_kwargs + ): + """The class is constructed in a similar fashion to the + :class:`Bilateral`: That class contains one :class:`Unilateral` for + each side of the neck, while this class will contain two instances of + :class:`Bilateral`, one for the case of a midline extension and one for + the case of no midline extension. + + Args: + graph: Dictionary of the same kind as for initialization of + :class:`System`. This graph will be passed to the constructors of + two :class:`System` attributes of this class. + use_mixing: Describe the contralateral base spread probabilities for the + case of a midline extension as a linear combination between the base + spread probs of the ipsilateral side and the ones of the contralateral + side when no midline extension is present. + trans_symmetric: If ``True``, the spread probabilities among the + LNLs will be set symmetrically. + + See Also: + :class:`Bilateral`: Two of these are held as attributes by this + class. One for the case of a mid-sagittal extension of the primary + tumor and one for the case of no such extension. + """ + self.ext = Bilateral( + graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric + ) + self.noext = Bilateral( + graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric + ) + self.ext_unknown = Bilateral( + graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric + ) + self.noext_unknown = Bilateral( + graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric + ) + self.use_mixing = use_mixing + if self.use_mixing: + self.alpha_mix = 0. + + self.noext.diag_time_dists = self.ext.diag_time_dists + self.ext_unknown.diag_time_dists = self.ext.diag_time_dists + self.noext_unknown.diag_time_dists = self.ext.diag_time_dists + + @property + def graph(self) -> Dict[Tuple[str], List[str]]: + """Return the (unilateral) graph that was used to create this network. + """ + return self.noext.graph + + + @property + def base_probs(self) -> np.ndarray: + """Base probabilities of metastatic lymphatic spread from the tumor(s) + to the lymph node levels. This will return the following concatenation of + base spread probs depending on whether ``use_mixing`` is set to ``True`` or + ``False``: + + With the use of a mixing parameter: + +-----------+-------------------------+--------------+ + | base ipsi | base contra (no midext) | mixing param | + +-----------+-------------------------+--------------+ + + Without it: + +-----------+----------------------+-------------------------+ + | base ipsi | base contra (midext) | base contra (no midext) | + +-----------+----------------------+-------------------------+ + + When setting these, one needs to provide the respective shape. + """ + if self.use_mixing: + return np.concatenate([ + self.ext.ipsi.base_probs, + self.noext.contra.base_probs, + [self.alpha_mix], + ]) + else: + return np.concatenate([ + self.ext.ipsi.base_probs, + self.ext.contra.base_probs, + self.noext.contra.base_probs, + ]) + + @base_probs.setter + def base_probs(self, new_params: np.ndarray): + """Set the base probabilities from the tumor(s) to the LNLs, accounting + for the mixing parameter :math:`\\alpha``. + """ + k = len(self.ext.ipsi.base_probs) + + self.ext.ipsi.base_probs = new_params[:k] + self.noext.ipsi.base_probs = new_params[:k] + self.ext_unknown.ipsi.base_probs = new_params[:k] + self.noext_unknown.ipsi.base_probs = new_params[:k] + + if self.use_mixing: + self.noext.contra.base_probs = new_params[k:2*k] + self.noext_unknown.contra.base_probs = new_params[k:2*k] + self.alpha_mix = new_params[-1] + # compute linear combination + self.ext.contra.base_probs = ( + self.alpha_mix * self.ext.ipsi.base_probs + + (1. - self.alpha_mix) * self.noext.contra.base_probs + ) + self.ext_unknown.contra.base_probs = ( + self.alpha_mix * self.ext_unknown.ipsi.base_probs + + (1. - self.alpha_mix) * self.noext_unknown.contra.base_probs + ) + else: + self.ext.contra.base_probs = new_params[k:2*k] + self.noext.contra.base_probs = new_params[2*k:3*k] + self.ext_unknown.contra.base_probs = new_params[k:2*k] + self.noext_unknown.contra.base_probs = new_params[2*k:3*k] + + # avoid unnecessary double computation of ipsilateral transition matrix + self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + + + @property + def trans_probs(self) -> np.ndarray: + """Probabilities of lymphatic spread among the lymph node levels. They + are assumed to be symmetric ipsi- & contralaterally by default. + """ + return self.noext.trans_probs + + @trans_probs.setter + def trans_probs(self, new_params: np.ndarray): + """Set the new spread probabilities for lymphatic spread from among the + LNLs. + """ + self.noext.trans_probs = new_params + self.ext.trans_probs = new_params + self.noext_unknown.trans_probs = new_params + self.ext_unknown.trans_probs = new_params + + # avoid unnecessary double computation of ipsilateral transition matrix + self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix + + + @property + def spread_probs(self) -> np.ndarray: + """These are the probabilities representing the spread of cancer along + lymphatic drainage pathways per timestep. + + It is composed of the base spread probs (possible with the mixing parameter) + and the probabilities of spread among the LNLs. + + +-------------+-------------+ + | base probs | trans probs | + +-------------+-------------+ + """ + return np.concatenate([self.base_probs, self.trans_probs]) + + @spread_probs.setter + def spread_probs(self, new_params: np.ndarray): + """Set the new spread probabilities and the mixing parameter + :math:`\\alpha`. + """ + num_base_probs = len(self.base_probs) + + self.base_probs = new_params[:num_base_probs] + self.trans_probs = new_params[num_base_probs:] + + + @property + def diag_time_dists(self) -> MarginalizorDict: + """This property holds the probability mass functions for marginalizing over + possible diagnose times for each T-stage. + + When setting this property, one may also provide a normal Python dict, in + which case it tries to convert it to a :class:`MarginalizorDict`. + + Note that the method will provide the same instance of this + :class:`MarginalizorDict` to both sides of the network. + + See Also: + :class:`MarginalzorDict`, :class:`Marginalizor`. + """ + return self.ext.diag_time_dists + + @diag_time_dists.setter + def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]): + """Assign new :class:`MarginalizorDict` to this property. If it is a normal + Python dictionary, tr to convert it into a :class:`MarginalizorDict`. + """ + self.ext.diag_time_dists = new_dists + self.noext.diag_time_dists = self.ext.diag_time_dists + self.noext_unknown.diag_time_dists = self.ext.diag_time_dists + self.ext_unknown.diag_time_dists = self.ext.diag_time_dists + + + @property + def modalities(self): + """A dictionary containing the specificity :math:`s_P` and sensitivity + :math:`s_N` values for each diagnostic modality. + + Such a dictionary can also be provided to set this property and compute + the observation matrices of all used systems. + + See Also: + :meth:`Bilateral.modalities`: Getting and setting this property in + the normal bilateral model. + + :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and + :math:`s_N` for a unilateral model. + """ + return self.noext.modalities + + @modalities.setter + def modalities(self, modality_spsn: Dict[str, List[float]]): + """Call the respective getter and setter methods of the bilateral + components with and without midline extension. + """ + self.noext.modalities = modality_spsn + self.ext.modalities = modality_spsn + self.noext_unknown.modalities = modality_spsn + self.ext_unknown.modalities = modality_spsn + + @property + def midext_prob(self): + """Assign the last of the new_params to the midline extension probability + """ + try: + return self._midext_prob + except AttributeError as attr_err: + raise AttributeError( + "No midline extension probability has been assigned" + ) from attr_err + + @midext_prob.setter + def midext_prob(self, new_params): + """A variable containing the midline extension probability + """ + self._midext_prob = new_params[-1] + + @property + def patient_data(self): + """A pandas :class:`DataFrame` with rows of patients and columns of + patient and involvement details. The table's header should have three + levels that categorize the individual lymph node level's involvement to + the corresponding diagnostic modality (first level), the side of the + LNL (second level) and finaly the name of the LNL (third level). + Additionally, the patient's T-category must be stored under ('info', + 'tumor', 't_stage') and whether the tumor extends over the mid-sagittal + line should be noted under ('info', 'tumor', 'midline_extension'). So, + part of this table could look like this: + + +-----------------------------+------------------+------------------+ + | info | MRI | PET | + +-----------------------------+--------+---------+--------+---------+ + | tumor | ipsi | contra | ipsi | contra | + +---------+-------------------+--------+---------+--------+---------+ + | t_stage | midline_extension | II | II | II | II | + +=========+===================+========+=========+========+=========+ + | early | ``True`` |``True``|``None`` |``True``|``False``| + +---------+-------------------+--------+---------+--------+---------+ + | late | ``True`` |``None``|``None`` |``None``|``None`` | + +---------+-------------------+--------+---------+--------+---------+ + | early | ``False`` |``True``|``False``|``True``|``True`` | + +---------+-------------------+--------+---------+--------+---------+ + """ + try: + return self._patient_data + except AttributeError: + raise AttributeError( + "No patient data has been loaded yet" + ) + + @patient_data.setter + def patient_data(self, patient_data: pd.DataFrame): + """Load the patient data. For now, this just calls the :meth:`load_data` + method, but at a later point, I would like to write a function here + that generates the pandas :class:`DataFrame` from the internal matrix + representation of the data. + """ + self._patient_data = patient_data.copy() + self.load_data(patient_data) + + + def load_data( + self, + data: pd.DataFrame, + modality_spsn: Optional[Dict[str, List[float]]] = None, + mode = "HMM" + ): + """Load data as table of patients with involvement details and convert + it into internal representation of a matrix. + + Args: + data: The table with rows of patients and columns of patient and + involvement details. The table's header must have three levels + that categorize the individual lymph node level's involvement + to the corresponding diagnostic modality (first level), the + side of the LNL (second level) and finaly the name of the LNL + (third level). Additionally, the patient's T-category must be + stored under ('info', 'tumor', 't_stage') and whether the tumor + extends over the mid-sagittal line should be noted under + ('info', 'tumor', 'midline_extension'). So, part of this table + could look like this: + + +-----------------------------+---------------------+ + | info | MRI | + +-----------------------------+----------+----------+ + | tumor | ipsi | contra | + +---------+-------------------+----------+----------+ + | t_stage | midline_extension | II | II | + +=========+===================+==========+==========+ + | early | ``True`` | ``True`` | ``None`` | + +---------+-------------------+----------+----------+ + | late | ``True`` | ``None`` | ``None`` | + +---------+-------------------+----------+----------+ + | early | ``False`` | ``True`` | ``True`` | + +---------+-------------------+----------+----------+ + + modality_spsn: If no diagnostic modalities have been defined yet, + this must be provided to build the observation matrix. + + See Also: + :attr:`patient_data`: The attribute for loading and exporting data. + + :meth:`Bilateral.load_data`: Loads data into a bilateral network by + splitting it into ipsi- & contralateral side and passing each to + the respective unilateral method (see below). + + :meth:`Unilateral.load_data`: Data loading method of the unilateral + network. + """ + ext_data = data.loc[(data[("info", "tumor", "midline_extension")]==True)] + noext_data = data.loc[(data[("info", "tumor", "midline_extension")]==False)] + unknown_data = data.loc[(data['info', 'tumor', 'midline_extension']!=False) & (data['info', 'tumor', 'midline_extension']!=True)] + + if len(ext_data) > 0: + self.ext.load_data( + ext_data, + modality_spsn=modality_spsn, + mode=mode + ) + + if len(noext_data) > 0: + self.noext.load_data( + noext_data, + modality_spsn=modality_spsn, + mode=mode + ) + if len(unknown_data) > 0 & len(ext_data) + len(noext_data) > 0: + self.ext_unknown.load_data( + unknown_data, + modality_spsn=modality_spsn, + mode=mode + ) + + self.noext_unknown.load_data( + unknown_data, + modality_spsn=modality_spsn, + mode=mode + ) + + if len(unknown_data) > 0 & len(ext_data) + len(noext_data) == 0: + self.ext.load_data( + unknown_data, + modality_spsn=modality_spsn, + mode=mode + ) + + self.noext.load_data( + unknown_data, + modality_spsn=modality_spsn, + mode=mode + ) + + def check_and_assign(self, new_params: np.ndarray): + """Check that the spread probability (rates) and the parameters for the + marginalization over diagnose times are all within limits and assign them to + the model. Also, make sure the ipsi- and contralateral distributions over + diagnose times are the same instance of the :class:`MarginalizorDict`. + + Args: + new_params: The set of :attr:`spread_probs` and parameters to provide for + updating the parametrized distributions over diagnose times. + + Warning: + This method assumes that the parametrized distributions (instances of + :class:`Marginalizor`) all raise a ``ValueError`` when provided with + invalid parameters. + """ + k = len(self.spread_probs) + l = len(self.diag_time_dists) + new_spread_probs = new_params[:k] + new_marg_params = new_params[k:(k+l)] + + try: + self.ext.ipsi.diag_time_dists.update(new_marg_params) + self.ext_unknown.ipsi.diag_time_dists.update(new_marg_params) + self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists + self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists + self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists + self.ext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists + self.noext_unknown.ipsi.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists + self.noext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists + self.midext_prob = new_params + except ValueError as val_err: + raise ValueError( + "Parameters for marginalization over diagnose times are invalid" + ) from val_err + + if new_spread_probs.shape != self.spread_probs.shape: + raise ValueError( + "Shape of provided spread parameters does not match network" + ) + if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.): + raise ValueError( + "Spread probs must be between 0 and 1" + ) + if 0. > self.midext_prob or self.midext_prob > 1.: + raise ValueError( + "Midline extension probability must be between 0 and 1" + ) + + self.spread_probs = new_spread_probs + + + def likelihood( + self, + data: Optional[pd.DataFrame] = None, + given_params: Optional[np.ndarray] = None, + log: bool = True, + ) -> float: + """Compute log-likelihood of (already stored) data, given the spread + probabilities and either a discrete diagnose time or a distribution to + use for marginalization over diagnose times. + + Args: + data: Table with rows of patients and columns of per-LNL involvment. See + :meth:`load_data` for more details on how this should look like. + + given_params: The likelihood is a function of these parameters. They mainly + consist of the :attr:`spread_probs` of the model. Any excess parameters + will be used to update the parametrized distributions used for + marginalizing over the diagnose times (see :attr:`diag_time_dists`). + + log: When ``True``, the log-likelihood is returned. + + Returns: + The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D` + is the data and :math:`\\theta` is the tuple of spread probabilities + and diagnose times or distributions over diagnose times. + + See Also: + :attr:`spread_probs`: Property for getting and setting the spread + probabilities, of which a lymphatic network has as many as it has + :class:`Edge` instances (in case no symmetries apply). + + :meth:`Unilateral.likelihood`: The log-likelihood function of + the unilateral system. + + :meth:`Bilateral.likelihood`: The (log-)likelihood function of the + bilateral system. + """ + if data is not None: + self.patient_data = data + + try: + self.check_and_assign(given_params) + except ValueError: + return -np.inf if log else 0. + + llh = 0. if log else 1. + + if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']!=False) & (self.patient_data['info', 'tumor', 'midline_extension']!=True)])>0: + state_probs = {} + state_probs["ipsi"] = self.ext_unknown.state_probs_ipsi() + state_probs["contra"] = self.ext_unknown.state_probs_contra() + state_probs2 = {} + state_probs2["ipsi"] = self.noext_unknown.state_probs_ipsi() + state_probs2["contra"] = self.noext_unknown.state_probs_contra() + t_stages = list(set(self.patient_data[("info","tumor","t_stage")])) + t_stages = self.ext_unknown.t_stages() + + + for stage in t_stages: + p = self.ext_unknown._likelihood2(stage=stage, state_probs=state_probs)*self.midext_prob + p2 = self.noext_unknown._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob) + llh += np.sum(np.log(p+p2)) + + if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False) | (self.patient_data['info', 'tumor', 'midline_extension']==True)])>0: + if log: + llh += self.ext._likelihood(log=log) + np.log(self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)]) + llh += self.noext._likelihood(log=log) + np.log(1-self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)]) + else: + llh *= self.ext._likelihood(log=log)*self.midext_prob**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)]) + llh *= self.noext._likelihood(log=log)*(1-self.midext_prob)**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)]) + + return llh + + + def risk( + self, + given_params: Optional[np.ndarray] = None, + midline_extension: bool = True, + **kwargs, + ) -> float: + """Compute the risk of nodal involvement given a specific diagnose. + + Args: + spread_probs: Set ot new spread parameters. This also contains the + mixing parameter alpha in the last position. + midline_extension: Whether or not the patient's tumor extends over + the mid-sagittal line. + + See Also: + :meth:`Bilateral.risk`: Depending on whether or not the patient's + tumor does extend over the midline, the risk function of the + respective :class:`Bilateral` instance gets called. + """ + if given_params is not None: + self.check_and_assign(given_params) + + if midline_extension: + return self.ext.risk(**kwargs) + else: + return self.noext.risk(**kwargs) + + + def generate_dataset( + self, + num_patients: int, + stage_dist: Dict[str, float], + ext_prob: float, + **_kwargs, + ) -> pd.DataFrame: + """Generate/sample a pandas :class:`DataFrame` from the defined network. + + Args: + num_patients: Number of patients to generate. + stage_dist: Probability to find a patient in a certain T-stage. + ext_prob: Probability that a patient's primary tumor extends over + the mid-sagittal line. + """ + drawn_ext = np.random.choice( + [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients + ) + ext_dataset = self.ext.generate_dataset( + num_patients=num_patients, + stage_dist=stage_dist, + ) + noext_dataset = self.noext.generate_dataset( + num_patients=num_patients, + stage_dist=stage_dist, + ) + + dataset = noext_dataset.copy() + dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext] + dataset[('info', 'tumor', 'midline_extension')] = drawn_ext + + return dataset \ No newline at end of file diff --git a/lymph/unilateral.py b/lymph/unilateral.py index 1f5aa1b..01527c3 100644 --- a/lymph/unilateral.py +++ b/lymph/unilateral.py @@ -358,27 +358,38 @@ def comp_diagnose_prob( return prob - def _gen_state_list(self): + def _gen_state_list(self, midline): """Generates the list of (hidden) states. """ if not hasattr(self, "_state_list"): - self._state_list = np.zeros( - shape=(2**len(self.lnls), len(self.lnls)), dtype=int - ) - for i in range(2**len(self.lnls)): - self._state_list[i] = [ - int(digit) for digit in change_base(i, 2, length=len(self.lnls)) - ] + if midline == False: + self._state_list = np.zeros( + shape=(2**len(self.lnls), len(self.lnls)), dtype=int + ) + if midline == True: + self._state_list = np.zeros( + shape=(2**1, 1), dtype=int + ) + if midline == False: + for i in range(2**len(self.lnls)): + self._state_list[i] = [ + int(digit) for digit in change_base(i, 2, length=len(self.lnls)) + ] + if midline == True: + for i in range(2**1): + self._state_list[i] = [ + int(digit) for digit in change_base(i, 2, length=1) + ] @property - def state_list(self): + def state_list(self, midline = False): """Return list of all possible hidden states. They are arranged in the same order as the lymph node levels in the network/graph. """ try: return self._state_list except AttributeError: - self._gen_state_list() + self._gen_state_list(midline) return self._state_list @@ -497,7 +508,6 @@ def transition_matrix(self) -> np.ndarray: self._gen_transition_matrix() return self._transition_matrix - @property def modalities(self): """Return specificity & sensitivity stored in this :class:`System` for @@ -790,7 +800,6 @@ def _evolve( return state_probs - def check_and_assign(self, new_params: np.ndarray): """Check that the spread probability (rates) and the parameters for the marginalization over diagnose times are all within limits and assign them to From 1db3047e5dc13fa31351bf8a466e6bb1ebaabab1 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 13 Jul 2023 17:30:20 +0200 Subject: [PATCH 13/35] deleted unused code to separate midline and unilateral class --- lymph/unilateral.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/lymph/unilateral.py b/lymph/unilateral.py index 01527c3..a0c4a09 100644 --- a/lymph/unilateral.py +++ b/lymph/unilateral.py @@ -362,24 +362,13 @@ def _gen_state_list(self, midline): """Generates the list of (hidden) states. """ if not hasattr(self, "_state_list"): - if midline == False: - self._state_list = np.zeros( - shape=(2**len(self.lnls), len(self.lnls)), dtype=int - ) - if midline == True: - self._state_list = np.zeros( - shape=(2**1, 1), dtype=int - ) - if midline == False: - for i in range(2**len(self.lnls)): - self._state_list[i] = [ - int(digit) for digit in change_base(i, 2, length=len(self.lnls)) - ] - if midline == True: - for i in range(2**1): - self._state_list[i] = [ - int(digit) for digit in change_base(i, 2, length=1) - ] + self._state_list = np.zeros( + shape=(2**len(self.lnls), len(self.lnls)), dtype=int + ) + for i in range(2**len(self.lnls)): + self._state_list[i] = [ + int(digit) for digit in change_base(i, 2, length=len(self.lnls)) + ] @property def state_list(self, midline = False): From 350c42b7702de3a1b58d4af39f8fa355cc25e1d9 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 13 Jul 2023 17:31:02 +0200 Subject: [PATCH 14/35] deleted unused code to separate unilateral and midline class --- lymph/unilateral.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lymph/unilateral.py b/lymph/unilateral.py index a0c4a09..ca9e695 100644 --- a/lymph/unilateral.py +++ b/lymph/unilateral.py @@ -358,7 +358,7 @@ def comp_diagnose_prob( return prob - def _gen_state_list(self, midline): + def _gen_state_list(self): """Generates the list of (hidden) states. """ if not hasattr(self, "_state_list"): @@ -371,14 +371,14 @@ def _gen_state_list(self, midline): ] @property - def state_list(self, midline = False): + def state_list(self): """Return list of all possible hidden states. They are arranged in the same order as the lymph node levels in the network/graph. """ try: return self._state_list except AttributeError: - self._gen_state_list(midline) + self._gen_state_list() return self._state_list From 5bf0bf2e14d2e03e583e7935050e9bc4e2014c5f Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 13 Jul 2023 17:35:11 +0200 Subject: [PATCH 15/35] deleted old file --- lymph/midline_old.py | 595 ------------------------------------------- 1 file changed, 595 deletions(-) delete mode 100644 lymph/midline_old.py diff --git a/lymph/midline_old.py b/lymph/midline_old.py deleted file mode 100644 index 138dad8..0000000 --- a/lymph/midline_old.py +++ /dev/null @@ -1,595 +0,0 @@ -from typing import Dict, List, Optional, Tuple, Union - -import numpy as np -import pandas as pd - -from .bilateral import Bilateral -from .timemarg import MarginalizorDict - - -class MidlineBilateral: - """Model a bilateral lymphatic system where an additional risk factor can - be provided in the data: Whether or not the primary tumor extended over the - mid-sagittal line. - - It is reasonable to assume (and supported by data) that such an extension - significantly increases the risk for metastatic spread to the contralateral - side of the neck. This class attempts to capture this using a simple - assumption: We assume that the probability of spread to the contralateral - side for patients *with* midline extension is larger than for patients - *without* it, but smaller than the probability of spread to the ipsilateral - side. Formally: - - .. math:: - b_c^{\\in} = \\alpha \\cdot b_i + (1 - \\alpha) \\cdot b_c^{\\not\\in} - - where :math:`b_c^{\\in}` is the probability of spread from the primary tumor - to the contralateral side for patients with midline extension, and - :math:`b_c^{\\not\\in}` for patients without. :math:`\\alpha` is the linear - mixing parameter. - """ - - def __init__( - self, - graph: Dict[Tuple[str], List[str]], - use_mixing: bool = True, - trans_symmetric: bool = True, - **_kwargs - ): - """The class is constructed in a similar fashion to the - :class:`Bilateral`: That class contains one :class:`Unilateral` for - each side of the neck, while this class will contain two instances of - :class:`Bilateral`, one for the case of a midline extension and one for - the case of no midline extension. - - Args: - graph: Dictionary of the same kind as for initialization of - :class:`System`. This graph will be passed to the constructors of - two :class:`System` attributes of this class. - use_mixing: Describe the contralateral base spread probabilities for the - case of a midline extension as a linear combination between the base - spread probs of the ipsilateral side and the ones of the contralateral - side when no midline extension is present. - trans_symmetric: If ``True``, the spread probabilities among the - LNLs will be set symmetrically. - - See Also: - :class:`Bilateral`: Two of these are held as attributes by this - class. One for the case of a mid-sagittal extension of the primary - tumor and one for the case of no such extension. - """ - self.ext = Bilateral( - graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric - ) - self.noext = Bilateral( - graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric - ) - self.ext_unknown = Bilateral( - graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric - ) - self.noext_unknown = Bilateral( - graph=graph, base_symmetric=False, trans_symmetric=trans_symmetric - ) - self.use_mixing = use_mixing - if self.use_mixing: - self.alpha_mix = 0. - - self.noext.diag_time_dists = self.ext.diag_time_dists - self.ext_unknown.diag_time_dists = self.ext.diag_time_dists - self.noext_unknown.diag_time_dists = self.ext.diag_time_dists - - @property - def graph(self) -> Dict[Tuple[str], List[str]]: - """Return the (unilateral) graph that was used to create this network. - """ - return self.noext.graph - - - @property - def base_probs(self) -> np.ndarray: - """Base probabilities of metastatic lymphatic spread from the tumor(s) - to the lymph node levels. This will return the following concatenation of - base spread probs depending on whether ``use_mixing`` is set to ``True`` or - ``False``: - - With the use of a mixing parameter: - +-----------+-------------------------+--------------+ - | base ipsi | base contra (no midext) | mixing param | - +-----------+-------------------------+--------------+ - - Without it: - +-----------+----------------------+-------------------------+ - | base ipsi | base contra (midext) | base contra (no midext) | - +-----------+----------------------+-------------------------+ - - When setting these, one needs to provide the respective shape. - """ - if self.use_mixing: - return np.concatenate([ - self.ext.ipsi.base_probs, - self.noext.contra.base_probs, - [self.alpha_mix], - ]) - else: - return np.concatenate([ - self.ext.ipsi.base_probs, - self.ext.contra.base_probs, - self.noext.contra.base_probs, - ]) - - @base_probs.setter - def base_probs(self, new_params: np.ndarray): - """Set the base probabilities from the tumor(s) to the LNLs, accounting - for the mixing parameter :math:`\\alpha``. - """ - k = len(self.ext.ipsi.base_probs) - - self.ext.ipsi.base_probs = new_params[:k] - self.noext.ipsi.base_probs = new_params[:k] - self.ext_unknown.ipsi.base_probs = new_params[:k] - self.noext_unknown.ipsi.base_probs = new_params[:k] - - if self.use_mixing: - self.noext.contra.base_probs = new_params[k:2*k] - self.noext_unknown.contra.base_probs = new_params[k:2*k] - self.alpha_mix = new_params[-1] - # compute linear combination - self.ext.contra.base_probs = ( - self.alpha_mix * self.ext.ipsi.base_probs - + (1. - self.alpha_mix) * self.noext.contra.base_probs - ) - self.ext_unknown.contra.base_probs = ( - self.alpha_mix * self.ext_unknown.ipsi.base_probs - + (1. - self.alpha_mix) * self.noext_unknown.contra.base_probs - ) - else: - self.ext.contra.base_probs = new_params[k:2*k] - self.noext.contra.base_probs = new_params[2*k:3*k] - self.ext_unknown.contra.base_probs = new_params[k:2*k] - self.noext_unknown.contra.base_probs = new_params[2*k:3*k] - - # avoid unnecessary double computation of ipsilateral transition matrix - self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - - - @property - def trans_probs(self) -> np.ndarray: - """Probabilities of lymphatic spread among the lymph node levels. They - are assumed to be symmetric ipsi- & contralaterally by default. - """ - return self.noext.trans_probs - - @trans_probs.setter - def trans_probs(self, new_params: np.ndarray): - """Set the new spread probabilities for lymphatic spread from among the - LNLs. - """ - self.noext.trans_probs = new_params - self.ext.trans_probs = new_params - self.noext_unknown.trans_probs = new_params - self.ext_unknown.trans_probs = new_params - - # avoid unnecessary double computation of ipsilateral transition matrix - self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - self.noext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - self.ext_unknown.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - - - @property - def spread_probs(self) -> np.ndarray: - """These are the probabilities representing the spread of cancer along - lymphatic drainage pathways per timestep. - - It is composed of the base spread probs (possible with the mixing parameter) - and the probabilities of spread among the LNLs. - - +-------------+-------------+ - | base probs | trans probs | - +-------------+-------------+ - """ - return np.concatenate([self.base_probs, self.trans_probs]) - - @spread_probs.setter - def spread_probs(self, new_params: np.ndarray): - """Set the new spread probabilities and the mixing parameter - :math:`\\alpha`. - """ - num_base_probs = len(self.base_probs) - - self.base_probs = new_params[:num_base_probs] - self.trans_probs = new_params[num_base_probs:] - - - @property - def diag_time_dists(self) -> MarginalizorDict: - """This property holds the probability mass functions for marginalizing over - possible diagnose times for each T-stage. - - When setting this property, one may also provide a normal Python dict, in - which case it tries to convert it to a :class:`MarginalizorDict`. - - Note that the method will provide the same instance of this - :class:`MarginalizorDict` to both sides of the network. - - See Also: - :class:`MarginalzorDict`, :class:`Marginalizor`. - """ - return self.ext.diag_time_dists - - @diag_time_dists.setter - def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]): - """Assign new :class:`MarginalizorDict` to this property. If it is a normal - Python dictionary, tr to convert it into a :class:`MarginalizorDict`. - """ - self.ext.diag_time_dists = new_dists - self.noext.diag_time_dists = self.ext.diag_time_dists - self.noext_unknown.diag_time_dists = self.ext.diag_time_dists - self.ext_unknown.diag_time_dists = self.ext.diag_time_dists - - - @property - def modalities(self): - """A dictionary containing the specificity :math:`s_P` and sensitivity - :math:`s_N` values for each diagnostic modality. - - Such a dictionary can also be provided to set this property and compute - the observation matrices of all used systems. - - See Also: - :meth:`Bilateral.modalities`: Getting and setting this property in - the normal bilateral model. - - :meth:`Unilateral.modalities`: Getting and setting :math:`s_P` and - :math:`s_N` for a unilateral model. - """ - return self.noext.modalities - - @modalities.setter - def modalities(self, modality_spsn: Dict[str, List[float]]): - """Call the respective getter and setter methods of the bilateral - components with and without midline extension. - """ - self.noext.modalities = modality_spsn - self.ext.modalities = modality_spsn - self.noext_unknown.modalities = modality_spsn - self.ext_unknown.modalities = modality_spsn - - @property - def midext_prob(self): - """Assign the last of the new_params to the midline extension probability - """ - try: - return self._midext_prob - except AttributeError as attr_err: - raise AttributeError( - "No midline extension probability has been assigned" - ) from attr_err - - @midext_prob.setter - def midext_prob(self, new_params): - """A variable containing the midline extension probability - """ - self._midext_prob = new_params[-1] - - @property - def patient_data(self): - """A pandas :class:`DataFrame` with rows of patients and columns of - patient and involvement details. The table's header should have three - levels that categorize the individual lymph node level's involvement to - the corresponding diagnostic modality (first level), the side of the - LNL (second level) and finaly the name of the LNL (third level). - Additionally, the patient's T-category must be stored under ('info', - 'tumor', 't_stage') and whether the tumor extends over the mid-sagittal - line should be noted under ('info', 'tumor', 'midline_extension'). So, - part of this table could look like this: - - +-----------------------------+------------------+------------------+ - | info | MRI | PET | - +-----------------------------+--------+---------+--------+---------+ - | tumor | ipsi | contra | ipsi | contra | - +---------+-------------------+--------+---------+--------+---------+ - | t_stage | midline_extension | II | II | II | II | - +=========+===================+========+=========+========+=========+ - | early | ``True`` |``True``|``None`` |``True``|``False``| - +---------+-------------------+--------+---------+--------+---------+ - | late | ``True`` |``None``|``None`` |``None``|``None`` | - +---------+-------------------+--------+---------+--------+---------+ - | early | ``False`` |``True``|``False``|``True``|``True`` | - +---------+-------------------+--------+---------+--------+---------+ - """ - try: - return self._patient_data - except AttributeError: - raise AttributeError( - "No patient data has been loaded yet" - ) - - @patient_data.setter - def patient_data(self, patient_data: pd.DataFrame): - """Load the patient data. For now, this just calls the :meth:`load_data` - method, but at a later point, I would like to write a function here - that generates the pandas :class:`DataFrame` from the internal matrix - representation of the data. - """ - self._patient_data = patient_data.copy() - self.load_data(patient_data) - - - def load_data( - self, - data: pd.DataFrame, - modality_spsn: Optional[Dict[str, List[float]]] = None, - mode = "HMM" - ): - """Load data as table of patients with involvement details and convert - it into internal representation of a matrix. - - Args: - data: The table with rows of patients and columns of patient and - involvement details. The table's header must have three levels - that categorize the individual lymph node level's involvement - to the corresponding diagnostic modality (first level), the - side of the LNL (second level) and finaly the name of the LNL - (third level). Additionally, the patient's T-category must be - stored under ('info', 'tumor', 't_stage') and whether the tumor - extends over the mid-sagittal line should be noted under - ('info', 'tumor', 'midline_extension'). So, part of this table - could look like this: - - +-----------------------------+---------------------+ - | info | MRI | - +-----------------------------+----------+----------+ - | tumor | ipsi | contra | - +---------+-------------------+----------+----------+ - | t_stage | midline_extension | II | II | - +=========+===================+==========+==========+ - | early | ``True`` | ``True`` | ``None`` | - +---------+-------------------+----------+----------+ - | late | ``True`` | ``None`` | ``None`` | - +---------+-------------------+----------+----------+ - | early | ``False`` | ``True`` | ``True`` | - +---------+-------------------+----------+----------+ - - modality_spsn: If no diagnostic modalities have been defined yet, - this must be provided to build the observation matrix. - - See Also: - :attr:`patient_data`: The attribute for loading and exporting data. - - :meth:`Bilateral.load_data`: Loads data into a bilateral network by - splitting it into ipsi- & contralateral side and passing each to - the respective unilateral method (see below). - - :meth:`Unilateral.load_data`: Data loading method of the unilateral - network. - """ - ext_data = data.loc[(data[("info", "tumor", "midline_extension")]==True)] - noext_data = data.loc[(data[("info", "tumor", "midline_extension")]==False)] - unknown_data = data.loc[(data['info', 'tumor', 'midline_extension']!=False) & (data['info', 'tumor', 'midline_extension']!=True)] - - if len(ext_data) > 0: - self.ext.load_data( - ext_data, - modality_spsn=modality_spsn, - mode=mode - ) - - if len(noext_data) > 0: - self.noext.load_data( - noext_data, - modality_spsn=modality_spsn, - mode=mode - ) - if len(unknown_data) > 0 & len(ext_data) + len(noext_data) > 0: - self.ext_unknown.load_data( - unknown_data, - modality_spsn=modality_spsn, - mode=mode - ) - - self.noext_unknown.load_data( - unknown_data, - modality_spsn=modality_spsn, - mode=mode - ) - - if len(unknown_data) > 0 & len(ext_data) + len(noext_data) == 0: - self.ext.load_data( - unknown_data, - modality_spsn=modality_spsn, - mode=mode - ) - - self.noext.load_data( - unknown_data, - modality_spsn=modality_spsn, - mode=mode - ) - - def check_and_assign(self, new_params: np.ndarray): - """Check that the spread probability (rates) and the parameters for the - marginalization over diagnose times are all within limits and assign them to - the model. Also, make sure the ipsi- and contralateral distributions over - diagnose times are the same instance of the :class:`MarginalizorDict`. - - Args: - new_params: The set of :attr:`spread_probs` and parameters to provide for - updating the parametrized distributions over diagnose times. - - Warning: - This method assumes that the parametrized distributions (instances of - :class:`Marginalizor`) all raise a ``ValueError`` when provided with - invalid parameters. - """ - k = len(self.spread_probs) - l = len(self.diag_time_dists) - new_spread_probs = new_params[:k] - new_marg_params = new_params[k:(k+l)] - - try: - self.ext.ipsi.diag_time_dists.update(new_marg_params) - self.ext_unknown.ipsi.diag_time_dists.update(new_marg_params) - self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists - self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists - self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists - self.ext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists - self.noext_unknown.ipsi.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists - self.noext_unknown.contra.diag_time_dists = self.ext_unknown.ipsi.diag_time_dists - self.midext_prob = new_params - except ValueError as val_err: - raise ValueError( - "Parameters for marginalization over diagnose times are invalid" - ) from val_err - - if new_spread_probs.shape != self.spread_probs.shape: - raise ValueError( - "Shape of provided spread parameters does not match network" - ) - if np.any(0. > new_spread_probs) or np.any(new_spread_probs > 1.): - raise ValueError( - "Spread probs must be between 0 and 1" - ) - if 0. > self.midext_prob or self.midext_prob > 1.: - raise ValueError( - "Midline extension probability must be between 0 and 1" - ) - - self.spread_probs = new_spread_probs - - - def likelihood( - self, - data: Optional[pd.DataFrame] = None, - given_params: Optional[np.ndarray] = None, - log: bool = True, - ) -> float: - """Compute log-likelihood of (already stored) data, given the spread - probabilities and either a discrete diagnose time or a distribution to - use for marginalization over diagnose times. - - Args: - data: Table with rows of patients and columns of per-LNL involvment. See - :meth:`load_data` for more details on how this should look like. - - given_params: The likelihood is a function of these parameters. They mainly - consist of the :attr:`spread_probs` of the model. Any excess parameters - will be used to update the parametrized distributions used for - marginalizing over the diagnose times (see :attr:`diag_time_dists`). - - log: When ``True``, the log-likelihood is returned. - - Returns: - The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D` - is the data and :math:`\\theta` is the tuple of spread probabilities - and diagnose times or distributions over diagnose times. - - See Also: - :attr:`spread_probs`: Property for getting and setting the spread - probabilities, of which a lymphatic network has as many as it has - :class:`Edge` instances (in case no symmetries apply). - - :meth:`Unilateral.likelihood`: The log-likelihood function of - the unilateral system. - - :meth:`Bilateral.likelihood`: The (log-)likelihood function of the - bilateral system. - """ - if data is not None: - self.patient_data = data - - try: - self.check_and_assign(given_params) - except ValueError: - return -np.inf if log else 0. - - llh = 0. if log else 1. - - if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']!=False) & (self.patient_data['info', 'tumor', 'midline_extension']!=True)])>0: - state_probs = {} - state_probs["ipsi"] = self.ext_unknown.state_probs_ipsi() - state_probs["contra"] = self.ext_unknown.state_probs_contra() - state_probs2 = {} - state_probs2["ipsi"] = self.noext_unknown.state_probs_ipsi() - state_probs2["contra"] = self.noext_unknown.state_probs_contra() - t_stages = list(set(self.patient_data[("info","tumor","t_stage")])) - t_stages = self.ext_unknown.t_stages() - - - for stage in t_stages: - p = self.ext_unknown._likelihood2(stage=stage, state_probs=state_probs)*self.midext_prob - p2 = self.noext_unknown._likelihood2(stage=stage, state_probs=state_probs2)*(1-self.midext_prob) - llh += np.sum(np.log(p+p2)) - - if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False) | (self.patient_data['info', 'tumor', 'midline_extension']==True)])>0: - if log: - llh += self.ext._likelihood(log=log) + np.log(self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)]) - llh += self.noext._likelihood(log=log) + np.log(1-self.midext_prob)*len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)]) - else: - llh *= self.ext._likelihood(log=log)*self.midext_prob**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==True)]) - llh *= self.noext._likelihood(log=log)*(1-self.midext_prob)**len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False)]) - - return llh - - - def risk( - self, - given_params: Optional[np.ndarray] = None, - midline_extension: bool = True, - **kwargs, - ) -> float: - """Compute the risk of nodal involvement given a specific diagnose. - - Args: - spread_probs: Set ot new spread parameters. This also contains the - mixing parameter alpha in the last position. - midline_extension: Whether or not the patient's tumor extends over - the mid-sagittal line. - - See Also: - :meth:`Bilateral.risk`: Depending on whether or not the patient's - tumor does extend over the midline, the risk function of the - respective :class:`Bilateral` instance gets called. - """ - if given_params is not None: - self.check_and_assign(given_params) - - if midline_extension: - return self.ext.risk(**kwargs) - else: - return self.noext.risk(**kwargs) - - - def generate_dataset( - self, - num_patients: int, - stage_dist: Dict[str, float], - ext_prob: float, - **_kwargs, - ) -> pd.DataFrame: - """Generate/sample a pandas :class:`DataFrame` from the defined network. - - Args: - num_patients: Number of patients to generate. - stage_dist: Probability to find a patient in a certain T-stage. - ext_prob: Probability that a patient's primary tumor extends over - the mid-sagittal line. - """ - drawn_ext = np.random.choice( - [True, False], p=[ext_prob, 1. - ext_prob], size=num_patients - ) - ext_dataset = self.ext.generate_dataset( - num_patients=num_patients, - stage_dist=stage_dist, - ) - noext_dataset = self.noext.generate_dataset( - num_patients=num_patients, - stage_dist=stage_dist, - ) - - dataset = noext_dataset.copy() - dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext] - dataset[('info', 'tumor', 'midline_extension')] = drawn_ext - - return dataset \ No newline at end of file From 05b8fa2efe026f27656d09975eca0b5ffe0ec6b3 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 13 Jul 2023 17:36:36 +0200 Subject: [PATCH 16/35] started implementation of midline diagnose matrix --- lymph/midline.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/lymph/midline.py b/lymph/midline.py index 598a3a7..dca5995 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -399,6 +399,45 @@ def midext_prob(self, new_params): """ self._midext_prob = new_params[-1] + def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str): + """Generate the matrix containing the probabilities to see the provided + diagnose, given any possible hidden state. The resulting matrix has + size :math:`2^N \\times M` where :math:`N` is the number of nodes in + the graph and :math:`M` the number of patients. + + Args: + table: pandas ``DataFrame`` containing rows of patients. Must have + ``MultiIndex`` columns with two levels: First, the modalities + and second, the LNLs. + t_stage: The T-stage all the patients in ``table`` belong to. + """ + if not hasattr(self, "_diagnose_matrices_midext"): + self._diagnose_matrices_midext = {} + + self.patient_data['info', 'tumor', 'midline_extension'] + self.patient_data['info', 'tumor', 't_stage'].unique() + + shape = (len(self.midexstate_list), len(table)) + self._diagnose_matrices_midext[t_stage] = np.ones(shape=shape) + + for i,state in enumerate(self.midexstate_list): + self.state = state + + for j, (_, patient) in enumerate(table.iterrows()): + patient_obs_prob = self.comp_diagnose_prob(patient) + self._diagnose_matrices_midext[t_stage][i,j] = patient_obs_prob + + + @property + def diagnose_matrices_midext(self): + try: + return self._diagnose_matrices_midext + except AttributeError as att_err: + raise AttributeError( + "No data has been loaded and hence no observation matrix has " + "been computed." + ) from att_err + @property def patient_data(self): """A pandas :class:`DataFrame` with rows of patients and columns of From 34850f5f4273bbfa2156b062ff6405c215f3967d Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Fri, 14 Jul 2023 00:22:29 +0200 Subject: [PATCH 17/35] deleted .DS_Store --- lymph/.DS_Store | Bin 6148 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 lymph/.DS_Store diff --git a/lymph/.DS_Store b/lymph/.DS_Store deleted file mode 100644 index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0 Date: Fri, 14 Jul 2023 15:19:07 +0200 Subject: [PATCH 18/35] initialized ipsi and contra states separately --- lymph/midline.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index dca5995..abc9920 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -190,7 +190,6 @@ def trans_probs(self, new_params: np.ndarray): """ self.noext.trans_probs = new_params self.ext.trans_probs = new_params - # avoid unnecessary double computation of ipsilateral transition matrix self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix @@ -658,10 +657,14 @@ def _likelihood_mid( t_stages = list(stored_t_stages.intersection(provided_t_stages)) max_t = self.diag_time_dists.max_t - state_probs_ex = np.zeros(shape=len(self.state_list), dtype=float) - state_probs_ex[0] = 1. - state_probs_nox = np.zeros(shape=len(self.state_list), dtype=float) - state_probs_nox[0] = 1. + state_probs_ipsi_ex = np.zeros(shape=len(self.state_list), dtype=float) + state_probs_ipsi_ex[0] = 1. + state_probs_ipsi_nox = np.zeros(shape=len(self.state_list), dtype=float) + state_probs_ipsi_nox[0] = 1. + state_probs_contra_ex = np.zeros(shape=len(self.state_list), dtype=float) + state_probs_contra_ex[0] = 1. + state_probs_contra_nox = np.zeros(shape=len(self.state_list), dtype=float) + state_probs_contra_nox[0] = 1. state_probs_midext = np.zeros(shape=len(self.midexstate_list), dtype=float) state_probs_midext[0] = 1. @@ -673,21 +676,21 @@ def _likelihood_mid( for i in range(max_t): for stage in t_stages: - state_probs_nox["ipsi"] = self.noext.ipsi._evolve_onestep(start_state = state_probs_nox["ipsi"]) - state_probs_nox["contra"] = self.noext.contra._evolve_onestep(start_state = state_probs_nox["contra"]) - state_probs_ex["ipsi"] = state_probs_midext[1] * self.ext.ipsi._evolve_onestep(start_state = state_probs_ex["ipsi"]) + state_probs_midext[0] * state_probs_nox["ipsi"] - state_probs_ex["contra"] = state_probs_midext[1] * self.ext.contra._evolve_onestep(start_state = state_probs_ex["contra"]) + state_probs_midext[0] * state_probs_nox["contra"] + state_probs_ipsi_nox = self.noext.ipsi._evolve_onestep(start_state = state_probs_ipsi_nox) + state_probs_contra_nox = self.noext.contra._evolve_onestep(start_state = state_probs_contra_nox) + state_probs_ipsi_ex = state_probs_midext[1] * self.ext.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex) + state_probs_midext[0] * state_probs_ipsi_nox + state_probs_contra_ex = state_probs_midext[1] * self.ext.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[0] * state_probs_contra_nox state_probs_midext = self._evolve_midext(start_midextstate = state_probs_midext) joint_state_probs_nox = ( - state_probs_nox["ipsi"].T + state_probs_ipsi_nox.T @ np.diag(self.ipsi.diag_time_dists[stage].pmf) - @ state_probs_nox["contra"] + @ state_probs_contra_nox ) joint_state_probs_ex = ( - state_probs_ex["ipsi"].T + state_probs_ipsi_ex.T @ np.diag(self.ipsi.diag_time_dists[stage].pmf) - @ state_probs_ex["contra"] + @ state_probs_contra_ex ) p_ex = np.sum( self.ext.ipsi.diagnose_matrices[stage] From 29105db757030dc06fa4e44ed2fde93c4f0886e7 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 20 Jul 2023 14:52:11 +0200 Subject: [PATCH 19/35] revise likelihood and related functioncs --- lymph/midline.py | 189 ++++++++++++++++++++++------------------------- 1 file changed, 90 insertions(+), 99 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index abc9920..cffa750 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -380,23 +380,6 @@ def modalities(self, modality_spsn: Dict[str, List[float]]): """ self.noext.modalities = modality_spsn self.ext.modalities = modality_spsn - - @property - def midext_prob(self): - """Assign the last of the new_params to the midline extension probability - """ - try: - return self._midext_prob - except AttributeError as attr_err: - raise AttributeError( - "No midline extension probability has been assigned" - ) from attr_err - - @midext_prob.setter - def midext_prob(self, new_params): - """A variable containing the midline extension probability - """ - self._midext_prob = new_params[-1] def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str): """Generate the matrix containing the probabilities to see the provided @@ -412,21 +395,18 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str): """ if not hasattr(self, "_diagnose_matrices_midext"): self._diagnose_matrices_midext = {} - - self.patient_data['info', 'tumor', 'midline_extension'] - self.patient_data['info', 'tumor', 't_stage'].unique() - - shape = (len(self.midexstate_list), len(table)) - self._diagnose_matrices_midext[t_stage] = np.ones(shape=shape) - - for i,state in enumerate(self.midexstate_list): - self.state = state - - for j, (_, patient) in enumerate(table.iterrows()): - patient_obs_prob = self.comp_diagnose_prob(patient) - self._diagnose_matrices_midext[t_stage][i,j] = patient_obs_prob - - + self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2)) + for patient in len(table): + if table.iloc[patient]['info', 'tumor', 'midline_extension'] == False: + self.diagnose_matrices_midext[t_stage][patient, 0] = 1 + self.diagnose_matrices_midext[t_stage][patient, 1] = 0 + if table.iloc[patient]['info', 'tumor', 'midline_extension'] == True: + self.diagnose_matrices_midext[t_stage][patient, 0] = 0 + self.diagnose_matrices_midext[t_stage][patient, 1] = 1 + if (table.iloc[patient]['info', 'tumor', 'midline_extension'] != False) & (table.iloc[patient]['info', 'tumor', 'midline_extension'] != True): + self.diagnose_matrices_midext[t_stage][patient, 0] = 1 + self.diagnose_matrices_midext[t_stage][patient, 1] = 1 + @property def diagnose_matrices_midext(self): try: @@ -526,15 +506,6 @@ def _evolve_midext( return new_midexstate - """ - def state_probs_midext(self): - max_t = self.diag_time_dists.max_t - self.ext.state_probs_midext = {} - self.noext.state_probs_midext = {} - self.noext.state_probs_midext = self.noext._evolve_midext(t_last=max_t) - self.ext.state_probs_midext = self.ext. - """ - def load_data( self, data: pd.DataFrame, @@ -657,61 +628,81 @@ def _likelihood_mid( t_stages = list(stored_t_stages.intersection(provided_t_stages)) max_t = self.diag_time_dists.max_t - state_probs_ipsi_ex = np.zeros(shape=len(self.state_list), dtype=float) - state_probs_ipsi_ex[0] = 1. - state_probs_ipsi_nox = np.zeros(shape=len(self.state_list), dtype=float) - state_probs_ipsi_nox[0] = 1. - state_probs_contra_ex = np.zeros(shape=len(self.state_list), dtype=float) - state_probs_contra_ex[0] = 1. - state_probs_contra_nox = np.zeros(shape=len(self.state_list), dtype=float) - state_probs_contra_nox[0] = 1. - state_probs_midext = np.zeros(shape=len(self.midexstate_list), dtype=float) - state_probs_midext[0] = 1. + state_probs_ipsi_ex = np.zeros( + shape=(max_t + 1, len(self.state_list)), + dtype=float + ) + state_probs_ipsi_ex[0,0] = 1. + state_probs_ipsi_nox = np.zeros( + shape=(max_t + 1, len(self.state_list)), + dtype=float + ) + state_probs_ipsi_nox[0,0] = 1. + state_probs_contra_ex = np.zeros( + shape=(max_t + 1, len(self.state_list)), + dtype=float + ) + state_probs_contra_ex[0,0] = 1. + state_probs_contra_nox = np.zeros( + shape=(max_t + 1, len(self.state_list)), + dtype=float + ) + state_probs_contra_nox[0,0] = 1. + state_probs_midext = np.zeros( + shape=(max_t + 1, 2), + dtype=float + ) + state_probs_midext[0,0] = 1. llh_ex = 0. if log else 1. - llh_nox = 0. if log else 1. - - #if 1 1 in diagnose matrix midline extension: - #state_probs_midext[0] * llh_nox + state_probs_midext[1] * llh_ex - - for i in range(max_t): - for stage in t_stages: - state_probs_ipsi_nox = self.noext.ipsi._evolve_onestep(start_state = state_probs_ipsi_nox) - state_probs_contra_nox = self.noext.contra._evolve_onestep(start_state = state_probs_contra_nox) - state_probs_ipsi_ex = state_probs_midext[1] * self.ext.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex) + state_probs_midext[0] * state_probs_ipsi_nox - state_probs_contra_ex = state_probs_midext[1] * self.ext.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[0] * state_probs_contra_nox - state_probs_midext = self._evolve_midext(start_midextstate = state_probs_midext) - - joint_state_probs_nox = ( - state_probs_ipsi_nox.T - @ np.diag(self.ipsi.diag_time_dists[stage].pmf) - @ state_probs_contra_nox + llh_nox = 0. if log else 1. + + for stage in t_stages: + state_probs = np.zeros( + shape=(len_time_range + 1, len(self.state_list)), + dtype=float + ) + state_probs_ipsi_nox = self.ipsi._evolve_onestep(start_state = state_probs_ipsi_nox) + state_probs_contra_nox = self.contra._evolve_onestep(start_state = state_probs_contra_nox) + state_probs_ipsi_ex = state_probs_midext[1] * self.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex) + state_probs_midext[0] * state_probs_ipsi_nox + state_probs_contra_ex = state_probs_midext[1] * self.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[0] * state_probs_contra_nox + state_probs_midext = self._evolve_midext(start_midextstate = state_probs_midext) + + for i in len(self.diagnose_matrices_midext[stage][:,0]): + if ((self.diagnose_matrices_midext[stage][i,0]==1) & (self.diagnose_matrices_midext[stage][i,1]==1)): + self.diagnose_matrices_midext[stage][i,0] = state_probs_midext[0] + self.diagnose_matrices_midext[stage][i,1] = state_probs_midext[1] + + joint_state_probs_nox = ( + state_probs_ipsi_nox.T + @ np.diag(self.ipsi.diag_time_dists[stage].pmf) + @ state_probs_contra_nox + ) + joint_state_probs_ex = ( + state_probs_ipsi_ex.T + @ np.diag(self.ipsi.diag_time_dists[stage].pmf) + @ state_probs_contra_ex + ) + p_ex = np.sum( + self.ipsi.diagnose_matrices[stage] + * (joint_state_probs_ex + @ self.contra.diagnose_matrices[stage]), + axis=0 ) - joint_state_probs_ex = ( - state_probs_ipsi_ex.T - @ np.diag(self.ipsi.diag_time_dists[stage].pmf) - @ state_probs_contra_ex + p_ex_weighted = p_ex.T * self.diagnose_matrices_midext[stage][:,1] + p_nox = np.sum( + self.ipsi.diagnose_matrices[stage] + * (joint_state_probs_nox + @ self.contra.diagnose_matrices[stage]), + axis=0 ) - p_ex = np.sum( - self.ext.ipsi.diagnose_matrices[stage] - * (joint_state_probs_ex - @ self.ext.contra.diagnose_matrices[stage]), - axis=0 - ) - p_nox = np.sum( - self.noext.ipsi.diagnose_matrices[stage] - * (joint_state_probs_nox - @ self.noext.contra.diagnose_matrices[stage]), - axis=0 - ) - if log: - llh_ex += np.sum(np.log(p_ex)) - llh_nox += np.sum(np.log(p_nox)) - else: - llh_ex *= np.prod(p_ex) - llh_nox *= np.prod(p_nox) - - return llh_ex + llh_nox + p_nox_weighted = p_nox.T * self.diagnose_matrices_midext[stage][:,0] + if log: + llh += np.sum(np.log(p_ex_weighted)) + np.sum(np.log(p_nox_weighted)) + else: + llh *= np.prod(p_ex_weighted) * np.prod(p_nox_weighted) + + return llh def likelihood( self, @@ -758,13 +749,13 @@ def likelihood( except ValueError: return -np.inf if log else 0. - if len(self.patient_data.loc[(self.patient_data['info', 'tumor', 'midline_extension']==False) | (self.patient_data['info', 'tumor', 'midline_extension']==True)])>0: - if log: - llh += self.ext._likelihood_mid(log=log) - llh += self.noext._likelihood_mid(log=log) - else: - llh *= self.ext._likelihood_mid(log=log) - llh *= self.noext._likelihood_mid(log=log) + + if log: + llh += self.ext._likelihood_mid(log=log) + llh += self.noext._likelihood_mid(log=log) + else: + llh *= self.ext._likelihood_mid(log=log) + llh *= self.noext._likelihood_mid(log=log) return llh From 860a8c7de55297967a613ebb9210a6223becc2cd Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Wed, 26 Jul 2023 09:42:08 +0200 Subject: [PATCH 20/35] revise likelihood and depending functions --- lymph/bilateral.py | 26 --------------- lymph/midline.py | 77 ++++++++++++++++++++++++++++----------------- lymph/unilateral.py | 1 - 3 files changed, 48 insertions(+), 56 deletions(-) diff --git a/lymph/bilateral.py b/lymph/bilateral.py index f3b1151..b0701b8 100644 --- a/lymph/bilateral.py +++ b/lymph/bilateral.py @@ -487,32 +487,6 @@ def _likelihood( return llh - def _likelihood2( - self, - stage, - state_probs, - ) -> float: - """Compute the (log-)likelihood of data, using the stored spread probs and - fixed distributions for marginalizing over diagnose times. - - This method mainly exists so that the checking and assigning of the - spread probs can be skipped. - """ - - joint_state_probs = ( - state_probs["ipsi"].T - @ np.diag(self.ipsi.diag_time_dists[stage].pmf) - @ state_probs["contra"] - ) - p = np.sum( - self.ipsi.diagnose_matrices[stage] - * (joint_state_probs - @ self.contra.diagnose_matrices[stage]), - axis=0 - ) - - return p - def likelihood( self, data: Optional[pd.DataFrame] = None, diff --git a/lymph/midline.py b/lymph/midline.py index cffa750..d180120 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -101,6 +101,18 @@ def midexstate_list(self): except AttributeError: self._gen_midexstate_list() return self._midexstate_list + + @property + def midexstate(self): + """Return the currently set state of the system. + """ + return self._midexstate + + @midexstate.setter + def midexstate(self, newstate: np.ndarray): + """Sets the state of the system to ``newstate``. + """ + self._midexstate = newstate @property def midext_prob(self): @@ -233,8 +245,8 @@ def _gen_midextransition_matrix(self): shape = (2**1, 2**1) self._midextransition_matrix = np.zeros(shape=shape) - for i,state in enumerate(self.midexstate_list): - self.state = state + for i,midexstate in enumerate(self.midexstate_list): + self.midexstate = midexstate for j in self.allowed_midextransitions[i]: midextransition_prob = self.comp_midextransition_prob(self.midexstate_list[j]) self._midextransition_matrix[i,j] = midextransition_prob @@ -342,17 +354,15 @@ def comp_midextransition_prob( Transition probability :math:`t`. """ res = 1. - for i, midexstate in enumerate(self.midexstates): - if not midexstate.state: - in_states = [1] - in_weights = [self.midext_prob] - res *= Node.trans_prob(in_states, in_weights)[newstate[i]] - elif not newstate[i]: - res = 0. - break + if not self.midexstate: + in_states = [1] + in_weights = [self.midext_prob] + res *= Node.trans_prob(in_states, in_weights)[newstate] + elif not newstate: + res = 0. if acquire: - self.state = newstate + self.midexstate = newstate return res @@ -477,13 +487,17 @@ def _evolve_onestep( """ # All healthy state at beginning if start_state is None: - start_state = np.zeros(shape=len(self.state_list), dtype=float) - start_state[0] = 1. + start_state = np.zeros( + shape=(self.diag_time_dists.max_t + 1, 2), + dtype=float + ) + start_state[0,0] = 1. # compute involvement at first time-step - new_state = start_state @ self.transition_matrix + for i in range(len(start_state)-1): + start_state[i+1,:] = start_state[i,:] @ self.transition_matrix - return new_state + return start_state def _evolve_midext( self, start_midexstate: Optional[np.ndarray] = None @@ -500,11 +514,18 @@ def _evolve_midext( :meta public: """ + if start_midexstate is None: + start_midexstate = np.zeros( + shape=(self.diag_time_dists.max_t + 1, 2), + dtype=float + ) + start_midexstate[0,0] = 1. # compute involvement at first time-step - new_midexstate = start_midexstate @ self.midextransition_matrix + for i in range(len(start_midexstate)-1): + start_midexstate[i+1,:] = start_midexstate[i,:] @ self.midextransition_matrix - return new_midexstate + return start_midexstate def load_data( self, @@ -654,24 +675,21 @@ def _likelihood_mid( ) state_probs_midext[0,0] = 1. - llh_ex = 0. if log else 1. - llh_nox = 0. if log else 1. + llh = 0. if log else 1. for stage in t_stages: - state_probs = np.zeros( - shape=(len_time_range + 1, len(self.state_list)), - dtype=float - ) state_probs_ipsi_nox = self.ipsi._evolve_onestep(start_state = state_probs_ipsi_nox) state_probs_contra_nox = self.contra._evolve_onestep(start_state = state_probs_contra_nox) - state_probs_ipsi_ex = state_probs_midext[1] * self.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex) + state_probs_midext[0] * state_probs_ipsi_nox - state_probs_contra_ex = state_probs_midext[1] * self.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[0] * state_probs_contra_nox - state_probs_midext = self._evolve_midext(start_midextstate = state_probs_midext) + state_probs_midext = self._evolve_midext(start_midexstate = state_probs_midext) + + for i in range(max_t): + state_probs_ipsi_ex[(i+1),:] = state_probs_midext[i,1] * self.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex)[1,:] + state_probs_midext[i,0] * state_probs_ipsi_nox[(i+1),:] + state_probs_contra_ex[(i+1),:] = state_probs_midext[i,1] * self.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[i,0] * state_probs_contra_nox[(i+1),:] - for i in len(self.diagnose_matrices_midext[stage][:,0]): + for i in range(len(self.diagnose_matrices_midext[stage][:,0])): if ((self.diagnose_matrices_midext[stage][i,0]==1) & (self.diagnose_matrices_midext[stage][i,1]==1)): - self.diagnose_matrices_midext[stage][i,0] = state_probs_midext[0] - self.diagnose_matrices_midext[stage][i,1] = state_probs_midext[1] + self.diagnose_matrices_midext[stage][i,0] = state_probs_midext[i,0] + self.diagnose_matrices_midext[stage][i,1] = state_probs_midext[i,1] joint_state_probs_nox = ( state_probs_ipsi_nox.T @@ -749,6 +767,7 @@ def likelihood( except ValueError: return -np.inf if log else 0. + llh = 0. if log else 1. if log: llh += self.ext._likelihood_mid(log=log) diff --git a/lymph/unilateral.py b/lymph/unilateral.py index ca9e695..c125db3 100644 --- a/lymph/unilateral.py +++ b/lymph/unilateral.py @@ -169,7 +169,6 @@ def state(self, newstate: np.ndarray): for i, node in enumerate(self.lnls): # only set lnl's states node.state = newstate[i] - @property def base_probs(self): """The spread probablities parametrizing the edges that represent the From d725fa3a514f05210d39a9280546483588eaf2bb Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Mon, 31 Jul 2023 17:18:05 +0200 Subject: [PATCH 21/35] fix errors in likelihood function and loading data --- lymph/bilateral.py | 1 + lymph/midline.py | 283 ++++++++++---------------------------------- lymph/unilateral.py | 29 +++++ 3 files changed, 95 insertions(+), 218 deletions(-) diff --git a/lymph/bilateral.py b/lymph/bilateral.py index b0701b8..45973b0 100644 --- a/lymph/bilateral.py +++ b/lymph/bilateral.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd +import pdb from .timemarg import MarginalizorDict from .unilateral import Unilateral diff --git a/lymph/midline.py b/lymph/midline.py index d180120..db4b1a4 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -1,7 +1,9 @@ +import warnings from typing import Dict, List, Optional, Tuple, Union import numpy as np import pandas as pd +import pdb from numpy.linalg import matrix_power as mat_pow from .bilateral import Bilateral @@ -205,36 +207,6 @@ def trans_probs(self, new_params: np.ndarray): # avoid unnecessary double computation of ipsilateral transition matrix self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - def _gen_allowed_midextransitions(self): - """Generate the allowed midline extension transitions. - """ - self._allowed_midextransitions = {} - for i in range(len(self.midexstate_list)): - self._allowed_midextransitions[i] = [] - for j in range(len(self.midexstate_list)): - if not np.any(np.greater(self.midexstate_list[i,:], - self.midexstate_list[j,:])): - self._allowed_midextransitions[i].append(j) - - @property - def allowed_midextransitions(self): - """Return a dictionary that contains for each row :math:`i` of the - transition matrix :math:`\\mathbf{A}` the column numbers :math:`j` for - which the transtion probability :math:`P\\left( x_j \\mid x_i \\right)` - is not zero due to the forbidden self-healing. - - For example: The hidden state ``[True, False]`` in a network with only - one tumor and two LNLs (one involved, one healthy) corresponds to the - index ``1`` and can only evolve into the state ``[True, True]``, which - has index 3. So, the key-value pair for that particular hidden state - would be ``1: [3]``. - """ - try: - return self._allowed_midextransitions - except AttributeError: - self._gen_allowed_midextransitions() - return self._allowed_midextransitions - def _gen_midextransition_matrix(self): """Generate the midline extension transition matrix :math:`\\mathbf{A}`, which contains the :math:`P \\left( S_{t+1} \\mid S_t \\right)`. :math:`\\mathbf{A}` @@ -245,11 +217,10 @@ def _gen_midextransition_matrix(self): shape = (2**1, 2**1) self._midextransition_matrix = np.zeros(shape=shape) - for i,midexstate in enumerate(self.midexstate_list): - self.midexstate = midexstate - for j in self.allowed_midextransitions[i]: - midextransition_prob = self.comp_midextransition_prob(self.midexstate_list[j]) - self._midextransition_matrix[i,j] = midextransition_prob + self._midextransition_matrix[0,0] = 1 - self.midext_prob + self._midextransition_matrix[0,1] = self.midext_prob + self._midextransition_matrix[1,0] = 0 + self._midextransition_matrix[1,1] = 1 @property def midextransition_matrix(self) -> np.ndarray: @@ -267,24 +238,6 @@ def midextransition_matrix(self) -> np.ndarray: self._gen_midextransition_matrix() return self._midextransition_matrix - @property - def midextrans_probs(self) -> np.ndarray: - """Probabilities of lymphatic spread among the lymph node levels. They - are assumed to be symmetric ipsi- & contralaterally by default. - """ - return self.noext.midextrans_probs - - @midextrans_probs.setter - def midextrans_probs(self, new_params: np.ndarray): - """Set the new spread probabilities for lymphatic spread from among the - LNLs. - """ - self.noext.midextrans_probs = new_params - self.ext.midextrans_probs = new_params - - # avoid unnecessary double computation of ipsilateral transition matrix - self.noext.ipsi._midextransition_matrix = self.ext.ipsi.midextransition_matrix - @property def spread_probs(self) -> np.ndarray: """These are the probabilities representing the spread of cancer along @@ -333,38 +286,6 @@ def diag_time_dists(self, new_dists: Union[dict, MarginalizorDict]): """ self.ext.diag_time_dists = new_dists self.noext.diag_time_dists = self.ext.diag_time_dists - - def comp_midextransition_prob( - self, - newstate: List[int], - acquire: bool = False - ) -> float: - """Computes the probability to transition to ``newstate``, given its - current state. - - Args: - newstate: List of new states for each LNL in the lymphatic - system. The transition probability :math:`t` will be computed - from the current states to these states. - acquire: if ``True``, after computing and returning the probability, - the system updates its own state to be ``newstate``. - (default: ``False``) - - Returns: - Transition probability :math:`t`. - """ - res = 1. - if not self.midexstate: - in_states = [1] - in_weights = [self.midext_prob] - res *= Node.trans_prob(in_states, in_weights)[newstate] - elif not newstate: - res = 0. - - if acquire: - self.midexstate = newstate - - return res @property def modalities(self): @@ -406,7 +327,7 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str): if not hasattr(self, "_diagnose_matrices_midext"): self._diagnose_matrices_midext = {} self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2)) - for patient in len(table): + for patient in range(len(table)): if table.iloc[patient]['info', 'tumor', 'midline_extension'] == False: self.diagnose_matrices_midext[t_stage][patient, 0] = 1 self.diagnose_matrices_midext[t_stage][patient, 1] = 0 @@ -470,35 +391,6 @@ def patient_data(self, patient_data: pd.DataFrame): self._patient_data = patient_data.copy() self.load_data(patient_data) - def _evolve_onestep( - self, start_state: Optional[np.ndarray] = None - ) -> np.ndarray: - """Evolve hidden Markov model based system over one time step. Compute - :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t` - is the time. - - Args: - start_state: The current state. - - Returns: - A matrix with the new state - - :meta public: - """ - # All healthy state at beginning - if start_state is None: - start_state = np.zeros( - shape=(self.diag_time_dists.max_t + 1, 2), - dtype=float - ) - start_state[0,0] = 1. - - # compute involvement at first time-step - for i in range(len(start_state)-1): - start_state[i+1,:] = start_state[i,:] @ self.transition_matrix - - return start_state - def _evolve_midext( self, start_midexstate: Optional[np.ndarray] = None ) -> np.ndarray: @@ -588,6 +480,21 @@ def load_data( mode=mode ) + if mode=="HMM": + t_stages = list(set(data[("info", "tumor", "t_stage")])) + + for stage in t_stages: + table = data.loc[ + data[("info", "tumor", "t_stage")] == stage + ] + self._gen_diagnose_matrices_midext(table, stage) + if stage not in self.diag_time_dists: + warnings.warn( + "No distribution for marginalizing over diagnose times has " + f"been defined for T-stage {stage}. During inference, all " + "patients in this T-stage will be ignored." + ) + def check_and_assign(self, new_params: np.ndarray): """Check that the spread probability (rates) and the parameters for the marginalization over diagnose times are all within limits and assign them to @@ -634,9 +541,11 @@ def check_and_assign(self, new_params: np.ndarray): self.spread_probs = new_spread_probs - def _likelihood_mid( + def likelihood( self, - log: bool = True + data: Optional[pd.DataFrame] = None, + given_params: Optional[np.ndarray] = None, + log: bool = True, ) -> float: """Compute the (log-)likelihood of data, using the stored spread probs and fixed distributions for marginalizing over diagnose times. @@ -644,141 +553,79 @@ def _likelihood_mid( This method mainly exists so that the checking and assigning of the spread probs can be skipped. """ - stored_t_stages = set(self.ipsi.diagnose_matrices.keys()) - provided_t_stages = set(self.ipsi.diag_time_dists.keys()) + if data is not None: + self.patient_data = data + + try: + self.check_and_assign(given_params) + except ValueError: + return -np.inf if log else 0. + + llh = 0. if log else 1. + + stored_t_stages = set(self.ext.ipsi.diagnose_matrices.keys()) + provided_t_stages = set(self.ext.ipsi.diag_time_dists.keys()) t_stages = list(stored_t_stages.intersection(provided_t_stages)) max_t = self.diag_time_dists.max_t + llh = 0. if log else 1. + + state_probs_ipsi_nox = self.noext.ipsi._evolve_onestep() + state_probs_contra_nox = self.noext.contra._evolve_onestep() + state_probs_midext = self._evolve_midext() state_probs_ipsi_ex = np.zeros( - shape=(max_t + 1, len(self.state_list)), - dtype=float + shape=(max_t + 1, len(self.ext.ipsi.state_list)), + dtype=float ) state_probs_ipsi_ex[0,0] = 1. - state_probs_ipsi_nox = np.zeros( - shape=(max_t + 1, len(self.state_list)), - dtype=float - ) - state_probs_ipsi_nox[0,0] = 1. state_probs_contra_ex = np.zeros( - shape=(max_t + 1, len(self.state_list)), - dtype=float + shape=(max_t + 1, len(self.ext.ipsi.state_list)), + dtype=float ) state_probs_contra_ex[0,0] = 1. - state_probs_contra_nox = np.zeros( - shape=(max_t + 1, len(self.state_list)), - dtype=float - ) - state_probs_contra_nox[0,0] = 1. - state_probs_midext = np.zeros( - shape=(max_t + 1, 2), - dtype=float - ) - state_probs_midext[0,0] = 1. - - llh = 0. if log else 1. for stage in t_stages: - state_probs_ipsi_nox = self.ipsi._evolve_onestep(start_state = state_probs_ipsi_nox) - state_probs_contra_nox = self.contra._evolve_onestep(start_state = state_probs_contra_nox) - state_probs_midext = self._evolve_midext(start_midexstate = state_probs_midext) - for i in range(max_t): - state_probs_ipsi_ex[(i+1),:] = state_probs_midext[i,1] * self.ipsi._evolve_onestep(start_state = state_probs_ipsi_ex)[1,:] + state_probs_midext[i,0] * state_probs_ipsi_nox[(i+1),:] - state_probs_contra_ex[(i+1),:] = state_probs_midext[i,1] * self.contra._evolve_onestep(start_state = state_probs_contra_ex) + state_probs_midext[i,0] * state_probs_contra_nox[(i+1),:] - - for i in range(len(self.diagnose_matrices_midext[stage][:,0])): - if ((self.diagnose_matrices_midext[stage][i,0]==1) & (self.diagnose_matrices_midext[stage][i,1]==1)): - self.diagnose_matrices_midext[stage][i,0] = state_probs_midext[i,0] - self.diagnose_matrices_midext[stage][i,1] = state_probs_midext[i,1] + state_probs_ipsi_ex[(i+1),:] = state_probs_midext[i,1] * state_probs_ipsi_ex[i,:] @ self.ext.ipsi.transition_matrix + state_probs_midext[i,0] * state_probs_ipsi_nox[(i+1),:] + state_probs_contra_ex[(i+1),:] = state_probs_midext[i,1] * state_probs_contra_ex[i,:] @ self.ext.contra.transition_matrix + state_probs_midext[i,0] * state_probs_contra_nox[(i+1),:] joint_state_probs_nox = ( state_probs_ipsi_nox.T - @ np.diag(self.ipsi.diag_time_dists[stage].pmf) + @ np.diag(self.noext.ipsi.diag_time_dists[stage].pmf) @ state_probs_contra_nox ) joint_state_probs_ex = ( state_probs_ipsi_ex.T - @ np.diag(self.ipsi.diag_time_dists[stage].pmf) + @ np.diag(self.ext.ipsi.diag_time_dists[stage].pmf) @ state_probs_contra_ex ) p_ex = np.sum( - self.ipsi.diagnose_matrices[stage] + self.ext.ipsi.diagnose_matrices[stage] * (joint_state_probs_ex - @ self.contra.diagnose_matrices[stage]), + @ self.ext.contra.diagnose_matrices[stage]), axis=0 ) - p_ex_weighted = p_ex.T * self.diagnose_matrices_midext[stage][:,1] p_nox = np.sum( - self.ipsi.diagnose_matrices[stage] + self.noext.ipsi.diagnose_matrices[stage] * (joint_state_probs_nox - @ self.contra.diagnose_matrices[stage]), + @ self.noext.contra.diagnose_matrices[stage]), axis=0 ) - p_nox_weighted = p_nox.T * self.diagnose_matrices_midext[stage][:,0] - if log: - llh += np.sum(np.log(p_ex_weighted)) + np.sum(np.log(p_nox_weighted)) - else: - llh *= np.prod(p_ex_weighted) * np.prod(p_nox_weighted) - return llh + p = np.array([p_nox,p_ex]) * np.array([[state_probs_midext[max_t,0]], [state_probs_midext[max_t,1]]]) - def likelihood( - self, - data: Optional[pd.DataFrame] = None, - given_params: Optional[np.ndarray] = None, - log: bool = True, - ) -> float: - """Compute log-likelihood of (already stored) data, given the spread - probabilities and either a discrete diagnose time or a distribution to - use for marginalization over diagnose times. - - Args: - data: Table with rows of patients and columns of per-LNL involvment. See - :meth:`load_data` for more details on how this should look like. - - given_params: The likelihood is a function of these parameters. They mainly - consist of the :attr:`spread_probs` of the model. Any excess parameters - will be used to update the parametrized distributions used for - marginalizing over the diagnose times (see :attr:`diag_time_dists`). - - log: When ``True``, the log-likelihood is returned. - - Returns: - The log-likelihood :math:`\\log{p(D \\mid \\theta)}` where :math:`D` - is the data and :math:`\\theta` is the tuple of spread probabilities - and diagnose times or distributions over diagnose times. - - See Also: - :attr:`spread_probs`: Property for getting and setting the spread - probabilities, of which a lymphatic network has as many as it has - :class:`Edge` instances (in case no symmetries apply). - - :meth:`Unilateral.likelihood`: The log-likelihood function of - the unilateral system. - - :meth:`Bilateral.likelihood`: The (log-)likelihood function of the - bilateral system. - """ - if data is not None: - self.patient_data = data - - try: - self.check_and_assign(given_params) - except ValueError: - return -np.inf if log else 0. - - llh = 0. if log else 1. - - if log: - llh += self.ext._likelihood_mid(log=log) - llh += self.noext._likelihood_mid(log=log) - else: - llh *= self.ext._likelihood_mid(log=log) - llh *= self.noext._likelihood_mid(log=log) + if log: + p = np.log(p) + #data not yet loaded correctly (or at all) + llh_nox_ex = p @ self.diagnose_matrices_midext[stage] + llh += np.sum(llh_nox_ex) + #probably wrong. Matrix multiplication sums over patients but it should multiply. + else: + llh_nox_ex = p @ self.diagnose_matrices_midext[stage] + llh *= np.prod(llh_nox_ex) return llh - def risk( self, given_params: Optional[np.ndarray] = None, diff --git a/lymph/unilateral.py b/lymph/unilateral.py index c125db3..9a7d583 100644 --- a/lymph/unilateral.py +++ b/lymph/unilateral.py @@ -787,6 +787,35 @@ def _evolve( state_probs[-1] = state return state_probs + + def _evolve_onestep( + self, start_state: Optional[np.ndarray] = None + ) -> np.ndarray: + """Evolve hidden Markov model based system over one time step. Compute + :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t` + is the time. + + Args: + start_state: The current state. + + Returns: + A matrix with the new state + + :meta public: + """ + # All healthy state at beginning + if start_state is None: + start_state = np.zeros( + shape=(self.diag_time_dists.max_t + 1, len(self.state_list)), + dtype=float + ) + start_state[0,0] = 1. + + # compute involvement at first time-step + for i in range(len(start_state)-1): + start_state[i+1,:] = start_state[i,:] @ self.transition_matrix + + return start_state def check_and_assign(self, new_params: np.ndarray): """Check that the spread probability (rates) and the parameters for the From aa5ceacf7165d095528c69aba68df0a4008ae6fe Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Mon, 31 Jul 2023 17:18:49 +0200 Subject: [PATCH 22/35] fix errors in likelihood function and loading data --- lymph/midline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lymph/midline.py b/lymph/midline.py index db4b1a4..f3944ca 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -616,7 +616,6 @@ def likelihood( if log: p = np.log(p) - #data not yet loaded correctly (or at all) llh_nox_ex = p @ self.diagnose_matrices_midext[stage] llh += np.sum(llh_nox_ex) #probably wrong. Matrix multiplication sums over patients but it should multiply. From 74034ea0a267317db971aa483dab12aaa27e82aa Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 3 Aug 2023 15:24:18 +0200 Subject: [PATCH 23/35] fix errors in midline extension transition matrixand delete unused code --- lymph/midline.py | 83 +++++++-------------------------------------- lymph/unilateral.py | 1 - 2 files changed, 12 insertions(+), 72 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index f3944ca..0200298 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -74,7 +74,6 @@ def __init__( self.alpha_mix = 0. self.noext.diag_time_dists = self.ext.diag_time_dists - self.midexstates = ["0", "1"] @property def graph(self) -> Dict[Tuple[str], List[str]]: @@ -82,40 +81,6 @@ def graph(self) -> Dict[Tuple[str], List[str]]: """ return self.noext.graph - def _gen_midexstate_list(self): - """Generates the list of (hidden) midline states. - """ - if not hasattr(self, "_midexstate_list"): - self._midexstate_list = np.zeros( - shape=(2**1, 1), dtype=int - ) - for i in range(2**1): - self._midexstate_list[i] = [ - int(digit) for digit in change_base(i, 2, length=1) - ] - - @property - def midexstate_list(self): - """Return list of all possible hidden midline states. - """ - try: - return self._midexstate_list - except AttributeError: - self._gen_midexstate_list() - return self._midexstate_list - - @property - def midexstate(self): - """Return the currently set state of the system. - """ - return self._midexstate - - @midexstate.setter - def midexstate(self, newstate: np.ndarray): - """Sets the state of the system to ``newstate``. - """ - self._midexstate = newstate - @property def midext_prob(self): """Assign the last of the new_params to the midline extension probability @@ -177,7 +142,7 @@ def base_probs(self, new_params: np.ndarray): if self.use_mixing: self.noext.contra.base_probs = new_params[k:2*k] - self.alpha_mix = new_params[-1] + self.alpha_mix = new_params[-2] # compute linear combination self.ext.contra.base_probs = ( self.alpha_mix * self.ext.ipsi.base_probs @@ -207,37 +172,6 @@ def trans_probs(self, new_params: np.ndarray): # avoid unnecessary double computation of ipsilateral transition matrix self.noext.ipsi._transition_matrix = self.ext.ipsi.transition_matrix - def _gen_midextransition_matrix(self): - """Generate the midline extension transition matrix :math:`\\mathbf{A}`, which contains - the :math:`P \\left( S_{t+1} \\mid S_t \\right)`. :math:`\\mathbf{A}` - is a square matrix with size ``(# of states)``. The lower diagonal is - zero. - """ - if not hasattr(self, "_midextransition_matrix"): - shape = (2**1, 2**1) - self._midextransition_matrix = np.zeros(shape=shape) - - self._midextransition_matrix[0,0] = 1 - self.midext_prob - self._midextransition_matrix[0,1] = self.midext_prob - self._midextransition_matrix[1,0] = 0 - self._midextransition_matrix[1,1] = 1 - - @property - def midextransition_matrix(self) -> np.ndarray: - """Return the midline extension listtransition matrix :math:`\\mathbf{A}`, which contains the - probability to transition from any state :math:`S_t` to any other state - :math:`S_{t+1}` one timestep later: - :math:`P \\left( S_{t+1} \\mid S_t \\right)`. :math:`\\mathbf{A}` is a - square matrix with size ``(# of states)``. The lower diagonal is zero, - because those entries correspond to transitions that would require - self-healing. - """ - try: - return self._midextransition_matrix - except AttributeError: - self._gen_midextransition_matrix() - return self._midextransition_matrix - @property def spread_probs(self) -> np.ndarray: """These are the probabilities representing the spread of cancer along @@ -392,7 +326,7 @@ def patient_data(self, patient_data: pd.DataFrame): self.load_data(patient_data) def _evolve_midext( - self, start_midexstate: Optional[np.ndarray] = None + self, new_params: np.ndarray, start_midexstate: Optional[np.ndarray] = None ) -> np.ndarray: """Evolve hidden Markov model based system over one time step. Compute :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t` @@ -412,11 +346,18 @@ def _evolve_midext( dtype=float ) start_midexstate[0,0] = 1. + + self.midext_prob = new_params + + midextransition_matrix = np.zeros(shape=(2**1, 2**1)) + midextransition_matrix[0,0] = 1 - self.midext_prob + midextransition_matrix[0,1] = self.midext_prob + midextransition_matrix[1,0] = 0 + midextransition_matrix[1,1] = 1 # compute involvement at first time-step for i in range(len(start_midexstate)-1): - start_midexstate[i+1,:] = start_midexstate[i,:] @ self.midextransition_matrix - + start_midexstate[i+1,:] = start_midexstate[i,:] @ midextransition_matrix return start_midexstate def load_data( @@ -572,7 +513,7 @@ def likelihood( state_probs_ipsi_nox = self.noext.ipsi._evolve_onestep() state_probs_contra_nox = self.noext.contra._evolve_onestep() - state_probs_midext = self._evolve_midext() + state_probs_midext = self._evolve_midext(new_params = given_params) state_probs_ipsi_ex = np.zeros( shape=(max_t + 1, len(self.ext.ipsi.state_list)), dtype=float diff --git a/lymph/unilateral.py b/lymph/unilateral.py index 9a7d583..ce6ddde 100644 --- a/lymph/unilateral.py +++ b/lymph/unilateral.py @@ -814,7 +814,6 @@ def _evolve_onestep( # compute involvement at first time-step for i in range(len(start_state)-1): start_state[i+1,:] = start_state[i,:] @ self.transition_matrix - return start_state def check_and_assign(self, new_params: np.ndarray): From 1d73a8b89a116d8a3150f011a2226cffc1e97b2f Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Wed, 9 Aug 2023 00:12:36 +0200 Subject: [PATCH 24/35] optimize code for readability --- lymph/midline.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index 0200298..e0e44bd 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -261,17 +261,18 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str): if not hasattr(self, "_diagnose_matrices_midext"): self._diagnose_matrices_midext = {} self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2)) - for patient in range(len(table)): - if table.iloc[patient]['info', 'tumor', 'midline_extension'] == False: - self.diagnose_matrices_midext[t_stage][patient, 0] = 1 - self.diagnose_matrices_midext[t_stage][patient, 1] = 0 - if table.iloc[patient]['info', 'tumor', 'midline_extension'] == True: - self.diagnose_matrices_midext[t_stage][patient, 0] = 0 - self.diagnose_matrices_midext[t_stage][patient, 1] = 1 - if (table.iloc[patient]['info', 'tumor', 'midline_extension'] != False) & (table.iloc[patient]['info', 'tumor', 'midline_extension'] != True): - self.diagnose_matrices_midext[t_stage][patient, 0] = 1 - self.diagnose_matrices_midext[t_stage][patient, 1] = 1 - + midext_column = table["info", "tumor", "midline_extension"] + for idx, midline_extension in midext_column.items(): + if pd.isna(midline_extension): + self.diagnose_matrices_midext[t_stage][idx, 0] = 1 + self.diagnose_matrices_midext[t_stage][idx, 1] = 1 + elif midline_extension: + self.diagnose_matrices_midext[t_stage][idx, 0] = 0 + self.diagnose_matrices_midext[t_stage][idx, 1] = 1 + elif not midline_extension: + self.diagnose_matrices_midext[t_stage][idx, 0] = 1 + self.diagnose_matrices_midext[t_stage][idx, 1] = 0 + @property def diagnose_matrices_midext(self): try: @@ -349,13 +350,13 @@ def _evolve_midext( self.midext_prob = new_params - midextransition_matrix = np.zeros(shape=(2**1, 2**1)) - midextransition_matrix[0,0] = 1 - self.midext_prob - midextransition_matrix[0,1] = self.midext_prob - midextransition_matrix[1,0] = 0 + midextransition_matrix = np.array( + [[1 - self.midext_prob, self.midext_prob], + [0. , 1. ]] + ) midextransition_matrix[1,1] = 1 - # compute involvement at first time-step + # compute involvement for all time steps for i in range(len(start_midexstate)-1): start_midexstate[i+1,:] = start_midexstate[i,:] @ midextransition_matrix return start_midexstate From b95b84664568226c5006f1021c220c6c61dbe4fe Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Mon, 14 Aug 2023 09:01:22 +0200 Subject: [PATCH 25/35] rename evolve_onestep function to evolve_stepwise --- lymph/unilateral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lymph/unilateral.py b/lymph/unilateral.py index ce6ddde..d668964 100644 --- a/lymph/unilateral.py +++ b/lymph/unilateral.py @@ -788,7 +788,7 @@ def _evolve( return state_probs - def _evolve_onestep( + def _evolve_stepwise( self, start_state: Optional[np.ndarray] = None ) -> np.ndarray: """Evolve hidden Markov model based system over one time step. Compute From 09b101db74255485cdfe282a19180ef2ca151483 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Mon, 14 Aug 2023 09:01:45 +0200 Subject: [PATCH 26/35] fix problems with mixing parameter and likelihood --- lymph/midline.py | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index e0e44bd..ca99fab 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -261,18 +261,30 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str): if not hasattr(self, "_diagnose_matrices_midext"): self._diagnose_matrices_midext = {} self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2)) - midext_column = table["info", "tumor", "midline_extension"] - for idx, midline_extension in midext_column.items(): - if pd.isna(midline_extension): - self.diagnose_matrices_midext[t_stage][idx, 0] = 1 - self.diagnose_matrices_midext[t_stage][idx, 1] = 1 + midline_extension = table['info', 'tumor', 'midline_extension'] + for patient in range(len(table)): + if table.iloc[patient]['info', 'tumor', 'midline_extension'] == False: + self.diagnose_matrices_midext[t_stage][patient, 0] = 0 + self.diagnose_matrices_midext[t_stage][patient, 1] = 1 + if table.iloc[patient]['info', 'tumor', 'midline_extension'] == True: + self.diagnose_matrices_midext[t_stage][patient, 0] = 1 + self.diagnose_matrices_midext[t_stage][patient, 1] = 0 + if (table.iloc[patient]['info', 'tumor', 'midline_extension'] != False) & (table.iloc[patient]['info', 'tumor', 'midline_extension'] != True): + self.diagnose_matrices_midext[t_stage][patient, 0] = 1 + self.diagnose_matrices_midext[t_stage][patient, 1] = 1 + """ + for patient, row in table.iterrows(): + midline_extension = row['info', 'tumor', 'midline_extension'] + if not midline_extension: + self.diagnose_matrices_midext[t_stage][patient, 0] = 0 + self.diagnose_matrices_midext[t_stage][patient, 1] = 1 elif midline_extension: - self.diagnose_matrices_midext[t_stage][idx, 0] = 0 - self.diagnose_matrices_midext[t_stage][idx, 1] = 1 - elif not midline_extension: - self.diagnose_matrices_midext[t_stage][idx, 0] = 1 - self.diagnose_matrices_midext[t_stage][idx, 1] = 0 - + self.diagnose_matrices_midext[t_stage][patient, 0] = 1 + self.diagnose_matrices_midext[t_stage][patient, 1] = 0 + else: + self.diagnose_matrices_midext[t_stage][patient, 0] = 1 + self.diagnose_matrices_midext[t_stage][patient, 1] = 1 + """ @property def diagnose_matrices_midext(self): try: @@ -512,8 +524,8 @@ def likelihood( max_t = self.diag_time_dists.max_t llh = 0. if log else 1. - state_probs_ipsi_nox = self.noext.ipsi._evolve_onestep() - state_probs_contra_nox = self.noext.contra._evolve_onestep() + state_probs_ipsi_nox = self.noext.ipsi._evolve_stepwise() + state_probs_contra_nox = self.noext.contra._evolve_stepwise() state_probs_midext = self._evolve_midext(new_params = given_params) state_probs_ipsi_ex = np.zeros( shape=(max_t + 1, len(self.ext.ipsi.state_list)), @@ -557,12 +569,13 @@ def likelihood( p = np.array([p_nox,p_ex]) * np.array([[state_probs_midext[max_t,0]], [state_probs_midext[max_t,1]]]) if log: - p = np.log(p) - llh_nox_ex = p @ self.diagnose_matrices_midext[stage] + llh_nox_ex = p.T * self.diagnose_matrices_midext[stage] + llh_nox_ex = np.log(llh_nox_ex[llh_nox_ex !=0]) llh += np.sum(llh_nox_ex) - #probably wrong. Matrix multiplication sums over patients but it should multiply. + else: - llh_nox_ex = p @ self.diagnose_matrices_midext[stage] + llh_nox_ex = p.T * self.diagnose_matrices_midext[stage] + llh_nox_ex = llh_nox_ex[llh_nox_ex !=0] llh *= np.prod(llh_nox_ex) return llh From 27cdb1678d71ec7d287c32f6d10883edba905833 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 15 Aug 2023 16:40:09 +0200 Subject: [PATCH 27/35] change: try potential fix for midline evolution --- .pre-commit-config.yaml | 20 ++++- lymph/midline.py | 189 ++++++++++++++++------------------------ lymph/unilateral.py | 29 +----- 3 files changed, 91 insertions(+), 147 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a8d3f74..e939d45 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,21 +1,33 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 + rev: v4.4.0 hooks: - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-merge-conflict + - id: check-toml + - id: check-yaml - repo: https://github.com/hadialqattan/pycln - rev: v1.1.0 + rev: v2.1.5 hooks: - id: pycln args: [--config=pyproject.toml] - repo: https://github.com/PyCQA/isort - rev: 5.10.1 + rev: 5.12.0 hooks: - id: isort files: "\\.(py)$" args: [--settings-path=pyproject.toml] +- repo: https://github.com/asottile/pyupgrade + rev: v3.9.0 + hooks: + - id: pyupgrade +- repo: https://github.com/kynan/nbstripout + rev: 0.6.0 + hooks: + - id: nbstripout - repo: https://github.com/compilerla/conventional-pre-commit - rev: v2.0.0 + rev: v2.3.0 hooks: - id: conventional-pre-commit stages: [commit-msg] diff --git a/lymph/midline.py b/lymph/midline.py index ca99fab..2b6c92e 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -3,13 +3,9 @@ import numpy as np import pandas as pd -import pdb -from numpy.linalg import matrix_power as mat_pow from .bilateral import Bilateral -from .node import Node from .timemarg import MarginalizorDict -from .unilateral import change_base class MidlineBilateral: @@ -38,6 +34,7 @@ def __init__( self, graph: Dict[Tuple[str], List[str]], use_mixing: bool = True, + evolve_midext: bool = False, trans_symmetric: bool = True, **_kwargs ): @@ -73,30 +70,16 @@ def __init__( if self.use_mixing: self.alpha_mix = 0. + self.evolve_midext = evolve_midext + if self.evolve_midext: + self.midext = 0. + self.noext.diag_time_dists = self.ext.diag_time_dists @property def graph(self) -> Dict[Tuple[str], List[str]]: - """Return the (unilateral) graph that was used to create this network. - """ + """Return the (unilateral) graph that was used to create this network.""" return self.noext.graph - - @property - def midext_prob(self): - """Assign the last of the new_params to the midline extension probability - """ - try: - return self._midext_prob - except AttributeError as attr_err: - raise AttributeError( - "No midline extension probability has been assigned" - ) from attr_err - - @midext_prob.setter - def midext_prob(self, new_params): - """A variable containing the midline extension probability - """ - self._midext_prob = new_params[-1] @property def base_probs(self) -> np.ndarray: @@ -123,12 +106,11 @@ def base_probs(self) -> np.ndarray: self.noext.contra.base_probs, [self.alpha_mix], ]) - else: - return np.concatenate([ - self.ext.ipsi.base_probs, - self.ext.contra.base_probs, - self.noext.contra.base_probs, - ]) + return np.concatenate([ + self.ext.ipsi.base_probs, + self.ext.contra.base_probs, + self.noext.contra.base_probs, + ]) @base_probs.setter def base_probs(self, new_params: np.ndarray): @@ -180,10 +162,13 @@ def spread_probs(self) -> np.ndarray: It is composed of the base spread probs (possible with the mixing parameter) and the probabilities of spread among the LNLs. - +-------------+-------------+ - | base probs | trans probs | - +-------------+-------------+ + +-------------+-------------+-------------+ + | base probs | trans probs | midext prob | + +-------------+-------------+-------------+ """ + if self.evolve_midext: + return np.concatenate([self.base_probs, self.trans_probs, self.midext_prob]) + return np.concatenate([self.base_probs, self.trans_probs]) @spread_probs.setter @@ -193,8 +178,14 @@ def spread_probs(self, new_params: np.ndarray): """ num_base_probs = len(self.base_probs) + if self.evolve_midext: + self.midext_prob = new_params[-1] + end = -1 + else: + end = None + self.base_probs = new_params[:num_base_probs] - self.trans_probs = new_params[num_base_probs:] + self.trans_probs = new_params[num_base_probs:end] @property @@ -260,31 +251,19 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str): """ if not hasattr(self, "_diagnose_matrices_midext"): self._diagnose_matrices_midext = {} + self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2)) - midline_extension = table['info', 'tumor', 'midline_extension'] - for patient in range(len(table)): - if table.iloc[patient]['info', 'tumor', 'midline_extension'] == False: - self.diagnose_matrices_midext[t_stage][patient, 0] = 0 - self.diagnose_matrices_midext[t_stage][patient, 1] = 1 - if table.iloc[patient]['info', 'tumor', 'midline_extension'] == True: - self.diagnose_matrices_midext[t_stage][patient, 0] = 1 - self.diagnose_matrices_midext[t_stage][patient, 1] = 0 - if (table.iloc[patient]['info', 'tumor', 'midline_extension'] != False) & (table.iloc[patient]['info', 'tumor', 'midline_extension'] != True): - self.diagnose_matrices_midext[t_stage][patient, 0] = 1 - self.diagnose_matrices_midext[t_stage][patient, 1] = 1 - """ - for patient, row in table.iterrows(): - midline_extension = row['info', 'tumor', 'midline_extension'] - if not midline_extension: - self.diagnose_matrices_midext[t_stage][patient, 0] = 0 - self.diagnose_matrices_midext[t_stage][patient, 1] = 1 - elif midline_extension: - self.diagnose_matrices_midext[t_stage][patient, 0] = 1 - self.diagnose_matrices_midext[t_stage][patient, 1] = 0 - else: - self.diagnose_matrices_midext[t_stage][patient, 0] = 1 - self.diagnose_matrices_midext[t_stage][patient, 1] = 1 - """ + for idx, (_, patient) in enumerate(table.iterrows()): + midext = patient["info", "tumor", "midline_extension"] + if midext == False: + self.diagnose_matrices_midext[t_stage][idx] = np.array([0, 1]) + + if midext == True: + self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 0]) + + if pd.isna(midext): + self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 1]) + @property def diagnose_matrices_midext(self): try: @@ -338,40 +317,31 @@ def patient_data(self, patient_data: pd.DataFrame): self._patient_data = patient_data.copy() self.load_data(patient_data) - def _evolve_midext( - self, new_params: np.ndarray, start_midexstate: Optional[np.ndarray] = None - ) -> np.ndarray: + def _evolve_midext(self) -> np.ndarray: """Evolve hidden Markov model based system over one time step. Compute :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t` is the time. - Args: - start_midexstate: The current midline extension state. - Returns: The new midline extension state :meta public: """ - if start_midexstate is None: - start_midexstate = np.zeros( - shape=(self.diag_time_dists.max_t + 1, 2), - dtype=float - ) - start_midexstate[0,0] = 1. - - self.midext_prob = new_params - - midextransition_matrix = np.array( - [[1 - self.midext_prob, self.midext_prob], - [0. , 1. ]] + midext_states = np.zeros( + shape=(self.diag_time_dists.max_t + 1, 2), + dtype=float ) - midextransition_matrix[1,1] = 1 + midext_states[0,0] = 1. + + midextransition_matrix = np.array([ + [1 - self.midext_prob, self.midext_prob], + [0. , 1. ], + ]) # compute involvement for all time steps - for i in range(len(start_midexstate)-1): - start_midexstate[i+1,:] = start_midexstate[i,:] @ midextransition_matrix - return start_midexstate + for i in range(len(midext_states)-1): + midext_states[i+1,:] = midext_states[i,:] @ midextransition_matrix + return midext_states def load_data( self, @@ -427,7 +397,7 @@ def load_data( modality_spsn=modality_spsn, mode=mode ) - + self.noext.load_data( data, modality_spsn=modality_spsn, @@ -474,7 +444,6 @@ def check_and_assign(self, new_params: np.ndarray): self.ext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists self.noext.ipsi.diag_time_dists = self.ext.ipsi.diag_time_dists self.noext.contra.diag_time_dists = self.ext.ipsi.diag_time_dists - self.midext_prob = new_params except ValueError as val_err: raise ValueError( "Parameters for marginalization over diagnose times are invalid" @@ -488,10 +457,6 @@ def check_and_assign(self, new_params: np.ndarray): raise ValueError( "Spread probs must be between 0 and 1" ) - if 0. > self.midext_prob or self.midext_prob > 1.: - raise ValueError( - "Midline extension probability must be between 0 and 1" - ) self.spread_probs = new_spread_probs @@ -520,63 +485,57 @@ def likelihood( stored_t_stages = set(self.ext.ipsi.diagnose_matrices.keys()) provided_t_stages = set(self.ext.ipsi.diag_time_dists.keys()) t_stages = list(stored_t_stages.intersection(provided_t_stages)) - + max_t = self.diag_time_dists.max_t llh = 0. if log else 1. - state_probs_ipsi_nox = self.noext.ipsi._evolve_stepwise() - state_probs_contra_nox = self.noext.contra._evolve_stepwise() - state_probs_midext = self._evolve_midext(new_params = given_params) - state_probs_ipsi_ex = np.zeros( - shape=(max_t + 1, len(self.ext.ipsi.state_list)), - dtype=float - ) - state_probs_ipsi_ex[0,0] = 1. + state_probs_ipsi = self.noext.ipsi._evolve(t_last=max_t) + state_probs_contra_nox = self.noext.contra._evolve(t_last=max_t) + state_probs_midext = self._evolve_midext() + state_probs_contra_ex = np.zeros( - shape=(max_t + 1, len(self.ext.ipsi.state_list)), - dtype=float + shape=(max_t + 1, len(self.ext.ipsi.state_list)), + dtype=float ) state_probs_contra_ex[0,0] = 1. for stage in t_stages: for i in range(max_t): - state_probs_ipsi_ex[(i+1),:] = state_probs_midext[i,1] * state_probs_ipsi_ex[i,:] @ self.ext.ipsi.transition_matrix + state_probs_midext[i,0] * state_probs_ipsi_nox[(i+1),:] - state_probs_contra_ex[(i+1),:] = state_probs_midext[i,1] * state_probs_contra_ex[i,:] @ self.ext.contra.transition_matrix + state_probs_midext[i,0] * state_probs_contra_nox[(i+1),:] + state_probs_contra_ex[i + 1] = ( + state_probs_midext[i,1] * state_probs_contra_ex[i] @ self.ext.contra.transition_matrix + + state_probs_midext[i,0] * state_probs_contra_nox[i + 1] + ) joint_state_probs_nox = ( - state_probs_ipsi_nox.T + state_probs_ipsi.T @ np.diag(self.noext.ipsi.diag_time_dists[stage].pmf) @ state_probs_contra_nox ) joint_state_probs_ex = ( - state_probs_ipsi_ex.T + state_probs_ipsi.T @ np.diag(self.ext.ipsi.diag_time_dists[stage].pmf) @ state_probs_contra_ex ) - p_ex = np.sum( - self.ext.ipsi.diagnose_matrices[stage] - * (joint_state_probs_ex - @ self.ext.contra.diagnose_matrices[stage]), - axis=0 - ) p_nox = np.sum( self.noext.ipsi.diagnose_matrices[stage] * (joint_state_probs_nox @ self.noext.contra.diagnose_matrices[stage]), axis=0 ) + p_ex = np.sum( + self.ext.ipsi.diagnose_matrices[stage] + * (joint_state_probs_ex + @ self.ext.contra.diagnose_matrices[stage]), + axis=0 + ) - p = np.array([p_nox,p_ex]) * np.array([[state_probs_midext[max_t,0]], [state_probs_midext[max_t,1]]]) + p = np.vstack((p_nox, p_ex)) + stage_llh = p @ self.diagnose_matrices_midext[stage] if log: - llh_nox_ex = p.T * self.diagnose_matrices_midext[stage] - llh_nox_ex = np.log(llh_nox_ex[llh_nox_ex !=0]) - llh += np.sum(llh_nox_ex) - + llh += np.sum(np.log(stage_llh)) else: - llh_nox_ex = p.T * self.diagnose_matrices_midext[stage] - llh_nox_ex = llh_nox_ex[llh_nox_ex !=0] - llh *= np.prod(llh_nox_ex) + llh *= np.prod(stage_llh) return llh @@ -639,4 +598,4 @@ def generate_dataset( dataset.loc[drawn_ext] = ext_dataset.loc[drawn_ext] dataset[('info', 'tumor', 'midline_extension')] = drawn_ext - return dataset \ No newline at end of file + return dataset diff --git a/lymph/unilateral.py b/lymph/unilateral.py index d668964..8a3321b 100644 --- a/lymph/unilateral.py +++ b/lymph/unilateral.py @@ -787,34 +787,7 @@ def _evolve( state_probs[-1] = state return state_probs - - def _evolve_stepwise( - self, start_state: Optional[np.ndarray] = None - ) -> np.ndarray: - """Evolve hidden Markov model based system over one time step. Compute - :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t` - is the time. - - Args: - start_state: The current state. - - Returns: - A matrix with the new state - :meta public: - """ - # All healthy state at beginning - if start_state is None: - start_state = np.zeros( - shape=(self.diag_time_dists.max_t + 1, len(self.state_list)), - dtype=float - ) - start_state[0,0] = 1. - - # compute involvement at first time-step - for i in range(len(start_state)-1): - start_state[i+1,:] = start_state[i,:] @ self.transition_matrix - return start_state def check_and_assign(self, new_params: np.ndarray): """Check that the spread probability (rates) and the parameters for the @@ -1117,4 +1090,4 @@ def __init__(self, *args, **kwargs): DeprecationWarning ) - super().__init__(*args, **kwargs) \ No newline at end of file + super().__init__(*args, **kwargs) From b6577e4a5ad89c310391cdc651f58e767f75226c Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 15 Aug 2023 16:58:40 +0200 Subject: [PATCH 28/35] fix: wrong name of midext_prob --- lymph/midline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lymph/midline.py b/lymph/midline.py index 2b6c92e..5035f9d 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -72,7 +72,7 @@ def __init__( self.evolve_midext = evolve_midext if self.evolve_midext: - self.midext = 0. + self.midext_prob = 0. self.noext.diag_time_dists = self.ext.diag_time_dists From cffae92710fd111ecfe71303fbb373bf81e09e8d Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Tue, 15 Aug 2023 18:11:54 +0200 Subject: [PATCH 29/35] fix: wrong shape of posterior in likelihood --- lymph/midline.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index 5035f9d..5e4076f 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -167,7 +167,7 @@ def spread_probs(self) -> np.ndarray: +-------------+-------------+-------------+ """ if self.evolve_midext: - return np.concatenate([self.base_probs, self.trans_probs, self.midext_prob]) + return np.concatenate([self.base_probs, self.trans_probs, [self.midext_prob]]) return np.concatenate([self.base_probs, self.trans_probs]) @@ -255,14 +255,12 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str): self._diagnose_matrices_midext[t_stage] = np.zeros((len(table), 2)) for idx, (_, patient) in enumerate(table.iterrows()): midext = patient["info", "tumor", "midline_extension"] - if midext == False: - self.diagnose_matrices_midext[t_stage][idx] = np.array([0, 1]) - - if midext == True: - self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 0]) - if pd.isna(midext): self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 1]) + elif not midext: + self.diagnose_matrices_midext[t_stage][idx] = np.array([0, 1]) + else: + self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 0]) @property def diagnose_matrices_midext(self): @@ -530,7 +528,7 @@ def likelihood( ) p = np.vstack((p_nox, p_ex)) - stage_llh = p @ self.diagnose_matrices_midext[stage] + stage_llh = (p * self.diagnose_matrices_midext[stage].T).sum(axis=0) if log: llh += np.sum(np.log(stage_llh)) From aa5f1b5764b0ffe2ba8ffe1072ceb5c90a9bdbe3 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Wed, 16 Aug 2023 17:36:06 +0200 Subject: [PATCH 30/35] fix: alpha mix assignment & explicit midext llh This still does not seem to work. --- lymph/midline.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index 5e4076f..a3ade7b 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -124,7 +124,7 @@ def base_probs(self, new_params: np.ndarray): if self.use_mixing: self.noext.contra.base_probs = new_params[k:2*k] - self.alpha_mix = new_params[-2] + self.alpha_mix = new_params[-1] # compute linear combination self.ext.contra.base_probs = ( self.alpha_mix * self.ext.ipsi.base_probs @@ -487,9 +487,16 @@ def likelihood( max_t = self.diag_time_dists.max_t llh = 0. if log else 1. - state_probs_ipsi = self.noext.ipsi._evolve(t_last=max_t) - state_probs_contra_nox = self.noext.contra._evolve(t_last=max_t) state_probs_midext = self._evolve_midext() + state_probs_ipsi = self.ext.ipsi._evolve(t_last=max_t) + + # I think we need to discount the contralateral state probabilities for no + # midline extension, as it becomes less and less likely that the tumor + # does not cross the midline the longer it has been growing. + state_probs_contra_nox = ( + self.noext.contra._evolve(t_last=max_t) + * state_probs_midext[:,0].reshape(-1,1) + ) state_probs_contra_ex = np.zeros( shape=(max_t + 1, len(self.ext.ipsi.state_list)), @@ -528,7 +535,14 @@ def likelihood( ) p = np.vstack((p_nox, p_ex)) - stage_llh = (p * self.diagnose_matrices_midext[stage].T).sum(axis=0) + stage_llh = ( + (p * self.diagnose_matrices_midext[stage].T).sum(axis=0) + * ( + self.diag_time_dists[stage].pmf + @ state_probs_midext + @ self.diagnose_matrices_midext[stage].T + ) + ) if log: llh += np.sum(np.log(stage_llh)) From e662c9061162b57e92c30c2fa4e7b216ae2fdde5 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Fri, 18 Aug 2023 14:11:56 +0200 Subject: [PATCH 31/35] fix: invert midext data matrix & correct evolution --- lymph/midline.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index a3ade7b..75b5124 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -258,9 +258,9 @@ def _gen_diagnose_matrices_midext(self, table: pd.DataFrame, t_stage: str): if pd.isna(midext): self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 1]) elif not midext: - self.diagnose_matrices_midext[t_stage][idx] = np.array([0, 1]) - else: self.diagnose_matrices_midext[t_stage][idx] = np.array([1, 0]) + else: + self.diagnose_matrices_midext[t_stage][idx] = np.array([0, 1]) @property def diagnose_matrices_midext(self): @@ -502,19 +502,18 @@ def likelihood( shape=(max_t + 1, len(self.ext.ipsi.state_list)), dtype=float ) - state_probs_contra_ex[0,0] = 1. for stage in t_stages: for i in range(max_t): state_probs_contra_ex[i + 1] = ( - state_probs_midext[i,1] * state_probs_contra_ex[i] @ self.ext.contra.transition_matrix - + state_probs_midext[i,0] * state_probs_contra_nox[i + 1] - ) + self.midext_prob * state_probs_contra_nox[i] + + state_probs_contra_ex[i] + ) @ self.ext.contra.transition_matrix joint_state_probs_nox = ( state_probs_ipsi.T @ np.diag(self.noext.ipsi.diag_time_dists[stage].pmf) - @ state_probs_contra_nox + @ (state_probs_contra_nox * state_probs_midext[:,0].reshape(-1,1)) ) joint_state_probs_ex = ( state_probs_ipsi.T @@ -535,14 +534,7 @@ def likelihood( ) p = np.vstack((p_nox, p_ex)) - stage_llh = ( - (p * self.diagnose_matrices_midext[stage].T).sum(axis=0) - * ( - self.diag_time_dists[stage].pmf - @ state_probs_midext - @ self.diagnose_matrices_midext[stage].T - ) - ) + stage_llh = (p * self.diagnose_matrices_midext[stage].T).sum(axis=0) if log: llh += np.sum(np.log(stage_llh)) From 0024895c407997f09ce131893fda5a3f46c87c93 Mon Sep 17 00:00:00 2001 From: Roman Ludwig <48687784+rmnldwg@users.noreply.github.com> Date: Fri, 18 Aug 2023 14:42:12 +0200 Subject: [PATCH 32/35] fix: double multiply of midext evo with contra nox --- lymph/midline.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index 75b5124..b724f4f 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -489,19 +489,8 @@ def likelihood( state_probs_midext = self._evolve_midext() state_probs_ipsi = self.ext.ipsi._evolve(t_last=max_t) - - # I think we need to discount the contralateral state probabilities for no - # midline extension, as it becomes less and less likely that the tumor - # does not cross the midline the longer it has been growing. - state_probs_contra_nox = ( - self.noext.contra._evolve(t_last=max_t) - * state_probs_midext[:,0].reshape(-1,1) - ) - - state_probs_contra_ex = np.zeros( - shape=(max_t + 1, len(self.ext.ipsi.state_list)), - dtype=float - ) + state_probs_contra_nox = self.noext.contra._evolve(t_last=max_t) + state_probs_contra_ex = np.zeros_like(state_probs_contra_nox) for stage in t_stages: for i in range(max_t): @@ -510,6 +499,9 @@ def likelihood( + state_probs_contra_ex[i] ) @ self.ext.contra.transition_matrix + # the two `joint_state_probs` below together represent the joint probability + # of any ips- AND contralateral state AND any midline extension state + # marginalized over the diagnose time. joint_state_probs_nox = ( state_probs_ipsi.T @ np.diag(self.noext.ipsi.diag_time_dists[stage].pmf) @@ -520,6 +512,7 @@ def likelihood( @ np.diag(self.ext.ipsi.diag_time_dists[stage].pmf) @ state_probs_contra_ex ) + p_nox = np.sum( self.noext.ipsi.diagnose_matrices[stage] * (joint_state_probs_nox From bb5d22ac2507c3dce51a9c784e527a2a116597c2 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 21 Aug 2023 11:20:27 +0200 Subject: [PATCH 33/35] fix: joint state probs sum to one now --- lymph/midline.py | 61 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index b724f4f..4fd33e5 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -315,6 +315,7 @@ def patient_data(self, patient_data: pd.DataFrame): self._patient_data = patient_data.copy() self.load_data(patient_data) + def _evolve_midext(self) -> np.ndarray: """Evolve hidden Markov model based system over one time step. Compute :math:`p(S \\mid t)` where :math:`S` is a distinct state and :math:`t` @@ -341,6 +342,32 @@ def _evolve_midext(self) -> np.ndarray: midext_states[i+1,:] = midext_states[i,:] @ midextransition_matrix return midext_states + + def _evolve_contra( + self, + t_last: int, + ) -> Tuple[np.ndarray, np.ndarray]: + """Evolve contra side as mixture of with & without midline extension.""" + state_probs_midext = np.zeros( + shape=(t_last + 1, len(self.ext.contra.state_list)) + ) + state_probs_noext = np.zeros( + shape=(t_last + 1, len(self.noext.contra.state_list)) + ) + state_probs_noext[0,0] = 1. + + for t in range(t_last): + state_probs_noext[t+1] = ( + (1. - self.midext_prob) * state_probs_noext[t] + ) @ self.noext.contra.transition_matrix + state_probs_midext[t+1] = ( + self.midext_prob * state_probs_noext[t] + + state_probs_midext[t] + ) @ self.ext.contra.transition_matrix + + return state_probs_noext, state_probs_midext + + def load_data( self, data: pd.DataFrame, @@ -417,6 +444,7 @@ def load_data( "patients in this T-stage will be ignored." ) + def check_and_assign(self, new_params: np.ndarray): """Check that the spread probability (rates) and the parameters for the marginalization over diagnose times are all within limits and assign them to @@ -458,6 +486,7 @@ def check_and_assign(self, new_params: np.ndarray): self.spread_probs = new_spread_probs + def likelihood( self, data: Optional[pd.DataFrame] = None, @@ -478,8 +507,6 @@ def likelihood( except ValueError: return -np.inf if log else 0. - llh = 0. if log else 1. - stored_t_stages = set(self.ext.ipsi.diagnose_matrices.keys()) provided_t_stages = set(self.ext.ipsi.diag_time_dists.keys()) t_stages = list(stored_t_stages.intersection(provided_t_stages)) @@ -487,25 +514,18 @@ def likelihood( max_t = self.diag_time_dists.max_t llh = 0. if log else 1. - state_probs_midext = self._evolve_midext() + # state_probs_midext = self._evolve_midext() state_probs_ipsi = self.ext.ipsi._evolve(t_last=max_t) - state_probs_contra_nox = self.noext.contra._evolve(t_last=max_t) - state_probs_contra_ex = np.zeros_like(state_probs_contra_nox) + state_probs_contra_nox, state_probs_contra_ex = self._evolve_contra(t_last=max_t) for stage in t_stages: - for i in range(max_t): - state_probs_contra_ex[i + 1] = ( - self.midext_prob * state_probs_contra_nox[i] - + state_probs_contra_ex[i] - ) @ self.ext.contra.transition_matrix - # the two `joint_state_probs` below together represent the joint probability - # of any ips- AND contralateral state AND any midline extension state + # of any ipsi- AND contralateral state AND any midline extension state # marginalized over the diagnose time. joint_state_probs_nox = ( state_probs_ipsi.T @ np.diag(self.noext.ipsi.diag_time_dists[stage].pmf) - @ (state_probs_contra_nox * state_probs_midext[:,0].reshape(-1,1)) + @ state_probs_contra_nox ) joint_state_probs_ex = ( state_probs_ipsi.T @@ -513,21 +533,23 @@ def likelihood( @ state_probs_contra_ex ) - p_nox = np.sum( + joint_diag_probs_nox = np.sum( self.noext.ipsi.diagnose_matrices[stage] * (joint_state_probs_nox @ self.noext.contra.diagnose_matrices[stage]), axis=0 - ) - p_ex = np.sum( + ) + joint_diag_probs_ex = np.sum( self.ext.ipsi.diagnose_matrices[stage] * (joint_state_probs_ex @ self.ext.contra.diagnose_matrices[stage]), axis=0 - ) + ) - p = np.vstack((p_nox, p_ex)) - stage_llh = (p * self.diagnose_matrices_midext[stage].T).sum(axis=0) + joint_diag_probs = np.vstack((joint_diag_probs_nox, joint_diag_probs_ex)) + stage_llh = ( + joint_diag_probs * self.diagnose_matrices_midext[stage].T + ).sum(axis=0) if log: llh += np.sum(np.log(stage_llh)) @@ -536,6 +558,7 @@ def likelihood( return llh + def risk( self, given_params: Optional[np.ndarray] = None, From b9f2048f2c7809061ebd04a378de538882949323 Mon Sep 17 00:00:00 2001 From: rmnldwg <48687784+rmnldwg@users.noreply.github.com> Date: Mon, 21 Aug 2023 15:25:49 +0200 Subject: [PATCH 34/35] fix: code works without evolution again --- lymph/midline.py | 59 ++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index 4fd33e5..9a31359 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -71,8 +71,7 @@ def __init__( self.alpha_mix = 0. self.evolve_midext = evolve_midext - if self.evolve_midext: - self.midext_prob = 0. + self.midext_prob = 0. self.noext.diag_time_dists = self.ext.diag_time_dists @@ -166,10 +165,7 @@ def spread_probs(self) -> np.ndarray: | base probs | trans probs | midext prob | +-------------+-------------+-------------+ """ - if self.evolve_midext: - return np.concatenate([self.base_probs, self.trans_probs, [self.midext_prob]]) - - return np.concatenate([self.base_probs, self.trans_probs]) + return np.concatenate([self.base_probs, self.trans_probs, [self.midext_prob]]) @spread_probs.setter def spread_probs(self, new_params: np.ndarray): @@ -178,14 +174,9 @@ def spread_probs(self, new_params: np.ndarray): """ num_base_probs = len(self.base_probs) - if self.evolve_midext: - self.midext_prob = new_params[-1] - end = -1 - else: - end = None - self.base_probs = new_params[:num_base_probs] - self.trans_probs = new_params[num_base_probs:end] + self.trans_probs = new_params[num_base_probs:-1] + self.midext_prob = new_params[-1] @property @@ -327,8 +318,8 @@ def _evolve_midext(self) -> np.ndarray: :meta public: """ midext_states = np.zeros( - shape=(self.diag_time_dists.max_t + 1, 2), - dtype=float + shape=(self.diag_time_dists.max_t + 1, 2), + dtype=float ) midext_states[0,0] = 1. @@ -348,22 +339,40 @@ def _evolve_contra( t_last: int, ) -> Tuple[np.ndarray, np.ndarray]: """Evolve contra side as mixture of with & without midline extension.""" - state_probs_midext = np.zeros( - shape=(t_last + 1, len(self.ext.contra.state_list)) - ) state_probs_noext = np.zeros( shape=(t_last + 1, len(self.noext.contra.state_list)) ) state_probs_noext[0,0] = 1. + state_probs_midext = np.zeros( + shape=(t_last + 1, len(self.ext.contra.state_list)) + ) + if not self.evolve_midext: + state_probs_noext[0,0] = (1. - self.midext_prob) + state_probs_midext[0,0] = self.midext_prob + for t in range(t_last): - state_probs_noext[t+1] = ( - (1. - self.midext_prob) * state_probs_noext[t] - ) @ self.noext.contra.transition_matrix - state_probs_midext[t+1] = ( - self.midext_prob * state_probs_noext[t] - + state_probs_midext[t] - ) @ self.ext.contra.transition_matrix + # When evolving over the midline extension state, there's a chance at any + # time step that the tumor grows over the midline and starts spreading to + # the contralateral side more aggressively. + if self.evolve_midext: + state_probs_noext[t+1] = ( + (1. - self.midext_prob) * state_probs_noext[t] + ) @ self.noext.contra.transition_matrix + state_probs_midext[t+1] = ( + self.midext_prob * state_probs_noext[t] + + state_probs_midext[t] + ) @ self.ext.contra.transition_matrix + + # When we do not evolve, the tumor is considered lateralized or extending + # over the midline from the start. + else: + state_probs_noext[t+1] = ( + state_probs_noext[t] @ self.noext.contra.transition_matrix + ) + state_probs_midext[t+1] = ( + state_probs_midext[t] @ self.ext.contra.transition_matrix + ) return state_probs_noext, state_probs_midext From e02c47c172d6c5714b0ea4e0356147f25e2c5046 Mon Sep 17 00:00:00 2001 From: larstwi <101893533+larstwi@users.noreply.github.com> Date: Thu, 26 Oct 2023 05:06:37 +0200 Subject: [PATCH 35/35] adapted code for changes in lyscripts prevalence prediction (midext timeevolution model) --- lymph/midline.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/lymph/midline.py b/lymph/midline.py index 9a31359..91fc692 100644 --- a/lymph/midline.py +++ b/lymph/midline.py @@ -501,6 +501,8 @@ def likelihood( data: Optional[pd.DataFrame] = None, given_params: Optional[np.ndarray] = None, log: bool = True, + t_stages: Optional[List] = None, + prevalence_calc: bool = False ) -> float: """Compute the (log-)likelihood of data, using the stored spread probs and fixed distributions for marginalizing over diagnose times. @@ -510,15 +512,15 @@ def likelihood( """ if data is not None: self.patient_data = data - + try: self.check_and_assign(given_params) except ValueError: return -np.inf if log else 0. - - stored_t_stages = set(self.ext.ipsi.diagnose_matrices.keys()) - provided_t_stages = set(self.ext.ipsi.diag_time_dists.keys()) - t_stages = list(stored_t_stages.intersection(provided_t_stages)) + if t_stages is None: + stored_t_stages = set(self.ext.ipsi.diagnose_matrices.keys()) + provided_t_stages = set(self.ext.ipsi.diag_time_dists.keys()) + t_stages = list(stored_t_stages.intersection(provided_t_stages)) max_t = self.diag_time_dists.max_t llh = 0. if log else 1. @@ -526,7 +528,6 @@ def likelihood( # state_probs_midext = self._evolve_midext() state_probs_ipsi = self.ext.ipsi._evolve(t_last=max_t) state_probs_contra_nox, state_probs_contra_ex = self._evolve_contra(t_last=max_t) - for stage in t_stages: # the two `joint_state_probs` below together represent the joint probability # of any ipsi- AND contralateral state AND any midline extension state @@ -541,7 +542,6 @@ def likelihood( @ np.diag(self.ext.ipsi.diag_time_dists[stage].pmf) @ state_probs_contra_ex ) - joint_diag_probs_nox = np.sum( self.noext.ipsi.diagnose_matrices[stage] * (joint_state_probs_nox @@ -559,12 +559,13 @@ def likelihood( stage_llh = ( joint_diag_probs * self.diagnose_matrices_midext[stage].T ).sum(axis=0) - if log: llh += np.sum(np.log(stage_llh)) else: - llh *= np.prod(stage_llh) - + if prevalence_calc: + llh = stage_llh + else: + llh *= np.prod(stage_llh) return llh