Merge pull request #234 from holukas/indev

Indev
holukas · Oct 25, 2024 · b119d94 · b119d94
2 parents 5897c8c + 6219c3d
commit b119d94
Show file tree

Hide file tree

Showing 62 changed files with 8,859 additions and 10,120 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,34 @@
 
 ![DIIVE](images/logo_diive1_256px.png)
 
+## v0.83.2 | 25 Oct 2024
+
+From now on Python version `3.11.10` is used for developing Python (up to now, version `3.9` was used). All unittests
+were successfully executed with this new Python version. In addition, all notebooks were re-run, all looked good.
+
+[JupyterLab](https://jupyterlab.readthedocs.io/en/4.2.x/index.html) is now included in the environment, which makes it
+easier to quickly install `diive` (`pip install diive`) in an environment and directly use its notebooks, without the
+need to install JupyterLab separately.
+
+### Environment
+
+- `diive` will now be developed using Python version `3.11.10`
+- Added [JupyterLab](https://jupyterlab.readthedocs.io/en/4.2.x/index.html)
+- Added [jupyter bokeh](https://github.com/bokeh/jupyter_bokeh)
+
+## Notebooks
+
+- All notebooks were re-run and updated using Python version `3.11.10`
+
+## Tests
+
+- 50/50 unittests ran successfully with Python version `3.11.10`
+
+## Changes
+
+- Adjusted flags check in QCF flag report, the progressive flag must be the same as the previously calculated overall
+  flag (`diive.pkgs.qaqc.qcf.FlagQCF.report_qcf_evolution`)
+
 ## v0.83.1 | 23 Oct 2024
 
 ## Changes

diff --git a/README.md b/README.md
@@ -191,11 +191,7 @@ One way to install and use `diive` with a specific Python version on a local mac
 
 - Install [miniconda](https://docs.conda.io/en/latest/miniconda.html)
 - Start `miniconda` prompt
-- Create a environment named `diive-env` that contains Python 3.9.7: `conda create --name diive-env python=3.9.7`
+- Create a environment named `diive-env` that contains Python 3.11: `conda create --name diive-env python=3.11`
 - Activate the new environment: `conda activate diive-env`
 - Install `diive` using pip: `pip install diive`
-- If you want to use `diive` in Jupyter notebooks, you can install Jupyterlab.
-  In this example Jupyterlab is installed from the `conda` distribution channel `conda-forge`:
-  `conda install -c conda-forge jupyterlab`
-- If used in Jupyter notebooks, `diive` can generate dynamic plots. This requires the installation of:
-  `conda install -c bokeh jupyter_bokeh`
+- To start JupyterLab type `jupyter lab` in the prompt
diff --git a/diive/pkgs/fluxprocessingchain/fluxprocessingchain.py b/diive/pkgs/fluxprocessingchain/fluxprocessingchain.py
@@ -832,7 +832,7 @@ def _run_level32(self):
         self.fpc.level32_qcf.report_qcf_series()
 
     def _run_level31(self):
-        self.fpc.level31_storage_correction(gapfill_storage_term=False)
+        self.fpc.level31_storage_correction(gapfill_storage_term=True)
         self.fpc.finalize_level31()
         # fpc.level31.showplot(maxflux=50)
         self.fpc.level31.report()
@@ -911,7 +911,7 @@ def _load_data(self):
 def example_quick():
     QuickFluxProcessingChain(
         fluxvars=['FC', 'LE', 'H'],
-        sourcedirs=[r'L:\Sync\luhk_work\CURRENT\fru\Level-1_results_fluxnet_2022'],
+        sourcedirs=[r'L:\Sync\luhk_work\TMP'],
         site_lat=47.115833,
         site_lon=8.537778,
         utc_offset=1,
@@ -1567,6 +1567,6 @@ def example_cumu():
 
 
 if __name__ == '__main__':
-    # example_quick()
-    example()
+    example_quick()
+    # example()
     # example_cumu()
diff --git a/diive/pkgs/qaqc/qcf.py b/diive/pkgs/qaqc/qcf.py
@@ -7,6 +7,8 @@
 Combine multiple flags in one single QCF flag.
 
 """
+from cmath import isnan
+
 import matplotlib.gridspec as gridspec
 import matplotlib.pyplot as plt
 import numpy as np
@@ -191,6 +193,7 @@ def report_qcf_evolution(self):
         n_flag0 = 0
         perc_flag2 = 0
         n_vals = len(allflags_df)
+        prog_df = pd.DataFrame()
         print(f"\nNumber of {self.series.name} records before QC: {n_vals}")
         for ix_last_test in range(1, n_tests):
             prog_testcols = flagcols[ix_first_test:ix_last_test]
@@ -205,6 +208,7 @@ def report_qcf_evolution(self):
             n_flag1 = prog_df[self.flagqcfcol].loc[prog_df[self.flagqcfcol] == 1].count()
             n_flag2 = prog_df[self.flagqcfcol].loc[prog_df[self.flagqcfcol] == 2].count()
 
+
             # Calculate some flag stats
             n_vals_test_rejected = n_flag2 - n_vals_total_rejected
             perc_vals_test_rejected = (n_vals_test_rejected / n_vals) * 100
@@ -221,11 +225,20 @@ def report_qcf_evolution(self):
             n_vals_total_rejected = n_flag2
 
         # Compare last overall flag from evolution with previously calculated overall flag
-        countflags = dict(Counter(self.flags[self.flagqcfcol]))
-        n_missing = self.series.isnull().sum()
-        c = True if (countflags[2] - n_missing) == n_flag2 else False
-        b = True if countflags[1] == n_flag1 else False
-        a = True if countflags[0] == n_flag0 else False
+        # Progressive flag must be the same as previously calculated overall flag
+        _is_equal = prog_df[self.flagqcfcol].equals(self.flags[self.flagqcfcol][~ix_missing_vals])
+        _checkdf = self.flags[self.flagqcfcol][~ix_missing_vals]
+        _n_flag0 = _checkdf.loc[_checkdf == 0].count()
+        _n_flag1 = _checkdf.loc[_checkdf == 1].count()
+        _n_flag2 = _checkdf.loc[_checkdf == 2].count()
+        a = True if n_flag0 == _n_flag0 else False
+        b = True if n_flag1 == _n_flag1 else False
+        c = True if n_flag2 == _n_flag2 else False
+        # countflags = dict(Counter(self.flags[self.flagqcfcol]))
+        # n_missing = self.series.isnull().sum()
+        # c = True if (countflags[2] - n_missing) == n_flag2 else False
+        # b = True if countflags[1] == n_flag1 else False
+        # a = True if countflags[0] == n_flag0 else False
         if not all([a, b, c]):
             raise ValueError("(!)Results from QCF evolution are different from the previously "
                              "calculated overall flag.")

diff --git a/notebooks/Analyses/CalculateZaggregatesInQuantileClassesOfXY.ipynb b/notebooks/Analyses/CalculateZaggregatesInQuantileClassesOfXY.ipynb
@@ -85,7 +85,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "diive version: v0.82.1\n"
+      "diive version: v0.83.2\n"
      ]
     }
    ],
@@ -113,51 +113,9 @@
      "start_time": "2024-08-22T14:05:35.140701Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Help on class QuantileXYAggZ in module diive.pkgs.analyses.quantilexyaggz:\n",
-      "\n",
-      "class QuantileXYAggZ(builtins.object)\n",
-      " |  QuantileXYAggZ(x: pandas.core.series.Series, y: pandas.core.series.Series, z: pandas.core.series.Series, n_quantiles: int = 10, min_n_vals_per_bin: int = 1, binagg_z: Literal['mean', 'min', 'max', 'median', 'count'] = 'mean')\n",
-      " |  \n",
-      " |  Calculate z-aggregates in quantiles (classes) of x and y\n",
-      " |  \n",
-      " |  By default, x and y are binned into 10 classes (n_quantiles: int = 10) and\n",
-      " |  the mean of z is shown in each of the resulting 100 classes (10*10).\n",
-      " |  \n",
-      " |  The result is a pivoted dataframe and its longform.\n",
-      " |  \n",
-      " |  Methods defined here:\n",
-      " |  \n",
-      " |  __init__(self, x: pandas.core.series.Series, y: pandas.core.series.Series, z: pandas.core.series.Series, n_quantiles: int = 10, min_n_vals_per_bin: int = 1, binagg_z: Literal['mean', 'min', 'max', 'median', 'count'] = 'mean')\n",
-      " |      Initialize self.  See help(type(self)) for accurate signature.\n",
-      " |  \n",
-      " |  run(self)\n",
-      " |  \n",
-      " |  ----------------------------------------------------------------------\n",
-      " |  Readonly properties defined here:\n",
-      " |  \n",
-      " |  longformdf\n",
-      " |  \n",
-      " |  pivotdf\n",
-      " |  \n",
-      " |  ----------------------------------------------------------------------\n",
-      " |  Data descriptors defined here:\n",
-      " |  \n",
-      " |  __dict__\n",
-      " |      dictionary for instance variables (if defined)\n",
-      " |  \n",
-      " |  __weakref__\n",
-      " |      list of weak references to the object (if defined)\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "help(QuantileXYAggZ)"
+    "# help(QuantileXYAggZ)"
    ]
   },
   {
@@ -191,7 +149,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loaded .parquet file L:\\Sync\\luhk_work\\20 - CODING\\21 - DIIVE\\diive\\diive\\configs\\exampledata\\exampledata_PARQUET_CH-DAV_FP2022.5_2013-2022_ID20230206154316_30MIN.parquet (0.065 seconds). Detected time resolution of <30 * Minutes> / 30min \n"
+      "Loaded .parquet file F:\\Sync\\luhk_work\\20 - CODING\\21 - DIIVE\\diive\\diive\\configs\\exampledata\\exampledata_PARQUET_CH-DAV_FP2022.5_2013-2022_ID20230206154316_30MIN.parquet (0.057 seconds). Detected time resolution of <30 * Minutes> / 30min \n"
      ]
     },
     {
@@ -1889,7 +1847,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Finished 2024-09-19 18:08:39\n"
+      "Finished 2024-10-24 13:38:17\n"
      ]
     }
    ],
@@ -1928,7 +1886,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.19"
+   "version": "3.11.10"
   },
   "toc-autonumbering": false
  },

diff --git a/notebooks/Analyses/DailyCorrelation.ipynb b/notebooks/Analyses/DailyCorrelation.ipynb
@@ -89,7 +89,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "diive version: v0.82.1\n"
+      "diive version: v0.83.2\n"
      ]
     }
    ],
@@ -117,36 +117,9 @@
      "start_time": "2024-08-22T14:06:13.611453Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Help on function daily_correlation in module diive.pkgs.analyses.correlation:\n",
-      "\n",
-      "daily_correlation(s1: pandas.core.series.Series, s2: pandas.core.series.Series, mincorr: float = 0.8, showplot: bool = False) -> pandas.core.series.Series\n",
-      "    Calculate daily correlation between two time series.\n",
-      "    \n",
-      "    Args:\n",
-      "        s1: any time series, timestamp must overlap with *s2*\n",
-      "        s2: any time series, timestamp must overlap with *s1*\n",
-      "        mincorr: minimum absolute correlation, only relevant when *showplot=True*,\n",
-      "            must be between -1 and 1 (inclusive)\n",
-      "            Example: with *0.8* all correlations between -0.8 and +0.8 are considered low,\n",
-      "            and all correlations smaller than -0.8 and higher than +0.8 are considered high.\n",
-      "        showplot: if *True*, show plot of results\n",
-      "    \n",
-      "    Returns:\n",
-      "        series with correlations for each day\n",
-      "    \n",
-      "    - Example notebook available in:\n",
-      "        notebooks/Analyses/DailyCorrelation.ipynb\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "help(daily_correlation)"
+    "# help(daily_correlation)"
    ]
   },
   {
@@ -180,7 +153,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loaded .parquet file L:\\Sync\\luhk_work\\20 - CODING\\21 - DIIVE\\diive\\diive\\configs\\exampledata\\exampledata_PARQUET_CH-DAV_FP2022.5_2013-2022_ID20230206154316_30MIN.parquet (0.064 seconds). Detected time resolution of <30 * Minutes> / 30min \n"
+      "Loaded .parquet file F:\\Sync\\luhk_work\\20 - CODING\\21 - DIIVE\\diive\\diive\\configs\\exampledata\\exampledata_PARQUET_CH-DAV_FP2022.5_2013-2022_ID20230206154316_30MIN.parquet (0.055 seconds). Detected time resolution of <30 * Minutes> / 30min \n"
      ]
     },
     {
@@ -1207,7 +1180,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Finished 2024-09-19 18:16:32\n"
+      "Finished 2024-10-24 13:38:48\n"
      ]
     }
    ],
@@ -1246,7 +1219,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.19"
+   "version": "3.11.10"
   },
   "toc-autonumbering": false
  },

diff --git a/notebooks/Analyses/DecouplingSortingBins.ipynb b/notebooks/Analyses/DecouplingSortingBins.ipynb
@@ -85,7 +85,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "diive version: v0.82.1\n"
+      "diive version: v0.83.2\n"
      ]
     }
    ],
@@ -150,7 +150,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loaded .parquet file L:\\Sync\\luhk_work\\20 - CODING\\21 - DIIVE\\diive\\diive\\configs\\exampledata\\exampledata_PARQUET_CH-DAV_FP2022.5_2013-2022_ID20230206154316_30MIN.parquet (0.039 seconds). Detected time resolution of <30 * Minutes> / 30min \n"
+      "Loaded .parquet file F:\\Sync\\luhk_work\\20 - CODING\\21 - DIIVE\\diive\\diive\\configs\\exampledata\\exampledata_PARQUET_CH-DAV_FP2022.5_2013-2022_ID20230206154316_30MIN.parquet (0.051 seconds). Detected time resolution of <30 * Minutes> / 30min \n"
      ]
     },
     {
@@ -697,7 +697,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Finished 2024-09-19 21:13:51\n"
+      "Finished 2024-10-24 13:39:04\n"
      ]
     }
    ],
@@ -736,7 +736,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.19"
+   "version": "3.11.10"
   },
   "toc-autonumbering": false
  },