diff --git a/CHANGELOG.md b/CHANGELOG.md index bf63f69..84f002e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,22 +2,33 @@ ![DIIVE](images/logo_diive1_256px.png) +## v0.84.1 | 8 Nov 2024 + +### Bugfixes + +- Removed invalid imports + +### Tests + +- Added test case for `diive` imports (`tests.test_imports.TestImports.test_imports`) +- 52/52 unittests ran successfully + ## v0.84.0 | 7 Nov 2024 -# New features +### New features - New class `BinFitterCP` for fitting function to binned data, includes confidence interval and prediction interval ( `diive.pkgs.fits.fitter.BinFitterCP`) ![DIIVE](images/BinFitterCP_diive_v0.84.0.png) -## Additions +### Additions - Added small function to detect duplicate entries in lists (`diive.core.funcs.funcs.find_duplicates_in_list`) - Added new filetype (`diive/configs/filetypes/ETH-MERCURY-CSV-20HZ.yml`) - Added new filetype (`diive/configs/filetypes/GENERIC-CSV-HEADER-1ROW-TS-END-FULL-NS-20HZ.yml`) -## Bugfixes +### Bugfixes - Not directly a bug fix, but when reading EddyPro fluxnet files with `LoadEddyProOutputFiles` (e.g., in the flux processing chain) duplicate columns are now automatically renamed by adding a numbered suffix. For example, if two diff --git a/diive/__init__.py b/diive/__init__.py index e880cac..0c2a107 100644 --- a/diive/__init__.py +++ b/diive/__init__.py @@ -1,4 +1,4 @@ -from . import core -from . import pkgs +# from . import core +# from . import pkgs # import core diff --git a/diive/core/__init__.py b/diive/core/__init__.py index 9beda48..cc44abf 100644 --- a/diive/core/__init__.py +++ b/diive/core/__init__.py @@ -1,6 +1,6 @@ # from diive.core import dfun, io, plotting, utils -from . import dfun -from . import io -from . import ml -from . import plotting -from . import times +# from . import dfun +# from . import io +# from . import ml +# from . import plotting +# from . import times diff --git a/diive/core/dfun/_BAK_fits.py b/diive/core/dfun/_BAK_fits.py deleted file mode 100644 index 9a1c35a..0000000 --- a/diive/core/dfun/_BAK_fits.py +++ /dev/null @@ -1,545 +0,0 @@ -""" -DATA FUNCTIONS: FITS -==================== - -# last update in: v0.23.0 - -This module is part of the diive library: -https://github.com/holukas/diive - -""" - -""" -Fit with CI and PI - -CI ... confidence interval -PI ... prediction interval - -- https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html -- https://numpy.org/doc/stable/reference/generated/numpy.polynomial.polynomial.Polynomial.fit.html#numpy.polynomial.polynomial.Polynomial.fit -- https://numpy.org/doc/stable/reference/generated/numpy.polyfit.html -- https://towardsdatascience.com/calculating-confidence-interval-with-bootstrapping-872c657c058d -- https://lmfit.github.io/lmfit-py/ -- **https://apmonitor.com/che263/index.php/Main/PythonRegressionStatistics** -- https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html - -""" -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import uncertainties as unc -import uncertainties.unumpy as unp -from scipy import stats -from scipy.optimize import curve_fit - -from diive.pkgs.dfun.stats import q25, q75 - -pd.set_option('display.width', 1500) -pd.set_option('display.max_columns', 30) -pd.set_option('display.max_rows', 30) - - -def groupagg(df, num_bins, bin_col): - # Divide into groups of x - - # # Alternative: using .cut - # group, bins = pd.cut(df[bin_col], - # bins=num_bins, - # retbins=True, - # duplicates='raise') # How awesome! - - # Alternative: using .qcut - group, bins = pd.qcut(df[bin_col], - q=num_bins, - retbins=True, - duplicates='drop') # How awesome! - - df['group'] = group - - df.sort_index(axis=1, inplace=True) # lexsort for better performance - - # Calc stats for each group - grouped_df = \ - df.groupby('group').agg( - {'mean', 'median', 'max', 'min', 'count', 'std', q25, q75}) - numvals_in_group = \ - grouped_df[(bin_col)]['count'].describe()[['min', 'max']] - - # print(numvals_in_group) - - # Bins info - grouped_df['BIN_START'] = bins[0:-1] - - return grouped_df - - -class LineCrossingBTS: - """""" - - def __init__( - self, - df: pd.DataFrame, - x_col: str or tuple, - y1_col: str or tuple, - y2_col: str or tuple, - ignore_zero_x: bool = False, - num_bts_runs: int = 10, - num_groups: int = 40 - ): - self.df = df[[x_col, y1_col, y2_col]].dropna() # Remove NaNs - self.x_col = x_col - self.ignore_zero_x = ignore_zero_x - self.y1_col = y1_col - self.y2_col = y2_col - self.num_bts_runs = num_bts_runs - self.num_groups = num_groups - - # Collect results from different bootstrap runs - self.bts = None - self.bts_line_crossings_x = [] - self.bts_line_crossings_y1 = [] - self.bts_line_crossings_y2 = [] - - def get_linecrossings(self): - """Return results from bootstrap runs""" - return self.bts_line_crossings_x, self.bts_line_crossings_y1, self.bts_line_crossings_y2 - - def check_linecrossings(self): - """Print results to screen""" - - lc_x = self.bts_line_crossings_x - lc_y1 = self.bts_line_crossings_y1 - lc_y2 = self.bts_line_crossings_y2 - - print( - f"Line crossings after {self.bts + 1} bootstrap runs\n" - f"Line crossings x: {lc_x}\n" - f"Line crossings y1: {lc_y1}\n" - f"Line crossings y2: {lc_y2}\n" - - f"Line crossings x (0.05 / 0.5 / 0.95): " - f"{np.quantile(lc_x, 0.05):.3f}" - f" / {np.median(lc_x):.3f}" - f" / {np.quantile(lc_x, 0.95):.3f}\n" - - f"Line crossings y1 (0.05 / 0.5 / 0.95): " - f"{np.quantile(lc_y1, 0.05):.3f}" - f" / {np.median(lc_y1):.3f}" - f" / {np.quantile(lc_y1, 0.95):.3f}\n" - - f"Line crossings y2 (0.05 / 0.5 / 0.95): " - f"{np.quantile(lc_y2, 0.05):.3f}" - f" / {np.median(lc_y2):.3f}" - f" / {np.quantile(lc_y2, 0.95):.3f}\n" - ) - - def detect_linecrossings(self): - - # Bootstrap for line crossing - for self.bts in range(0, self.num_bts_runs): - print(f"Bootstrap run #{self.bts}") - - # Sample from data - sample_df = self.df.sample(n=int(len(self.df)), replace=True) - - if self.ignore_zero_x: - sample_df = sample_df.loc[sample_df[self.x_col] > 0] - - # sample_df = groupagg(df=sample_df, - # grouping_col=self.x_col, - # num_groups=self.num_groups) - - # Divide into groups of x - group, bins = pd.qcut(sample_df[self.x_col], - q=self.num_groups, - retbins=True) # How awesome! - sample_df['group'] = group - - sample_df.sort_index(axis=1, inplace=True) # lexsort for better performance - - # Calc stats for each group - grouped_df = \ - sample_df.groupby('group').agg( - {'mean', 'median', 'count', 'std', q25, q75}) - numvals_in_group = \ - grouped_df[(self.x_col)]['count'].describe()[['min', 'max']] - - # Bins info - grouped_df[self.bin_start_col] = bins[0:-1] - - try: - # y1 - fitter = FitterCP(x=grouped_df[self.bin_start_col], - y=grouped_df[self.y1_col]['median'], - predict_max_x=None, - predict_min_x=None, - num_predictions=10000) - y1_fit_df, y1_fit_params = fitter._fit() - - # y2 - fitter = FitterCP(x=grouped_df[self.bin_start_col], - y=grouped_df[self.y2_col]['median'], - predict_max_x=None, - predict_min_x=None, - num_predictions=10000) - y2_fit_df, y2_fit_params = fitter._fit() - - crossing_df = pd.DataFrame() - crossing_df['nom_ix'] = y1_fit_df['fit_x'] - crossing_df['nom_y1'] = y1_fit_df['nom'] - crossing_df['nom_y2'] = y2_fit_df['nom'] - # crossing_df = crossing_df.set_index('nom_ix') - - # Find where the two fits cross - crossing_df['diff'] = crossing_df['nom_y1'].sub(crossing_df['nom_y2']) - cross_ix = np.argmax(crossing_df['diff'] < 0) - if cross_ix == 0: - cross_ix = np.argmax(crossing_df['diff'] > 0) - - if cross_ix != 0: - linecross_x = crossing_df.iloc[cross_ix]['nom_ix'] - linecross_y1 = crossing_df.iloc[cross_ix]['nom_y1'] - linecross_y2 = crossing_df.iloc[cross_ix]['nom_y2'] - - self.bts_line_crossings_x.append(linecross_x) - self.bts_line_crossings_y1.append(linecross_y1) - self.bts_line_crossings_y2.append(linecross_y2) - - except: - # Sometimes the fit to the bootstrapped data fails, in that case repeat - self.bts -= 1 - - -class FitterCP: - """Fit function to data and give CI and bootstrapped PI""" - - def __init__( - self, - df: pd.DataFrame, - x_col: str or tuple, - y_col: str or tuple, - predict_max_x: float = None, - predict_min_x: float = None, - num_predictions: int = None, - bins_x_num: int = 0, - bins_y_agg: str = None, - ): - self.df = df[[x_col, y_col]].dropna() # Remove NaNs, working data - self.x_col = x_col - self.y_col = y_col - self.x = self.df[self.x_col] - self.y = self.df[self.y_col] - self.len_y = len(self.y) - self.num_predictions = num_predictions - self.usebins = bins_x_num if bins_x_num >= 0 else 0 # Must be positive - self.bins_y_agg = bins_y_agg - self.fit_x_max = predict_max_x if isinstance(predict_max_x, float) else self.x.max() - self.fit_x_min = predict_min_x if isinstance(predict_min_x, float) else self.x.min() - self.num_predictions = num_predictions if isinstance(num_predictions, int) else len(self.x) - self.num_predictions = 2 if self.num_predictions < 2 else self.num_predictions - - self.fit_results = {} # Stores fit results - # self.bts_results = {} # Stores fit results for each bootstrap run - # self.bts_predbands_minmax = pd.DataFrame() # Min/max for prediction bands across bootstraps - - def run(self): - - bts_upper_predbands_df = pd.DataFrame() - bts_lower_predbands_df = pd.DataFrame() - - # Sample from original input data and collect results - if self.bootstrap_runs > 0: - for bts in range(1, self.bootstrap_runs + 1): - print(f"Fitting {self.y_col}, bootstrap run #{bts}") - try: - bts_df = self.df.sample(n=int(len(self.df)), replace=True) - self.bts_results[bts] = self._fit(df=bts_df) - # Collect prediction bands (and their x values) - bts_upper_predbands_df[bts] = self.bts_results[bts]['fit_df']['upper_predband'] - bts_lower_predbands_df[bts] = self.bts_results[bts]['fit_df']['lower_predband'] - except: - print(f"Fitting failed, repeating run {bts}") - bts -= 1 - - # fit_x is the same across bootstraps, is set at start of FitterCP - self.bts_predbands_minmax['fit_x'] = self.bts_results[1]['fit_df']['fit_x'] - - # For each predicted value, calculate quantiles - self.bts_predbands_minmax['upper_predband_bts_max'] = bts_upper_predbands_df.T.quantile(0.975) - self.bts_predbands_minmax['upper_predband_bts_min'] = bts_upper_predbands_df.T.quantile(0.025) - self.bts_predbands_minmax['lower_predband_bts_max'] = bts_lower_predbands_df.T.quantile(0.975) - self.bts_predbands_minmax['lower_predband_bts_min'] = bts_lower_predbands_df.T.quantile(0.025) - - # Original input data - self.fit_results = self._fit(df=self.df.copy()) - - def plot_results(self): - fig = plt.figure(figsize=(9, 12)) - gs = gridspec.GridSpec(2, 1) # rows, cols - gs.update(wspace=0.3, hspace=0.1, left=0.1, right=0.9, top=0.97, bottom=0.03) - - # Results from original data - ax1 = fig.add_subplot(gs[0, 0]) - self._make_plot(ax=ax1, results=self.fit_results, predbands_df=self.bts_predbands_minmax) - - # Results from bootstrapped data - ax2 = fig.add_subplot(gs[1, 0]) - for bts_run in self.bts_results.keys(): - self._make_plot(ax=ax2, results=self.bts_results[bts_run]) - - fig.show() - - def _make_plot(self, ax, results, predbands_df: pd.DataFrame = None): - # Get data - x = results['x'] - y = results['y'] - fit_df = results['fit_df'] - - # Daily aggregates - ax.scatter(x, y, edgecolor='#B0BEC5', color='none', - alpha=.5, s=60, label='daily aggregates', zorder=9, marker='o') - - # Fit + CI - label_fit = r"$y = ax^2 + bx + c$" - ax.plot(fit_df['fit_x'], fit_df['nom'], lw=2, color='black', - label=label_fit, zorder=11) - ax.fill_between(fit_df['fit_x'], - fit_df['nom_lower_ci95'], - fit_df['nom_upper_ci95'], alpha=.2, zorder=10, - color='black', - label="95% confidence region") # uncertainty lines (95% confidence) - - # Prediction bands - # # Lower prediction band (95% confidence) - ax.plot(fit_df['fit_x'], fit_df['lower_predband'], color='black', - ls='--', zorder=8, - label="95% prediction interval") - # # Upper prediction band (95% confidence) - ax.plot(fit_df['fit_x'], fit_df['upper_predband'], color='black', - ls='--', zorder=8) - - if isinstance(predbands_df, pd.DataFrame): - ax.fill_between(predbands_df['fit_x'], - predbands_df['upper_predband_bts_max'], - predbands_df['upper_predband_bts_min'], alpha=.1, zorder=8, - color='black', label="95% prediction interval") - ax.fill_between(predbands_df['fit_x'], - predbands_df['lower_predband_bts_max'], - predbands_df['lower_predband_bts_min'], alpha=.1, zorder=8, - color='black', label="95% prediction interval") - - ax.axhline(0, lw=1, color='black', zorder=12) - - def get_results(self): - return self.fit_results, self.bts_results, self.bts_predbands_minmax - - def _bin_data(self, df, num_bins: int = 10): - return groupagg(df=df, num_bins=num_bins, bin_col=self.x_col) - - def _predband(self, px, x, y, params_opt, func, conf=0.95): - """Prediction band""" - # px = requested points, x = x data, y = y data, params_opt = parameters, func = function name - alpha = 1.0 - conf # significance - N = x.size # data sample size - var_n = len(params_opt) # number of parameters - q = stats.t.ppf(1.0 - alpha / 2.0, N - var_n) # Quantile of Student's t distribution for p=(1-alpha/2) - se = np.sqrt(1. / (N - var_n) * np.sum((y - func(x, *params_opt)) ** 2)) # Stdev of an individual measurement - sx = (px - x.mean()) ** 2 # Auxiliary definition - sxd = np.sum((x - x.mean()) ** 2) # Auxiliary definition - yp = func(px, *params_opt) # Predicted values (best-fit model) - dy = q * se * np.sqrt(1.0 + (1.0 / N) + (sx / sxd)) # Prediction band - lpb, upb = yp - dy, yp + dy # Upper & lower prediction bands. - return lpb, upb - - def _func(self, x, a, b, c): - """Fitting function""" - return a * x ** 2 + b * x + c - - def _fit(self, df): - """Calculate curve fit, confidence intervals and prediction bands - - kudos: https://apmonitor.com/che263/index.php/Main/PythonRegressionStatistics - """ - - # Bin data - if self.usebins == 0: - x = df[self.x_col] - y = df[self.y_col] - len_y = len(y) - else: - df = self._bin_data(df=df, num_bins=self.usebins) - x = df['BIN_START'] - y = df[self.y_col][self.bins_y_agg] - len_y = len(self.y) - - # Fit function f to data - fit_params_opt, fit_params_cov = curve_fit(self._func, x, y) - - # Retrieve parameter values - a = fit_params_opt[0] - b = fit_params_opt[1] - c = fit_params_opt[2] - - # Calc r2 - fit_r2 = \ - 1.0 - (sum((y - self._func(x, a, b, c)) ** 2) / ((len_y - 1.0) * np.var(y, ddof=1))) - - # Calculate parameter confidence interval - a, b, c = unc.correlated_values(fit_params_opt, fit_params_cov) - - # Calculate regression confidence interval - fit_x = np.linspace(self.fit_x_min, self.fit_x_max, self.num_predictions) - fit_y = a * fit_x ** 2 + b * fit_x + c - nom = unp.nominal_values(fit_y) - std = unp.std_devs(fit_y) - - # Best lower and upper prediction bands - lower_predband, upper_predband = \ - self._predband(px=fit_x, x=x, y=y, - params_opt=fit_params_opt, func=self._func, conf=0.95) - - # Fit data - fit_df = pd.DataFrame() - fit_df['fit_x'] = fit_x - fit_df['fit_y'] = fit_y - fit_df['std'] = std - fit_df['nom'] = nom - fit_df['lower_predband'] = lower_predband - fit_df['upper_predband'] = upper_predband - ## Calc 95% confidence region of fit - fit_df['nom_lower_ci95'] = fit_df['nom'] - 1.96 * fit_df['std'] - fit_df['nom_upper_ci95'] = fit_df['nom'] + 1.96 * fit_df['std'] - - # Collect results in dict - fit_results = dict(fit_df=fit_df, - fit_params_opt=fit_params_opt, - fit_params_cov=fit_params_cov, - fit_r2=fit_r2, - x=x, - y=y, - xvar=self.x_col, - yvar=self.y_col) - - return fit_results - - -if __name__ == '__main__': - pass - - -def fit_to_bins_linreg(df, x_col, y_col, bin_col): - # https://www.geeksforgeeks.org/python-implementation-of-polynomial-regression/ - # https://towardsdatascience.com/a-beginners-guide-to-linear-regression-in-python-with-scikit-learn-83a8f7ae2b4f - - from sklearn import linear_model - from sklearn import metrics - import numpy as np - - _df = df.copy() - - X = _df[x_col][bin_col].values.reshape(-1, 1) # Attributes (independent) - y = _df[y_col][bin_col].values.reshape(-1, 1) # Labels (dependent, predicted) - - # Fitting the linear Regression model on two components. - linreg = linear_model.LinearRegression() - linreg.fit(X, y) # training the algorithm - y_pred = linreg.predict(X) # predict y - predicted_col = (y_col[0], y_col[1], 'predicted') - _df[predicted_col] = y_pred.flatten() - - # # Robustly fit linear model with RANSAC algorithm - # # https://scikit-learn.org/stable/auto_examples/linear_model/plot_ransac.html#sphx-glr-auto-examples-linear-model-plot-ransac-py - # ransac = linear_model.RANSACRegressor() - # ransac.fit(X, y) - # inlier_mask = ransac.inlier_mask_ - # outlier_mask = np.logical_not(inlier_mask) - - # print('Mean Absolute Error:', metrics.mean_absolute_error(y, y_pred)) - # print('Mean Squared Error:', metrics.mean_squared_error(y, y_pred)) - # print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y, y_pred))) - - predicted_results = { - 'MAE': metrics.mean_absolute_error(y, y_pred), - 'MSE': metrics.mean_squared_error(y, y_pred), - 'RMSE': np.sqrt(metrics.mean_squared_error(y, y_pred)), - 'r2': metrics.explained_variance_score(y, y_pred), - 'intercept': float(linreg.intercept_), - 'slope': float(linreg.coef_) - } - - # predicted_score = regressor.score(X, y) # retrieve r2 - # predicted_intercept = regressor.intercept_ # retrieve the intercept - # predicted_slope = regressor.coef_ # retrieving the slope - - # comparison_df = pd.DataFrame({'Actual': y.flatten(), 'Predicted': y_pred.flatten()}) - - return _df, predicted_col, predicted_results - - # # split 80% of the data to the training set while 20% of the data to test - # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) - # # Fitting the linear Regression model on two components. - # regressor = LinearRegression() - # regressor.fit(X_train, y_train) # training the algorithm - # y_pred = regressor.predict(X_test) - # comparison_df = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()}) - - # lin_df['predicted'] = lin.predict(X) - # - # plt.plot(X, lin.predict(X), color='red') - # x_bin_avg_df['x'] = lin.predict(X) - # - # # Fitting Polynomial Regression to the dataset - # from sklearn.preprocessing import PolynomialFeatures - # - # poly = PolynomialFeatures(degree=4) - # X_poly = poly.fit_transform(X) - # - # poly.fit(X_poly, y) - # lin2 = LinearRegression() - # lin2.fit(X_poly, y) - - -def fit_to_bins_polyreg(df, x_col, y_col, bin_col, degree): - # https://www.geeksforgeeks.org/python-implementation-of-polynomial-regression/ - # https://scikit-learn.org/stable/modules/linear_model.html#polynomial-regression-extending-linear-models-with-basis-functions - - # * https://towardsdatascience.com/polynomial-regression-bbe8b9d97491 - # https://stackoverflow.com/questions/39012611/how-get-equation-after-fitting-in-scikit-learn - # https://stackoverflow.com/questions/51006193/interpreting-logistic-regression-feature-coefficient-values-in-sklearn - - from sklearn.preprocessing import PolynomialFeatures - from sklearn.linear_model import LinearRegression - from sklearn import metrics - import numpy as np - - _df = df.copy() - - X = _df[x_col][bin_col].values.reshape(-1, 1) # Attributes (independent) - y = _df[y_col][bin_col].values.reshape(-1, 1) # Labels (dependent, predicted) - - poly = PolynomialFeatures(degree=degree) - X_poly = poly.fit_transform(X) - - poly.fit(X_poly, y) - lin = LinearRegression() - lin.fit(X_poly, y) - - y_pred = lin.predict(poly.fit_transform(X)) - - predicted_col = (y_col[0], y_col[1], 'predicted') - _df[predicted_col] = y_pred.flatten() - - predicted_results = { - 'MAE': metrics.mean_absolute_error(y, y_pred), - 'MSE': metrics.mean_squared_error(y, y_pred), - 'RMSE': np.sqrt(metrics.mean_squared_error(y, y_pred)), - 'r2': metrics.explained_variance_score(y, y_pred), - 'intercept': float(lin.intercept_), - 'slope': lin.coef_} - - # The logistic function, which returns the probability of success, - # is given by p(x) = 1/(1 + exp(-(B0 + B1X1 + ... BnXn)). B0 is in intercept. - # B1 through Bn are the coefficients. X1 through Xn are the features. - - return _df, predicted_col, predicted_results diff --git a/diive/core/plotting/__init__.py b/diive/core/plotting/__init__.py index 9af3ee8..b2ebb57 100644 --- a/diive/core/plotting/__init__.py +++ b/diive/core/plotting/__init__.py @@ -1,2 +1,2 @@ -from . import styles -from . import plotfuncs +# from . import styles +# from . import plotfuncs diff --git a/diive/core/times/__init__.py b/diive/core/times/__init__.py index c5215da..c350d3d 100644 --- a/diive/core/times/__init__.py +++ b/diive/core/times/__init__.py @@ -1,3 +1,3 @@ -from . import times as times -from . import resampling as resampling +# from . import times as times +# from . import resampling as resampling diff --git a/diive/pkgs/__init__.py b/diive/pkgs/__init__.py index 7217677..086bd80 100644 --- a/diive/pkgs/__init__.py +++ b/diive/pkgs/__init__.py @@ -1,5 +1,5 @@ -from diive.pkgs import analyses -from diive.pkgs import corrections -from diive.pkgs import createvar -from diive.pkgs import flux -from diive.pkgs import qaqc +# from diive.pkgs import analyses +# from diive.pkgs import corrections +# from diive.pkgs import createvar +# from diive.pkgs import flux +# from diive.pkgs import qaqc diff --git a/diive/pkgs/flux/__init__.py b/diive/pkgs/flux/__init__.py index df9ff13..8b13789 100644 --- a/diive/pkgs/flux/__init__.py +++ b/diive/pkgs/flux/__init__.py @@ -1,2 +1 @@ -import diive.pkgs.flux.criticaldays -import diive.pkgs.flux.co2_penalty + diff --git a/diive/pkgs/flux/co2_penalty.py b/diive/pkgs/flux/co2_penalty.py deleted file mode 100644 index 4023405..0000000 --- a/diive/pkgs/flux/co2_penalty.py +++ /dev/null @@ -1,882 +0,0 @@ -# todo SEE FURTHER DOWN: RandomForestTS was updated (Sep 2023), but this NEARLY should work - -from pathlib import Path -from typing import Literal - -import matplotlib as mpl -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from matplotlib import dates as mdates -from pandas import DataFrame, Series - -import diive.core.dfun.frames as frames -from diive.pkgs.fits.fitter import BinFitterCP -# from diive.core.dfun.frames import steplagged_variants -from diive.core.plotting._fitplot import fitplot -from diive.core.plotting.plotfuncs import default_legend, default_format, nice_date_ticks, save_fig, add_zeroline_y -from diive.core.plotting.styles.LightTheme import COLOR_NEP, COLOR_RECO -from diive.core.times.times import include_timestamp_as_cols -from diive.pkgs.createvar.vpd import calc_vpd_from_ta_rh -from diive.pkgs.gapfilling.randomforest_ts import RandomForestTS - - -class CO2penalty: - - def __init__( - self, - df: DataFrame, - vpd_col: str, - nee_col: str, - swin_col: str, - ta_col: str, - rh_col: str, - thres_chd_ta: float, - thres_chd_vpd: float, - thres_nchd_ta: tuple[float, float], - thres_nchd_vpd: tuple[float, float], - penalty_start_month: int = 5, - penalty_end_month: int = 9, - **random_forest_params - ): - """ - - Args: - df: Timeseries data with timestamp as index in half-hourly time resolution - vpd_col: VPD column name, is used to define critical conditions (kPa) - nee_col: NEE column name (net ecosystem exchange of CO2, umol CO2 m-2 s-1), - used to calculate net ecosystem productivity (NEP=-NEE) - swin_col: Short-wave incoming radiation column name (W m-2) - ta_col: Air temperature column name (°C) - rh_col: Relative humidity column name (%) - thres_chd_ta: Air temperature threshold for critical heat days - thres_chd_vpd: VPD threshold for critical heat days - thres_nchd_ta: Lower and upper air temperature thresholds that define - near-critical heat days - lower threshold <= near-critical days <= upper threshold - thres_nchd_vpd: Lower and upper VPD thresholds that define near-critical heat days - lower threshold <= near-critical days <= upper threshold - """ - # Columns - self.df = df.copy() - self.vpd_col = vpd_col - self.nee_col = nee_col - self.swin_col = swin_col - self.ta_col = ta_col - self.rh_col = rh_col - self.thres_chd_ta = thres_chd_ta - self.thres_chd_vpd = thres_chd_vpd - self.thres_nchd_ta = thres_nchd_ta - self.thres_nchd_vpd = thres_nchd_vpd - self.penalty_start_month = penalty_start_month - self.penalty_end_month = penalty_end_month - self.random_forest_params = random_forest_params - - # Convert NEE units: umol CO2 m-2 s-1 --> g CO2 m-2 30min-1 - self.df[self.nee_col] = self.df[self.nee_col].multiply(0.0792171) - - # Calculate NEP from NEE - self.nep_col = "NEP" - self.df[self.nep_col] = self.df[self.nee_col].multiply(-1) - - # Columns that will be limited - self.vpd_col_limited = f'_LIMITED_{self.vpd_col}' - # VPD limited potentially needs gapfilling b/c it is calculated from TA limited - self.vpd_col_limited_gapfilled = f'{self.vpd_col_limited}_gfRF' - self.ta_col_limited = f'_LIMITED_{self.ta_col}' - self.swin_col_limited = f'_LIMITED_{self.swin_col}' - self.swin_col_limited_gapfilled = f'_LIMITED_{self.swin_col}_gfRF' - self.nep_col_limited = f'_LIMITED_{self.nep_col}' - self.nep_col_limited_gf = f'_LIMITED_{self.nep_col}_gfRF' - - # Results from gapfilling - self._gapfilled_df = None - self._gf_results = None - self._penalty_hires_df = None - self._penalty_min_year = None - self._penalty_per_year_df = None - - @property - def penalty_hires_df(self) -> DataFrame: - """Return gap-filled dataframe""" - if not isinstance(self._penalty_hires_df, DataFrame): - raise Exception('No gap-filled data found') - return self._penalty_hires_df - - @property - def penalty_per_year_df(self) -> DataFrame: - """Yearly overview of carbon cost per year""" - if not isinstance(self._penalty_per_year_df, DataFrame): - raise Exception('No gap-filled data found') - return self._penalty_per_year_df - - @property - def penalty_min_year(self) -> int: - """Year when carbon cost was highest (=most negative number, minimum)""" - if not isinstance(self._penalty_min_year, int): - raise Exception('No gap-filled data found') - return self._penalty_min_year - - @property - def gapfilled_df(self) -> DataFrame: - """Return gap-filled dataframe for focus months""" - if not isinstance(self._gapfilled_df, DataFrame): - raise Exception('No gap-filled data found') - return self._gapfilled_df - - @property - def gf_results(self) -> DataFrame: - """Return gap-filled data results for focus months""" - if not self._gf_results: - raise Exception('No gap-filled data found') - return self._gf_results - - def calculate_penalty(self, **kwargs): - self._penalty_hires_df, \ - self._penalty_per_year_df, \ - self._gapfilled_df, \ - self._gf_results, \ - self._penalty_min_year = \ - self._calculate_penalty(**kwargs) - - def _gapfill(self, df: DataFrame, target_col: str, random_state: int = None, n_bootstrap_runs: int = 11, - lagged_variants: int = 1): - # Gapfilling random forest - - # todo RandomForestTS was updated (Sep 2023), but this NEARLY should work - # todo The yearpools are not implements - # Lagged variants - df = steplagged_variants(df=df, - stepsize=1, - stepmax=1, - exclude_cols=[target_col]) - - df = include_timestamp_as_cols(df=df, txt="(...)") - - rfts = RandomForestTS( - input_df=df, - target_col=target_col, - verbose=1, - n_estimators=n_bootstrap_runs, - random_state=random_state, - min_samples_split=2, - min_samples_leaf=1, - n_jobs=-1 - ) - - rfts.trainmodel(showplot_predictions=True, showplot_importance=True, verbose=1) - rfts.fillgaps(showplot_scores=True, showplot_importance=True, verbose=1) - - _gapfilled_df, _gf_results = rfts.get_gapfilled_dataset() - - # Reindex to have same index as full dataset (full timestamp range) - _gapfilled_df = _gapfilled_df.reindex(self.df.index) - return _gapfilled_df, _gf_results - - def _calculate_penalty(self, random_state: int = None): - - print("Calculating penalty ...") - - # Limit/remove CHD data - _df = self._limit_chd_data() - - # Make subset with vars required for gapfilling - # limited_cols = [col for col in _df.columns if '_LIMITED_' in col] - limited_cols = [self.nep_col_limited, self.ta_col_limited, - self.vpd_col_limited_gapfilled, self.swin_col_limited_gapfilled] - _df_limited = _df[limited_cols].copy() - - # Gapfilling NEP - gapfilled_df, gf_results = self._gapfill(df=_df_limited, - target_col=self.nep_col_limited, - random_state=random_state, - **self.random_forest_params) - - # Merge gapfilled with full data range - penalty_df = self.df.copy() - - gapfilled_flag_col = f'QCF_{self.nep_col_limited_gf}' - penalty_df[self.nep_col_limited_gf] = gapfilled_df[self.nep_col_limited_gf].copy() - penalty_df[gapfilled_flag_col] = gapfilled_df[gapfilled_flag_col].copy() - - # Calculate CO2 penalty - penalty_df['PENALTY'] = penalty_df[self.nep_col_limited_gf].sub(penalty_df[self.nep_col]) - - # Cumulatives - penalty_df[f'CUMSUM_{self.nep_col_limited_gf}'] = penalty_df[self.nep_col_limited_gf].cumsum() - penalty_df[f'CUMSUM_{self.nep_col}'] = penalty_df[self.nep_col].cumsum() - penalty_df['CUMSUM_PENALTY'] = penalty_df['PENALTY'].cumsum() - - # Add limited columns - penalty_df[limited_cols] = _df_limited[limited_cols].copy() - - # Add flag columns - penalty_df['FLAG_CHD'] = _df['FLAG_CHD'].copy() - penalty_df['FLAG_nCHD'] = _df['FLAG_nCHD'].copy() - - # Collect info for yearly overview - - # Detect year with highest carbon cost - penalty_per_year_df = penalty_df[['PENALTY']].groupby(penalty_df.index.year).sum() - penalty_per_year_df[self.nep_col_limited_gf] = \ - penalty_df[self.nep_col_limited_gf].groupby(penalty_df.index.year).sum() - penalty_per_year_df[f'{self.nep_col}'] = penalty_df[self.nep_col].groupby(penalty_df.index.year).sum() - - # Add info about number of CHDs - _subset = penalty_df[[self.ta_col, self.vpd_col]].copy() - _subset = _subset.resample('D').max() - _locs_chd = (_subset[self.vpd_col] > self.thres_chd_vpd) & (_subset[self.ta_col] > self.thres_chd_ta) - _subset = _subset.loc[_locs_chd].copy() - _subset = _subset.groupby(_subset.index.year).count() - penalty_per_year_df['num_CHDs'] = _subset[self.ta_col].copy() - penalty_per_year_df = penalty_per_year_df.fillna(0) - # todo hier weiter - - # _n_chd = _n_chd.loc[_locs_chd] - # _n_chd = _n_chd.groupby(_n_chd.index.year).count() - # _n_chd = _n_chd.fillna(0) - # penalty_per_year_df['num_CHDs'] = _n_chd - - penalty_min_year = int(penalty_per_year_df['PENALTY'].idxmin()) - penalty_min = penalty_per_year_df.min() - - return penalty_df, penalty_per_year_df, gapfilled_df, gf_results, penalty_min_year - - def _insert_aggregates_into_hires(self, hires_df: DataFrame) -> tuple[DataFrame, str, str]: - """Insert daily max of TA and VPD into high-res dataframe""" - - # Aggregation settings - settings = dict(to_freq='D', - to_agg='max', - interpolate_missing_vals=False, - interpolation_lim=False) - - # Insert aggregated TA as column in hires dataframe - _hiresagg_ta = frames.aggregated_as_hires(aggregate_series=hires_df[self.ta_col], - hires_timestamp=hires_df.index, - **settings) - hires_df[_hiresagg_ta.name] = _hiresagg_ta - hiresagg_ta_name = str(_hiresagg_ta.name) - - # Insert aggregated VPD as column in hires dataframe - _hiresagg_vpd = frames.aggregated_as_hires(aggregate_series=hires_df[self.vpd_col], - hires_timestamp=hires_df.index, - **settings) - hires_df[_hiresagg_vpd.name] = _hiresagg_vpd - hiresagg_vpd_name = str(_hiresagg_vpd.name) - return hires_df, hiresagg_ta_name, hiresagg_vpd_name - - def _get_hires_chd_data(self, - hires_df: DataFrame, - hiresagg_ta_name: str, - hiresagg_vpd_name: str) -> tuple[DataFrame, Series]: - # Get hires CHD data (critical heat days) - _locs_chd = (hires_df[hiresagg_ta_name] >= self.thres_chd_ta) & \ - (hires_df[hiresagg_vpd_name] >= self.thres_chd_vpd) & \ - (hires_df.index.month >= self.penalty_start_month) & \ - (hires_df.index.month <= self.penalty_end_month) - chd_hires_df = hires_df[_locs_chd].copy() - return chd_hires_df, _locs_chd - - def _get_hires_nchd_data(self, - hires_df: DataFrame, - hiresagg_ta_name: str, - hiresagg_vpd_name: str) -> tuple[DataFrame, Series]: - locs_nchd = (hires_df[hiresagg_ta_name] <= self.thres_nchd_ta[1]) & \ - (hires_df[hiresagg_ta_name] >= self.thres_nchd_ta[0]) & \ - (hires_df[hiresagg_vpd_name] <= self.thres_nchd_vpd[1]) & \ - (hires_df[hiresagg_vpd_name] >= self.thres_nchd_vpd[0]) & \ - (hires_df.index.month >= self.penalty_start_month) & \ - (hires_df.index.month <= self.penalty_end_month) - nchd_hires_df = hires_df[locs_nchd].copy() - return nchd_hires_df, locs_nchd - - def _limit_chd_data(self) -> DataFrame: - """Limit/remove data on critical heat days - - Set CHD data to their diel cycle medians - - Remove NEP CHD data - """ - - # High-res dataframe - hires_df = self.df.copy() - - # Add daily maxima of TA and VPD - hires_df, \ - hiresagg_ta_name, \ - hiresagg_vpd_name = self._insert_aggregates_into_hires(hires_df=hires_df) - - # Get hires CHD data (critical heat days) - chd_hires_df, locs_chd = self._get_hires_chd_data(hires_df=hires_df, - hiresagg_ta_name=hiresagg_ta_name, - hiresagg_vpd_name=hiresagg_vpd_name) - - # Get highres nCHD data (near-critical heat days) - nchd_hires_df, locs_nchd = self._get_hires_nchd_data(hires_df=hires_df, - hiresagg_ta_name=hiresagg_ta_name, - hiresagg_vpd_name=hiresagg_vpd_name) - - # Add flag to mark CHD and nCHD data - flag_chd_col = 'FLAG_CHD' - hires_df[flag_chd_col] = 0 - hires_df.loc[locs_chd, flag_chd_col] = 1 - - flag_nchd_col = 'FLAG_nCHD' - hires_df[flag_nchd_col] = 0 - hires_df.loc[locs_nchd, flag_nchd_col] = 1 - - # Calculate TA diel cycles (half-hourly medians) for nCHD - diel_cycles_nchds_df = self._diel_cycle(data=nchd_hires_df[self.ta_col], agg='median') - - # Insert time as column for merging diel cycles with hires data - diel_cycles_nchds_df['TIME'] = diel_cycles_nchds_df.index - diel_cycles_nchds_df.index.name = 'INDEX' # Rename to avoid same name as TIME column, otherwise merging fails - hires_df['TIME'] = hires_df.index.time - - # Add TA diel cycles from nCHDs to hires data, merge on time - orig_ix_name = hires_df.index.name - hires_df[orig_ix_name] = hires_df.index # Original index - hires_df = hires_df.merge(diel_cycles_nchds_df, left_on='TIME', right_on='TIME', how='left') - hires_df = hires_df.set_index(orig_ix_name) # Re-apply original index (merging lost index) - - # Remove TA CHD data and replace with TEMPLATE (diel cycles) - # TA CHD data will be replaced with nCHD diel cycle median - matching_template_col = f'_TEMPLATE_{self.ta_col}' - hires_df[self.ta_col_limited] = hires_df[self.ta_col].copy() # Copy original data - hires_df.loc[locs_chd, self.ta_col_limited] = np.nan # Remove data on critical days, creates gaps - hires_df[self.ta_col_limited].fillna(hires_df[matching_template_col], - inplace=True) # Fill gaps with diel cycle nCHD medians - - # Calculate VPD from limited TA and original (measured) RH, - # also needs gap-filling (using SW_IN, limited TA and timestamp info as features) - hires_df[self.vpd_col_limited] = calc_vpd_from_ta_rh(df=hires_df, rh_col=self.rh_col, - ta_col=self.ta_col_limited) - rf_subset = hires_df[[self.vpd_col_limited, self.swin_col, self.ta_col_limited]].copy() - _gapfilled_df, _gf_results = self._gapfill(df=rf_subset, target_col=self.vpd_col_limited, - n_bootstrap_runs=100) - hires_df[self.vpd_col_limited_gapfilled] = _gapfilled_df[self.vpd_col_limited_gapfilled].copy() - - # Remove SW_IN CHD data - # SW_IN CHD data will be replaced with random forest gapfilling - # matching_template_col = f'_TEMPLATE_{self.swin_col}' - hires_df[self.swin_col_limited] = hires_df[self.swin_col].copy() # Copy original data - hires_df.loc[locs_chd, self.swin_col_limited] = np.nan # Remove data on critical days, creates gaps - rf_subset = hires_df[[self.swin_col_limited, self.ta_col_limited]].copy() - _gapfilled_df, _gf_results = self._gapfill(df=rf_subset, target_col=self.swin_col_limited, - lagged_variants=0, n_bootstrap_runs=100) - hires_df[self.swin_col_limited_gapfilled] = _gapfilled_df[self.swin_col_limited_gapfilled].copy() - - # Remove NEP on critical days - # Will be gap-filled with random forest - hires_df[self.nep_col_limited] = hires_df[self.nep_col].copy() - hires_df.loc[locs_chd, self.nep_col_limited] = np.nan - - # Plot check - hires_df[[self.ta_col, '_TEMPLATE_Tair_f', self.ta_col_limited]].plot(title='Tair_f', - xlim=('2019-06-15', '2019-07-15'), - subplots=True) - hires_df[[self.vpd_col, self.vpd_col_limited]].plot(title='VPD_f', - xlim=('2019-06-15', '2019-07-15'), - subplots=True) - hires_df[[self.nep_col, self.nep_col_limited]].plot(title=self.nep_col, xlim=('2019-06-15', '2019-07-15')) - plt.show() - - return hires_df - - def _diel_cycle(self, data: DataFrame or Series, agg: str or dict) -> DataFrame: - """Calculate diel cycles grouped by time""" - diel_cycles_df = DataFrame(data) - diel_cycles_df['TIME'] = diel_cycles_df.index.time - diel_cycles_df = diel_cycles_df.groupby('TIME').agg(agg) - diel_cycles_df = diel_cycles_df.add_prefix('_TEMPLATE_') - return diel_cycles_df - - def plot_critical_hours(self, ax, - which_threshold: Literal['crd'] = 'crd', - figletter: str = '', - show_fitline: bool = True, - fit_n_bootstraps: int = 10, - show_year_labels: bool = False, - show_title: bool = False, - fit_type: str = 'quadratic_offset', - labels_below: list = None, - labels_above: list = None, - labels_right: list = None, - labels_left: list = None, - labels_shifted: list = None, - decorate_labels1: list = None, - decorate_labels2: list = None): - - df = self.df.copy() - - if fit_n_bootstraps < 2: - fit_n_bootstraps = 2 - - xlabel_units = "$\mathrm{hours\ yr^{-1}}$" - ylabel_penalty = r"$\mathrm{CO_{2}\ penalty}$" - ylabel_units = r"$\mathrm{gCO_{2}\ m^{-2}\ yr^{-1}}$" - - # Count half-hours > CHD threshold - # thr_crd = self.results_crd_threshold_detection['thres_crd'] - locs_above_thr_chd = (df[self.vpd_col] > self.thres_chd_vpd) & (df[self.ta_col] > self.thres_chd_ta) - df_above_thr_chd = df.loc[locs_above_thr_chd].copy() - df_above_thr_chd = df_above_thr_chd[[self.ta_col, self.vpd_col]].copy() - hh_above_thr_crd = df_above_thr_chd.groupby(df_above_thr_chd.index.year).count() - - # Penalty YY results, overview - penalty_per_year = self.penalty_per_year_df['PENALTY'].copy() - # penalty_per_year = penalty_per_year.multiply(-1) # Make penalty positive --> is already positive w/ NEP - # penalty_per_year.loc[penalty_per_year < 0] = 0 # Leave negative CCT in dataset - penalty_per_year.loc[penalty_per_year == 0] = 0 - - # Combine - penalty_vs_hh_thr = pd.DataFrame(penalty_per_year) - penalty_vs_hh_thr['Hours above THR_CHD'] = hh_above_thr_crd[self.ta_col].divide(2) - penalty_vs_hh_thr = penalty_vs_hh_thr.fillna(0) - - xcol = 'Hours above THR_CHD' - - # Fit - if show_fitline: - - # Bootstrapping, mainly for prediction intervals 95% range - bts_fit_results = {} - predict_min_x = penalty_vs_hh_thr[xcol].min() # Prediction range, same for all bootstraps - predict_max_x = penalty_vs_hh_thr[xcol].max() - - n_bts_successful = 0 # Number of succesful bootstrapping runs - while n_bts_successful < fit_n_bootstraps: - # for bts_run in range(0, fit_n_bootstraps): - - try: - if n_bts_successful == 0: - # Run zero is the original data, not bootstrapped - bts_df = penalty_vs_hh_thr.copy() - else: - # Bootstrap data - bts_df = penalty_vs_hh_thr.sample(n=int(len(penalty_vs_hh_thr)), replace=True, - random_state=None) - - # Fit - fitter = BinFitterCP(df=bts_df, - xcol=xcol, - ycol='PENALTY', - n_predictions=10000, - predict_min_x=predict_min_x, - predict_max_x=predict_max_x, - n_bins_x=0, - bins_y_agg='mean', - fit_type=fit_type) - fitter.run() - bts_fit_results[n_bts_successful] = fitter.get_results() - n_bts_successful += 1 - print(f"Bootstrapping run {n_bts_successful} for fit line successful ... ") - except: - print(f"Bootstrapping for fit line failed, repeating run {n_bts_successful + 1} ... ") - pass - - # Collect bootstrapping results - _fit_x_predbands = pd.DataFrame() - _upper_predbands = pd.DataFrame() - _lower_predbands = pd.DataFrame() - for bts_run in range(1, fit_n_bootstraps): - _fit_x_predbands[bts_run] = bts_fit_results[bts_run]['fit_df']['fit_x'].copy() - _upper_predbands[bts_run] = bts_fit_results[bts_run]['fit_df']['upper_predband'].copy() - _lower_predbands[bts_run] = bts_fit_results[bts_run]['fit_df']['lower_predband'].copy() - predbands_quantiles_95 = pd.DataFrame() - predbands_quantiles_95['fit_x'] = _fit_x_predbands.mean(axis=1) # Output is the same for all bootstraps - predbands_quantiles_95['upper_Q97.5'] = _upper_predbands.quantile(q=.975, axis=1) - predbands_quantiles_95['lower_Q97.5'] = _lower_predbands.quantile(q=.975, axis=1) - predbands_quantiles_95['upper_Q02.5'] = _upper_predbands.quantile(q=.025, axis=1) - predbands_quantiles_95['lower_Q02.5'] = _lower_predbands.quantile(q=.025, axis=1) - - # Fitplot - line_xy_gpp, line_fit_gpp, line_fit_ci_gpp, line_fit_pb_gpp, line_highlight = \ - fitplot(ax=ax, - label='year', - flux_bts_results=bts_fit_results[0], - alpha=1, - edgecolor=COLOR_RECO, - color=COLOR_RECO, - color_fitline=COLOR_RECO, - show_prediction_interval=False, - size_scatter=90, - fit_type=fit_type) - - # line_xy = ax.plot(bts_fit_results[0]['fit_df']['fit_x'], - # bts_fit_results[0]['fit_df']['upper_predband'], - # zorder=1, ls='--', color=COLOR_RECO, label="95% prediction interval") - # line_xy = ax.plot(bts_fit_results[0]['fit_df']['fit_x'], - # bts_fit_results[0]['fit_df']['lower_predband'], - # zorder=1, ls='--', color=COLOR_RECO) - - # ax.fill_between(predbands_quantiles_95['fit_x'], - # predbands_quantiles_95['upper_Q97.5'], - # predbands_quantiles_95['upper_Q02.5'], - # alpha=.2, lw=0, color='#ef9a9a', edgecolor='white', - # zorder=1) - # ax.fill_between(predbands_quantiles_95['fit_x'], - # predbands_quantiles_95['lower_Q97.5'], - # predbands_quantiles_95['lower_Q02.5'], - # alpha=.2, lw=0, color='#ef9a9a', edgecolor='white', - # zorder=1) - - # Prediction bands (smoothed) - line_xy = ax.plot(predbands_quantiles_95['fit_x'], - predbands_quantiles_95['upper_Q97.5'].rolling(400, center=True).mean(), - zorder=1, ls='--', color=COLOR_RECO, label="95% prediction interval") - line_xy = ax.plot(predbands_quantiles_95['fit_x'], - predbands_quantiles_95['lower_Q02.5'].rolling(400, center=True).mean(), - zorder=1, ls='--', color=COLOR_RECO) - - # Title - if show_title: - ax.set_title(f"{figletter} {ylabel_penalty} per year", x=0.05, y=1, size=24, ha='left', va='top', - weight='normal') - # ax.set_title(f"Carbon cost per year", x=0.95, y=0.95, size=24, ha='right', weight='bold') - # ax.text(0.95, 0.93, f"{_units}", size=20, color='#9E9E9E', backgroundcolor='none', alpha=1, - # horizontalalignment='right', verticalalignment='center', transform=ax.transAxes, weight='bold') - - # Format - default_format(ax=ax, - ax_xlabel_txt=f'Critical heat ({xlabel_units})', - ax_ylabel_txt=f'{ylabel_penalty} ({ylabel_units})', - showgrid=False) - add_zeroline_y(ax=ax, data=bts_fit_results[0]['y']) - - # Legend - default_legend(ax=ax, ncol=1, loc=(.07, .75)) - # ax.legend(ncol=1, edgecolor='none', loc=(.1, .7), prop={'size': 16}, facecolor='none') - - # Spines - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - - if show_year_labels: - - for year in penalty_vs_hh_thr.index: - txt_y = penalty_vs_hh_thr['PENALTY'].loc[penalty_vs_hh_thr.index == year].values[0] - txt_x = penalty_vs_hh_thr[xcol].loc[penalty_vs_hh_thr.index == year].values[0] - xytext = (-11, -14) - arrowprops = None - - if year in labels_below: - pass - elif year in labels_above: - xytext = (-11, 5) - elif year in labels_right: - xytext = (5, -4) - elif year in labels_left: - xytext = (-28, -4) - else: - xytext = None - - decoration = "" - if year in decorate_labels1: - decoration = "***" - elif year in decorate_labels2: - decoration = "*" - - if xytext: - text_in_ax = \ - ax.annotate(f'{year}{decoration}', (txt_x, txt_y), - xytext=xytext, xycoords='data', - color='black', weight='normal', fontsize=9, - textcoords='offset points', zorder=100, - arrowprops=arrowprops) - - def plot_cumulatives(self, ax, - figletter: str = '', - year: int = None, - showline_modeled: bool = True, - showfill_penalty: bool = True, - showtitle: bool = True): - - gapfilled_col = f'_LIMITED_{self.nep_col}_gfRF' - label_penalty = r"$\mathrm{CO_{2}\ penalty}$" - label_units_cumulative = r"$\mathrm{gCO_{2}\ m^{-2}}$" - - penalty_perc = None - if year: - df = self.penalty_hires_df.loc[self.penalty_hires_df.index.year == year] - num_crds = int(self.penalty_per_year_df.loc[self.penalty_per_year_df.index == year]['num_CHDs']) - obs = float(self.penalty_per_year_df.loc[self.penalty_per_year_df.index == year][self.nep_col]) - pot = float(self.penalty_per_year_df.loc[self.penalty_per_year_df.index == year][gapfilled_col]) - penalty = float(self.penalty_per_year_df.loc[self.penalty_per_year_df.index == year]['PENALTY']) - - # obs and pot both show UPTAKE and - # pot shows *MORE* uptake than obs - if (obs > 0) and (pot > 0) and (pot > obs): - diff = pot - obs - penalty_perc = diff / pot - penalty_perc *= 100 - # Example: - # pot=488, obs=378 - # 488 - 378 = 110 - # 110 / 488 = 0.225 - # obs uptake was 22.5% lower compared to pot - - # obs and pot both show EMISSION and - # pot shows *LESS* emission than obs - elif (obs < 0) and (pot < 0) and (pot < obs): - diff = pot - obs - penalty_perc = diff / abs(pot) - penalty_perc *= 100 - # Example: - # pot=-115, obs=-150 - # -115 - -150 = 35 - # 35 / 115 = 0.304 - # obs emission was 30.4% higher compared to pot - - else: - penalty_perc = -9999 - - - else: - df = self.penalty_hires_df - num_crds = int(self.penalty_per_year_df['num_CHDs'].sum()) - obs = float(self.penalty_per_year_df[self.nep_col].sum()) - pot = float(self.penalty_per_year_df[gapfilled_col].sum()) - penalty = float(self.penalty_per_year_df['PENALTY'].sum()) - if (obs < 0) and (pot < 0) and (pot < obs): - penalty_perc = (1 - (obs / pot)) * 100 - - cumulative_orig = df[self.nep_col].cumsum() # Cumulative of original measured and gap-filled NEP - cumulative_model = df[gapfilled_col].cumsum() # NEP where hot days were modeled - - # Original data as measured and gap-filled - x = cumulative_orig.index - y = cumulative_orig - ax.plot(x, y, color=COLOR_NEP, alpha=0.9, ls='-', lw=3, marker='', - markeredgecolor='none', ms=0, zorder=99, label='observed NEP') - ax.plot(x[-1], y[-1], ms=10, zorder=100, color=COLOR_NEP) - ax.text(x[-1], y[-1], f" {cumulative_orig[-1]:.0f}", size=20, - color=COLOR_NEP, backgroundcolor='none', alpha=1, - horizontalalignment='left', verticalalignment='center') - - # Modeled hot days - if showline_modeled: - linestyle = '-' - marksersize = 10 - txtsize = 20 - label = 'potential NEP' - else: - # Hide elements if not required - linestyle = 'None' - marksersize = 0 - txtsize = 0 - label = None - x = cumulative_model.index - y = cumulative_model - ax.plot(x, y, color=COLOR_RECO, alpha=0.9, ls=linestyle, lw=3, marker='', - markeredgecolor='none', ms=0, zorder=98, label=label) - ax.plot(x[-1], y[-1], ms=marksersize, zorder=100, color=COLOR_RECO) - ax.text(x[-1], y[-1], f" {cumulative_model[-1]:.0f}", size=txtsize, color=COLOR_RECO, - backgroundcolor='none', alpha=1, horizontalalignment='left', - verticalalignment='center') - - # Fill between: NEP penalty - if showfill_penalty: - mpl.rcParams['hatch.linewidth'] = 2 # Set width of hatch lines - ax.fill_between(cumulative_model.index, cumulative_model, cumulative_orig, - alpha=.7, lw=0, color='#ef9a9a', edgecolor='white', - zorder=1, hatch='//', label=label_penalty) - txt = f"critical heat days: {num_crds}\n" \ - f"NEP reduction: {penalty_perc:.0f}%\n" \ - f"{label_penalty}: {np.abs(penalty):.0f} {label_units_cumulative}\n" - # r"$\bf{" + str(number) + "}$" - ax.text(.5, .1, txt, size=16, color='black', backgroundcolor='none', - alpha=1, horizontalalignment='left', verticalalignment='center', - transform=ax.transAxes, weight='normal', linespacing=1.4) - - # Zero-line - ax.axhline(0, color='black') - - # Title - if showtitle: - if year: - title_year = year - else: - title_year = f"{df.index.year[0]} - {df.index.year[-1]}" - ax.set_title(f"{figletter} {label_penalty} {title_year}", x=0.05, y=1, size=24, ha='left', weight='normal') - - # Format - default_format(ax=ax, - ax_xlabel_txt='Date', - ax_ylabel_txt=f"Cumulative NEP ({label_units_cumulative})", - showgrid=False) - - # Legend - default_legend(ax=ax, ncol=1, loc=(.11, .82)) - # ax.legend(ncol=1, edgecolor='none', loc=(.5, .7), prop={'size': 14}) - - # Ticks - ax.tick_params(axis='both', which='major', direction='in', labelsize=16, length=8, size=5) # x/y ticks text - - # Nice format for date ticks - locator = mdates.AutoDateLocator(minticks=12, maxticks=12) - ax.xaxis.set_major_locator(locator) - formatter = mdates.ConciseDateFormatter(locator, show_offset=False) - ax.xaxis.set_major_formatter(formatter) - - # Limits - # ax.set_xlim(df.index[0], df.index[-1]) - - # Spines - ax.spines['top'].set_visible(False) - ax.spines['right'].set_visible(False) - - def plot_day_example(self, ax_nep, ax_ta, ax_vpd, ax_swin, - showline_nep_modeled: bool = True, - showline_ta_modeled: bool = True, - showline_vpd_modeled: bool = True, - showline_swin_modeled: bool = True, - showfill_penalty: bool = True, - ): - - label_penalty = r"$\mathrm{NEP\ penalty}$" - label_units = r"$\mathrm{gCO_{2}\ m^{-2}\ 30min^{-1}}$" - - df = self.penalty_hires_df.copy() - - subset = df.loc[df['FLAG_CHD'] == 1, :].copy() - subset = subset.groupby(subset.index.time).mean() - subset.index = pd.to_datetime(subset.index, format='%H:%M:%S') - - x = subset.index - props = dict(ls='-') - - # Observed NEP - ax_nep.plot(x, subset[self.nep_col], - label="on critical days", color=COLOR_NEP, lw=2, ms=6, **props) - - # Modeled NEP - lw = 2 if showline_nep_modeled else 0 - ms = 6 if showline_nep_modeled else 0 - label = "modeled" if showline_nep_modeled else None - ax_nep.plot(x, subset[self.nep_col_limited_gf], - label=label, color=COLOR_RECO, lw=lw, ms=ms, **props) - - # Observed TA - ax_ta.plot(x, subset[self.ta_col], - label="on critical days", color=COLOR_NEP, lw=2, ms=6, **props) - - # Modeled TA - lw = 2 if showline_ta_modeled else 0 - ms = 6 if showline_ta_modeled else 0 - label = "on near-critical days" if showline_ta_modeled else None - ax_ta.plot(x, subset[self.ta_col_limited], - label=label, color=COLOR_RECO, lw=lw, ms=ms, **props) - - # Observed VPD - ax_vpd.plot(x, subset[self.vpd_col], - label="on critical days", color=COLOR_NEP, lw=2, ms=6, **props) - - # Newly calculated VPD - lw = 2 if showline_vpd_modeled else 0 - ms = 6 if showline_vpd_modeled else 0 - label = "newly calculated" if showline_vpd_modeled else None - ax_vpd.plot(x, subset[self.vpd_col_limited_gapfilled], color=COLOR_RECO, - label=label, lw=lw, ms=ms, **props) - - # Observed SW_IN - ax_swin.plot(x, subset[self.swin_col], - label="on critical days", color=COLOR_NEP, lw=2, ms=6, **props) - - # SW_IN from random forest - lw = 2 if showline_swin_modeled else 0 - ms = 6 if showline_swin_modeled else 0 - label = "modeled" if showline_swin_modeled else None - ax_swin.plot(x, subset[self.swin_col_limited_gapfilled], color=COLOR_RECO, - label=label, lw=lw, ms=ms, **props) - - # Fill area penalty - if showfill_penalty: - ax_nep.fill_between(x, subset[self.nep_col], subset[self.nep_col_limited_gf], - alpha=.7, lw=0, color='#ef9a9a', edgecolor='white', - zorder=1, hatch='//', label=label_penalty) - - props = dict(x=.5, y=1.05, size=24, ha='center', va='center', weight='normal') - ax_nep.set_title("NEP", **props) - ax_ta.set_title("TA", **props) - ax_vpd.set_title("VPD", **props) - ax_swin.set_title("SW_IN", **props) - - ax_nep.axhline(0, color='black', lw=1) - - default_legend(ax=ax_nep, loc='upper center') - default_legend(ax=ax_ta, loc='upper left') - default_legend(ax=ax_vpd, loc='upper left') - default_legend(ax=ax_swin, loc='upper left') - - props = dict(ax_labels_fontsize=16) - default_format(ax=ax_nep, ax_ylabel_txt='NEP', txt_ylabel_units=f"({label_units})", **props) - default_format(ax=ax_ta, ax_ylabel_txt='TA', txt_ylabel_units="(°C)", ax_xlabel_txt='Time (hour)', **props) - default_format(ax=ax_vpd, ax_ylabel_txt="VPD", txt_ylabel_units="(kPa)", **props) - default_format(ax=ax_swin, ax_ylabel_txt="SW_IN", txt_ylabel_units="(W m-2)", **props) - - nice_date_ticks(ax=ax_nep, locator='hour') - nice_date_ticks(ax=ax_ta, locator='hour') - nice_date_ticks(ax=ax_vpd, locator='hour') - nice_date_ticks(ax=ax_swin, locator='hour') - - def showplot_critical_hours(self, - saveplot: bool = True, - title: str = None, - path: Path or str = None, - dpi: int = 72, - **kwargs): - fig = plt.figure(facecolor='white', figsize=(9, 9), dpi=dpi) - gs = gridspec.GridSpec(1, 1) # rows, cols - # gs.update(wspace=0, hspace=0, left=.2, right=.8, top=.8, bottom=.2) - ax = fig.add_subplot(gs[0, 0]) - self.plot_critical_hours(ax=ax, **kwargs) - fig.tight_layout() - fig.show() - if saveplot: - save_fig(fig=fig, title=title, path=path) - - def showplot_day_example(self, - saveplot: bool = False, - title: str = None, - path: Path or str = None, - dpi: int = 72, - **kwargs): - fig = plt.figure(facecolor='white', figsize=(23, 6), dpi=dpi) - gs = gridspec.GridSpec(1, 4) # rows, cols - # gs.update(wspace=0, hspace=0, left=.2, right=.8, top=.8, bottom=.2) - ax_nep = fig.add_subplot(gs[0, 0]) - ax_ta = fig.add_subplot(gs[0, 1]) - ax_vpd = fig.add_subplot(gs[0, 2]) - ax_swin = fig.add_subplot(gs[0, 3]) - ax_nep.xaxis.axis_date() - ax_ta.xaxis.axis_date() - ax_vpd.xaxis.axis_date() - ax_swin.xaxis.axis_date() - self.plot_day_example(ax_nep=ax_nep, ax_ta=ax_ta, - ax_vpd=ax_vpd, ax_swin=ax_swin, **kwargs) - fig.tight_layout() - fig.show() - if saveplot: - save_fig(fig=fig, title=title, path=path) - - def showplot_cumulatives(self, - saveplot: bool = False, - title: str = '', - path: Path or str = None, - dpi: int = 72, - **kwargs): - fig = plt.figure(facecolor='white', figsize=(9, 9), dpi=dpi) - gs = gridspec.GridSpec(1, 1) # rows, cols - # gs.update(wspace=0, hspace=0, left=.2, right=.8, top=.8, bottom=.2) - ax = fig.add_subplot(gs[0, 0]) - ax.xaxis.axis_date() - self.plot_cumulatives(ax=ax, **kwargs) - fig.tight_layout() - fig.show() - if saveplot: - save_fig(fig=fig, title=title, path=path) - - -if __name__ == '__main__': - pass diff --git a/diive/pkgs/flux/criticaldays.py b/diive/pkgs/flux/criticaldays.py deleted file mode 100644 index 89c2071..0000000 --- a/diive/pkgs/flux/criticaldays.py +++ /dev/null @@ -1,545 +0,0 @@ -""" -============= -CRITICAL DAYS -============= -""" -from pathlib import Path -from typing import Literal - -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from matplotlib.legend_handler import HandlerTuple -from pandas import DataFrame - -import diive.core.plotting.styles.LightTheme as theme -from diive.core.plotting import plotfuncs -from diive.core.plotting.plotfuncs import save_fig -from diive.core.plotting.rectangle import rectangle -from diive.core.plotting.styles.LightTheme import COLOR_THRESHOLD, \ - FONTSIZE_LEGEND, COLOR_NEE, INFOTXT_FONTSIZE -from __local_folders.__archived._binfitter import BinFitterBTS, PlotBinFitterBTS - -pd.set_option('display.width', 1500) -pd.set_option('display.max_columns', 30) -pd.set_option('display.max_rows', 30) - - -class CriticalDays: - - def __init__( - self, - df: DataFrame, - x_col: str, - y_col: str, - thres_min_x: float or int, - x_agg: Literal['max'] = 'max', - y_agg: Literal['sum'] = 'sum', - usebins: int = 10, - n_bootstrap_runs: int = 10, - bootstrapping_random_state: int = None, - thres_from_bootstrap: Literal['max', 'median'] = 'max', - thres_y_sign_change: Literal['+', '-'] = '-' - ): - """Detect threshold in x that defines critical days in y - - For example, investigate daily NEP sums (y) in relation to - daily max VPD (x). - - Args: - df: Data in half-hourly time resolution - x_col: Column name of x variable, e.g. 'VPD' - y_col: Column name of y variable, e.g. 'NEP' - thres_min_x: Minimum value for x at threshold - e.g., VPD must be min 1 kPa for threshold to be accepted - x_agg: Daily aggregation for x, e.g. 'max' selects the daily maximum - y_agg: Daily aggregation for y, e.g. 'sum' selects daily sums - usebins: XXX (default 10) - n_bootstrap_runs: Number of bootstrap runs during detection of - critical heat days. Must be an odd number. In case an even - number is given +1 is added automatically. - bootstrapping_random_state: Option to use fixed random state - thres_from_bootstrap: Which aggregate to use as threshold, - e.g., for bootstrapping results [2.1, 2.2, 2.1, 2.5, 2.3] 'max' - selects 2.5 as threshold - thres_y_sign_change: Defines what the detected threshold describes, can - be '+' or '-' - e.g., if '+', the threshold describes the *x* value where - *y* changes from a negative sign to a positive sign - e.g., if '-', the threshold describes the *x* value where - *y* changes from a positive sign to a negative sign - e.g., for flux NEE if '+', the threshold describes the VPD - value where NEE becomes positive (i.e., net loss of CO2) - e.g., for flux NEP if '-', the threshold describes the VPD - value where NEP becomes negative (i.e., net loss of CO2) - """ - - # self.df = df[[x_col, y_col]].copy() - self.df = df.copy() - self.x_col = x_col - self.x_agg = x_agg - self.y_col = y_col - self.y_agg = y_agg - self.usebins = usebins - self.bootstrapping_random_state = bootstrapping_random_state - self.thres_from_bootstrap = thres_from_bootstrap - self.thres_y_sign_change = thres_y_sign_change - self.thres_min_x = thres_min_x - - self.n_bootstrap_runs = n_bootstrap_runs - - # Resample dataset - aggs = ['mean', 'median', 'count', 'min', 'max', 'sum'] - self.df_aggs = self._resample_dataset(df=self.df, aggs=aggs, day_start_hour=None) - - self.predict_min_x = self.df[x_col].min() - self.predict_max_x = self.df[x_col].max() - - self._results_threshold_detection = {} # Collects results for each bootstrap run - self._results_daytime_analysis = {} - self._results_optimum_range = {} # Results for optimum range - - @property - def results_threshold_detection(self) -> dict: - """Return bootstrap results for daily fluxes, includes threshold detection""" - if not self._results_threshold_detection: - raise Exception('Results for threshold detection are empty') - return self._results_threshold_detection - - def detect_dcrit_threshold(self): - """Detect critical days x threshold for y""" - - # Fit to bootstrapped data, daily time resolution - fit_results, bts_fit_results = self._bootstrap_fits(df_aggs=self.df_aggs, - x_col=self.x_col, - x_agg=self.x_agg, - y_col=self.y_col, - y_agg=self.y_agg, - fit_to_bins=self.usebins) - - # Add info about zero crossings to fit results - fit_results = self._collect_zerocrossings(fit_results=fit_results) - for key, cur_fit_results in bts_fit_results.items(): - bts_fit_results[key] = self._collect_zerocrossings(fit_results=bts_fit_results[key]) - - # Get flux zerocrossing values (y has crossed zeroline) from bootstrap runs - bts_zerocrossings_df = self._bts_zerocrossings_collect(bts_fit_results=bts_fit_results) - - # Calc flux equilibrium points aggregates from bootstrap runs - bts_zerocrossings_aggs = self._zerocrossings_aggs(bts_zerocrossings_df=bts_zerocrossings_df) - - # Threshold for Critical Heat Days (Dcrit) - # defined as the linecrossing e.g. MAX x (e.g. VPD) from bootstrap runs - # thres_dcrit = bts_zerocrossings_aggs['x_median'] - thres_dcrit = bts_zerocrossings_aggs[f"x_{self.thres_from_bootstrap}"] # e.g. "x_max" - - # Collect days above or equal to Dcrit threshold - df_aggs_dcrit = self.df_aggs.loc[self.df_aggs[self.x_col][self.x_agg] >= thres_dcrit, :].copy() - - # Number of days above Dcrit threshold - n_dcrit = len(df_aggs_dcrit) - - # Collect Near-Critical Heat Days (nDcrit) - # With the number of Dcrit known, collect data for the same number - # of days below of equal to Dcrit threshold. - # For example: if 10 Dcrit were found, nDcrit are the 10 days closest - # to the Dcrit threshold (below or equal to the threshold). - sortby_col = (self.x_col, self.x_agg) - ndcrit_start_ix = n_dcrit - ndcrit_end_ix = n_dcrit * 2 - df_aggs_ndcrit = self.df_aggs \ - .sort_values(by=sortby_col, ascending=False) \ - .iloc[ndcrit_start_ix:ndcrit_end_ix] - - # Threshold for nDcrit - # The lower threshold is e.g. the minimum of found x maxima, depending - # on the param "thres_from_bootstrap" - thres_ndcrit_lower = df_aggs_ndcrit[self.x_col][self.x_agg].min() - thres_ndcrit_upper = thres_dcrit - - # Number of days above nDcrit threshold and below or equal Dcrit threshold - n_ndcrit = len(df_aggs_ndcrit) - - # Collect results - self._results_threshold_detection = dict( - fit_results=fit_results, # Non-boostrapped fit results - bts_zerocrossings_df=bts_zerocrossings_df, - bts_zerocrossings_aggs=bts_zerocrossings_aggs, - thres_dcrit=thres_dcrit, - thres_ndcrit_lower=thres_ndcrit_lower, - thres_ndcrit_upper=thres_ndcrit_upper, - df_aggs_dcrit=df_aggs_dcrit, - df_aggs_ndcrit=df_aggs_ndcrit, - n_dcrit=n_dcrit, - n_ndcrit=n_ndcrit, - ) - - def plot_dcrit_detection_results(self, ax, - x_units: str, - y_units: str, - x_label: str = None, - y_label: str = None, - showfit: bool = True, - highlight_year: int = None, - showline_dcrit: bool = True, - showrange_dcrit: bool = True, - label_threshold: str = None): - """Plot results from critical days threshold detection""" - - label_threshold = 'threshold' if not label_threshold else label_threshold - - # bts_results = self.results_threshold_detection['bts_results'][0] # 0 means non-bootstrapped data - fit_results = self.results_threshold_detection['fit_results'] - bts_zerocrossings_aggs = self.results_threshold_detection['bts_zerocrossings_aggs'] - - self.ax, line_xy, line_highlight, line_fit, line_fit_ci, line_fit_pb = \ - PlotBinFitterBTS(fit_results=fit_results, - ax=ax, label="NEP", - highlight_year=highlight_year, edgecolor='#B0BEC5', - color='none', color_fitline=COLOR_NEE, - showfit=showfit, show_prediction_interval=True).plot_binfitter() - - # Threshold line - # Vertical line showing e.g. MAX Dcrit threshold from bootstrap runs - line_dcrit_vertical = None - if showline_dcrit: - thres_dcrit = self.results_threshold_detection['thres_dcrit'] - line_dcrit_vertical = ax.axvline(thres_dcrit, lw=3, - color=COLOR_THRESHOLD, ls='-', zorder=99, - label=f"{label_threshold} = {thres_dcrit:.2f} {x_units}") - # Text Dcrit - n_dcrit = len(self.results_threshold_detection['df_aggs_dcrit']) - sym_max = r'$\rightarrow$' - _pos = thres_dcrit + thres_dcrit * 0.02 - t = ax.text(_pos, 0.1, f"{sym_max} {n_dcrit} critical days {sym_max}", size=INFOTXT_FONTSIZE, - color='black', backgroundcolor='none', transform=ax.get_xaxis_transform(), - alpha=1, horizontalalignment='left', verticalalignment='top', zorder=99) - t.set_bbox(dict(facecolor=COLOR_THRESHOLD, alpha=.7, edgecolor=COLOR_THRESHOLD)) - - # # Area Dcrit - # area_dcrit = ax.fill_between([bts_zerocrossings_aggs[_thres_agg], - # fit_results['fit_df']['fit_x'].max()], - # 0, 1, color='#BA68C8', alpha=0.1, - # transform=ax.get_xaxis_transform(), - # label=f"Dcrit ({n_dcrit} days)", zorder=1) - - # ## Rectangle bootstrap range Dcrit - range_bts_netzeroflux = None - if showrange_dcrit: - range_bts_netzeroflux = rectangle( - ax=ax, - rect_lower_left_x=bts_zerocrossings_aggs['x_min'], - rect_lower_left_y=0, - rect_width=bts_zerocrossings_aggs['x_max'] - bts_zerocrossings_aggs['x_min'], - rect_height=1, - label=f"{label_threshold} range ({self.n_bootstrap_runs} bootstraps)", - color=COLOR_THRESHOLD) - - # Format - ax.axhline(0, lw=1, color='black') - if x_label: - x_label = f"{x_label} ({x_units})" - else: - x_label = f"Daily {self.x_agg} {self.x_col} ({x_units})" - if y_label: - y_label = f"{y_label} ({y_units})" - else: - y_label = f"Daily {self.y_agg} {self.y_col} ({y_units})" - plotfuncs.default_format(ax=ax, ax_xlabel_txt=x_label, ax_ylabel_txt=y_label) - - # Custom legend - # This legend replaces the legend from PlotBinFitterBTS - # Assign two of the handles to the same legend entry by putting them in a tuple - # and using a generic handler map (which would be used for any additional - # tuples of handles like (p1, p3)). - # https://matplotlib.org/stable/gallery/text_labels_and_annotations/legend_demo.html - l = ax.legend( - [ - line_dcrit_vertical if showline_dcrit else None, - range_bts_netzeroflux if showrange_dcrit else None, - line_xy, - line_highlight if line_highlight else None, - line_fit if showfit else None, - line_fit_ci if showfit else None, - line_fit_pb if showfit else None - ], - [ - line_dcrit_vertical.get_label() if showline_dcrit else None, - range_bts_netzeroflux.get_label() if showrange_dcrit else None, - line_xy.get_label(), - line_highlight.get_label() if line_highlight else None, - line_fit.get_label() if showfit else None, - line_fit_ci.get_label() if showfit else None, - line_fit_pb.get_label() if showfit else None - ], - bbox_to_anchor=(0, 1), - frameon=False, - fontsize=FONTSIZE_LEGEND, - loc="lower left", - ncol=2, - scatterpoints=1, - numpoints=1, - handler_map={tuple: HandlerTuple(ndivide=None)}) - - ax.text(0.06, 0.95, "(a)", - size=theme.AX_LABELS_FONTSIZE, color='black', backgroundcolor='none', transform=ax.transAxes, - alpha=1, horizontalalignment='left', verticalalignment='top') - - return ax - - def _resample_dataset(self, df: DataFrame, aggs: list, day_start_hour: int = None): - """Resample to daily values from *day_start_hour* to *day_start_hour*""" - if day_start_hour: - df_aggs = df.resample('D', offset=f'{day_start_hour}H').agg(aggs) - else: - df_aggs = df.resample('D').agg(aggs) - df_aggs = df_aggs.where(df_aggs[self.x_col]['count'] == 48) # Full days only - # df_aggs = df_aggs.where(df_aggs[self.x_col]['count'] == 48).dropna() # Full days only - df_aggs.index.name = 'TIMESTAMP_START' - return df_aggs - - def _zerocrossings_aggs(self, bts_zerocrossings_df: pd.DataFrame) -> dict: - """Aggregate linecrossing results from bootstrap runs""" - # linecrossings_x = [] - # linecrossings_y_gpp = [] - # for b in range(1, self.bootstrap_runs + 1): - # linecrossings_x.append(self.bts_results[b]['linecrossing_vals']['x_col']) - # linecrossings_y_gpp.append(self.bts_results[b]['linecrossing_vals']['gpp_nom']) - - zerocrossings_aggs = dict( - x_median=round(bts_zerocrossings_df['x_col'].median(), 6), - x_min=bts_zerocrossings_df['x_col'].min(), - x_max=bts_zerocrossings_df['x_col'].max(), - y_median=bts_zerocrossings_df['y_nom'].median(), - y_min=bts_zerocrossings_df['y_nom'].min(), - y_max=bts_zerocrossings_df['y_nom'].max(), - ) - - return zerocrossings_aggs - - def _bts_zerocrossings_collect(self, bts_fit_results) -> DataFrame: - """Collect all zero crossing values in DataFrame""" - bts_linecrossings_df = pd.DataFrame() - for bts in range(0, self.n_bootstrap_runs): - _dict = bts_fit_results[bts]['zerocrossing_vals'] - _series = pd.Series(_dict) - _series.name = bts - if bts == 0: - bts_linecrossings_df = pd.DataFrame(_series).T - else: - bts_linecrossings_df = bts_linecrossings_df.append(_series.T) - return bts_linecrossings_df - - def _bootstrap_fits(self, - df_aggs: DataFrame, - x_col: str, - x_agg: str, - y_col: str, - y_agg: str, - fit_to_bins: int = 10) -> tuple[dict, dict]: - """Bootstrap ycols and fit to x""" - - # Get column names in aggregated df - x_col = (x_col, x_agg) - y_col = (y_col, y_agg) - - fitter = BinFitterBTS( - df=df_aggs, - n_bootstraps=self.n_bootstrap_runs, - x_col=x_col, - y_col=y_col, - predict_max_x=self.predict_max_x, - predict_min_x=self.predict_min_x, - n_predictions=1000, - n_bins_x=0, - # bins_y_agg='mean', - fit_type='quadratic' - ) - fitter.fit() - - fit_results = fitter.fit_results - bts_fit_results = fitter.bts_fit_results - - return fit_results, bts_fit_results - - def _collect_zerocrossings(self, fit_results) -> dict: - """Collect zero-crossing results from all fit results""" - zerocrossing_vals = self._detect_zerocrossing_y(fit_df=fit_results['fit_df']) - if isinstance(zerocrossing_vals, dict): - pass - else: - raise ValueError - fit_results['zerocrossing_vals'] = zerocrossing_vals - print(f"Found zero-crossing values: {fit_results['zerocrossing_vals']}") - return fit_results - - def _detect_zerocrossing_y(self, fit_df: DataFrame): - # kudos: https://stackoverflow.com/questions/28766692/intersection-of-two-graphs-in-python-find-the-x-value - - n_zerocrossings = None - zerocrossings_ix = None - - # Collect predicted vals in df - zerocrossings_df = pd.DataFrame() - zerocrossings_df['x_col'] = fit_df['fit_x'] - zerocrossings_df['y_nom'] = fit_df['nom'] - - # Check values above/below zero - _signs = np.sign(zerocrossings_df['y_nom']) - _signs_max = _signs.max() - _signs_min = _signs.min() - - if _signs_max == _signs_min: - print("y does not cross zero-line.") - else: - zerocrossings_ix = np.argwhere(np.diff(_signs)).flatten() - n_zerocrossings = len(zerocrossings_ix) - - # There must be one single line crossing to accept result - # If there is more than one line crossing, reject result - if n_zerocrossings == 1: - zerocrossings_ix = zerocrossings_ix[0] - else: - raise ("More than 1 zero-crossing detected. " - "There must be one single line crossing to accept result. " - "Stopping.") - - # Values at zero crossing needed - # Found index is last element *before* zero-crossing, therefore + 1 - zerocrossing_vals = zerocrossings_df.iloc[zerocrossings_ix + 1] - zerocrossing_vals = zerocrossing_vals.to_dict() - - # Value for y at zero crossing - # i.e. reject if NEE after zero-crossing does not change to emission - - # Check if the sign after the crossing is indeed negative as expected - if (self.thres_y_sign_change == '-') & (zerocrossing_vals['y_nom'] < 0): - pass - # Check if the sign after the crossing is indeed positive as expected - elif (self.thres_y_sign_change == '+') & (zerocrossing_vals['y_nom'] > 0): - pass - # If the sign after the crossing is positive against expectations, return None - elif (self.thres_y_sign_change == '-') & (zerocrossing_vals['y_nom'] > 0): - return None - # If the sign after the crossing is negative against expectations, return None - elif (self.thres_y_sign_change == '+') & (zerocrossing_vals['y_nom'] < 0): - return None - - # x value must be above threshold to be somewhat meaningful, otherwise reject result - if (zerocrossing_vals['x_col'] < self.thres_min_x): - # x is too low, must be at least 1 kPa for valid crossing - return None - - return zerocrossing_vals - - def _aggregate_by_group(self, df, groupby_col, date_col, min_vals, aggs: list) -> pd.DataFrame: - """Aggregate dataset by *day/night groups*""" - - # Aggregate values by day/night group membership, this drops the date col - agg_df = \ - df.groupby(groupby_col) \ - .agg(aggs) - # .agg(['median', q25, q75, 'count', 'max']) - # .agg(['median', q25, q75, 'min', 'max', 'count', 'mean', 'std', 'sum']) - - # Add the date col back to data - grp_daynight_col = groupby_col - agg_df[grp_daynight_col] = agg_df.index - - # For each day/night group, detect its start and end time - - ## Start date (w/ .idxmin) - grp_starts = df.groupby(groupby_col).idxmin()[date_col].dt.date - grp_starts = grp_starts.to_dict() - grp_startdate_col = '.GRP_STARTDATE' - agg_df[grp_startdate_col] = agg_df[grp_daynight_col].map(grp_starts) - - ## End date (w/ .idxmax) - grp_ends = df.groupby(groupby_col).idxmax()[date_col].dt.date - grp_ends = grp_ends.to_dict() - grp_enddate_col = '.GRP_ENDDATE' - agg_df[grp_enddate_col] = agg_df[grp_daynight_col].map(grp_ends) - - # Set start date as index - agg_df = agg_df.set_index(grp_startdate_col) - agg_df.index = pd.to_datetime(agg_df.index) - - # Keep consecutive time periods with enough values (min. 11 half-hours) - agg_df = agg_df.where(agg_df[self.x_col]['count'] >= min_vals).dropna() - - return agg_df - - def showplot_criticaldays(self, - saveplot: bool = False, - title: str = None, - path: Path or str = None, - dpi: int = 72, - **kwargs): - fig = plt.figure(figsize=(9, 9), dpi=dpi) - gs = gridspec.GridSpec(1, 1) # rows, cols - # gs.update(wspace=.2, hspace=1, left=.1, right=.9, top=.85, bottom=.1) - ax = fig.add_subplot(gs[0, 0]) - ax = self.plot_dcrit_detection_results(ax=ax, **kwargs) - fig.tight_layout() - fig.show() - if saveplot: - save_fig(fig=fig, title=title, path=path) - return ax - - -if __name__ == '__main__': - # # Subset - # subset_cols = [vpd_col, nee_col] - # df = df_orig[subset_cols].copy().dropna() - # - # # Convert units - # df[vpd_col] = df[vpd_col].multiply(0.1) # hPa --> kPa - # df[nee_col] = df[nee_col].multiply(0.0792171) # umol CO2 m-2 s-1 --> g CO2 m-2 30min-1 - # df = df.loc[df.index.year <= 2022] - # df = df.loc[(df.index.month >= 5) & (df.index.month <= 9)] - - from diive.core.io.files import load_pickle - - # Variables - vpd_col = 'VPD_f' - nee_col = 'NEE_CUT_REF_f' - x_col = vpd_col - y_col = nee_col - - # Load data, using pickle for fast loading - source_file = r"L:\Dropbox\luhk_work\20 - CODING\21 - DIIVE\diive\__apply\co2penalty_dav\input_data\CH-DAV_FP2022.1_1997-2022.08_ID20220826234456_30MIN.diive.csv.pickle" - df_orig = load_pickle(filepath=source_file) - df_orig = df_orig.loc[df_orig.index.year <= 2022].copy() - # df_orig = df_orig.loc[df_orig.index.year >= 2019].copy() - - # Select daytime data between May and September 1997-2021 - maysep_dt_df = df_orig.loc[(df_orig.index.month >= 5) & (df_orig.index.month <= 9)].copy() - - # Convert units - maysep_dt_df[vpd_col] = maysep_dt_df[vpd_col].multiply(0.1) # hPa --> kPa - maysep_dt_df[nee_col] = maysep_dt_df[nee_col].multiply(0.0792171) # umol CO2 m-2 s-1 --> g CO2 m-2 30min-1 - x_units = "kPa" - y_units = "gCO_{2}\ m^{-2}\ 30min^{-1}" - xlabel = f"Half-hourly VPD ({x_units})" - ylabel = f"{y_col} (${y_units}$)" - - # Critical days - dcrit = CriticalDays( - df=maysep_dt_df, - x_col=vpd_col, - x_agg='max', - y_col=nee_col, - y_agg='sum', - usebins=0, - n_bootstrap_runs=99, - bootstrapping_random_state=None, - thres_from_bootstrap='max' - ) - dcrit.detect_dcrit_threshold() - results_threshold_detection = dcrit.results_threshold_detection - dcrit.showplot_criticaldays(x_units='kPa', - y_units="gCO_{2}\ m^{-2}\ d^{-1}", - highlight_year=2019, - showline_dcrit=True, - showrange_dcrit=True) diff --git a/diive/pkgs/flux/criticalheatdays.py b/diive/pkgs/flux/criticalheatdays.py deleted file mode 100644 index db39cc8..0000000 --- a/diive/pkgs/flux/criticalheatdays.py +++ /dev/null @@ -1,824 +0,0 @@ -from pathlib import Path -from typing import Literal - -import matplotlib.gridspec as gridspec -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from matplotlib.legend_handler import HandlerTuple -from pandas import DataFrame, Series - -from diive.core.dfun.frames import flatten_multiindex_all_df_cols -from diive.core.plotting.heatmap_xyz import HeatmapPivotXYZ -from diive.core.plotting.plotfuncs import default_format, save_fig -from diive.core.plotting.styles.LightTheme import COLOR_NEP, FONTSIZE_LEGEND -from diive.pkgs.analyses.quantilexyaggz import QuantileXYAggZ -from __local_folders.__archived._fitter import QuadraticFit - - -class FluxCriticalHeatDaysP95: - exception = f'Result not available, call .run() first.' - n_bins = 20 - - def __init__(self, - df: DataFrame, - ta_col: str, - vpd_col: str, - flux_col: str, - ta_agg: Literal['min', 'max', 'mean', 'median', 'sum'] = 'max', - vpd_agg: Literal['min', 'max', 'mean', 'median', 'sum'] = 'max', - flux_agg: Literal['min', 'max', 'mean', 'median', 'sum'] = 'sum', - additional_cols: list = None): - """ - Calculate the thresholds for air temperature (TA) and vapor - pressure deficit (VPD) for critical heat days, defined as days - when the respective TA and VPD daily maxima (default) were - >= their respective 95th percentile. - - Data *df* are converted to daily aggregates. By default, for TA - the daily maxima are calculated, for VPD the daily maxima and - for the flux the daily sums. - - The aggregated values for TA and VPD are then divided into - 20 equally-sized bins, respectively. The 20th bin contains all - data in the 95th - 100th percentile range. The thresholds - corresponds to the starting value of the 95th bin, respectively - for TA and VPD. Days when both the daily maximum TA and the - daily maximum VPD equal or exceed this threshold are labelled - as critical heat days. - - chd / CHD = critical heat days - nchd / nCHD = near-critical heat days - - Args: - df: Input data as time series at time resolution < 1 day - For this analysis the time series are later aggregated - to daily values. - ta_col: Name of the column in *df* that contains the air - temperature time series - vpd_col: Name of the column in *df* that contains the - VPD time series - flux_col: Name of the column in *df* that contains the flux - time series - ta_agg: Aggregation method for air temperature from which the - threshold is calculated. If the default 'max' is used, then - the air temperature threshold for CHD corresponds to the - 95th percentile of daily max air temperatures. - vpd_agg: Aggregation method for VPD from which the threshold - is calculated. If the default 'max' is used, then the - VPD threshold for CHD corresponds to the 95th percentile - of daily max VPD values. - flux_agg: # todo - additional_cols: List of additional variables (column names) - that should be included in some output results. This makes - sense e.g. for other fluxes, such as GPP and RECO. - """ - self.df = df - self.ta_col = ta_col - self.vpd_col = vpd_col - self.flux_col = flux_col - self.ta_agg = ta_agg - self.vpd_agg = vpd_agg - self.flux_agg = flux_agg - self.additional_cols = additional_cols - - # Data series - self.ta_agg_col = f"{self.ta_col}_{self.ta_agg}" - self.vpd_agg_col = f"{self.vpd_col}_{self.vpd_agg}" - self.flux_agg_col = f"{self.flux_col}_{self.flux_agg}" - - # Instance attributes - self._xdata = None - self._ydata = None - self._zdata = None - self._xyz_pivot_df = None - self._xyz_long_df = None - - self._xyz_long_extended_df = None - self._combobins_df = None - self._xyz_long_extended_bins_equal_df = None - self._xyz_long_extended_bins_vpdhigher_df = None - self._xyz_long_extended_bins_tahigher_df = None - self._combobins_bins_equal_df = None - self._combobins_bins_tahigher_df = None - self._combobins_bins_vpdhigher_df = None - self._thres_chd_ta = None - self._thres_chd_vpd = None - self._thres_nchd_ta = None - self._thres_nchd_vpd = None - self._fit_results = None - self._xyz_long_extended_criticalheatdays_df = None - self._xyz_long_extended_nearcriticalheatdays_df = None - - self._results = None - - def get_results(self) -> dict: - """All results collected in dict""" - if not isinstance(self._results, dict): - raise Exception(self.exception) - return self._results - - def _checkprop(self, p, dtype): - if not isinstance(p, dtype): - raise Exception(self.exception) - return p - - @property - def xdata(self) -> Series: - """Time series of daily aggregated x values""" - return self._checkprop(self._xdata, Series) - - @property - def ydata(self) -> Series: - """Time series of daily aggregated y values""" - return self._checkprop(self._ydata, Series) - - @property - def zdata(self) -> Series: - """Time series of daily aggregated z values""" - return self._checkprop(self._zdata, Series) - - @property - def thres_chd_ta(self) -> float: - """Threshold for air temperature to define critical heat days""" - return self._checkprop(self._thres_chd_ta, float) - - @property - def thres_chd_vpd(self) -> float: - """Threshold for VPD to define critical heat days""" - return self._checkprop(self._thres_chd_vpd, float) - - @property - def thres_nchd_ta(self) -> tuple: - """Upper and lower thresholds for air temperature to define near-critical heat days""" - return self._checkprop(self._thres_nchd_ta, tuple) - - @property - def thres_nchd_vpd(self) -> tuple: - """Upper and lower thresholds for VPD to define near-critical heat days""" - return self._checkprop(self._thres_nchd_vpd, tuple) - - @property - def xyz_pivot_df(self) -> DataFrame: - """Aggregated z in xy quantiles, pivoted""" - return self._checkprop(self._xyz_pivot_df, DataFrame) - - @property - def xyz_long_df(self) -> DataFrame: - """Time series of xyz, including binning info""" - return self._checkprop(self._xyz_long_df, DataFrame) - - @property - def xyz_long_extended_df(self) -> DataFrame: - """Time series of xyz with additional aggregates and including - extended binning info""" - return self._checkprop(self._xyz_long_extended_df, DataFrame) - - @property - def xyz_long_extended_bins_equal_df(self) -> DataFrame: - """Same data as *xyz_long_extended_df*, but only containing data - where x and y fall into the same respective bin, e.g. both are in - their respective 90th percentile""" - return self._checkprop(self._xyz_long_extended_bins_equal_df, DataFrame) - - @property - def xyz_long_extended_bins_tahigher_df(self) -> DataFrame: - """Same data as *xyz_long_extended_df*, but only containing data - where x falls into a higher bin than y, e.g. x falls into the 60th - percentile and y falls into the respective 45th percentile""" - return self._checkprop(self._xyz_long_extended_bins_tahigher_df, DataFrame) - - @property - def xyz_long_extended_bins_vpdhigher_df(self) -> DataFrame: - """Same data as *xyz_long_extended_df*, but only containing data - where y falls into a higher bin than x, e.g. y falls into the 80th - percentile and x falls into the respective 50th percentile""" - return self._checkprop(self._xyz_long_extended_bins_vpdhigher_df, DataFrame) - - @property - def combobins_df(self) -> DataFrame: - """Aggregated xyz in combined bins (sum) of x and y. The combined bins - can result from any combination of x and y bins, e.g. the combined bin - 100 can result from the combination 60+40 (x in the 60th percentile bin, - y in the 40th percentile bin), but also from any other combination that - results in a sum of 100 (e.g. 30+70, 80+20, 50+50, ...). However, the most - extreme combobin 190 has only one possible combination (95+95), which is - also the case for combobin 0 (0+0).""" - return self._checkprop(self._combobins_df, DataFrame) - - @property - def combobins_bins_equal_df(self) -> DataFrame: - """Aggregated xyz in combo bins, but only including z values - where x and y are members of the same respective bin, e.g. both x and - y fall into their respective 65th percentile bin. The resulting dataframe - shows the sum of the combined bins, e.g. the combined bin 100 collects - data where both x and y were in their respective 50th percentile bin, - (50+50), e.g. combined bin 190 is the most extreme combo (95+95). - See also docstring for *combobins_df*.""" - return self._checkprop(self._combobins_bins_equal_df, DataFrame) - - @property - def combobins_bins_tahigher_df(self) -> DataFrame: - """Aggregated xyz in combo bins, but only including z values - where x falls into a higher respective bin than and y. - See also docstring for *combobins_df*.""" - return self._checkprop(self._combobins_bins_tahigher_df, DataFrame) - - @property - def combobins_bins_vpdhigher_df(self) -> DataFrame: - """Aggregated xyz in combo bins, but only including z values - where y falls into a higher respective bin than and x. - See also docstring for *combobins_df*.""" - return self._checkprop(self._combobins_bins_vpdhigher_df, DataFrame) - - @property - def xyz_long_extended_criticalheatdays_df(self) -> DataFrame: - """Time series of xyz with additional aggregates and including - extended binning info, but only including data where x and y - are above their respective threshold. - See also docstring for *xyz_long_extended_df*.""" - return self._checkprop(self._xyz_long_extended_criticalheatdays_df, DataFrame) - - @property - def xyz_long_extended_nearcriticalheatdays_df(self) -> DataFrame: - """Time series of xyz with additional aggregates and including - extended binning info, but only including data where (1) x and y - are both in their respective 90th percentile bin, or where (2) x - or y are in their respective 95th bin, while the other is in the - 90th bin. Therefore, the following percentile bin combinations define - near-extreme conditions: 90+90, 95+90 or 90+95.""" - return self._checkprop(self._xyz_long_extended_nearcriticalheatdays_df, DataFrame) - - @property - def fit_results(self) -> dict: - """XXX""" - return self._checkprop(self._fit_results, dict) - - def _setnames(self, df: DataFrame): - """Get data for xyz with desired aggregation""" - x = df[self.ta_agg_col].copy() - y = df[self.vpd_agg_col].copy() - z = df[self.flux_agg_col].copy() - return x, y, z - - def run(self, bins_min_n_vals: int = 5, verbose: bool = True): - - # Create xyz subset - _subsetdf = self._create_subset() - - # Calc xyz daily aggregates - _subsetdf_daily = self._resample_daily_aggs(subset=_subsetdf) - - # Get data for xyz - self._xdata, self._ydata, self._zdata = self._setnames(df=_subsetdf_daily) - - # Based on the x and y aggregates, assign bins to data and then aggregate z in bins - self._xyz_pivot_df, self._xyz_long_df = self._assign_bins(binagg_z='mean', - n_quantiles=self.n_bins, - min_n_vals_per_bin=bins_min_n_vals) - - # Collect bin info and add it to the extended dataframe - _bin_info = self._collect_bininfo(df=self._xyz_long_df) - self._xyz_long_extended_df = self._add_bininfo(left=_subsetdf_daily, right=_bin_info) - _binmissing = self._xyz_long_extended_df['BINS_COMBINED_INT'].isnull() - self._xyz_long_extended_df = self._xyz_long_extended_df[~_binmissing].copy() - - # Calculate difference between xy bins - self._xyz_long_extended_df['BIN_DIFF'] = self._bin_difference() - - # Collect data for different bin scenarios - # (1) Data where TA and VPD are in the same bin, respectively, e.g. both in 50th quantile - self._xyz_long_extended_bins_equal_df = self.xyz_long_extended_df.loc[ - self.xyz_long_extended_df['BIN_DIFF'] == 0].copy() - - # (2) Data where TA is in a higher bin than VPD - self._xyz_long_extended_bins_tahigher_df = self.xyz_long_extended_df.loc[ - self.xyz_long_extended_df['BIN_DIFF'] > 5].copy() - - # (3) Data where VPD is in a higher bin than TA - self._xyz_long_extended_bins_vpdhigher_df = self.xyz_long_extended_df.loc[ - self.xyz_long_extended_df['BIN_DIFF'] < 5].copy() - - # Calculate stats for combo bins for each bin scenario 1-3 and over all data - self._combobins_df = self._calc_combobins(df=self.xyz_long_extended_df) - self._combobins_bins_equal_df = self._calc_combobins(df=self.xyz_long_extended_bins_equal_df) - self._combobins_bins_tahigher_df = self._calc_combobins(df=self.xyz_long_extended_bins_tahigher_df) - self._combobins_bins_vpdhigher_df = self._calc_combobins(df=self.xyz_long_extended_bins_vpdhigher_df) - - # Collect extreme days - self._xyz_long_extended_criticalheatdays_df, \ - self._xyz_long_extended_nearcriticalheatdays_df = \ - self._criticalheatdays_subset() - - # Get thresholds from pivot - self._thres_chd_ta, \ - self._thres_chd_vpd, \ - self._thres_nchd_ta, \ - self._thres_nchd_vpd = \ - self._thresholds() - - # Collect results in dict - self._results = self._collect_results() - - if verbose: - self._results_overview() - - def _results_overview(self): - res = self.get_results() - print("# Results are stored in a dictionary that can be accessed with .get_results()." - "\nDictionary keys:") - for k in res.keys(): - print(f" {k}") - print("\n# Critical heat days are defined by:") - print(f" Daily maximum air temperature >= {res['thres_chd_ta']}") - print(f" Daily maximum VPD >= {res['thres_chd_vpd']}") - - thres_nchd_ta_lower = res['thres_nchd_ta'][0] - thres_nchd_ta_upper = res['thres_nchd_ta'][1] - thres_nchd_vpd_lower = res['thres_nchd_vpd'][0] - thres_nchd_vpd_upper = res['thres_nchd_vpd'][1] - print("\n# Near-critical heat days are defined by:") - print(f" Daily maximum air temperature >= {thres_nchd_ta_lower:.3f} and <= {thres_nchd_ta_upper:.3f}") - print(f" Daily maximum VPD >= {thres_nchd_vpd_lower:.3f} and <= {thres_nchd_vpd_upper:.3f}") - - print("\n# Number of critical and near-critical heat days:") - print(f" Number of critical heat days: {len(res['xyz_long_extended_criticalheatdays_df'])}") - print(f" Number of near-critical heat days: {len(res['xyz_long_extended_nearcriticalheatdays_df'])}") - - def _bin_difference(self) -> Series: - """Calculate the difference between the bins""" - xbins = f'BIN_{self.ta_agg_col}' - ybins = f'BIN_{self.vpd_agg_col}' - return self._xyz_long_extended_df[xbins] - self._xyz_long_extended_df[ybins] - - def _collect_results(self): - return { - 'xdata': self.xdata, - 'ydata': self.ydata, - 'zdata': self.zdata, - 'thres_chd_ta': self.thres_chd_ta, - 'thres_chd_vpd': self.thres_chd_vpd, - 'thres_nchd_ta': self.thres_nchd_ta, - 'thres_nchd_vpd': self.thres_nchd_vpd, - 'xyz_pivot_df': self.xyz_pivot_df, - 'xyz_long_df': self.xyz_long_df, - 'xyz_long_extended_df': self.xyz_long_extended_df, - 'xyz_long_extended_bins_equal_df': self.xyz_long_extended_bins_equal_df, - 'xyz_long_extended_bins_tahigher_df': self.xyz_long_extended_bins_tahigher_df, - 'xyz_long_extended_bins_vpdhigher_df': self.xyz_long_extended_bins_vpdhigher_df, - 'xyz_long_extended_criticalheatdays_df': self.xyz_long_extended_criticalheatdays_df, - 'xyz_long_extended_nearcriticalheatdays_df': self.xyz_long_extended_nearcriticalheatdays_df, - 'combobins_df': self.combobins_df, - 'combobins_bins_equal_df': self.combobins_bins_equal_df, - 'combobins_bins_tahigher_df': self.combobins_bins_tahigher_df, - 'combobins_bins_vpdhigher_df': self.combobins_bins_vpdhigher_df, - } - - def _criticalheatdays_subset(self): - """Collect daily data of critical and near-critical heat days in separate subsets""" - - bin_ta = self.xyz_long_extended_df[f'BIN_{self.ta_agg_col}'] - bin_vpd = self.xyz_long_extended_df[f'BIN_{self.vpd_agg_col}'] - - # Data for critical heat days - # Both daily max (default) TA and VPD are in the 95-100th percentile range - locs_chd = (bin_ta == 95) & (bin_vpd == 95) - criticalheatdays_df = self.xyz_long_extended_df[locs_chd].copy() - - # Data for near-critical heat days - # Both daily max (default) TA and VPD are in the 90-95th percentile range - locs_nchd = (bin_ta == 90) & (bin_vpd == 90) - nearcriticalheatdays_df = self.xyz_long_extended_df[locs_nchd].copy() - - criticalheatdays_df = criticalheatdays_df.set_index('DATE') - nearcriticalheatdays_df = nearcriticalheatdays_df.set_index('DATE') - criticalheatdays_df.index = pd.to_datetime(criticalheatdays_df.index) - nearcriticalheatdays_df.index = pd.to_datetime(nearcriticalheatdays_df.index) - - return criticalheatdays_df, nearcriticalheatdays_df - - def _thresholds(self) -> tuple[float, float, tuple[float, float], tuple[float, float]]: - """Get the threshold values for critical and near-critical heat days""" - - # Thresholds for critical heat days - thres_chd_ta = self.xyz_long_extended_criticalheatdays_df[self.ta_agg_col].min() - thres_chd_vpd = self.xyz_long_extended_criticalheatdays_df[self.vpd_agg_col].min() - - # Thresholds for near-critical heat days - thres_nchd_ta = self.xyz_long_extended_nearcriticalheatdays_df[self.ta_agg_col].min(), \ - self.xyz_long_extended_nearcriticalheatdays_df[self.ta_agg_col].max() - thres_nchd_vpd = self.xyz_long_extended_nearcriticalheatdays_df[self.vpd_agg_col].min(), \ - self.xyz_long_extended_nearcriticalheatdays_df[self.vpd_agg_col].max() - return thres_chd_ta, thres_chd_vpd, thres_nchd_ta, thres_nchd_vpd - - def _calc_combobins(self, df: DataFrame): - dailybinned_df = df.groupby('BINS_COMBINED_INT').agg({ - self.flux_agg_col: ['mean', 'std', 'count'], - self.ta_agg_col: ['min', 'max'], - self.vpd_agg_col: ['min', 'max'] - }) - dailybinned_df[(self.flux_agg_col, 'mean+std')] = \ - dailybinned_df[self.flux_agg_col]['mean'].add(dailybinned_df[self.flux_agg_col]['std']) - dailybinned_df[(self.flux_agg_col, 'mean-std')] = \ - dailybinned_df[self.flux_agg_col]['mean'].sub(dailybinned_df[self.flux_agg_col]['std']) - return dailybinned_df - - def fit(self, n_predictions: int = 1000): - - _df = self.xyz_long_extended_bins_equal_df - fitter = QuadraticFit( - df=_df, - xcol='BINS_COMBINED_INT', - ycol=self.flux_agg_col, - predict_max_x=_df['BINS_COMBINED_INT'].max(), - predict_min_x=_df['BINS_COMBINED_INT'].min(), - n_predictions=n_predictions - ) - fitter.fit() - self._fit_results = fitter.fit_results - - def showplot_heatmap_percentiles(self, - saveplot: bool = False, - title: str = None, - path: Path or str = None, - dpi: int = 72, - **kwargs): - fig = plt.figure(figsize=(10, 9), dpi=dpi) - gs = gridspec.GridSpec(1, 1) # rows, cols - # gs.update(wspace=.2, hspace=1, left=.1, right=.9, top=.85, bottom=.1) - ax = fig.add_subplot(gs[0, 0]) - ax = self.plot_heatmap_percentiles(ax=ax, **kwargs) - fig.tight_layout() - fig.show() - if saveplot: - save_fig(fig=fig, title=title, path=path) - return ax - - def plot_heatmap_percentiles(self, ax, **kwargs): - hm = HeatmapPivotXYZ(pivotdf=self.xyz_pivot_df) - hm.plot(ax=ax, - xlabel=r'Daily maximum air temperature ($\mathrm{percentile}$)', - ylabel=r'Daily maximum VPD ($\mathrm{percentile}$)', - zlabel=r'Net ecosystem production ($\mathrm{gCO_{2}\ m^{-2}\ d^{-1}}$)', - tickpos=list(np.arange(-2.5, 102.5, 10)), - ticklabels=list(range(0, 105, 10)), - **kwargs) - return ax - - def showplot_criticalheat(self, - saveplot: bool = False, - title: str = None, - path: Path or str = None, - dpi: int = 72, - **kwargs): - fig = plt.figure(figsize=(9, 9), dpi=dpi) - gs = gridspec.GridSpec(1, 1) # rows, cols - # gs.update(wspace=.2, hspace=1, left=.1, right=.9, top=.85, bottom=.1) - ax = fig.add_subplot(gs[0, 0]) - ax = self.plot_bins(ax=ax, **kwargs) - fig.tight_layout() - fig.show() - if saveplot: - save_fig(fig=fig, title=title, path=path) - return ax - - def plot_bins(self, - ax, - show_fit: bool = True, - show_prediction_interval: bool = True, - show_sd: bool = True, - xlabel: str = None, - ylabel: str = None, - xunits: str = None, - yunits: str = None): - - # # Difference - # diff = DataFrame() - # diff['a'] = dailybinned_df[self.zvaragg]['mean'] - # diff['aa'] = diff['a'].shift(1) - # diff['aaa'] = diff['a'].sub(diff['aa']) - # diff['aaa'].plot() - # plt.show() - - _plotdf = self.combobins_bins_equal_df - - # Plot z var as y in scatter plot - _label = xlabel if xlabel else self.flux_col - _n_vals_min = _plotdf[self.flux_agg_col]['count'].min() - _n_vals_max = _plotdf[self.flux_agg_col]['count'].max() - _label = f"{_label} (between {_n_vals_min} and {_n_vals_max} days per bin)" - _y = _plotdf[self.flux_agg_col]['mean'] - # _y.index = _y.index / 2 - line_xy = ax.scatter(_y.index, _y, - edgecolor='none', color=COLOR_NEP, alpha=1, - s=150, label=_label, zorder=99, marker='o') - - # # Plot z var as y in scatter plot - # # _label = xlabel if xlabel else self.zvar - # _y = dailybinned_df[self.xvaragg]['min'] - # xxx = ax.scatter(_y.index, _y, - # edgecolor='none', color='blue', alpha=1, - # s=100, label='XXX', zorder=99, marker='o') - # _y = dailybinned_df[self.yvaragg]['min'].multiply(20) - # xxx = ax.scatter(_y.index, _y, - # edgecolor='none', color='green', alpha=1, - # s=100, label='XXX', zorder=99, marker='o') - - if show_sd: - _upper = _plotdf[self.flux_agg_col]['mean+std'] - _lower = _plotdf[self.flux_agg_col]['mean-std'] - line_sd = ax.fill_between(_upper.index, _upper, _lower, - alpha=.1, zorder=0, color=COLOR_NEP, - edgecolor='none', label="standard deviation") - - # Fit and confidence intervals - line_fit = line_fit_ci = None - line_fit_pb = None - if show_fit: - line_fit = self._plot_fit(ax=ax, color_fitline='red') - line_fit_ci = self._plot_fit_ci(ax=ax, color_fitline='red') - - # Prediction bands - if show_prediction_interval: - line_fit_pb = self._plot_fit_pi(ax=ax, color_fitline='blue') - - # # Rectangle - # _s = self.dailybinneddf[self.xvaragg]['min'] - # _locs = (_s > 15) & (_s < 20) - # _s = list(_s[_locs].index) - # import numpy as np - # _start = np.min(_s) - # _width = np.max(_s) - _start - # range_bts_netzeroflux = rectangle( - # ax=ax, - # rect_lower_left_x=_start, - # rect_lower_left_y=.9, - # rect_width=_width, - # rect_height=.2, - # label=f"xxx", - # color='blue') - - # Axes labels - ax.axhline(0, lw=1, color='black') - if xlabel: - xlabel = f"{xlabel} ({xunits})" - else: - xlabel = f"Combined percentiles of daily {self.ta_agg} {self.ta_col} " \ - f"and daily {self.vpd_agg} {self.vpd_col}" - if ylabel: - ylabel = f"{ylabel} ({yunits})" - else: - ylabel = f"{self.flux_col} ({yunits})" - - # Format - default_format(ax=ax, showgrid=False, ax_xlabel_txt=xlabel, ax_ylabel_txt=ylabel) - _min = _plotdf[self.flux_agg_col]['mean-std'].min() - _max = _plotdf[self.flux_agg_col]['mean+std'].max() - ax.set_ylim([_min, _max]) - - # Zero line - ax.axhline(0, color='black', lw=1) - - # specify the number of ticks on the x-axis - ax.locator_params(axis='y', nbins=20) - _range = range(0, 210, 20) - ax.set_xticks(_range) - - # Custom legend - # This legend replaces the legend from PlotBinFitterBTS - # Assign two of the handles to the same legend entry by putting them in a tuple - # and using a generic handler map (which would be used for any additional - # tuples of handles like (p1, p3)). - # https://matplotlib.org/stable/gallery/text_labels_and_annotations/legend_demo.html - l = ax.legend( - [ - # line_dcrit_vertical if showline_dcrit else None, - # range_bts_netzeroflux if showrange_dcrit else None, - line_xy, - line_sd, - # line_highlight if line_highlight else None, - line_fit if show_fit else None, - line_fit_ci if show_fit else None, - # line_fit_pb if show_fit else None - ], - [ - # line_dcrit_vertical.get_label() if showline_dcrit else None, - # range_bts_netzeroflux.get_label() if showrange_dcrit else None, - line_xy.get_label(), - line_sd.get_label() if show_sd else None, - # line_highlight.get_label() if line_highlight else None, - line_fit.get_label() if show_fit else None, - line_fit_ci.get_label() if show_fit else None, - # line_fit_pb.get_label() if show_fit else None - ], - bbox_to_anchor=(0, 1), - frameon=False, - fontsize=FONTSIZE_LEGEND, - loc="lower left", - ncol=2, - scatterpoints=1, - numpoints=1, - handler_map={tuple: HandlerTuple(ndivide=None)}) - - return ax - - # t = [] - # for bts_run in range(1, 10): - # sample_df = self.subset.sample(n=int(len(self.subset)), replace=True) - # z_pivot, z_long = self._assign_bins(df=sample_df, binagg_z='mean') - # bin_info = self._collect_bininfo(df=z_long) - # _subset_bin_info = self._add_bininfo(left=sample_df, right=bin_info) - - # xx = _subset_bin_info.loc[_subset_bin_info['BINS_COMBINED_INT'] == 200]['VPD_f_max'].max() - # t.append(xx) - - # d = DataFrame() - # d['MEAN'] = _subset_bin_info.groupby('BINS_COMBINED_INT').mean()['NEP_mean'] - # d['SD'] = _subset_bin_info.groupby('BINS_COMBINED_INT').std()['NEP_mean'] - # d['MEAN+SD'] = d['MEAN'] + d['SD'] - # d['MEAN-SD'] = d['MEAN'] - d['SD'] - # plt.scatter(d.index, d['MEAN']) - # plt.fill_between(d.index.values, d['MEAN+SD'].values, d['MEAN-SD'].values, - # alpha=.1, zorder=0, color='black', edgecolor='none', label="mean±1sd") - # plt.axhline(0) - # plt.tight_layout() - # plt.show() - # - # print(np.median(t)) - - # extreme_ix = z_long.loc[z_long['BINS_COMBINED_INT'] == 200]['index'] - # extreme_ix = list(extreme_ix) - # - # a = z_long.loc[z_long['index'].isin(extreme_ix)] - # plt.scatter(a['Tair_f_max'], a['NEP_mean'], c=a['VPD_f_max']) - # plt.scatter(a['VPD_f_max'], a['NEP_mean'], c=a['Tair_f_max']) - # plt.show() - # - # is_daytime = self.df['Rg_f'] > 50 - # daytime = self.df[is_daytime].copy() - # daytime['DATE'] = daytime.index.date - # - # a = daytime.loc[daytime['DATE'].isin(extreme_ix)] - # plt.scatter(a['Tair_f'], a['NEP'], c=a['VPD_f']) - # plt.scatter(a['VPD_f'], a['NEP'], c=a['Tair_f']) - # plt.show() - - @staticmethod - def _collect_bininfo(df: DataFrame): - """Collect columns containig bin info with timestamp""" - cols = ['DATE'] - [cols.append(c) for c in df.columns if str(c).startswith('BIN')] - bin_info = df[cols].copy() - return bin_info - - @staticmethod - def _add_bininfo(left: DataFrame, right: DataFrame) -> DataFrame: - _subset = left.copy() - _subset = _subset.reset_index(drop=False).copy() - subset_bin_info = _subset.merge(right, left_on='DATE', right_on='DATE', how='left') - return subset_bin_info - - def _create_subset(self): - subset = self.df[[self.ta_col, self.vpd_col, self.flux_col]].copy() - _additional_data = self.df[self.additional_cols].copy() - subset = pd.concat([subset, _additional_data], axis=1) - # Remove duplicates, each variable is only needed once - subset = subset.loc[:, ~subset.columns.duplicated()] - return subset # todo add gpp reco]].copy() - - @staticmethod - def _resample_daily_aggs(subset: DataFrame): - """Aggregation to daily values, the resulting multiindex is flattened""" - _aggs = ['min', 'max', 'mean', 'median', 'sum', 'count'] - subset = subset.groupby(subset.index.date).agg(_aggs) - subset = flatten_multiindex_all_df_cols(df=subset) - subset.index.name = 'DATE' - return subset - - def _assign_bins(self, **params): - qua = QuantileXYAggZ(x=self._xdata, y=self._ydata, z=self._zdata, **params) - qua.run() - return qua.pivotdf, qua.longformdf - - def _plot_fit(self, ax, color_fitline): - a = self.fit_results['fit_params_opt'][0] - b = self.fit_results['fit_params_opt'][1] - c = self.fit_results['fit_params_opt'][2] - operator1 = "+" if b > 0 else "" - operator2 = "+" if c > 0 else "" - label_fit = rf"$y = {a:.4f}x^2{operator1}{b:.4f}x{operator2}{c:.4f}$" - line_fit, = ax.plot(self.fit_results['fit_df']['fit_x'], - self.fit_results['fit_df']['nom'], - c=color_fitline, lw=3, zorder=99, alpha=1, label=label_fit) - return line_fit - - def _plot_fit_ci(self, ax, color_fitline): - # Fit confidence region - # Uncertainty lines (95% confidence) - line_fit_ci = ax.fill_between(self.fit_results['fit_df']['fit_x'], - self.fit_results['fit_df']['nom_lower_ci95'], - self.fit_results['fit_df']['nom_upper_ci95'], - alpha=.2, color=color_fitline, zorder=1, - label="95% confidence region") - return line_fit_ci - - def _plot_fit_pi(self, ax, color_fitline): - # Fit prediction interval - # Lower prediction band (95% confidence) - ax.plot(self.fit_results['fit_df']['fit_x'], - self.fit_results['fit_df']['lower_predband'], - color=color_fitline, ls='--', zorder=97, lw=2, - label="95% prediction interval") - # ax.fill_between(self.fit_results['fit_df']['fit_x'], - # self.fit_results['fit_df']['bts_lower_predband_Q97.5'], - # self.fit_results['fit_df']['bts_lower_predband_Q02.5'], - # alpha=.2, color=color_fitline, zorder=97, - # label="95% confidence region") - # Upper prediction band (95% confidence) - line_fit_pb, = ax.plot(self.fit_results['fit_df']['fit_x'], - self.fit_results['fit_df']['upper_predband'], - color=color_fitline, ls='--', zorder=96, lw=2, - label="95% prediction interval") - # ax.fill_between(self.fit_results['fit_df']['fit_x'], - # self.fit_results['fit_df']['bts_upper_predband_Q97.5'], - # self.fit_results['fit_df']['bts_upper_predband_Q02.5'], - # alpha=.2, color=color_fitline, zorder=97, - # label="95% confidence region") - return line_fit_pb - - -def example(): - vpd_col = 'VPD_f' - nee_col = 'NEE_CUT_REF_f' - nee_orig = 'NEE_CUT_REF_orig' - nep_col = 'NEP' - ta_col = 'Tair_f' - gpp_dt_col = 'GPP_DT_CUT_REF' - reco_dt_col = 'Reco_DT_CUT_REF' - gpp_nt_col = 'GPP_CUT_REF_f' - reco_nt_col = 'Reco_CUT_REF' - ratio_dt_gpp_reco = 'RATIO_DT_GPP_RECO' - rh_col = 'RH' - swin_col = 'Rg_f' - - # Load data, using pickle for fast loading - from diive.core.io.files import load_pickle - source_file = r"L:\Sync\luhk_work\20 - CODING\21 - DIIVE\diive\__manuscripts\11.01_NEP-Penalty_CH-DAV_1997-2022 (2023)\data\CH-DAV_FP2022.5_1997-2022_ID20230206154316_30MIN.diive.csv.pickle" - df_orig = load_pickle(filepath=source_file) - - # Data between May and Sep - df_orig = df_orig.loc[(df_orig.index.month >= 4) & (df_orig.index.month <= 10)].copy() - - # Subset - df = df_orig[[nee_col, vpd_col, ta_col, - gpp_dt_col, reco_dt_col, gpp_nt_col, reco_nt_col, - swin_col]].copy() - - # Convert units - df[vpd_col] = df[vpd_col].multiply(0.1) # hPa --> kPa - df[nee_col] = df[nee_col].multiply(0.0792171) # umol CO2 m-2 s-1 --> g CO2 m-2 30min-1 - df[nep_col] = df[nee_col].multiply(-1) # Convert NEE to NEP, net uptake is now positive - df[gpp_dt_col] = df[gpp_dt_col].multiply(0.0792171) # umol CO2 m-2 s-1 --> g CO2 m-2 30min-1 - df[gpp_nt_col] = df[gpp_nt_col].multiply(0.0792171) # umol CO2 m-2 s-1 --> g CO2 m-2 30min-1 - df[reco_dt_col] = df[reco_dt_col].multiply(0.0792171) # umol CO2 m-2 s-1 --> g CO2 m-2 30min-1 - df[reco_nt_col] = df[reco_nt_col].multiply(0.0792171) # umol CO2 m-2 s-1 --> g CO2 m-2 30min-1 - df[ratio_dt_gpp_reco] = df[gpp_dt_col].divide(df[reco_dt_col]) - df['nepmodel'] = df[gpp_nt_col].sub(df[reco_nt_col]) - - # # todo TESTING - # t = df[[gpp_dt_col, reco_dt_col, ta_col]].copy() - # tt = t.groupby(t.index.date).sum() - # tt['ratio'] = tt[gpp_dt_col].divide(tt[reco_dt_col]) - # ttt = tt.loc[tt['ratio'] < 100].copy() - # import pandas as pd - # labels = range(0, 20, 1) - # group, bins = pd.qcut(ttt[ta_col],q=20,retbins=True,duplicates='drop', labels=labels) # How awesome! - # ttt['group'] = group - # tttt = ttt.groupby('group').mean() - # tttt['ratio'].plot() - # plt.show() - - df = df.loc[df[ratio_dt_gpp_reco] < 10].copy() - - ta_col = ta_col - vpd_col = vpd_col - flux_col = nep_col - - ext = FluxCriticalHeatDaysP95(df=df, - ta_col=ta_col, ta_agg='max', - vpd_col=vpd_col, vpd_agg='max', - flux_col=flux_col, flux_agg='sum') - ext.run(bins_min_n_vals=10, verbose=True) - ext.showplot_heatmap_percentiles(cb_digits_after_comma=0) - res = ext.get_results() - - ext.fit(n_predictions=1000) - print(ext.fit_results.keys()) - ext.showplot_criticalheat(show_fit=True, - show_prediction_interval=False, - show_sd=True, - yunits="$\mathrm{gCO_{2}\ m^{-2}\ d^{-1}}$") - - -if __name__ == '__main__': - example() diff --git a/diive/pkgs/gapfilling/__init__.py b/diive/pkgs/gapfilling/__init__.py index a1c8b91..24c6e3d 100644 --- a/diive/pkgs/gapfilling/__init__.py +++ b/diive/pkgs/gapfilling/__init__.py @@ -1 +1 @@ -from . import interpolate \ No newline at end of file +# from . import interpolate \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 797b1a8..73b02f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "diive" -version = "0.84.0" +version = "0.84.1" description = "Time series processing" authors = ["holukas "] readme = "README.md" diff --git a/tests/test_fits.py b/tests/test_fits.py index 4b05c6f..1f6ca34 100644 --- a/tests/test_fits.py +++ b/tests/test_fits.py @@ -31,7 +31,7 @@ def test_binfittercp(self): fit_results = bf.fit_results self.assertEqual(len(fit_results['input_df']['group'].unique()), 10) - self.assertEqual(fit_results['bin_df'].sum().sum(), 63443.00512672013) + self.assertAlmostEqual(fit_results['bin_df'].sum().sum(), 63443.00512672013, places=5 ) self.assertEqual(fit_results['fit_df']['nom'].sum(), 678.9973275669132) self.assertEqual(fit_results['fit_equation_str'], 'y = 0.0050x^2-0.0424x+0.1842') self.assertEqual(fit_results['fit_type'], 'quadratic_offset') diff --git a/tests/test_imports.py b/tests/test_imports.py new file mode 100644 index 0000000..f640bc4 --- /dev/null +++ b/tests/test_imports.py @@ -0,0 +1,12 @@ +import unittest + + +class TestImports(unittest.TestCase): + + def test_imports(self): + import diive as a + import diive.configs as b + import diive.core as c + import diive.core.plotting as d + import diive.pkgs as e + print(a, b, c, d, e)