From bb306ed1caf788c5ade40c2efc9383eae325138b Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 2 Jul 2024 12:47:28 +1000 Subject: [PATCH 001/137] added ci github action #146, updated tests, restructured build-umfrq #148 --- .github/workflows/mopper-conda.yaml | 54 +++++++++++++++++++ src/mopdb/mopdb_utils.py | 80 +++++++++++++++-------------- tests/conftest.py | 29 +++++++++++ tests/test_mop_utils.py | 26 +++++----- tests/test_mopdb.py | 20 ++++---- tests/test_mopdb_utils.py | 10 ++++ 6 files changed, 158 insertions(+), 61 deletions(-) create mode 100644 .github/workflows/mopper-conda.yaml diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml new file mode 100644 index 0000000..98ea1a7 --- /dev/null +++ b/.github/workflows/mopper-conda.yaml @@ -0,0 +1,54 @@ +name: xmhw-conda-install-test + +#on: [push] +on: + push: + branches: + - main + - newrelease + pull_request: + branches: + - main + + +jobs: + build-linux: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: 3.10 + - name: Add conda to system path + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + echo $CONDA/bin >> $GITHUB_PATH + - name: Install dependencies + run: | + conda env update --file conda/environment.yml --name base + - name: Lint with flake8 + run: | + conda install flake8 + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + # - name: Install package + # run: | + # conda run python setup.py install + - name: Test with pytest + run: | + conda install pytest coverage codecov + conda run python -m pytest + conda run coverage run --source src -m py.test + - name: Upload to codecov + if: steps.build.outcome == 'success' + run: | + curl -Os https://uploader.codecov.io/latest/linux/codecov + chmod +x codecov + ./codecov + diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 7d4ab62..85eb922 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -426,32 +426,32 @@ def list_files(indir, match, db_log): def build_umfrq(time_axs, ds, db_log): - """ + """Return a dictionary with frequency for each time axis. + + Frequency is inferred by comparing interval between two consecutive + timesteps with expected interval at a given frequency. + Order time_axis so ones with only one step are last, so we can use + file frequency (interval_file) inferred from other time axes. """ umfrq = {} - #PPfirst_step = {} int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0, 'day': 1.0, '6hr': 0.25, '3hr': 0.125, '1hr': 0.041667, '10min': 0.006944} - for t in time_axs: - #PPfirst_step[t] = ds[t][0].values + time_axs.sort(key=lambda x: len(ds[x]), reverse=True) + db_log.debug(f"in build_umfrq, time_axs: {time_axs}") + for t in time_axs: + db_log.debug(f"len of time axis {t}: {len(ds[t])}") if len(ds[t]) > 1: interval = (ds[t][1]-ds[t][0]).values interval_file = (ds[t][-1] -ds[t][0]).values - for k,v in int2frq.items(): - if math.isclose(interval, v, rel_tol=0.05): - umfrq[t] = k - break else: - umfrq[t] = 'file' - # use other time_axis info to work out frq of time axis with 1 step - db_log.debug(f"umfrq in function {umfrq}") - for t,frq in umfrq.items(): - if frq == 'file': - for k,v in int2frq.items(): - if math.isclose(interval_file, v, rel_tol=0.05): - umfrq[t] = k - break + interval = interval_file + db_log.debug(f"interval 2 timesteps for {t}: {interval}") + db_log.debug(f"interval entire file {t}: {interval_file}") + for k,v in int2frq.items(): + if math.isclose(interval, v, rel_tol=0.05): + umfrq[t] = k + break return umfrq @@ -461,24 +461,23 @@ def get_frequency(realm, fname, ds, db_log): returns dictionary with frequency: variable list """ umfrq = {} - frequency = 'NA' + frequency = 'NAfrq' if realm == 'atmos': fbits = fname.split("_") frequency = fbits[-1].replace(".nc", "") - if frequency == 'dai': - frequency = 'day' - elif frequency == '3h': - frequency = '3hr' - elif frequency == '6h': - frequency = '6hr' + fix_frq = {'dai': 'day', '3h': '3hr', '6h': '6hr'} + if frequency in fix_frq.keys(): + frequency = fix_frq[frequency] else: frequency = frequency.replace('hPt', 'hrPt') + # retrieve all time axes and check their frequency time_axs = [d for d in ds.dims if 'time' in d] time_axs_len = set(len(ds[d]) for d in time_axs) if len(time_axs_len) == 1: umfrq = {} else: umfrq = build_umfrq(time_axs, ds, db_log) + db_log.debug(f"umfrq: {umfrq}") elif realm == 'ocean': # if I found scalar or monthly in any of fbits if any(x in fname for x in ['scalar', 'month']): @@ -544,24 +543,10 @@ def write_varlist(conn, indir, startdate, version, db_log): "vtype", "size", "nsteps", "filename", "long_name", "standard_name"]) # get attributes for the file variables - try: - if version == 'AUS2200': - realm = '/atmos/' - else: - realm = [x for x in ['/atmos/', '/ocean/', '/ice/'] if x in str(fpath)][0] - except: - realm = [x for x in ['/atm/', '/ocn/', '/ice/'] if x in str(fpath)][0] - realm = realm[1:-1] - if realm == 'atm': - realm = 'atmos' - elif realm == 'ocn': - realm = 'ocean' - db_log.debug(realm) + realm = get_realm(fpath, version, db_log) ds = xr.open_dataset(fpath, decode_times=False) coords = [c for c in ds.coords] + ['latitude_longitude'] frequency, umfrq = get_frequency(realm, fpath.name, ds, db_log) - db_log.debug(f"Frequency: {frequency}") - db_log.debug(f"umfrq: {umfrq}") multiple_frq = False if umfrq != {}: multiple_frq = True @@ -961,3 +946,20 @@ def check_realm_units(conn, var, db_log): db_log.warning(f"Variable {vname} not found in cmor table") return var + + def get_realm(fpath, version, db_log): + '''Return realm for variable in files or NArealm''' + if version == 'AUS2200': + realm = 'atmos' + else: + realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] + if x in fpath.parts][0] + if realm == 'atm': + realm = 'atmos' + elif realm == 'ocn': + realm = 'ocean' + elif realm is None: + realm = 'NArealm' + db_log.info(f"Couldn't detect realm from path, setting to NArealm") + db_log.debug(f"Realm is {realm}") + return realm diff --git a/tests/conftest.py b/tests/conftest.py index 2f7fcbf..7f544ac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,7 @@ import numpy as np import pandas as pd import datetime +import logging from mopdb.mopdb_utils import mapping_sql, cmorvar_sql from mopper.setup_utils import filelist_sql @@ -29,7 +30,17 @@ TESTS_HOME = os.path.abspath(os.path.dirname(__file__)) TESTS_DATA = os.path.join(TESTS_HOME, "testdata") +# setting up loggers for both mopdb and mop +@pytest.fixture +def moplog(): + return logging.getLogger('mop_log') + + +@pytest.fixture +def mopdblog(): + return logging.getLogger('mopdb_log') +# setting up fixtures for databases:a ccess.db and mopper.db @pytest.fixture def session(): connection = sqlite3.connect(':memory:') @@ -64,6 +75,10 @@ def setup_mopper_db(session): session.connection.commit() +def test_check_timestamp(caplog): + global ctx, logger + caplog.set_level(logging.DEBUG, logger='mop_log') + @pytest.fixture def varlist_rows(): lines = ["fld_s03i236;tas;K;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature", @@ -71,3 +86,17 @@ def varlist_rows(): "fld_s03i236;tas;;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature"] rows = [l.split(";") for l in lines] return rows + +@pytest.fixture +def um_multi_time(): + '''Return a um stule file with multiple time axes''' + time1 = pd.date_range("2001-01-01", periods=1) + time2 = pd.date_range("2001-01-01", periods=24, freq='h') + time3 = pd.date_range("2001-01-01", periods=48, freq='30min') + var1 = xr.DataArray(name='var1', data=[1], + dims=["time"], coords={"time": time1}) + var2 = xr.DataArray(name='var2', data=np.arange(24), + dims=["time_0"], coords={"time_0": time2}) + var3 = xr.DataArray(name='var3', data=np.arange(48), dims=["time_1"], + coords={"time_1": time3}) + return xr.merge([var1, var2, var3]) diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py index d006ca1..f177f21 100644 --- a/tests/test_mop_utils.py +++ b/tests/test_mop_utils.py @@ -19,6 +19,7 @@ import numpy as np import pandas as pd from mopper.mop_utils import * +from conftest import moplog #try: # import unittest.mock as mock @@ -28,14 +29,14 @@ ctx = click.Context(click.Command('cmd'), obj={'sel_start': '198302170600', 'sel_end': '198302181300', 'realm': 'atmos', 'frequency': '1hr'}) -logger = logging.getLogger('mop_log') +#logger = logging.getLogger('mop_log') -def test_check_timestamp(caplog): - global ctx, logger - caplog.set_level(logging.DEBUG, logger='mop_log') +def test_check_timestamp(caplog, ctx): + moplog.set_level(logging.DEBUG)#, logger='mop_log') # test atmos files files = [f'obj_198302{d}T{str(h).zfill(2)}01_1hr.nc' for d in ['17','18','19'] for h in range(24)] + print(files) inrange = files[6:37] with ctx: out1 = check_timestamp(files, logger) @@ -47,7 +48,7 @@ def test_check_timestamp(caplog): out2 = check_timestamp(files, logger) assert out2 == inrange # test ocn files - ctx.obj['frequency'] = 'mon' + ctx.obj['frequency'] = 'day' ctx.obj['realm'] = 'ocean' files = [f'ocn_daily.nc-198302{str(d).zfill(2)}' for d in range(1,29)] inrange = files[16:18] @@ -56,10 +57,9 @@ def test_check_timestamp(caplog): assert out3 == inrange -def test_get_cmorname(caplog): - global ctx, logger - caplog.set_level(logging.DEBUG, logger='mop_log') - # axiis_name t +def test_get_cmorname(caplog, ctx): + caplog.set_level(logging.DEBUG)#, logger='mop_log') + # axis_name t ctx.obj['calculation'] = "plevinterp(var[0], var[1], 24)" ctx.obj['variable_id'] = "ta24" ctx.obj['timeshot'] = 'mean' @@ -71,10 +71,10 @@ def test_get_cmorname(caplog): foo = xr.DataArray(data, coords=[levs, tdata, lats, lons], dims=["lev", "t", "lat", "lon"]) with ctx: - tname = get_cmorname('t', foo.t, logger, z_len=None) - iname = get_cmorname('i_index', foo.lon, logger, z_len=None) - jname = get_cmorname('j_index', foo.lat, logger, z_len=None) - zname = get_cmorname('z', foo.lev, logger, z_len=3) + tname = get_cmorname('t', foo.t, caplog, z_len=None) + iname = get_cmorname('lon', foo.lon, caplog, z_len=None) + jname = get_cmorname('lat', foo.lat, caplog, z_len=None) + zname = get_cmorname('z', foo.lev, caplog, z_len=3) assert tname == 'time' assert iname == 'longitude' assert jname == 'latitude' diff --git a/tests/test_mopdb.py b/tests/test_mopdb.py index 37f4232..0eddc58 100644 --- a/tests/test_mopdb.py +++ b/tests/test_mopdb.py @@ -22,11 +22,13 @@ from click.testing import CliRunner @pytest.mark.parametrize('subcommand', ['varlist', 'template', 'check', 'cmor', 'table', 'map']) -def test_cmip(command, runner): - result = runner.invoke(mopdb, ['--help']) - assert result.exit_code == 0 - result = runner.invoke(mopdb, [subcommand, '--help']) - assert result.exit_code == 0 +def test_mopdb(command, subcommand, runner): + ctx = click.Context(click.Command('cmd'), obj={'prop': 'A Context'}) + with ctx: + result = runner.invoke(mopdb, ['--help']) + assert result.exit_code == 0 + result = runner.invoke(mopdb, [subcommand, '--help']) + assert result.exit_code == 0 @pytest.mark.usefixtures("setup_db") # 1 def test_template(session): @@ -53,7 +55,7 @@ def test_template(session): # with runner.isolated_filesystem(temp_dir=tmp_path) as td: # ... -def test_with_context(): - ctx = click.Context(click.Command('cmd'), obj={'prop': 'A Context'}) - with ctx: - process_cmd() +#def test_with_context(): +# ctx = click.Context(click.Command('cmd'), obj={'prop': 'A Context'}) +# with ctx: +# mopdb() diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py index 48aa87b..103f75e 100644 --- a/tests/test_mopdb_utils.py +++ b/tests/test_mopdb_utils.py @@ -21,6 +21,7 @@ import click import logging from mopdb.mopdb_utils import * +from conftest import um_multi_time #from click.testing import CliRunner @@ -43,3 +44,12 @@ def test_add_var(varlist_rows, idx, db_log): match = ("tas", "", "K") vlist = add_var(vlist, varlist_rows[idx], match, db_log) assert vlist == vlistout + + +def test_build_umfrq(um_multi_time, caplog): + caplog.set_level(logging.DEBUG) + time_axs = [d for d in um_multi_time.dims if 'time' in d] + print(time_axs) + umfrq = {'time': 'day', 'time_0': '1hr', 'time_1': '30min'} + assert umfrq == build_umfrq(time_axs, um_multi_time, caplog) + From 300927e1881099283b7cf9f09371093aba1d14ce Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 3 Jul 2024 17:41:22 +1000 Subject: [PATCH 002/137] partial work on tests, issues #147 and #146 --- ACDD_conf.yaml | 13 +- CMIP6_conf.yaml | 2 + src/mopdb/mopdb.py | 95 +++++++------ src/mopdb/mopdb_utils.py | 275 ++++++++++++++++++++----------------- src/mopper/calculations.py | 14 +- src/mopper/mop_setup.py | 23 ++-- src/mopper/mop_utils.py | 101 +++++++++----- src/mopper/mopper.py | 84 +++++------ src/mopper/setup_utils.py | 29 ++-- tests/conftest.py | 17 ++- tests/test_mopdb_utils.py | 27 ++-- 11 files changed, 369 insertions(+), 311 deletions(-) diff --git a/ACDD_conf.yaml b/ACDD_conf.yaml index d75312a..f507aeb 100755 --- a/ACDD_conf.yaml +++ b/ACDD_conf.yaml @@ -83,16 +83,25 @@ cmor: grids: "ACDD_grids.json" # Additional NCI information: # NCI project to charge compute; $PROJECT = your default project - # NCI queue to use; hugemem is recommended project: v45 # additional NCI projects to be included in the storage flags addprojs: [] - # queue and memory (GB) per CPU (depends on queue) + # queue and memory (GB) per CPU (depends on queue), + # hugemem is reccomended for high reoslution data and/or derived variables + # hugemem requires a minimum of 6 cpus this is handled by the code queue: hugemem mem_per_cpu: 32 # walltime in "hh:mm:ss" walltime: '8:00:00' mode: custom + # conda_env to use by default hh5 analysis3-unstable + # as this has the code and all dependecies installed + # you can override that by supplying the env to pass to "source" + # Ex + # conda_env: /bin/activate + # or you can set "test: true" and modify mopper_job.sh manually + conda_env: default + # # Global attributes: these will be added to each files comment unwanted ones # Using ACDD CV vocab to check validity of global attributes diff --git a/CMIP6_conf.yaml b/CMIP6_conf.yaml index fd5f14b..9ae85aa 100755 --- a/CMIP6_conf.yaml +++ b/CMIP6_conf.yaml @@ -84,6 +84,8 @@ cmor: # additional NCI projects to be included in the storage flags addprojs: [] # queue and memory (GB) per CPU (depends on queue) + # hugemem is reccomended for high reoslution data and/or derived variables + # hugemem requires a minimum of 6 cpus this is handled by the code queue: hugemem mem_per_cpu: 30 # walltime in "hh:mm:ss" diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index fbc5df8..892b4cb 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -70,7 +70,7 @@ def mopdb(ctx, debug): ctx.obj={} # set up a default value for flow if none selected for logging ctx.obj['debug'] = debug - ctx.obj['log'] = config_log(debug) + mopdb_log = config_log(debug) @mopdb.command(name='check') @@ -89,11 +89,11 @@ def check_cmor(ctx, dbname): dbname : str Database relative path (default is data/access.db) """ - db_log = ctx.obj['log'] + mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing if dbname == 'default': dbname = import_files('data').joinpath('access.db') - conn = db_connect(dbname, db_log) + conn = db_connect(dbname) # get list of variables already in db sql = 'SELECT name, out_name FROM cmorvar' results = query(conn, sql, first=False) @@ -108,9 +108,9 @@ def check_cmor(ctx, dbname): results = query(conn, sql, first=False) map_vars = [x[0] for x in results] missing = set(map_vars) - set(cmor_vars) - db_log.info("Variables not yet defined in cmorvar table:") + mopdb_log.info("Variables not yet defined in cmorvar table:") for v in missing: - db_log.info(f"{v}") + mopdb_log.info(f"{v}") conn.close() return @@ -140,11 +140,11 @@ def cmor_table(ctx, dbname, fname, alias, label): label : str Label indicating preferred cmor variable definitions """ - db_log = ctx.obj['log'] + mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing if dbname == 'default': dbname = import_files('data').joinpath('access.db') - conn = db_connect(dbname, db_log) + conn = db_connect(dbname) # get list of variables already in db sql = "SELECT out_name, frequency, modeling_realm FROM cmorvar" results = query(conn, sql, first=False) @@ -152,7 +152,7 @@ def cmor_table(ctx, dbname, fname, alias, label): # this sometime differs from name used in tables tohat can distinguish different dims/freq cmor_vars = set(x[0] for x in results) # read variable list from map_ file - vlist = read_map(fname, alias, db_log) + vlist = read_map(fname, alias) # extract cmor_var,units,dimensions,frequency,realm,cell_methods var_list = [] for v in vlist[1:]: @@ -160,7 +160,7 @@ def cmor_table(ctx, dbname, fname, alias, label): # This was adding variables to the table just if they didn't exists in other tables if v[0][:4] != 'fld_': if v[0] not in cmor_vars: - db_log.warning(f"Variable {v[0]} not defined in cmorvar table") + mopdb_log.warning(f"Variable {v[0]} not defined in cmorvar table") else: sql = f"SELECT * FROM cmorvar WHERE out_name='{v[0]}'" @@ -178,13 +178,13 @@ def cmor_table(ctx, dbname, fname, alias, label): definition[2] = v[6] # if units are different print warning! if v[3] != record[4]: - db_log.warning(f"Variable {v[0]} units orig/table are different: {v[3]}/{record[4]}") + mopdb_log.warning(f"Variable {v[0]} units orig/table are different: {v[3]}/{record[4]}") if v[7] != '' and v[7] != record[5]: - db_log.warning(f"Variable {v[0]} cell_methods orig/table are different: {v[7]}/{record[5]}") + mopdb_log.warning(f"Variable {v[0]} cell_methods orig/table are different: {v[7]}/{record[5]}") if len(v[4].split()) != len(record[9].split()): - db_log.warning(f"Variable {v[0]} number of dims orig/table are different: {v[4]}/{record[9]}") + mopdb_log.warning(f"Variable {v[0]} number of dims orig/table are different: {v[4]}/{record[9]}") var_list.append(definition) - write_cmor_table(var_list, alias, db_log) + write_cmor_table(var_list, alias, mopdb_log) conn.close() return @@ -213,26 +213,26 @@ def update_cmor(ctx, dbname, fname, alias): ------- """ - db_log = ctx.obj['log'] + mopdb_log = logging.getLogger('mopdb_log') if alias is None: alias = fname.split("/")[-1] alias = alias.replace('.json', '') - db_log.info(f"Adding {alias} to variable name to track origin") + mopdb_log.info(f"Adding {alias} to variable name to track origin") # connect to db, this will create one if not existing dbcentral = import_files('data').joinpath('access.db') if dbname in [dbcentral, 'default']: - db_log.error("The package database cannot be updated") + mopdb_log.error("The package database cannot be updated") sys.exit() - conn = db_connect(dbname, db_log) + conn = db_connect(dbname) # create table if not existing table_sql = cmorvar_sql() - create_table(conn, table_sql, db_log) + create_table(conn, table_sql) # get list of variables already in db in debug mode if ctx.obj['debug']: sql = 'SELECT name FROM cmorvar' results = query(conn, sql, first=False) existing_vars = [x[0] for x in results] - db_log.debug(f"Variables already in db: {existing_vars}") + mopdb_log.debug(f"Variables already in db: {existing_vars}") # read list of vars from file with open(fname, 'r') as fj: @@ -247,14 +247,14 @@ def update_cmor(ctx, dbname, fname, alias): if 'flag_values' not in row.keys(): values = values[:-2] + ['',''] + values[-2:] vars_list.append(tuple([name] + values)) - db_log.debug(f"Variables list: {vars_list}") + mopdb_log.debug(f"Variables list: {vars_list}") # check that all tuples have len == 19 for r in vars_list: if len(r) != 19: - db_log.error(r) + mopdb_log.error(r) sys.exit() # insert new vars and update existing ones - update_db(conn, 'cmorvar', vars_list, db_log) + update_db(conn, 'cmorvar', vars_list) return @@ -287,13 +287,13 @@ def map_template(ctx, dbname, fname, alias, version): Returns ------- """ - db_log = ctx.obj['log'] + mopdb_log = logging.getLogger('mopdb_log') if alias is None: alias = fname.split(".")[0] # connect to db, check first if db exists or exit if dbname == 'default': dbname = import_files('data').joinpath('access.db') - conn = db_connect(dbname, db_log) + conn = db_connect(dbname) # read list of vars from file with open(fname, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=';') @@ -301,24 +301,23 @@ def map_template(ctx, dbname, fname, alias, version): # return lists of fully/partially matching variables and stash_vars # these are input_vars for calculation defined in already in mapping db full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, - rows, version, db_log) + rows, version) # remove duplicates from partially matched variables - no_ver = remove_duplicate(no_ver, db_log) - no_frq = remove_duplicate(no_frq, db_log, strict=False) - no_match = remove_duplicate(no_match, db_log, strict=False) + no_ver = remove_duplicate(no_ver) + no_frq = remove_duplicate(no_frq, strict=False) + no_match = remove_duplicate(no_match, strict=False) # check if more derived variables can be added based on all # input_vars being available pot_full, pot_part, pot_varnames = potential_vars(conn, rows, - stash_vars, version, db_log) + stash_vars, version) # potential vars have always duplicates: 1 for each input_var - pot_full = remove_duplicate(pot_full, db_log, strict=False) - pot_part = remove_duplicate(pot_part, db_log, extra=pot_full, - strict=False) - db_log.info(f"Derived variables: {pot_varnames}") + pot_full = remove_duplicate(pot_full, strict=False) + pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False) + mopdb_log.info(f"Derived variables: {pot_varnames}") write_map_template(conn, full, no_ver, no_frq, stdn, - no_match, pot_full, pot_part, alias, db_log) + no_match, pot_full, pot_part, alias) conn.close() return @@ -347,29 +346,29 @@ def update_map(ctx, dbname, fname, alias): Returns ------- """ - db_log = ctx.obj['log'] + mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing dbcentral = import_files('data').joinpath('access.db') if dbname in [dbcentral, 'default']: - db_log.error("The package database cannot be updated") + mopdb_log.error("The package database cannot be updated") sys.exit() - conn = db_connect(dbname, db_log) + conn = db_connect(dbname) # create table if not existing table_sql = mapping_sql() - create_table(conn, table_sql, db_log) + create_table(conn, table_sql) # get list of variables already in db in debug mode if ctx.obj['debug']: sql = 'SELECT cmor_var FROM mapping' results = query(conn, sql, first=False) existing_vars = [x[0] for x in results] - db_log.debug(f"Variables already in db: {existing_vars}") + mopdb_log.debug(f"Variables already in db: {existing_vars}") # read list of vars from file if alias == 'app4': var_list = read_map_app4(fname) else: - var_list = read_map(fname, alias, db_log) + var_list = read_map(fname, alias) # update mapping table - update_db(conn, 'mapping', var_list, db_log) + update_db(conn, 'mapping', var_list) return @@ -405,12 +404,12 @@ def model_vars(ctx, indir, startdate, dbname, version): Returns ------- """ - db_log = ctx.obj['log'] + mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing if dbname == 'default': dbname = import_files('data').joinpath('access.db') - conn = db_connect(dbname, db_log) - write_varlist(conn, indir, startdate, version, db_log) + conn = db_connect(dbname) + write_varlist(conn, indir, startdate, version) conn.close() return @@ -442,19 +441,19 @@ def remove_record(ctx, dbname, table, pair): Returns ------- """ - db_log = ctx.obj['log'] + mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing dbcentral = import_files('data').joinpath('access.db') if dbname == dbcentral: - db_log.error("The package database cannot be updated") + mopdb_log.error("The package database cannot be updated") sys.exit() - conn = db_connect(dbname, db_log) + conn = db_connect(dbname) # set which columns to show based on table if table == 'cmorvar': col = "name" elif table == 'mapping': col = "cmor_var,frequency,realm,cmor_table" # select, confirm, delete record/s - delete_record(conn, table, col, pair, db_log) + delete_record(conn, table, col, pair) return diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 85eb922..d4de94f 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -28,6 +28,7 @@ import json import stat import xarray as xr +import numpy as np import math from datetime import datetime, date from collections import Counter @@ -38,7 +39,7 @@ def config_log(debug): """Configures log file""" # start a logger - logger = logging.getLogger('db_log') + logger = logging.getLogger('mopdb_log') # set a formatter to manage the output format of our handler formatter = logging.Formatter('%(asctime)s; %(message)s',"%Y-%m-%d %H:%M:%S") # set the level for the logger, has to be logging.LEVEL not a string @@ -72,11 +73,12 @@ def config_log(debug): return logger -def db_connect(db, db_log): +def db_connect(db): """Connects to ACCESS mapping sqlite database""" + mopdb_log = logging.getLogger('mopdb_log') conn = sqlite3.connect(db, timeout=10, isolation_level=None) if conn.total_changes == 0: - db_log.info(f"Opened database {db} successfully") + mopdb_log.info(f"Opened database {db} successfully") return conn @@ -175,7 +177,7 @@ def cmor_update_sql(): return sql -def create_table(conn, sql, db_log): +def create_table(conn, sql): """Creates table if database is empty Parameters @@ -183,17 +185,17 @@ def create_table(conn, sql, db_log): conn : connection object sql : str SQL style string defining table to create - db_log: logger obj """ + mopdb_log = logging.getLogger('mopdb_log') try: c = conn.cursor() c.execute(sql) except Exception as e: - db_log.error(e) + mopdb_log.error(e) return -def update_db(conn, table, rows_list, db_log): +def update_db(conn, table, rows_list): """Adds to table new variables definitions Parameters @@ -203,25 +205,25 @@ def update_db(conn, table, rows_list, db_log): Name of database table to use rows_list : list List of str represneting rows to add to table - db_log: logger obj """ + mopdb_log = logging.getLogger('mopdb_log') # insert into db if table == 'cmorvar': sql = cmor_update_sql() elif table == 'mapping': sql = map_update_sql() else: - db_log.error("Provide an insert sql statement for table: {table}") + mopdb_log.error("Provide an insert sql statement for table: {table}") if len(rows_list) > 0: - db_log.info('Updating db ...') + mopdb_log.info('Updating db ...') with conn: c = conn.cursor() - db_log.debug(sql) + mopdb_log.debug(sql) c.executemany(sql, rows_list) nmodified = c.rowcount - db_log.info(f"Rows modified: {nmodified}") + mopdb_log.info(f"Rows modified: {nmodified}") conn.close() - db_log.info('--- Done ---') + mopdb_log.info('--- Done ---') return @@ -246,6 +248,7 @@ def query(conn, sql, tup=(), first=True): result : tuple/list(tuple) tuple or a list of, representing row/s returned by query """ + mopdb_log = logging.getLogger('mopdb_log') with conn: c = conn.cursor() c.execute(sql, tup) @@ -260,16 +263,18 @@ def query(conn, sql, tup=(), first=True): def get_columns(conn, table): """Gets list of columns form db table """ + mopdb_log = logging.getLogger('mopdb_log') sql = f'PRAGMA table_info({table});' table_data = query(conn, sql, first=False) columns = [x[1] for x in table_data] return columns -def get_cmorname(conn, varname, version, frequency, db_log): +def get_cmorname(conn, varname, version, frequency): """Queries mapping table for cmip name given variable name as output by the model """ + mopdb_log = logging.getLogger('mopdb_log') sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping WHERE input_vars='{varname}' and (calculation='' or calculation IS NULL)""" @@ -283,7 +288,7 @@ def get_cmorname(conn, varname, version, frequency, db_log): cmor_var = names[0] cmor_table = tables[0] elif len(names) > 1: - db_log.debug(f"Found more than 1 definition for {varname}:\n" + + mopdb_log.debug(f"Found more than 1 definition for {varname}:\n" + f"{results}") match_found = False for r in results: @@ -306,7 +311,7 @@ def get_cmorname(conn, varname, version, frequency, db_log): if not match_found: cmor_var = names[0] cmor_table = tables[0] - db_log.info(f"Found more than 1 definition for {varname}:\n"+ + mopdb_log.info(f"Found more than 1 definition for {varname}:\n"+ f"{results}\n Using {cmor_var} from {cmor_table}") return cmor_var, cmor_table @@ -335,23 +340,24 @@ def cmor_table_header(name, realm, frequency): return header -def write_cmor_table(var_list, name, db_log): +def write_cmor_table(var_list, name): """ """ + mopdb_log = logging.getLogger('mopdb_log') realms = [v[2] for v in var_list] setr = set(realms) if len(setr) > 1: realm = Counter(realms).most_common(1)[0][0] - db_log.info(f"More than one realms found for variables: {setr}") - db_log.info(f"Using: {realm}") + mopdb_log.info(f"More than one realms found for variables: {setr}") + mopdb_log.info(f"Using: {realm}") else: realm = realms[0] freqs = [v[1] for v in var_list] setf = set(freqs) if len(setf) > 1: frequency = Counter(freqs).most_common(1)[0][0] - db_log.info(f"More than one freqs found for variables: {setf}") - db_log.info(f"Using: {frequency}") + mopdb_log.info(f"More than one freqs found for variables: {setf}") + mopdb_log.info(f"Using: {frequency}") else: frequency = freqs[0] header = cmor_table_header(name, realm, frequency) @@ -373,7 +379,7 @@ def write_cmor_table(var_list, name, db_log): return -def delete_record(conn, table, col, pairs, db_log): +def delete_record(conn, table, col, pairs): """Deletes record from table based on pairs of column and value passed for selection @@ -387,45 +393,45 @@ def delete_record(conn, table, col, pairs, db_log): name of column to return with query pairs : list[tuple(str, str)] pairs of columns, values to select record/s - db_log: logger obj - logger connection """ + mopdb_log = logging.getLogger('mopdb_log') # Set up query sqlwhere = f"FROM {table} WHERE " for c,v in pairs: sqlwhere += f"{c}='{v}' AND " sql = f"SELECT {col} " + sqlwhere[:-4] - db_log.debug(f"Delete query: {sql}") + mopdb_log.debug(f"Delete query: {sql}") xl = query(conn, sql, first=False) # Delete from db if xl is not None: - db_log.info(f"Found {len(xl)} records") + mopdb_log.info(f"Found {len(xl)} records") for x in xl: - db_log.info(f"{x}") + mopdb_log.info(f"{x}") confirm = input('Confirm deletion from database: Y/N ') if confirm == 'Y': - db_log.info('Updating db ...') + mopdb_log.info('Updating db ...') with conn: c = conn.cursor() sql = "DELETE " + sqlwhere[:-4] - db_log.debug(f"Delete sql: {sql}") + mopdb_log.debug(f"Delete sql: {sql}") c.execute(sql) c.execute('select total_changes()') - db_log.info(f"Rows modified: {c.fetchall()[0][0]}") + mopdb_log.info(f"Rows modified: {c.fetchall()[0][0]}") else: - db_log.info("The query did not return any records") + mopdb_log.info("The query did not return any records") conn.close() return -def list_files(indir, match, db_log): +def list_files(indir, match): """Returns list of files matching input directory and match""" + mopdb_log = logging.getLogger('mopdb_log') files = [x for x in Path(indir).rglob(f"{match}") if x.is_file()] - db_log.debug(f"{indir}/**/*{match}*") + mopdb_log.debug(f"{indir}/**/*{match}*") return files -def build_umfrq(time_axs, ds, db_log): +def build_umfrq(time_axs, ds): """Return a dictionary with frequency for each time axis. Frequency is inferred by comparing interval between two consecutive @@ -433,21 +439,23 @@ def build_umfrq(time_axs, ds, db_log): Order time_axis so ones with only one step are last, so we can use file frequency (interval_file) inferred from other time axes. """ + mopdb_log = logging.getLogger('mopdb_log') umfrq = {} int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0, 'day': 1.0, '6hr': 0.25, '3hr': 0.125, - '1hr': 0.041667, '10min': 0.006944} + '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944} time_axs.sort(key=lambda x: len(ds[x]), reverse=True) - db_log.debug(f"in build_umfrq, time_axs: {time_axs}") + mopdb_log.debug(f"in build_umfrq, time_axs: {time_axs}") for t in time_axs: - db_log.debug(f"len of time axis {t}: {len(ds[t])}") + mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}") if len(ds[t]) > 1: - interval = (ds[t][1]-ds[t][0]).values - interval_file = (ds[t][-1] -ds[t][0]).values + interval = (ds[t][1]-ds[t][0]).values / np.timedelta64(1, 'D') +#astype('timedelta64[m]') / 1440.0 + interval_file = (ds[t][-1] -ds[t][0]).values / np.timedelta64(1, 'D') else: interval = interval_file - db_log.debug(f"interval 2 timesteps for {t}: {interval}") - db_log.debug(f"interval entire file {t}: {interval_file}") + mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}") + mopdb_log.debug(f"interval entire file {t}: {interval_file}") for k,v in int2frq.items(): if math.isclose(interval, v, rel_tol=0.05): umfrq[t] = k @@ -455,11 +463,12 @@ def build_umfrq(time_axs, ds, db_log): return umfrq -def get_frequency(realm, fname, ds, db_log): +def get_frequency(realm, fname, ds): """Return frequency based on realm and filename For UM files checks if more than one time axis is present and if so returns dictionary with frequency: variable list """ + mopdb_log = logging.getLogger('mopdb_log') umfrq = {} frequency = 'NAfrq' if realm == 'atmos': @@ -476,8 +485,8 @@ def get_frequency(realm, fname, ds, db_log): if len(time_axs_len) == 1: umfrq = {} else: - umfrq = build_umfrq(time_axs, ds, db_log) - db_log.debug(f"umfrq: {umfrq}") + umfrq = build_umfrq(time_axs, ds) + mopdb_log.debug(f"umfrq: {umfrq}") elif realm == 'ocean': # if I found scalar or monthly in any of fbits if any(x in fname for x in ['scalar', 'month']): @@ -489,7 +498,7 @@ def get_frequency(realm, fname, ds, db_log): frequency = 'mon' elif '_d.' in fname: frequency = 'day' - db_log.debug(f"Frequency: {frequency}") + mopdb_log.debug(f"Frequency: {frequency}") return frequency, umfrq @@ -499,6 +508,7 @@ def get_cell_methods(attrs, dims): `time: point` If `area` not specified is added at start of string as `area: ` """ + mopdb_log = logging.getLogger('mopdb_log') frqmod = '' val = attrs.get('cell_methods', "") if 'area' not in val: @@ -513,29 +523,28 @@ def get_cell_methods(attrs, dims): return val, frqmod -def write_varlist(conn, indir, startdate, version, db_log): +def write_varlist(conn, indir, startdate, version): """Based on model output files create a variable list and save it to a csv file. Main attributes needed to map output are provided for each variable """ - #PP temporarily remove .nc as ocean files sometimes have pattern.nc-datestamp - #sdate = f"*{startdate}*.nc" + mopdb_log = logging.getLogger('mopdb_log') sdate = f"*{startdate}*" - files = list_files(indir, sdate, db_log) - db_log.debug(f"Found files: {files}") + files = list_files(indir, sdate) + mopdb_log.debug(f"Found files: {files}") patterns = [] for fpath in files: # get filename pattern until date match - db_log.debug(f"Filename: {fpath.name}") + mopdb_log.debug(f"Filename: {fpath.name}") fpattern = fpath.name.split(startdate)[0] # adding this in case we have a mix of yyyy/yyyymn date stamps # as then a user would have to pass yyyy only and would get 12 files for some of the patterns if fpattern in patterns: continue patterns.append(fpattern) - pattern_list = list_files(indir, f"{fpattern}*", db_log) + pattern_list = list_files(indir, f"{fpattern}*") nfiles = len(pattern_list) - db_log.debug(f"File pattern: {fpattern}") + mopdb_log.debug(f"File pattern: {fpattern}") fcsv = open(f"{fpattern}.csv", 'w') fwriter = csv.writer(fcsv, delimiter=';') fwriter.writerow(["name", "cmor_var", "units", "dimensions", @@ -543,18 +552,18 @@ def write_varlist(conn, indir, startdate, version, db_log): "vtype", "size", "nsteps", "filename", "long_name", "standard_name"]) # get attributes for the file variables - realm = get_realm(fpath, version, db_log) + realm = get_realm(fpath, version) ds = xr.open_dataset(fpath, decode_times=False) coords = [c for c in ds.coords] + ['latitude_longitude'] - frequency, umfrq = get_frequency(realm, fpath.name, ds, db_log) + frequency, umfrq = get_frequency(realm, fpath.name, ds) multiple_frq = False if umfrq != {}: multiple_frq = True - db_log.debug(f"Multiple frq: {multiple_frq}") + mopdb_log.debug(f"Multiple frq: {multiple_frq}") for vname in ds.variables: if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']): v = ds[vname] - db_log.debug(f"Variable: {v.name}") + mopdb_log.debug(f"Variable: {v.name}") # get size in bytes of grid for 1 timestep and number of timesteps vsize = v[0].nbytes nsteps = nfiles * v.shape[0] @@ -564,14 +573,14 @@ def write_varlist(conn, indir, startdate, version, db_log): frequency = umfrq[v.dims[0]] else: frequency = 'NA' - db_log.info(f"Could not detect frequency for variable: {v}") + mopdb_log.info(f"Could not detect frequency for variable: {v}") attrs = v.attrs cell_methods, frqmod = get_cell_methods(attrs, v.dims) varfrq = frequency + frqmod - db_log.debug(f"Frequency x var: {varfrq}") + mopdb_log.debug(f"Frequency x var: {varfrq}") # try to retrieve cmip name cmor_var, cmor_table = get_cmorname(conn, vname, - version, varfrq, db_log) + version, varfrq) line = [v.name, cmor_var, attrs.get('units', ""), " ".join(v.dims), varfrq, realm, cell_methods, cmor_table, v.dtype, vsize, @@ -579,12 +588,13 @@ def write_varlist(conn, indir, startdate, version, db_log): attrs.get('standard_name', "")] fwriter.writerow(line) fcsv.close() - db_log.info(f"Variable list for {fpattern} successfully written") + mopdb_log.info(f"Variable list for {fpattern} successfully written") return def read_map_app4(fname): """Reads APP4 style mapping """ + mopdb_log = logging.getLogger('mopdb_log') # old order #cmor_var,definable,input_vars,calculation,units,axes_mod,positive,ACCESS_ver[CM2/ESM/both],realm,notes var_list = [] @@ -607,7 +617,7 @@ def read_map_app4(fname): return var_list -def read_map(fname, alias, db_log): +def read_map(fname, alias): """Reads complete mapping csv file and extract info necessary to create new records for the mapping table in access.db Fields from file: @@ -619,6 +629,7 @@ def read_map(fname, alias, db_log): realm, cell_methods, positive, model, notes, origin NB model and version are often the same but version should eventually be defined in a CV """ + mopdb_log = logging.getLogger('mopdb_log') var_list = [] with open(fname, 'r') as csvfile: reader = csv.reader(csvfile, delimiter=';') @@ -627,8 +638,8 @@ def read_map(fname, alias, db_log): if row[0][0] == "#": continue else: - db_log.debug(f"In read_map: {row[0]}") - db_log.debug(f"In read_map row length: {len(row)}") + mopdb_log.debug(f"In read_map: {row[0]}") + mopdb_log.debug(f"In read_map row length: {len(row)}") if row[16] != '': notes = row[16] else: @@ -639,30 +650,31 @@ def read_map(fname, alias, db_log): return var_list -def match_stdname(conn, row, stdn, db_log): +def match_stdname(conn, row, stdn): """Returns an updated stdn list if finds one or more variables in cmorvar table that match the standard name passed as input. It also return a False/True found_match boolean. """ + mopdb_log = logging.getLogger('mopdb_log') found_match = False sql = f"""SELECT name FROM cmorvar where standard_name='{row['standard_name']}'""" results = query(conn, sql, first=False) matches = [x[0] for x in results] if len(matches) > 0: - stdn = add_var(stdn, row, tuple([matches]+['']*7), db_log, - stdnm=True) + stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True) found_match = True return stdn, found_match -def match_var(row, version, mode, conn, records, db_log): +def match_var(row, version, mode, conn, records): """Returns match for variable if found after looping variables already mapped in database Parameters """ + mopdb_log = logging.getLogger('mopdb_log') found_match = False # build sql query based on mode sql_base = f"""SELECT cmor_var,input_vars,calculation,frequency, @@ -678,17 +690,17 @@ def match_var(row, version, mode, conn, records, db_log): sql = sql_base + sql_frq # execute query and process results result = query(conn, sql, first=False) - db_log.debug(f"match_var: {result}, sql: {sql[110:]}") + mopdb_log.debug(f"match_var: {result}, sql: {sql[110:]}") if result is not None and result != []: for x in result: - db_log.debug(f"match: {x}") - records = add_var(records, row, x, db_log) + mopdb_log.debug(f"match: {x}") + records = add_var(records, row, x) found_match = True return records, found_match -def parse_vars(conn, rows, version, db_log): +def parse_vars(conn, rows, version): """Returns records of variables to include in template mapping file, a list of all stash variables + frequency available in model output and a list of variables already defined in db @@ -700,13 +712,13 @@ def parse_vars(conn, rows, version, db_log): list of variables to match version : str model version to use to match variables - db_log: logger obj Returns ------- stash_vars : list varname-frequency for each listed variable, varname is from model output """ + mopdb_log = logging.getLogger('mopdb_log') full = [] no_ver = [] no_frq = [] @@ -719,29 +731,28 @@ def parse_vars(conn, rows, version, db_log): if row['name'][0] == "#" or row['name'] == 'name': continue else: - full, found = match_var(row, version, 'full', conn, full, db_log) + full, found = match_var(row, version, 'full', conn, full) # if no match, ignore model version first and then frequency - db_log.debug(f"found perfect match: {found}") + mopdb_log.debug(f"found perfect match: {found}") if not found: - no_ver, found = match_var(row, version, 'no_ver', conn, no_ver, db_log) - db_log.debug(f"found no ver match: {found}") + no_ver, found = match_var(row, version, 'no_ver', conn, no_ver) + mopdb_log.debug(f"found no ver match: {found}") if not found: - no_frq, found = match_var(row, version, 'no_frq', conn, no_frq, db_log) - db_log.debug(f"found no frq match: {found}") + no_frq, found = match_var(row, version, 'no_frq', conn, no_frq) + mopdb_log.debug(f"found no frq match: {found}") # make a last attempt to match using standard_name if not found: if row['standard_name'] != '': - stdn, found = match_stdname(conn, row, stdn, db_log) - db_log.debug(f"found stdnm match: {found}") + stdn, found = match_stdname(conn, row, stdn) + mopdb_log.debug(f"found stdnm match: {found}") if not found: - no_match = add_var(no_match, row, tuple([row['name']]+['']*8), - db_log) + no_match = add_var(no_match, row, tuple([row['name']]+['']*8)) stash_vars.append(f"{row['name']}-{row['frequency']}") return full, no_ver, no_frq, stdn, no_match, stash_vars -def add_var(vlist, row, match, db_log, stdnm=False): +def add_var(vlist, row, match, stdnm=False): """Add information from match to variable list and re-order fields so they correspond to final mapping output. @@ -750,9 +761,10 @@ def add_var(vlist, row, match, db_log, stdnm=False): match values (cmor_var,input_vars,calculation,frequency, realm,model(version),cmor_table,positive,units) """ + mopdb_log = logging.getLogger('mopdb_log') # assign cmor_var from match and swap place with input_vars - db_log.debug(f"Assign cmor_var: {match}") - db_log.debug(f"initial row: {row}") + mopdb_log.debug(f"Assign cmor_var: {match}") + mopdb_log.debug(f"initial row: {row}") var = row.copy() var['cmor_var'] = match[0] var['input_vars'] = match[1] @@ -781,7 +793,7 @@ def add_var(vlist, row, match, db_log, stdnm=False): return vlist -def remove_duplicate(vlist, db_log, extra=[], strict=True): +def remove_duplicate(vlist, extra=[], strict=True): """Returns list without duplicate variable definitions. Define unique definition for variable as tuple (cmor_var, input_vars, @@ -790,25 +802,26 @@ def remove_duplicate(vlist, db_log, extra=[], strict=True): If extra is defined if a variable exists in this additional set it is a duplicate """ - db_log.debug(f'in duplicate, vlist {vlist}') + mopdb_log = logging.getLogger('mopdb_log') + mopdb_log.debug(f'in duplicate, vlist {vlist}') vid_list = [] keys = ['cmor_var', 'input_vars', 'calculation'] if strict is True: keys += ['frequency', 'realm'] if extra: vid_list = [tuple(x[k] for k in keys) for x in extra] - db_log.debug(f"vid_list: {vid_list}") + mopdb_log.debug(f"vid_list: {vid_list}") final = [] for v in vlist: vid = tuple(v[k] for k in keys) - db_log.debug(f"var and vid: {v['cmor_var']}, {vid}") + mopdb_log.debug(f"var and vid: {v['cmor_var']}, {vid}") if vid not in vid_list: final.append(v) vid_list.append(vid) return final -def potential_vars(conn, rows, stash_vars, version, db_log): +def potential_vars(conn, rows, stash_vars, version): """Returns list of variables that can be potentially derived from model output. @@ -825,11 +838,11 @@ def potential_vars(conn, rows, stash_vars, version, db_log): varname-frequency for each listed variable, varname is from model output version : str model version to use to match variables - db_log: logger obj Returns ------- """ + mopdb_log = logging.getLogger('mopdb_log') pot_full = [] pot_part = [] pot_varnames = set() @@ -838,24 +851,24 @@ def potential_vars(conn, rows, stash_vars, version, db_log): realm,model,cmor_table,positive,units FROM mapping WHERE input_vars like '%{row['name']}%'""" results = query(conn, sql, first=False) - db_log.debug(f"In potential: var {row['name']}, db results {results}") + mopdb_log.debug(f"In potential: var {row['name']}, db results {results}") for r in results: allinput = r[1].split(" ") - db_log.debug(f"{len(allinput)> 1}") - db_log.debug(all(f"{x}-{row['frequency']}" in stash_vars for x in allinput)) + mopdb_log.debug(f"{len(allinput)> 1}") + mopdb_log.debug(all(f"{x}-{row['frequency']}" in stash_vars for x in allinput)) if len(allinput) > 1 and all(f"{x}-{row['frequency']}" in stash_vars for x in allinput): # if both version and frequency of applied mapping match # consider this a full matching potential var if r[5] == version and r[3] == row['frequency']: - pot_full = add_var(pot_full, row, r, db_log) + pot_full = add_var(pot_full, row, r) else: - pot_part = add_var(pot_part, row, r, db_log) + pot_part = add_var(pot_part, row, r) pot_varnames.add(r[0]) return pot_full, pot_part, pot_varnames def write_map_template(conn, full, no_ver, no_frq, stdn, - no_match, pot_full, pot_part, alias, db_log): + no_match, pot_full, pot_part, alias): """Write mapping csv file template based on list of variables to define Input varlist file order: @@ -870,6 +883,7 @@ def write_map_template(conn, full, no_ver, no_frq, stdn, cell_methods, positive, cmor_table, version, vtype, size, nsteps, filename, long_name, standard_name """ + mopdb_log = logging.getLogger('mopdb_log') keys = ['cmor_var', 'input_vars', 'calculation', 'units', 'dimensions', 'frequency', 'realm', 'cell_methods', 'positive', 'cmor_table', 'version', 'vtype', 'size', @@ -877,34 +891,35 @@ def write_map_template(conn, full, no_ver, no_frq, stdn, with open(f"map_{alias}.csv", 'w') as fcsv: fwriter = csv.DictWriter(fcsv, keys, delimiter=';') - write_vars(full, fwriter, keys, db_log, conn=conn) + write_vars(full, fwriter, keys, conn=conn) div = ("# Derived variables with matching version and " + "frequency: Use with caution!") - write_vars(pot_full, fwriter, div, db_log, conn=conn) + write_vars(pot_full, fwriter, div, conn=conn) #pot=True, conn=conn, sortby=0) div = ("# Variables definitions coming from different " + "version") - write_vars(no_ver, fwriter, div, db_log, conn=conn) + write_vars(no_ver, fwriter, div, conn=conn) div = ("# Variables with different frequency: Use with" + " caution!") - write_vars(no_ver, fwriter, div, db_log, conn=conn) + write_vars(no_ver, fwriter, div, conn=conn) div = ("# Variables matched using standard_name: Use " + "with caution!") - write_vars(stdn, fwriter, div, db_log, sortby='input_vars') + write_vars(stdn, fwriter, div, sortby='input_vars') div = "# Derived variables: Use with caution!" - write_vars(pot_part, fwriter, div, db_log, conn=conn) + write_vars(pot_part, fwriter, div, conn=conn) #pot=True, conn=conn, sortby=0) div = "# Variables without mapping" - write_vars(no_match, fwriter, div, db_log) - db_log.debug("Finished writing variables to mapping template") + write_vars(no_match, fwriter, div) + mopdb_log.debug("Finished writing variables to mapping template") fcsv.close() return -def write_vars(vlist, fwriter, div, db_log, conn=None, sortby='cmor_var'): +def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'): """ """ + mopdb_log = logging.getLogger('mopdb_log') if len(vlist) > 0: if type(div) is str: divrow = {x:'' for x in vlist[0].keys()} @@ -914,52 +929,54 @@ def write_vars(vlist, fwriter, div, db_log, conn=None, sortby='cmor_var'): fwriter.writerow(divrow) for var in sorted(vlist, key=itemgetter(sortby)): if conn: - var = check_realm_units(conn, var, db_log) + var = check_realm_units(conn, var) fwriter.writerow(var) return -def check_realm_units(conn, var, db_log): +def check_realm_units(conn, var): """Checks that realm and units are consistent with values in cmor table. """ + mopdb_log = logging.getLogger('mopdb_log') vname = f"{var['cmor_var']}-{var['cmor_table']}" if var['cmor_table'] is None or var['cmor_table'] == "": - db_log.warning(f"Variable: {vname} has no associated cmor_table") + mopdb_log.warning(f"Variable: {vname} has no associated cmor_table") else: # retrieve modeling_realm, units from db cmor table sql = f"""SELECT modeling_realm, units FROM cmorvar WHERE name='{vname}' """ result = query(conn, sql) - db_log.debug(f"In check_realm_units: {vname}, {result}") + mopdb_log.debug(f"In check_realm_units: {vname}, {result}") if result is not None: dbrealm = result[0] dbunits = result[1] # dbrealm could have two realms if var['realm'] not in [dbrealm] + dbrealm.split(): - db_log.info(f"Changing {vname} realm from {var['realm']} to {dbrealm}") + mopdb_log.info(f"Changing {vname} realm from {var['realm']} to {dbrealm}") var['realm'] = dbrealm if var['units'] != dbunits : - db_log.info(f"Changing {vname} units from {var['units']} to {dbunits}") + mopdb_log.info(f"Changing {vname} units from {var['units']} to {dbunits}") var['units'] = dbunits else: - db_log.warning(f"Variable {vname} not found in cmor table") + mopdb_log.warning(f"Variable {vname} not found in cmor table") return var - def get_realm(fpath, version, db_log): - '''Return realm for variable in files or NArealm''' - if version == 'AUS2200': - realm = 'atmos' - else: - realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] - if x in fpath.parts][0] - if realm == 'atm': - realm = 'atmos' - elif realm == 'ocn': - realm = 'ocean' - elif realm is None: - realm = 'NArealm' - db_log.info(f"Couldn't detect realm from path, setting to NArealm") - db_log.debug(f"Realm is {realm}") +def get_realm(fpath, version): + '''Return realm for variable in files or NArealm''' + mopdb_log = logging.getLogger('mopdb_log') + if version == 'AUS2200': + realm = 'atmos' + else: + realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] + if x in fpath.parts][0] + if realm == 'atm': + realm = 'atmos' + elif realm == 'ocn': + realm = 'ocean' + elif realm is None: + realm = 'NArealm' + mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm") + mopdb_log.debug(f"Realm is {realm}") return realm diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index b8af723..1adf216 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -37,6 +37,7 @@ import json import numpy as np import dask +import logging from importlib_resources import files as import_files from mopper.setup_utils import read_yaml @@ -852,9 +853,10 @@ def plevinterp(ctx, var, pmod, levnum): interp : Xarray DataArray The variable interpolated on pressure levels """ + + var_log = logging.getLogger(ctx.obj['var_log']) # avoid dask warning dask.config.set(**{'array.slicing.split_large_chunks': True}) - var_log = ctx.obj['var_log'] plev = get_plev(levnum) lev = var.dims[1] # if pmod is pressure on rho_level_0 and variable is on rho_level @@ -928,7 +930,7 @@ def K_degC(ctx, var): vout : Xarray DataArray temperature array in degrees Celsius """ - var_log = ctx.obj['var_log'] + var_log = logging.getLogger(ctx.obj['var_log']) if 'K' in var.units: var_log.info("temp in K, converting to degC") vout = var - 273.15 @@ -1199,7 +1201,7 @@ def level_to_height(ctx, var, levs=None): vout : Xarray DataArray Same variable defined on model levels height """ - var_log = ctx.obj['var_log'] + var_log = logging.getLogger(ctx.obj['var_log']) if levs is not None and type(levs) not in [tuple, list]: var_log.error(f"level_to_height function: levs {levs} should be a tuple or list") zdim = var.dims[1] @@ -1293,7 +1295,7 @@ def calc_overt(ctx, varlist, sv=False): overt: DataArray overturning mass streamfunction (time, basin, depth, gridlat) variable """ - var_log = ctx.obj['var_log'] + var_log = logging.getLogger(ctx.obj['var_log']) var1 = varlist[0] vlat, vlon = var1.dims[2:] mask = get_basin_mask(vlat, vlon) @@ -1381,7 +1383,7 @@ def overturn_stream(ctx, varlist, sv=False): stream: DataArray The ocean overturning mass streamfunction in kg s-1 """ - var_log = ctx.obj['var_log'] + var_log = logging.getLogger(ctx.obj['var_log']) londim = varlist[0].dims[3] depdim = varlist[0].dims[1] var_log.debug(f"Streamfunct lon, dep dims: {londim}, {depdim}") @@ -1434,7 +1436,7 @@ def calc_depositions(ctx, var, weight=None): (personal communication from M. Woodhouse) """ - var_log = ctx.obj['var_log'] + var_log = logging.getLogger(ctx.obj['var_log']) varlist = [] for v in var: v0 = v.sel(model_theta_level_number=1).squeeze(dim='model_theta_level_number') diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py index a639075..7040270 100755 --- a/src/mopper/mop_setup.py +++ b/src/mopper/mop_setup.py @@ -28,6 +28,7 @@ import json import csv import click +import logging from pathlib import Path from json.decoder import JSONDecodeError from importlib.resources import files as import_files @@ -35,7 +36,7 @@ from mopper.setup_utils import * -def find_matches(table, var, realm, frequency, varlist, mop_log): +def find_matches(table, var, realm, frequency, varlist): """Finds variable matching constraints given by table and config settings and returns a dictionary with the variable specifications. @@ -59,14 +60,13 @@ def find_matches(table, var, realm, frequency, varlist, mop_log): varlist : list List of variables, each represented by a dictionary with mappings used to find a match to "var" passed - mop_log : logging object - Log Returns ------- match : dict Dictionary containing matched variable specifications or None if not matches """ + mop_log = logging.getLogger('mop_log') near_matches = [] found = False match = None @@ -83,7 +83,7 @@ def find_matches(table, var, realm, frequency, varlist, mop_log): and v['realm'] in realm.split()): near_matches.append(v) if found is False and frequency != 'fx': - v = find_nearest(near_matches, frequency, mop_log) + v = find_nearest(near_matches, frequency) if v is not None: match = v found = True @@ -110,7 +110,7 @@ def find_matches(table, var, realm, frequency, varlist, mop_log): return match -def find_nearest(varlist, frequency, mop_log): +def find_nearest(varlist, frequency): """If variable is present in file at different frequencies, finds the one with higher frequency nearest to desired frequency. Adds frequency to variable resample field. @@ -124,8 +124,6 @@ def find_nearest(varlist, frequency, mop_log): frequency frequency : str Variable frequency to match - mop_log : logging object - Log Returns ------- @@ -133,6 +131,7 @@ def find_nearest(varlist, frequency, mop_log): Dictionary containing matched variable specifications or None if not matches """ + mop_log = logging.getLogger('mop_log') var = None found = False freq = frequency @@ -178,7 +177,7 @@ def setup_env(ctx): attributes for experiment """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') cdict = ctx.obj cdict['appdir'] = Path(cdict['appdir']) appdir = cdict['appdir'] @@ -231,7 +230,7 @@ def setup_env(ctx): def var_map(ctx, activity_id=None): """ """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') tables = ctx.obj.get('tables', 'all') subset = ctx.obj.get('var_subset', False) sublist = ctx.obj.get('var_subset_list', None) @@ -289,7 +288,7 @@ def create_var_map(ctx, table, mappings, activity_id=None, Returns ------- """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') matches = [] fpath = ctx.obj['tables_path'] / f"{table}.json" if not fpath.exists(): @@ -325,7 +324,7 @@ def create_var_map(ctx, table, mappings, activity_id=None, years = dreq_years[var] if 'subhr' in frq: frq = ctx.obj['subhr'] + frq.split('subhr')[1] - match = find_matches(table, var, realm, frq, mappings, mop_log) + match = find_matches(table, var, realm, frq, mappings) if match is not None: match['years'] = years matches.append(match) @@ -367,7 +366,7 @@ def archive_workdir(ctx): def manage_env(ctx): """Prepare output directories and removes pre-existing ones """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') # check if output path already exists outpath = ctx.obj['outpath'] if outpath.exists() and ctx.obj['update'] is False: diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index 0f0fd42..52f01bf 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -81,9 +81,9 @@ def config_log(debug, path, stream_level=logging.WARNING): return logger -def config_varlog(debug, logname): +def config_varlog(debug, logname, pid): """Configure varlog file: use this for specific var information""" - logger = logging.getLogger('var_log') + logger = logging.getLogger(f'{pid}_log') formatter = logging.Formatter('%(asctime)s; %(message)s',"%Y-%m-%d %H:%M:%S") if debug is True: level = logging.DEBUG @@ -99,6 +99,8 @@ def config_varlog(debug, logname): flog.setLevel(level) flog.setFormatter(formatter) logger.addHandler(flog) + # Stop propagation + logger.propagate = False return logger @@ -118,9 +120,8 @@ def _preselect(ds, varlist): return ds[varsel] - @click.pass_context -def get_files(ctx, var_log): +def get_files(ctx): """Returns all files in time range First identifies all files with pattern/s defined for invars Then retrieve time dimension and if multiple time axis are present @@ -129,40 +130,41 @@ def get_files(ctx, var_log): last timestep from each file """ # Returns file list for each input var and list of vars for each file pattern - all_files, path_vars = find_all_files(var_log) + var_log = logging.getLogger(ctx.obj['var_log']) + all_files, path_vars = find_all_files() # PP FUNCTION END return all_files, extra_files var_log.debug(f"access files from: {os.path.basename(all_files[0][0])}" + f"to {os.path.basename(all_files[0][-1])}") ds = xr.open_dataset(all_files[0][0], decode_times=False) - time_dim, units, multiple_times = get_time_dim(ds, var_log) + time_dim, units, multiple_times = get_time_dim(ds) del ds try: inrange_files = [] for i,paths in enumerate(all_files): if multiple_times is True: - inrange_files.append( check_in_range(paths, time_dim, var_log) ) + inrange_files.append( check_in_range(paths, time_dim) ) else: - inrange_files.append( check_timestamp(paths, var_log) ) + inrange_files.append( check_timestamp(paths) ) except: for i,paths in enumerate(all_files): - inrange_files.append( check_in_range(paths, time_dim, var_log) ) + inrange_files.append( check_in_range(paths, time_dim) ) for i,paths in enumerate(inrange_files): if paths == []: - mop_log.error(f"No data in requested time range for: {ctx.obj['filename']}") var_log.error(f"No data in requested time range for: {ctx.obj['filename']}") return inrange_files, path_vars, time_dim, units @click.pass_context -def find_all_files(ctx, var_log): +def find_all_files(ctx): """Find all the ACCESS file names which match the pattern/s associated with invars. Sort the filenames, assuming that the sorted filenames will be in chronological order because there is usually some sort of date and/or time information in the filename. Check that all variables needed are in file, otherwise add extra file pattern """ + var_log = logging.getLogger(ctx.obj['var_log']) var_log.debug(f"Input file structure: {ctx.obj['infile']}") patterns = ctx.obj['infile'].split() var_log.debug(f"Input file patterns: {patterns}") @@ -186,7 +188,7 @@ def find_all_files(ctx, var_log): while len(missing) > 0 and i < len(patterns): path_vars[i] = [] f = files[i][0] - missing, found = check_vars_in_file(missing, f, var_log) + missing, found = check_vars_in_file(missing, f) if len(found) > 0: for v in found: path_vars[i].append(v) @@ -198,10 +200,11 @@ def find_all_files(ctx, var_log): @click.pass_context -def check_vars_in_file(ctx, invars, fname, var_log): +def check_vars_in_file(ctx, invars, fname): """Check that all variables needed for calculation are in file else return extra filenames """ + var_log = logging.getLogger(ctx.obj['var_log']) ds = xr.open_dataset(fname, decode_times=False) tofind = [v for v in invars if v not in ds.variables] found = [v for v in invars if v not in tofind] @@ -209,10 +212,11 @@ def check_vars_in_file(ctx, invars, fname, var_log): @click.pass_context -def get_time_dim(ctx, ds, var_log): +def get_time_dim(ctx, ds): """Find time info: time axis, reference time and set tstart and tend also return mutlitple_times True if more than one time axis """ + var_log = logging.getLogger(ctx.obj['var_log']) time_dim = None multiple_times = False varname = [ctx.obj['vin'][0]] @@ -236,11 +240,12 @@ def get_time_dim(ctx, ds, var_log): @click.pass_context -def check_timestamp(ctx, all_files, var_log): +def check_timestamp(ctx, all_files): """This function tries to guess the time coverage of a file based on its timestamp and return the files in range. At the moment it does a lot of checks based on the realm and real examples eventually it would make sense to make sure all files generated are consistent in naming """ + var_log = logging.getLogger(ctx.obj['var_log']) inrange_files = [] realm = ctx.obj['realm'] var_log.info("checking files timestamp ...") @@ -305,11 +310,12 @@ def check_timestamp(ctx, all_files, var_log): @click.pass_context -def check_in_range(ctx, all_files, tdim, var_log): +def check_in_range(ctx, all_files, tdim): """Return a list of files in time range Open each file and check based on time axis Use this function only if check_timestamp fails """ + var_log = logging.getLogger(ctx.obj['var_log']) inrange_files = [] var_log.info("loading files...") var_log.debug(f"time dimension: {tdim}") @@ -337,7 +343,7 @@ def check_in_range(ctx, all_files, tdim, var_log): @click.pass_context -def load_data(ctx, inrange_files, path_vars, time_dim, var_log): +def load_data(ctx, inrange_files, path_vars, time_dim): """Returns a dictionary of input var: xarray dataset """ # preprocessing to select only variables we need to avoid @@ -345,6 +351,7 @@ def load_data(ctx, inrange_files, path_vars, time_dim, var_log): # temporarily opening file without decoding times, fixing # faulty time bounds units and decoding times # this is to prevent issues with ocean files + var_log = logging.getLogger(ctx.obj['var_log']) input_ds = {} for i, paths in enumerate(inrange_files): preselect = partial(_preselect, varlist=path_vars[i]) @@ -361,9 +368,10 @@ def load_data(ctx, inrange_files, path_vars, time_dim, var_log): @click.pass_context -def get_cmorname(ctx, axis_name, axis, var_log, z_len=None): +def get_cmorname(ctx, axis_name, axis, z_len=None): """Get time cmor name based on timeshot option """ + var_log = logging.getLogger(ctx.obj['var_log']) var_log.debug(f'axis_name, axis.name: {axis_name}, {axis.name}') ctx.obj['axes_modifier'] = [] if axis_name == 't': @@ -418,10 +426,11 @@ def get_cmorname(ctx, axis_name, axis, var_log, z_len=None): #PP this should eventually just be generated directly by defining the dimension using the same terms # in related calculation @click.pass_context -def pseudo_axis(axis, var_log): +def pseudo_axis(ctx, axis): """coordinates with axis_identifier other than X,Y,Z,T PP not sure if axis can be used to remove axes_mod """ + var_log = logging.getLogger(ctx.obj['var_log']) cmor_name = None p_vals = None p_len = None @@ -450,9 +459,11 @@ def pseudo_axis(axis, var_log): #PP this should eventually just be generated directly by defining the dimension using the same terms # in calculation for meridional overturning -def create_axis(axis, table, var_log): +@click.pass_context +def create_axis(ctx, axis, table): """ """ + var_log = logging.getLogger(ctx.obj['var_log']) # maybe we can just create these axis as they're meant in calculations var_log.info(f"creating {axis.name} axis...") #func_dict = {'oline': getTransportLines(), @@ -469,9 +480,10 @@ def create_axis(axis, table, var_log): return axis_id -def hybrid_axis(lev, z_ax_id, z_ids, var_log): +def hybrid_axis(lev, z_ax_id, z_ids): """Setting up additional hybrid axis information """ + var_log = logging.getLogger(ctx.obj['var_log']) hybrid_dict = {'hybrid_height': 'b', 'hybrid_height_half': 'b_half'} orog_vals = getOrog() @@ -492,9 +504,10 @@ def hybrid_axis(lev, z_ax_id, z_ids, var_log): @click.pass_context -def ij_axis(ctx, ax, ax_name, table, var_log): +def ij_axis(ctx, ax, ax_name, table): """ """ + var_log = logging.getLogger(ctx.obj['var_log']) cmor.set_table(table) ax_id = cmor.axis(table_entry=ax_name, units='1', @@ -503,12 +516,13 @@ def ij_axis(ctx, ax, ax_name, table, var_log): @click.pass_context -def ll_axis(ctx, ax, ax_name, ds, table, bounds_list, var_log): +def ll_axis(ctx, ax, ax_name, ds, table, bounds_list): """ """ + var_log = logging.getLogger(ctx.obj['var_log']) var_log.debug(f"n ll_axis") cmor.set_table(table) - cmor_aName = get_cmorname(ax_name, ax, var_log) + cmor_aName = get_cmorname(ax_name, ax) try: ax_units = ax.units except: @@ -516,7 +530,7 @@ def ll_axis(ctx, ax, ax_name, ds, table, bounds_list, var_log): a_bnds = None var_log.debug(f"got cmor name: {cmor_aName}") if cmor_aName in bounds_list: - a_bnds = get_bounds(ds, ax, cmor_aName, var_log) + a_bnds = get_bounds(ds, ax, cmor_aName) a_vals = ax.values var_log.debug(f"a_bnds: {a_bnds.shape}") var_log.debug(f"a_vals: {a_vals.shape}") @@ -533,10 +547,10 @@ def ll_axis(ctx, ax, ax_name, ds, table, bounds_list, var_log): return ax_id @click.pass_context -def define_grid(ctx, j_id, i_id, lat, lat_bnds, lon, lon_bnds, - var_log): +def define_grid(ctx, j_id, i_id, lat, lat_bnds, lon, lon_bnds): """If we are on a non-cartesian grid, Define the spatial grid """ + var_log = logging.getLogger(ctx.obj['var_log']) grid_id=None var_log.info("setting up grid") #Set grid id and append to axis and z ids @@ -550,9 +564,10 @@ def define_grid(ctx, j_id, i_id, lat, lat_bnds, lon, lon_bnds, @click.pass_context -def get_coords(ctx, ovar, coords, var_log): +def get_coords(ctx, ovar, coords): """Get lat/lon and their boundaries from ancil file """ + var_log = logging.getLogger(ctx.obj['var_log']) # open ancil grid file to read vertices #PP be careful this is currently hardcoded which is not ok! ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"] @@ -583,9 +598,10 @@ def get_coords(ctx, ovar, coords, var_log): @click.pass_context -def get_axis_dim(ctx, var, var_log): +def get_axis_dim(ctx, var): """ """ + var_log = logging.getLogger(ctx.obj['var_log']) axes = {'t_ax': None, 'z_ax': None, 'glat_ax': None, 'lat_ax': None, 'lon_ax': None, 'j_ax': None, 'i_ax': None, 'p_ax': None, 'e_ax': None} @@ -631,8 +647,10 @@ def get_axis_dim(ctx, var, var_log): return axes -def check_time_bnds(bnds, frequency, var_log): +@click.pass_context +def check_time_bnds(ictx, bnds, frequency): """Checks if dimension boundaries from file are wrong""" + var_log = logging.getLogger(ctx.obj['var_log']) var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}") diff = bnds[:,1] - bnds[:,0] #approx_int = [np.timedelta64(x, 'D').astype(float) for x in diff] @@ -650,10 +668,11 @@ def check_time_bnds(bnds, frequency, var_log): @click.pass_context -def require_bounds(ctx, var_log): +def require_bounds(ctx): """Returns list of coordinates that require bounds. Reads the requirement directly from .._coordinate.json file """ + var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}") fpath = f"{ctx.obj['tpath']}/{ctx.obj['_AXIS_ENTRY_FILE']}" with open(fpath, 'r') as jfile: data = json.load(jfile) @@ -665,10 +684,11 @@ def require_bounds(ctx, var_log): @click.pass_context -def bnds_change(ctx, axis, var_log): +def bnds_change(ctx, axis): """Returns True if calculation/resample changes bnds of specified dimension. """ + var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}") dim = axis.name calculation = ctx.obj['calculation'] changed_bnds = False @@ -683,16 +703,17 @@ def bnds_change(ctx, axis, var_log): @click.pass_context -def get_bounds(ctx, ds, axis, cmor_name, var_log, ax_val=None): +def get_bounds(ctx, ds, axis, cmor_name, ax_val=None): """Returns bounds for input dimension, if bounds are not available uses edges or tries to calculate them. If variable goes through calculation potentially bounds are different from input file and forces re-calculating them """ + var_log = logging.getLogger(ctx.obj['var_log']) var_log.debug(f'in getting bounds: {axis}') dim = axis.name var_log.info(f"Getting bounds for axis: {dim}") - changed_bnds = bnds_change(axis, var_log) + changed_bnds = bnds_change(axis) var_log.debug(f"Bounds has changed: {changed_bnds}") #The default bounds assume that the grid cells are centred on #each grid point specified by the coordinate variable. @@ -716,7 +737,7 @@ def get_bounds(ctx, ds, axis, cmor_name, var_log, ax_val=None): dim_bnds_val = cftime.date2num(dim_bnds_val, units=ctx.obj['reference_date'], calendar=ctx.obj['attrs']['calendar']) - inrange = check_time_bnds(dim_bnds_val, frq, var_log) + inrange = check_time_bnds(dim_bnds_val, frq) if not inrange: calc = True var_log.info(f"Inherited bounds for {dim} are incorrect") @@ -735,7 +756,7 @@ def get_bounds(ctx, ds, axis, cmor_name, var_log, ax_val=None): var_log.warning(f"dodgy bounds for dimension: {dim}") var_log.error(f"error: {e}") if 'time' in cmor_name: - inrange = check_time_bnds(dim_bnds_val, frq, var_log) + inrange = check_time_bnds(dim_bnds_val, frq) if inrange is False: var_log.error(f"Boundaries for {cmor_name} are " + "wrong even after calculation") @@ -766,9 +787,10 @@ def get_bounds(ctx, ds, axis, cmor_name, var_log, ax_val=None): @click.pass_context -def get_attrs(ctx, infiles, var1, var_log): +def get_attrs(ctx, infiles, var1): """ """ + var_log = logging.getLogger(ctx.obj['var_log']) # open only first file so we can access encoding ds = xr.open_dataset(infiles[0][0]) var_attrs = ds[var1].attrs @@ -803,7 +825,7 @@ def get_attrs(ctx, infiles, var1, var_log): @click.pass_context -def extract_var(ctx, input_ds, tdim, in_missing, mop_log, var_log): +def extract_var(ctx, input_ds, tdim, in_missing): """ This function pulls the required variables from the Xarray dataset. If a calculation isn't needed then it just returns the variables to be saved. @@ -814,6 +836,8 @@ def extract_var(ctx, input_ds, tdim, in_missing, mop_log, var_log): input_ds - dict dictionary of input datasets for each variable """ + mop_log = logging.getLogger('mop_log') + var_log = logging.getLogger(ctx.obj['var_log']) failed = False # Save the variables if ctx.obj['calculation'] == '': @@ -873,6 +897,7 @@ def define_attrs(ctx): listed in notes file, this is indicated by precending any function in file with a ~. For other fields it checks equality. """ + var_log = logging.getLogger(ctx.obj['var_log']) attrs = ctx.obj['attrs'] notes = attrs.get('notes', '') # open file containing notes diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 554c7c7..5418309 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -80,11 +80,10 @@ def mop(ctx, cfile, debug): ctx.obj['attrs'] = cfg['attrs'] # set up main mop log if ctx.invoked_subcommand == 'setup': - ctx.obj['log'] = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO) + mop_log = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO) else: - ctx.obj['log'] = config_log(debug, ctx.obj['appdir']) + mop_log = config_log(debug, ctx.obj['appdir']) ctx.obj['debug'] = debug - mop_log = ctx.obj['log'] mop_log.info(f"Simulation to process: {ctx.obj['exp']}") @@ -95,9 +94,9 @@ def mop_run(ctx): Use the configuration yaml file created in setup step as input. """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') # Open database and retrieve list of files to create - conn = db_connect(ctx.obj['database'], mop_log) + conn = db_connect(ctx.obj['database']) c = conn.cursor() sql = f"""select *,ROWID from filelist where status=='unprocessed' and exp_id=='{ctx.obj['exp']}'""" @@ -133,7 +132,7 @@ def mop_setup(ctx, update): * finalises configuration and save in new yaml file * writes job executable file and submits (optional) to queue """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') # then add setup_env to config mop_log.info("Setting environment and creating working directory") ctx.obj['update'] = update @@ -152,11 +151,11 @@ def mop_setup(ctx, update): # setup database table database = ctx.obj['database'] mop_log.info(f"creating & using database: {database}") - conn = db_connect(database, mop_log) + conn = db_connect(database) table_sql = filelist_sql() - create_table(conn, table_sql, mop_log) + create_table(conn, table_sql) populate_db(conn) - nrows = count_rows(conn, ctx.obj['exp'], mop_log) + nrows = count_rows(conn, ctx.obj['exp']) tot_size = sum_file_sizes(conn) mop_log.info(f"Estimated total files size before compression is: {tot_size} GB") #write app_job.sh @@ -177,14 +176,16 @@ def mop_setup(ctx, update): @click.pass_context -def mop_process(ctx, mop_log, var_log): +def mop_process(ctx): """Main processing workflow Sets up CMOR dataset, tables and axis. Extracts and/or calculates variable and write to file using CMOR. Returns path of created file if successful or error code if not. """ - + + mop_log = logging.getLogger('mop_log') + var_log = logging.getLogger(ctx.obj['var_log']) default_cal = "gregorian" logname = f"{ctx.obj['variable_id']}_{ctx.obj['table']}_{ctx.obj['tstart']}" @@ -210,15 +211,15 @@ def mop_process(ctx, mop_log, var_log): # Select files to use and associate a path to each input variable #P I might not need this! - inrange_files, path_vars, time_dim, t_units = get_files(var_log) + inrange_files, path_vars, time_dim, t_units = get_files() # Open input datasets based on input files, return dict= {var: ds} - dsin = load_data(inrange_files, path_vars, time_dim, var_log) + dsin = load_data(inrange_files, path_vars, time_dim) #Get the units and other attrs of first variable. var1 = ctx.obj['vin'][0] in_units, in_missing, positive, coords = get_attrs(inrange_files, - var1, var_log) + var1) var_log.debug(f"var just after reading {dsin[var1][var1]}") # Extract variable and calculation: @@ -226,7 +227,7 @@ def mop_process(ctx, mop_log, var_log): var_log.info(f"calculation: {ctx.obj['calculation']}") var_log.info(f"resample: {ctx.obj['resample']}") try: - ovar, failed = extract_var(dsin, time_dim, in_missing, mop_log, var_log) + ovar, failed = extract_var(dsin, time_dim, in_missing) var_log.info("Calculation completed.") except Exception as e: mop_log.error(f"E: Unable to retrieve/calculate var for {ctx.obj['filename']}") @@ -239,16 +240,16 @@ def mop_process(ctx, mop_log, var_log): # Define axis and variable for CMOR var_log.info("Defining axes...") # get list of coordinates that require bounds - bounds_list = require_bounds(var_log) + bounds_list = require_bounds() # get axis of each dimension - axes = get_axis_dim(ovar, var_log) + axes = get_axis_dim(ovar) var_log.debug(f"detected axes: {axes}") cmor.set_table(tables[1]) axis_ids = [] z_ids = [] setgrid = False if axes['t_ax'] is not None: - cmor_tName = get_cmorname('t', axes['t_ax'], var_log) + cmor_tName = get_cmorname('t', axes['t_ax']) ctx.obj['reference_date'] = f"days since {ctx.obj['reference_date']}" var_log.debug(f"{ctx.obj['reference_date']}") t_ax_val = cftime.date2num(axes['t_ax'], units=ctx.obj['reference_date'], @@ -257,7 +258,7 @@ def mop_process(ctx, mop_log, var_log): t_bounds = None if cmor_tName in bounds_list: t_bounds = get_bounds(dsin[var1], axes['t_ax'], cmor_tName, - var_log, ax_val=t_ax_val) + ax_val=t_ax_val) t_ax_id = cmor.axis(table_entry=cmor_tName, units=ctx.obj['reference_date'], length=len(t_ax_val), @@ -266,14 +267,14 @@ def mop_process(ctx, mop_log, var_log): interval=None) axis_ids.append(t_ax_id) if axes['e_ax'] is not None: - e_ax_id = create_axis(axes['e_ax'], tables[1], var_log) + e_ax_id = create_axis(axes['e_ax'], tables[1]) axis_ids.append(e_ax_id) if axes['z_ax'] is not None: zlen = len(axes['z_ax']) - cmor_zName = get_cmorname('z', axes['z_ax'], var_log, z_len=zlen) + cmor_zName = get_cmorname('z', axes['z_ax'], z_len=zlen) z_bounds = None if cmor_zName in bounds_list: - z_bounds = get_bounds(dsin[var1], axes['z_ax'], cmor_zName, var_log) + z_bounds = get_bounds(dsin[var1], axes['z_ax'], cmor_zName) z_ax_id = cmor.axis(table_entry=cmor_zName, units=axes['z_ax'].units, length=zlen, @@ -284,31 +285,30 @@ def mop_process(ctx, mop_log, var_log): # if both i, j are defined setgrid if only one treat as lat/lon if axes['i_ax'] is not None and axes['j_ax'] is not None: setgrid = True - j_id = ij_axis(axes['j_ax'], 'j_index', tables[0], var_log) - i_id = ij_axis(axes['i_ax'], 'i_index', tables[0], var_log) + j_id = ij_axis(axes['j_ax'], 'j_index', tables[0]) + i_id = ij_axis(axes['i_ax'], 'i_index', tables[0]) elif axes['j_ax'] is not None: axes['lat_ax'] = axes['j_ax'] elif axes['i_ax'] is not None: axes['lon_ax'] = axes['i_ax'] # Define the spatial grid if non-cartesian grid if setgrid: - lat, lat_bnds, lon, lon_bnds = get_coords(ovar, coords, var_log) - grid_id = define_grid(j_id, i_id, lat, lat_bnds, lon, - lon_bnds, var_log) + lat, lat_bnds, lon, lon_bnds = get_coords(ovar, coords) + grid_id = define_grid(j_id, i_id, lat, lat_bnds, lon, lon_bnds) else: if axes['glat_ax'] is not None: - lat_id = ll_axis(axes['glat_ax'], 'glat', dsin[var1], tables[1], - bounds_list, var_log) + lat_id = ll_axis(axes['glat_ax'], 'glat', dsin[var1], + tables[1], bounds_list) axis_ids.append(lat_id) #z_ids.append(lat_id) elif axes['lat_ax'] is not None: lat_id = ll_axis(axes['lat_ax'], 'lat', dsin[var1], tables[1], - bounds_list, var_log) + bounds_list) axis_ids.append(lat_id) z_ids.append(lat_id) if axes['lon_ax'] is not None: lon_id = ll_axis(axes['lon_ax'], 'lon', dsin[var1], tables[1], - bounds_list, var_log) + bounds_list) axis_ids.append(lon_id) z_ids.append(lon_id) if axes['p_ax'] is not None: @@ -324,7 +324,7 @@ def mop_process(ctx, mop_log, var_log): # Set up additional hybrid coordinate information if (axes['z_ax'] is not None and cmor_zName in ['hybrid_height', 'hybrid_height_half']): - zfactor_b_id, zfactor_orog_id = hybrid_axis(lev_name, z_ax_id, z_ids, var_log) + zfactor_b_id, zfactor_orog_id = hybrid_axis(lev_name, z_ax_id, z_ids) # Freeing up memory del dsin @@ -371,7 +371,7 @@ def mop_process(ctx, mop_log, var_log): @click.pass_context -def process_file(ctx, row, var_log): +def process_file(ctx, row): """Processes file from database if status is unprocessed. If override is true, re-writes existing files. Called by process_row() and calls mop_process() to extract and write variable. @@ -382,15 +382,14 @@ def process_file(ctx, row, var_log): Click context object row : dict row from filelist db table describing one output file - var_log : logging handler - Logging file handler specific to the file to process Returns ------- out : tuple Output status message and code and db rowid for processed file """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') + var_log = logging.getLogger(ctx.obj['var_log']) row['vin'] = row['vin'].split() # Check that calculation is defined if more than one variable is passed as input if len(row['vin']) > 1 and row['calculation'] == '': @@ -411,7 +410,7 @@ def process_file(ctx, row, var_log): var_msg = f"{row['table']},{row['variable_id']},{row['tstart']},{row['tend']}" if ctx.obj['override'] or not os.path.exists(expected_file): try: - ret = mop_process(mop_log, var_log) + ret = mop_process() except Exception as e: #something has gone wrong in the processing ret = -1 mop_log.error(e) @@ -464,6 +463,7 @@ def process_row(ctx, row): Sets up variable log file, prepares dictionary with file details and calls process_file """ + pid = os.getpid() record = {} header = ['infile', 'filepath', 'filename', 'vin', 'variable_id', 'table', 'frequency', 'realm', 'timeshot', 'tstart', @@ -478,11 +478,11 @@ def process_row(ctx, row): trange = record['filename'].replace('.nc.','').split("_")[-1] varlog_file = (f"{ctx.obj['var_logs']}/{record['variable_id']}" + f"_{record['table']}_{record['tstart']}.txt") - var_log = config_varlog(ctx.obj['debug'], varlog_file) - ctx.obj['var_log'] = var_log + var_log = config_varlog(ctx.obj['debug'], varlog_file, pid) + ctx.obj['var_log'] = var_log.name var_log.info(f"Start processing") - var_log.debug(f"Process id: {os.getpid()}") - msg = process_file(record, var_log) + var_log.debug(f"Process id: {pid}") + msg = process_file(record) var_log.handlers[0].close() var_log.removeHandler(var_log.handlers[0]) return msg @@ -500,7 +500,7 @@ def pool_handler(ctx, rows, ncpus): list of process_row() outputs returned by futures, these are tuples with status message and code, and rowid """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') executor = concurrent.futures.ProcessPoolExecutor(max_workers=ncpus) futures = [] for row in rows: diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index 7981e0d..68c60dd 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -35,6 +35,7 @@ import re import click import pathlib +import logging from collections import OrderedDict from datetime import datetime#, timedelta @@ -111,19 +112,20 @@ def read_yaml(fname): return data -def write_yaml(data, fname, logger): +def write_yaml(data, fname, log_name='__name__'): """Write data to a yaml file Parameters ---------- data : dict - The file content as a dictioinary + The file content as a dictionary fname : str Yaml filename Returns ------- """ + logger = logging.getLogger(log_name) try: with open(fname, 'w') as f: yaml.dump(data, f) @@ -153,8 +155,7 @@ def write_config(ctx, fname='exp_config.yaml'): else: config['cmor'][k] = v config['attrs'] = config['cmor'].pop('attrs') - mop_log = config['cmor'].pop('log') - write_yaml(config, fname, mop_log) + write_yaml(config, fname, 'mop_log') return @@ -162,7 +163,7 @@ def write_config(ctx, fname='exp_config.yaml'): def find_custom_tables(ctx): """Returns list of tables files in custom table path """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') tables = [] path = ctx.obj['tables_path'] tables = ctx.obj['tables_path'].rglob("*_*.json") @@ -237,7 +238,7 @@ def filelist_sql(): def write_job(ctx, nrows): """ """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') # define storage flag flag = "storage=gdata/hh5" projects = ctx.obj['addprojs'] + [ctx.obj['project']] @@ -282,7 +283,7 @@ def create_exp_json(ctx, json_cv): fname : str Name of created experiment json file """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') fname = ctx.obj['outpath'] / f"{ctx.obj['exp']}.json" attrs = ctx.obj['attrs'] with json_cv.open(mode='r') as f: @@ -353,7 +354,7 @@ def populate_db(ctx, conn): conn : obj DB connection object """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') cursor = conn.cursor() # process experiment information opts = {} @@ -388,7 +389,7 @@ def populate_db(ctx, conn): return -def add_row(values, cursor, update, mop_log): +def add_row(values, cursor, update): """Add a row to the filelist database table one row specifies the information to produce one output cmip5 file @@ -404,6 +405,7 @@ def add_row(values, cursor, update, mop_log): Returns ------- """ + mop_log = logging.getLogger('mop_log') sql = '''insert into filelist (infile, filepath, filename, vin, variable_id, ctable, frequency, realm, timeshot, tstart, tend, sel_start, sel_end, @@ -466,7 +468,7 @@ def compute_fsize(ctx, opts, grid_size, frequency): Returns ------- """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') # set small number for fx frequency so it always create only one file nstep_day = {'10min': 144, '30min': 48, '1hr': 24, '3hr': 8, '6hr': 4, 'day': 1, '10day': 0.1, 'mon': 1/30, @@ -617,7 +619,7 @@ def define_files(ctx, cursor, opts, mp): time interval for each file. This last is determined by maximum file size. These and other files details are saved in filelist db table. """ - mop_log = ctx.obj['log'] + mop_log = logging.getLogger('mop_log') update = ctx.obj['update'] exp_start = opts['exp_start'] exp_end = opts['exp_end'] @@ -662,14 +664,15 @@ def define_files(ctx, cursor, opts, mp): opts['sel_end'] = (newtime - half_tstep).strftime('%4Y%m%d%H%M') opts['filepath'], opts['filename'] = build_filename(opts, start, newtime, half_tstep) - rowid = add_row(opts, cursor, update, mop_log) + rowid = add_row(opts, cursor, update) start = newtime return -def count_rows(conn, exp, mop_log): +def count_rows(conn, exp): """Returns number of files to process """ + mop_log = logging.getLogger('mop_log') sql = f"select * from filelist where status=='unprocessed' and exp_id=='{exp}'" rows = query(conn, sql, first=False) mop_log.info(f"Number of rows in filelist: {len(rows)}") diff --git a/tests/conftest.py b/tests/conftest.py index 7f544ac..9f2f190 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -82,11 +82,24 @@ def test_check_timestamp(caplog): @pytest.fixture def varlist_rows(): lines = ["fld_s03i236;tas;K;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature", - "fld_s03i236;;K;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature", - "fld_s03i236;tas;;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature"] + "fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;AUS2200_A1hr;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction", +"fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX W/M2;surface_upward_latent_heat_flu"] rows = [l.split(";") for l in lines] return rows +@pytest.fixture +def add_var_out(): + vlist = [{'cmor_var': '', 'input_vars': '', 'calculation': '', 'units': '' + ,'realm': '', 'positive': '', 'version': '', 'cmor_table': ''} + ] + +@pytest.fixture +def map_rows(): + maps = [["fld_s03i236","tas","K","time_0 lat lon","1hr","atmos", + "area: time: mean","","AUS2200_A1hr","float32","22048000","96", + "umnsa_slv_","TEMPERATURE AT 1.5M","air_temperature"]] + return maps + @pytest.fixture def um_multi_time(): '''Return a um stule file with multiple time axes''' diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py index 103f75e..9737c52 100644 --- a/tests/test_mopdb_utils.py +++ b/tests/test_mopdb_utils.py @@ -25,31 +25,20 @@ #from click.testing import CliRunner -@pytest.fixture -def db_log(): - return config_log(False) - - -@pytest.fixture -def db_log_debug(): - return config_log(True) - @pytest.mark.parametrize('idx', [0,1,2]) -def test_add_var(varlist_rows, idx, db_log): +def test_add_var(varlist_rows, idx, caplog): + caplog.set_level(logging.DEBUG, logger='mopdb_log') vlist = [] - vlistout = [["fld_s03i236","tas","K","time_0 lat lon","1hr","atmos", - "area: time: mean","","AUS2200_A1hr","float32","22048000","96", - "umnsa_slv_","TEMPERATURE AT 1.5M","air_temperature"]] - match = ("tas", "", "K") - vlist = add_var(vlist, varlist_rows[idx], match, db_log) - assert vlist == vlistout + match = [("tas", "", "K"), ("siconca", "", ""), ("hfls", "", "")] + vlist = add_var(vlist, varlist_rows[idx], match[idx]) + assert vlist[idx]['cmor_var'] == match[idx][0] def test_build_umfrq(um_multi_time, caplog): - caplog.set_level(logging.DEBUG) + caplog.set_level(logging.DEBUG, logger='mopdb_log') time_axs = [d for d in um_multi_time.dims if 'time' in d] - print(time_axs) umfrq = {'time': 'day', 'time_0': '1hr', 'time_1': '30min'} - assert umfrq == build_umfrq(time_axs, um_multi_time, caplog) + out = build_umfrq(time_axs, um_multi_time) + assert umfrq == out From 5d0ab2407d8ed7b8a362acb4b3a1974b7556e91b Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 4 Jul 2024 09:52:24 +1000 Subject: [PATCH 003/137] fixed #147 --- src/mopper/mop_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index 52f01bf..cdb78db 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -648,7 +648,7 @@ def get_axis_dim(ctx, var): @click.pass_context -def check_time_bnds(ictx, bnds, frequency): +def check_time_bnds(ctx, bnds, frequency): """Checks if dimension boundaries from file are wrong""" var_log = logging.getLogger(ctx.obj['var_log']) var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}") @@ -672,7 +672,7 @@ def require_bounds(ctx): """Returns list of coordinates that require bounds. Reads the requirement directly from .._coordinate.json file """ - var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}") + var_log = logging.getLogger(ctx.obj['var_log']) fpath = f"{ctx.obj['tpath']}/{ctx.obj['_AXIS_ENTRY_FILE']}" with open(fpath, 'r') as jfile: data = json.load(jfile) @@ -688,7 +688,7 @@ def bnds_change(ctx, axis): """Returns True if calculation/resample changes bnds of specified dimension. """ - var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}") + var_log = logging.getLogger(ctx.obj['var_log']) dim = axis.name calculation = ctx.obj['calculation'] changed_bnds = False From 9904d0f1f62e4288fdcd1420552e210a9b303b37 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 4 Jul 2024 18:20:38 +1000 Subject: [PATCH 004/137] now varlist and template are 1 step see #150 --- src/mopdb/mopdb.py | 125 ++++++++++++++++++++++++++------------- src/mopdb/mopdb_utils.py | 54 +++++++++++++---- 2 files changed, 129 insertions(+), 50 deletions(-) diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 892b4cb..b0935ff 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -43,18 +43,47 @@ def mopdb_catch(): sys.exit(1) +def require_date(ctx, param, value): + """Changes startdate option in template command from optional to + required if fpath is a directory. + """ + if Path(value).is_dir(): + ctx.command.params[1].required = True + return value + + def db_args(f): - """Define database click arguments + """Define database click options """ constraints = [ click.option('--fname', '-f', type=str, required=True, - help='Input file: used to update db table (mapping/cmor),' + - 'or to pass output model variables (list)'), + help='Input file: used to update db table (mapping/cmor)'), click.option('--dbname', type=str, required=False, default='default', help='Database relative path by default is package access.db'), - click.option('--alias', '-a', type=str, required=False, default=None, - help='Table alias to use when updating cmor var table or creating map template with list' + - ' to keep track of variable definition origin. If none passed uses input filename')] + click.option('--alias', '-a', type=str, required=False, default='', + help='Table alias to track definitions origin in cmorvar table.')] + for c in reversed(constraints): + f = c(f) + return f + + +def map_args(f): + """Define mapping click options for varlist and template commands""" + constraints = [ + click.option('--fpath', '-f', type=str, required=True, + callback=require_date, + help='Model output directory or varlist for the same'), + click.option('--startdate', '-d', type=str, required=False, + help='Start date of model run as YYYYMMDD'), + click.option('--version', '-v', required=True, + type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), + show_default=True, + help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2'), + click.option('--dbname', type=str, required=False, default='default', + help='Database relative path by default is package access.db'), + click.option('--alias', '-a', type=str, required=False, default='', + help='''Alias to use to keep track of variable definition origin. + If none passed uses input filename''')] for c in reversed(constraints): f = c(f) return f @@ -118,8 +147,10 @@ def check_cmor(ctx, dbname): @mopdb.command(name='table') @db_args @click.option('--label', '-l', required=False, default='CMIP6', - type=click.Choice(['CMIP6', 'AUS2200', 'CM2']), show_default=True, - help='Label indicating origin of CMOR variable definitions. Currently only CMIP6, AUS2200 and CM2') + type=click.Choice(['CMIP6', 'AUS2200', 'CM2', 'OM2']), + show_default=True, + help='''Label indicating origin of CMOR variable definitions. + Currently only CMIP6, AUS2200, CM2 and OM2''') @click.pass_context def cmor_table(ctx, dbname, fname, alias, label): """Create CMIP style table containing new variable definitions @@ -136,7 +167,7 @@ def cmor_table(ctx, dbname, fname, alias, label): fname : str Mapping file??? alias : str - not used here + ??? it is used so what's ahppenw hen not passed? label : str Label indicating preferred cmor variable definitions """ @@ -184,7 +215,7 @@ def cmor_table(ctx, dbname, fname, alias, label): if len(v[4].split()) != len(record[9].split()): mopdb_log.warning(f"Variable {v[0]} number of dims orig/table are different: {v[4]}/{record[9]}") var_list.append(definition) - write_cmor_table(var_list, alias, mopdb_log) + write_cmor_table(var_list, alias) conn.close() return @@ -206,15 +237,15 @@ def update_cmor(ctx, dbname, fname, alias): fname : str Name of json input file with records to add alias : str - Indicates origin of records to add, if None json filename - base is used instead + Indicates origin of records to add, if '' (default) json + filename base is used instead Returns ------- """ mopdb_log = logging.getLogger('mopdb_log') - if alias is None: + if alias == '': alias = fname.split("/")[-1] alias = alias.replace('.json', '') mopdb_log.info(f"Adding {alias} to variable name to track origin") @@ -260,35 +291,46 @@ def update_cmor(ctx, dbname, fname, alias): @mopdb.command(name='template') -@db_args -@click.option('--version', '-v', required=True, - type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True, - help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2') +@map_args @click.pass_context -def map_template(ctx, dbname, fname, alias, version): +def map_template(ctx, fpath, startdate, dbname, version, alias): """Writes a template of mapping file needed to run setup. First opens database and check if variables match any in mapping table. If not tries to partially match them. + It can get as input the directory containing the output in + which case it will first call model_vars() (varlist command) + or the file output of the same if already available. + Parameters ---------- ctx : obj Click context object + fpath : str + Path of csv input file with output variables to map or + of directory containing output files to scan + startdate : str + Date or other string to match to individuate one file per type dbname : str Database relative path (default is data/access.db) - fname : str - Name of csv input file with output variables to map - alias : str - Indicates origin of records to add, if None csv filename - base is used instead version : str Version of ACCESS model used to generate variables + alias : str + Indicates origin of records to add, if '' csv filename + base is used instead Returns ------- """ mopdb_log = logging.getLogger('mopdb_log') - if alias is None: + # work out if fpath is varlist or path to output + fpath = Path(fpath) + if fpath.is_file(): + fname = fpath.name + else: + mopdb_log.debug(f"Calling model_vars() from template: {fpath}") + fname = model_vars(fpath, startdate, dbname, version, alias) + if alias == '': alias = fname.split(".")[0] # connect to db, check first if db exists or exit if dbname == 'default': @@ -298,6 +340,7 @@ def map_template(ctx, dbname, fname, alias, version): with open(fname, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=';') rows = list(reader) + check_varlist(rows, fname) # return lists of fully/partially matching variables and stash_vars # these are input_vars for calculation defined in already in mapping db full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, @@ -340,7 +383,7 @@ def update_map(ctx, dbname, fname, alias): fname : str Name of csv input file with mapping records alias : str - Indicates origin of records to add, if None csv filename + Indicates origin of records to add, if '' csv filename base is used instead Returns @@ -373,26 +416,23 @@ def update_map(ctx, dbname, fname, alias): @mopdb.command(name='varlist') -@click.option('--indir', '-i', type=str, required=True, - help='Converted model output directory') -@click.option('--startdate', '-d', type=str, required=True, - help='Start date of model run as YYYYMMDD') -@click.option('--dbname', type=str, required=False, default='default', - help='Database relative path by default is package access.db') -@click.option('--version', '-v', required=False, default='CM2', - type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True, - help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2') +@map_args @click.pass_context -def model_vars(ctx, indir, startdate, dbname, version): +def list_vars(ctx, fpath, startdate, dbname, version, alias): + """Calls model_vars to generate list of variables""" + fname = model_vars(fpath, startdate, dbname, version, alias) + + +@click.pass_context +def model_vars(ctx, fpath, startdate, dbname, version, alias): """Read variables from model output opens one file for each kind, save variable list as csv file - alias is not used so far Parameters ---------- ctx : obj Click context object - indir : str + fpath : str Path for model output files startdate : str Date or other string to match to individuate one file per type @@ -400,18 +440,24 @@ def model_vars(ctx, indir, startdate, dbname, version): Database relative path (default is data/access.db) version : str Version of ACCESS model to use as preferred mapping + alias : str + Used for output filename: 'varlist_'. If '', + 'varlist_mopdb' is used instead Returns ------- + fname : str + Name of output varlist file """ + mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing if dbname == 'default': dbname = import_files('data').joinpath('access.db') conn = db_connect(dbname) - write_varlist(conn, indir, startdate, version) + fname = write_varlist(conn, fpath, startdate, version, alias) conn.close() - return + return fname @mopdb.command(name='del') @@ -456,4 +502,3 @@ def remove_record(ctx, dbname, table, pair): # select, confirm, delete record/s delete_record(conn, table, col, pair) return - diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index d4de94f..b9e0c4d 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -523,7 +523,7 @@ def get_cell_methods(attrs, dims): return val, frqmod -def write_varlist(conn, indir, startdate, version): +def write_varlist(conn, indir, startdate, version, alias): """Based on model output files create a variable list and save it to a csv file. Main attributes needed to map output are provided for each variable @@ -533,6 +533,14 @@ def write_varlist(conn, indir, startdate, version): files = list_files(indir, sdate) mopdb_log.debug(f"Found files: {files}") patterns = [] + if alias == '': + alias = 'mopdb' + fname = f"varlist_{alias}.csv" + fcsv = open(fname, 'w') + fwriter = csv.writer(fcsv, delimiter=';') + fwriter.writerow(["name", "cmor_var", "units", "dimensions", + "frequency", "realm", "cell_methods", "cmor_table", "vtype", + "size", "nsteps", "filename", "long_name", "standard_name"]) for fpath in files: # get filename pattern until date match mopdb_log.debug(f"Filename: {fpath.name}") @@ -545,12 +553,7 @@ def write_varlist(conn, indir, startdate, version): pattern_list = list_files(indir, f"{fpattern}*") nfiles = len(pattern_list) mopdb_log.debug(f"File pattern: {fpattern}") - fcsv = open(f"{fpattern}.csv", 'w') - fwriter = csv.writer(fcsv, delimiter=';') - fwriter.writerow(["name", "cmor_var", "units", "dimensions", - "frequency", "realm", "cell_methods", "cmor_table", - "vtype", "size", "nsteps", "filename", "long_name", - "standard_name"]) + fwriter.writerow([f"#{fpattern}"]) # get attributes for the file variables realm = get_realm(fpath, version) ds = xr.open_dataset(fpath, decode_times=False) @@ -587,9 +590,9 @@ def write_varlist(conn, indir, startdate, version): nsteps, fpattern, attrs.get('long_name', ""), attrs.get('standard_name', "")] fwriter.writerow(line) - fcsv.close() mopdb_log.info(f"Variable list for {fpattern} successfully written") - return + fcsv.close() + return fname def read_map_app4(fname): @@ -644,7 +647,7 @@ def read_map(fname, alias): notes = row[16] else: notes = row[15] - if alias is None: + if alias is '': alias = fname.replace(".csv","") var_list.append(row[:11] + [notes, alias]) return var_list @@ -883,6 +886,7 @@ def write_map_template(conn, full, no_ver, no_frq, stdn, cell_methods, positive, cmor_table, version, vtype, size, nsteps, filename, long_name, standard_name """ + mopdb_log = logging.getLogger('mopdb_log') keys = ['cmor_var', 'input_vars', 'calculation', 'units', 'dimensions', 'frequency', 'realm', 'cell_methods', @@ -919,6 +923,7 @@ def write_map_template(conn, full, no_ver, no_frq, stdn, def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'): """ """ + mopdb_log = logging.getLogger('mopdb_log') if len(vlist) > 0: if type(div) is str: @@ -938,6 +943,7 @@ def check_realm_units(conn, var): """Checks that realm and units are consistent with values in cmor table. """ + mopdb_log = logging.getLogger('mopdb_log') vname = f"{var['cmor_var']}-{var['cmor_table']}" if var['cmor_table'] is None or var['cmor_table'] == "": @@ -965,6 +971,7 @@ def check_realm_units(conn, var): def get_realm(fpath, version): '''Return realm for variable in files or NArealm''' + mopdb_log = logging.getLogger('mopdb_log') if version == 'AUS2200': realm = 'atmos' @@ -980,3 +987,30 @@ def get_realm(fpath, version): mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm") mopdb_log.debug(f"Realm is {realm}") return realm + + +def check_varlist(rows, fname): + """Checks that varlist written to file has sensible information for frequency and realm + to avoid incorrect mapping to be produced. + + At the moment we're checking only frequency and realm as they can be missed or wrong + depending on the file structure. + + Parameters + ---------- + rows : list(dict) + list of variables to match + """ + + mopdb_log = logging.getLogger('mopdb_log') + frq_list = ['min', 'hr', 'day', 'mon', 'yr'] + realm_list = ['ice', 'ocean', 'atmos', 'land'] + for row in rows: + if row['name'][0] == "#" or row['name'] == 'name': + continue + elif (not any( x in row['frequency'] for x in frq_list) + or row['realm'] not in realm_list): + mopdb_log.error(f""" Check frequency and realm in {fname}. + Some values might be invalid and need fixing""") + sys.exit() + return From 29567a35e821a59f9a7a861f65e2ea9198600129 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 5 Jul 2024 15:08:39 +1000 Subject: [PATCH 005/137] moved options #151, #113 --- conda/meta.yaml | 2 +- docs/gettingstarted.rst | 62 ++++++++++++++---------------- docs/mopdb_command.rst | 49 ++++++++++++++---------- src/mopdb/mopdb.py | 8 ++-- src/mopper/mop_setup.py | 8 ++++ src/mopper/mopper.py | 80 ++++++++++++++++++++++++++++----------- src/mopper/setup_utils.py | 7 ++-- 7 files changed, 132 insertions(+), 84 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index b0cb321..f20a79a 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,4 +1,4 @@ -{% set version = "0.6.1" %} +{% set version = "1.0.0" %} package: name: mopper version: {{ version }} diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst index 397652e..666e89a 100644 --- a/docs/gettingstarted.rst +++ b/docs/gettingstarted.rst @@ -3,46 +3,38 @@ Starting with MOPPeR A typical workflow to post-process an ACCESS or UM model output requires three steps. -Step1: get a list of variables from the raw output --------------------------------------------------- - - *mopdb varlist -i -d * - -`mopdb varlist` will output one or more `csv` files with a detailed list of variables, one list for each pattern of output files. - -.. code-block:: console - - $ mopdb varlist -i /scratch/../exp -d 20120101 - Opened database ~/.local/lib/python3.10/site-packages/data/access.db successfully - Variable list for ocean_scalar.nc- successfully written - Variable list for ocean_month.nc- successfully written - Variable list for ocean_daily.nc- successfully written - -.. csv-table:: Example of varlist output - :file: varlist_example.csv - :delim: ; - -The argument is used to reduce the number of files to check. The tool will recognise anyway a repeated pattern and only add a list of variable for the same pattern once. - -Step2: create a template for a mapping file +Step1: create a template for a mapping file ------------------------------------------- - *mopdb template -i -v -a * + *mopdb template -f -v -a * .. code-block:: console - $ mopdb template -f ocean.csv -v OM2 -a ocnmon - Opened database ~/.local/lib/python3.10/site-packages/data/access.db successfully - Derived variables: {'msftyrho', 'msftmrho', 'hfds', 'msftmz', 'msftyz'} - Changing advectsweby-CM2_mon units from Watts/m^2 to W m-2 - Changing areacello-CMIP6_Ofx units from m^2 to m2 - Variable difvho-CM2_Omon not found in cmor table + $ mopdb template -f /scratch/.../exp1/atmos -m 095101 -v CM2 -a exp1 + Opened database /home/581/pxp581/.local/lib/python3.10/site-packages/data/access.db successfully + Found more than 1 definition for fld_s16i222: + [('psl', 'AUS2200', 'AUS2200_A10min', '10minPt'), ('psl', 'AUS2200', 'AUS2200_A1hr', '1hr')] + Using psl from AUS2200_A10min + Variable list for cw323a.pm successfully written + Opened database /home/581/pxp581/.local/lib/python3.10/site-packages/data/access.db successfully + Derived variables: {'treeFracBdlEvg', 'grassFracC4', 'shrubFrac', 'prc', 'mrsfl', 'landCoverFrac', 'mmrbc', 'mmrso4', 'theta24', 'sftgif', 'treeFracNdlEvg', 'snw', 'rtmt', 'nwdFracLut', 'sifllatstop', 'prw', 'mrfso', 'rlus', 'mrsll', 'baresoilFrac', 'c4PftFrac', 'wetlandFrac', 'mrro', 'c3PftFrac', 'treeFracBdlDcd', 'od550lt1aer', 'treeFracNdlDcd', 'residualFrac', 'wetss', 'sbl', 'vegFrac', 'rsus', 'cropFrac', 'mmrdust', 'grassFrac', 'mmrss', 'od550aer', 'hus24', 'dryss', 'fracLut', 'mrlso', 'mc', 'od440aer', 'grassFracC3', 'nep', 'mmroa', 'cropFracC3', 'snm', 'agesno'} + Changing cl-CMIP6_Amon units from 1 to % + Changing cli-CMIP6_Amon units from 1 to kg kg-1 + Changing clt-CMIP6_Amon units from 1 to % + Changing clw-CMIP6_Amon units from 1 to kg kg-1 + Variable husuvgrid-CM2_mon not found in cmor table + ... `mopdb template` takes as input: - * the output/s of `varlist` - To get one template for the all variable concatenate the output on `varlist` into one file first. - * the access version to use as preferred - * an optional alias, if omitted the varlist filename will be used. Based on the example: `map_ocnmon.csv` or `map_ocean.csv` if omitted. + * -f/--fpath : the path to the model output + * -m/--match : used to identify files' patterns. The tool will only add a list of variables for the same pattern once. + * -v/--version : the access version to use as preferred mapping. ESM1.5, CM2, OM2 and AUS2200 are currently available. + * -a/--alias : an optional alias, if omitted default names will be used for the output files. + +Alternatively a list of variables can be created separately using the *varlist* command and this can be passed directly to template using the *fpath* option. + + *mopdb template -f -v -a * It produces a csv file with a list of all the variables from raw output mapped to cmip style variables. These mappings also take into account the frequency and include variables that can be potentially calculated with the listed fields. The console output lists these, as shown above. @@ -51,18 +43,20 @@ The mappings can be different between different version and/or configurations of Starting with version 0.6 the list includes matches based on the standard_name, as these rows often list more than one option per field, it's important to either edit or remove these rows before using the mapping file. The :doc:`Customing section ` covers what to do for an experiment using a new configuration which is substantially different from the ones which are available. +It also provides an intermediate varlist_.csv file that shows the information derived directly from the files. This can be useful to debug in case of issues with the mapping. This file is checked before the mapping step to make sure the tool has detected sensible frequency and realm, if the check fails the mapping won't proceed but the varlist file can be edited appropriately. .. warning:: Always check that the resulting template is mapping the variables correctly. This is particularly true for derived variables. Comment lines are inserted to give some information on what assumptions were done for each group of mappings. + The se -Step3: Set up the working environment +Step2: Set up the working environment ------------------------------------- *mop -c setup* .. code-block:: console - +https://climate-cms.org/posts/2023-05-31-vscode-are.html $ mop -c exp_conf.yaml setup Simulation to process: cy286 Setting environment and creating working directory diff --git a/docs/mopdb_command.rst b/docs/mopdb_command.rst index 32d712c..421f39c 100644 --- a/docs/mopdb_command.rst +++ b/docs/mopdb_command.rst @@ -54,29 +54,17 @@ e.g. use aus2200 for mappings related to the AUS2200 configuration: A user that wants to create a mapping table for another AUS2200 simulation can use this value to select appropriate mappings (see how to do that below). -Get a list of variables from the model output ---------------------------------------------- +Create a mapping file +--------------------- .. code-block:: - mopdb varlist -i -d - -this will create for each output file a list of variables with useful attributes -These can be concatenated into one or used to create separate mappings. - -.. _varlist example: -.. dropdown:: Example output of varlist +This can be done by providing the model output path and a pattern to match or directly a varlist file - name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name - fld_s00i004;theta;K;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CM2_mon;float32;9400320;12;cw323a.pm;THETA AFTER TIMESTEP;air_potential_temperature - fld_s00i010;hus;1;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;9400320;12;cw323a.pm;SPECIFIC HUMIDITY AFTER TIMESTEP;specific_humidity - fld_s00i024;ts;K;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE TEMPERATURE AFTER TIMESTEP;surface_temperature - fld_s00i030;;1;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;LAND MASK (No halo) (LAND=TRUE);land_binary_mask - fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;CMIP6_SImon;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction - ... +From output path: + + mopdb template -f -m -v -Create a mapping file starting from variable list -------------------------------------------------- -.. code-block:: +From varlist file: mopdb template -f -v @@ -119,6 +107,29 @@ The other groups of records require checking, as either the version or the frequ ... +Get a list of variables from the model output +--------------------------------------------- +.. code-block:: + + mopdb varlist -f -m + +this will create a list of variables with useful attributes + +.. _varlist example: +.. dropdown:: Example output of varlist + + name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name + #cw323a.pm + fld_s00i004;theta;K;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CM2_mon;float32;9400320;12;cw323a.pm;THETA AFTER TIMESTEP;air_potential_temperature + fld_s00i010;hus;1;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;9400320;12;cw323a.pm;SPECIFIC HUMIDITY AFTER TIMESTEP;specific_humidity + fld_s00i024;ts;K;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE TEMPERATURE AFTER TIMESTEP;surface_temperature + fld_s00i030;;1;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;LAND MASK (No halo) (LAND=TRUE);land_binary_mask + fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;CMIP6_SImon;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction + ... + +Doing this step separately can be useful if the model output is using a random directory structure, as it's more likely in such a case that important attributes like frequency and realm which are used for the mapping might be incorrect or missing. In such a case it might be more efficient processing different kind of files separately first, making sure frequency and realm are correct and then combining them into one file to pass to template. +The template command will stop execution if detects potentially wrong values for these fields and save + Check which variables aren't yet defined ---------------------------------------- .. code-block:: console diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index b0935ff..7a2e744 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -72,9 +72,11 @@ def map_args(f): constraints = [ click.option('--fpath', '-f', type=str, required=True, callback=require_date, - help='Model output directory or varlist for the same'), - click.option('--startdate', '-d', type=str, required=False, - help='Start date of model run as YYYYMMDD'), + help=(''''Path for model output files. For "template" + command can also be file generated by varlist step''')), + click.option('--match', '-m', type=str, required=False, + help=('''String to match output files. Most often + the timestamp from one of the output files''')), click.option('--version', '-v', required=True, type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True, diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py index 7040270..90ba47e 100755 --- a/src/mopper/mop_setup.py +++ b/src/mopper/mop_setup.py @@ -196,6 +196,14 @@ def setup_env(ctx): else: cdict['tables_path'] = appdir / cdict['tables_path'] cdict['ancils_path'] = appdir / cdict['ancils_path'] + # conda env to run job + if cdict['conda_env'] == 'default': + cdict['conda_env'] = '' + else: + path = Path(cdict['conda_env']) + if not path.is_absolute(): + path = appdir / path + cdict['conda_env'] = f"source {str(path)}" # Output subdirectories outpath = cdict['outpath'] cdict['maps'] = outpath / "maps" diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 5418309..6313edd 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -53,13 +53,22 @@ def mop_catch(): sys.exit(1) +def mop_args(f): + """Define common click options + """ + constraints = [ + click.option('--debug', is_flag=True, default=False, + help="Show debug info"), + click.option('--cfile', '-c', type=str, required=True, + help='Experiment configuration as yaml file')] + for c in reversed(constraints): + f = c(f) + return f + + @click.group(context_settings=dict(help_option_names=['-h', '--help'])) -@click.option('--cfile', '-c', type=str, required=True, - help='Experiment configuration as yaml file') -@click.option('--debug', is_flag=True, default=False, - help="Show debug info") @click.pass_context -def mop(ctx, cfile, debug): +def mop(ctx): """Main command with 2 sub-commands: - setup to setup the job to run - run to execute the post-processing @@ -68,33 +77,39 @@ def mop(ctx, cfile, debug): ---------- ctx : obj Click context object + """ + #ctx.obj = {} + pass + + +@mop.command(name='run') +@mop_args +#@click.option('--cfile', '-c', type=str, required=True, +# help='Experiment configuration as yaml file') +@click.pass_context +def mop_run(ctx, cfile, debug): + """Subcommand that executes the processing. + + Use the configuration yaml file created in setup step as input. + + Parameters + ---------- cfile : str Name of yaml configuration file, run sub-command uses the configuration created by setup debug : bool If true set logging level to debug """ + + # load config file with open(cfile, 'r') as yfile: cfg = yaml.safe_load(yfile) ctx.obj = cfg['cmor'] ctx.obj['attrs'] = cfg['attrs'] - # set up main mop log - if ctx.invoked_subcommand == 'setup': - mop_log = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO) - else: - mop_log = config_log(debug, ctx.obj['appdir']) + # set up logger + mop_log = config_log(debug, ctx.obj['appdir']) ctx.obj['debug'] = debug mop_log.info(f"Simulation to process: {ctx.obj['exp']}") - - -@mop.command(name='run') -@click.pass_context -def mop_run(ctx): - """Subcommand that executes the processing. - - Use the configuration yaml file created in setup step as input. - """ - mop_log = logging.getLogger('mop_log') # Open database and retrieve list of files to create conn = db_connect(ctx.obj['database']) c = conn.cursor() @@ -117,11 +132,12 @@ def mop_run(ctx): return +@mop.command(name='setup') +@mop_args @click.option('--update', is_flag=True, default=False, help="Update current settings, keeping db and logs") -@mop.command(name='setup') @click.pass_context -def mop_setup(ctx, update): +def mop_setup(ctx, cfile, debug, update): """Setup of mopper processing job and working environment. * Defines and creates paths @@ -131,8 +147,26 @@ def mop_setup(ctx, update): * creates/updates database filelist table to list files to create * finalises configuration and save in new yaml file * writes job executable file and submits (optional) to queue + + Parameters + ---------- + cfile : str + Name of yaml configuration file, run sub-command uses the + configuration created by setup + debug : bool + If True set logging level to debug + update : bool + If True update current workding directory (default is False) """ - mop_log = logging.getLogger('mop_log') + + # load config file + with open(cfile, 'r') as yfile: + cfg = yaml.safe_load(yfile) + ctx.obj = cfg['cmor'] + ctx.obj['attrs'] = cfg['attrs'] + ctx.obj['debug'] = debug + # set up logger + mop_log = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO) # then add setup_env to config mop_log.info("Setting environment and creating working directory") ctx.obj['update'] = update diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index 68c60dd..e0341fa 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -700,8 +700,6 @@ def define_template(ctx, flag, nrows): cdict : dict Dictionary with cmor settings for experiment """ - # temporarily removing this as it only works for conda envs - #{os.path.dirname(sys.executable)}/mop -c {ctx.obj['exp']}_config.yaml run template = f"""#!/bin/bash #PBS -P {ctx.obj['project']} #PBS -q {ctx.obj['queue']} @@ -717,9 +715,10 @@ def define_template(ctx, flag, nrows): # for a list of packages module use /g/data/hh5/public/modules -module load conda/analysis3 +module load conda/analysis3-unstable +{ctx.obj['conda_env']} cd {ctx.obj['appdir']} -mop -c {ctx.obj['exp']}_config.yaml run +mop run -c {ctx.obj['exp']}_config.yaml echo 'APP completed for exp {ctx.obj['exp']}.'""" return template From f8b1a24a4b96d781f4fd5c33bf1a4a3d5d8e76ac Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 5 Jul 2024 16:40:28 +1000 Subject: [PATCH 006/137] minor fix to actions and solved #150 --- .github/workflows/mopper-conda.yaml | 8 ++++---- src/mopdb/mopdb.py | 18 +++++++++--------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 98ea1a7..b4ecaa5 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -1,11 +1,11 @@ -name: xmhw-conda-install-test +name: mopper-conda-install-test #on: [push] on: push: branches: - main - - newrelease + - prerelease pull_request: branches: - main @@ -38,8 +38,8 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics # - name: Install package - # run: | - # conda run python setup.py install + run: | + conda build conda/meta.yaml - name: Test with pytest run: | conda install pytest coverage codecov diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 7a2e744..4a63bba 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -44,7 +44,7 @@ def mopdb_catch(): def require_date(ctx, param, value): - """Changes startdate option in template command from optional to + """Changes match option in template command from optional to required if fpath is a directory. """ if Path(value).is_dir(): @@ -295,7 +295,7 @@ def update_cmor(ctx, dbname, fname, alias): @mopdb.command(name='template') @map_args @click.pass_context -def map_template(ctx, fpath, startdate, dbname, version, alias): +def map_template(ctx, fpath, match, dbname, version, alias): """Writes a template of mapping file needed to run setup. First opens database and check if variables match any in mapping table. If not tries to partially match them. @@ -311,7 +311,7 @@ def map_template(ctx, fpath, startdate, dbname, version, alias): fpath : str Path of csv input file with output variables to map or of directory containing output files to scan - startdate : str + match : str Date or other string to match to individuate one file per type dbname : str Database relative path (default is data/access.db) @@ -331,7 +331,7 @@ def map_template(ctx, fpath, startdate, dbname, version, alias): fname = fpath.name else: mopdb_log.debug(f"Calling model_vars() from template: {fpath}") - fname = model_vars(fpath, startdate, dbname, version, alias) + fname = model_vars(fpath, match, dbname, version, alias) if alias == '': alias = fname.split(".")[0] # connect to db, check first if db exists or exit @@ -420,13 +420,13 @@ def update_map(ctx, dbname, fname, alias): @mopdb.command(name='varlist') @map_args @click.pass_context -def list_vars(ctx, fpath, startdate, dbname, version, alias): +def list_vars(ctx, fpath, match, dbname, version, alias): """Calls model_vars to generate list of variables""" - fname = model_vars(fpath, startdate, dbname, version, alias) + fname = model_vars(fpath, match, dbname, version, alias) @click.pass_context -def model_vars(ctx, fpath, startdate, dbname, version, alias): +def model_vars(ctx, fpath, match, dbname, version, alias): """Read variables from model output opens one file for each kind, save variable list as csv file @@ -436,7 +436,7 @@ def model_vars(ctx, fpath, startdate, dbname, version, alias): Click context object fpath : str Path for model output files - startdate : str + match : str Date or other string to match to individuate one file per type dbname : str Database relative path (default is data/access.db) @@ -457,7 +457,7 @@ def model_vars(ctx, fpath, startdate, dbname, version, alias): if dbname == 'default': dbname = import_files('data').joinpath('access.db') conn = db_connect(dbname) - fname = write_varlist(conn, fpath, startdate, version, alias) + fname = write_varlist(conn, fpath, match, version, alias) conn.close() return fname From ab3cd299d3604d3d3fe559905363cfe4f254cd52 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Sat, 6 Jul 2024 18:31:26 +1000 Subject: [PATCH 007/137] progress in detecting relam and frequency --- src/mopdb/mopdb_utils.py | 84 ++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index b9e0c4d..f723025 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -426,50 +426,67 @@ def delete_record(conn, table, col, pairs): def list_files(indir, match): """Returns list of files matching input directory and match""" mopdb_log = logging.getLogger('mopdb_log') - files = [x for x in Path(indir).rglob(f"{match}") if x.is_file()] - mopdb_log.debug(f"{indir}/**/*{match}*") + mopdb_log.debug(f"Pattern to list files: {indir}/**/*{match}*") + files = [x for x in Path(indir).rglob(f"{match}") if x.is_file() + and '.nc' in str(x)] + files.sort(key=lambda x:x.name) + mopdb_log.debug(f"Files after sorting: {files}") return files -def build_umfrq(time_axs, ds): +def get_file_frq(ds, fnext): """Return a dictionary with frequency for each time axis. Frequency is inferred by comparing interval between two consecutive timesteps with expected interval at a given frequency. Order time_axis so ones with only one step are last, so we can use file frequency (interval_file) inferred from other time axes. + This is called if there are more than one time axis in file + (usually only UM) or if frequency can be guessed from filename. """ mopdb_log = logging.getLogger('mopdb_log') - umfrq = {} + frq = {} int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0, 'day': 1.0, '6hr': 0.25, '3hr': 0.125, '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944} + # retrieve all time axes + time_axs = [d for d in ds.dims if 'time' in d] + time_axs_len = set(len(ds[d]) for d in time_axs) time_axs.sort(key=lambda x: len(ds[x]), reverse=True) - mopdb_log.debug(f"in build_umfrq, time_axs: {time_axs}") + mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}") + max_len = len(ds[time_axs[0]]) + # if all time axes have only 1 timestep we cannot infer frequency + # so we open also next file but get only time axs + if max_len == 1: + dsnext = xr.open_dataset(fnext, decode_times = False) + time_axs2 = [d for d in dsnext.dims if 'time' in d] + ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time') + time_axs = [d for d in ds.dims if 'time' in d] + time_axs_len = set(len(ds[d]) for d in time_axs) + time_axs.sort(key=lambda x: len(ds[x]), reverse=True) for t in time_axs: mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}") if len(ds[t]) > 1: - interval = (ds[t][1]-ds[t][0]).values / np.timedelta64(1, 'D') -#astype('timedelta64[m]') / 1440.0 - interval_file = (ds[t][-1] -ds[t][0]).values / np.timedelta64(1, 'D') + interval = (ds[t][1]-ds[t][0]).values #/ np.timedelta64(1, 'D') + interval_file = (ds[t][-1] -ds[t][0]).values #/ np.timedelta64(1, 'D') else: interval = interval_file mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}") - mopdb_log.debug(f"interval entire file {t}: {interval_file}") + #mopdb_log.debug(f"interval entire file {t}: {interval_file}") for k,v in int2frq.items(): if math.isclose(interval, v, rel_tol=0.05): - umfrq[t] = k + frq[t] = k break - return umfrq + return frq -def get_frequency(realm, fname, ds): +def get_frequency(realm, fname, ds, fnext): """Return frequency based on realm and filename For UM files checks if more than one time axis is present and if so returns dictionary with frequency: variable list """ mopdb_log = logging.getLogger('mopdb_log') - umfrq = {} + frq_dict = {} frequency = 'NAfrq' if realm == 'atmos': fbits = fname.split("_") @@ -479,14 +496,8 @@ def get_frequency(realm, fname, ds): frequency = fix_frq[frequency] else: frequency = frequency.replace('hPt', 'hrPt') - # retrieve all time axes and check their frequency - time_axs = [d for d in ds.dims if 'time' in d] - time_axs_len = set(len(ds[d]) for d in time_axs) - if len(time_axs_len) == 1: - umfrq = {} - else: - umfrq = build_umfrq(time_axs, ds) - mopdb_log.debug(f"umfrq: {umfrq}") + frq_dict = get_file_frq(ds, fnext) + mopdb_log.debug(f"frq_dict: {frq_dict}") elif realm == 'ocean': # if I found scalar or monthly in any of fbits if any(x in fname for x in ['scalar', 'month']): @@ -498,8 +509,13 @@ def get_frequency(realm, fname, ds): frequency = 'mon' elif '_d.' in fname: frequency = 'day' + if frequency == 'NAfrq': + frq_dict = get_file_frq(ds, fnext) + # if only one frequency detected empty dict + if len(frq_dict) == 1: + frequency = frq_dict.popitem()[1] mopdb_log.debug(f"Frequency: {frequency}") - return frequency, umfrq + return frequency, frq_dict def get_cell_methods(attrs, dims): @@ -523,15 +539,13 @@ def get_cell_methods(attrs, dims): return val, frqmod -def write_varlist(conn, indir, startdate, version, alias): +def write_varlist(conn, indir, match, version, alias): """Based on model output files create a variable list and save it to a csv file. Main attributes needed to map output are provided for each variable """ mopdb_log = logging.getLogger('mopdb_log') - sdate = f"*{startdate}*" - files = list_files(indir, sdate) - mopdb_log.debug(f"Found files: {files}") + files = list_files(indir, f"*{match}*") patterns = [] if alias == '': alias = 'mopdb' @@ -541,10 +555,10 @@ def write_varlist(conn, indir, startdate, version, alias): fwriter.writerow(["name", "cmor_var", "units", "dimensions", "frequency", "realm", "cell_methods", "cmor_table", "vtype", "size", "nsteps", "filename", "long_name", "standard_name"]) - for fpath in files: + for i, fpath in enumerate(files): # get filename pattern until date match mopdb_log.debug(f"Filename: {fpath.name}") - fpattern = fpath.name.split(startdate)[0] + fpattern = fpath.name.split(match)[0] # adding this in case we have a mix of yyyy/yyyymn date stamps # as then a user would have to pass yyyy only and would get 12 files for some of the patterns if fpattern in patterns: @@ -555,10 +569,12 @@ def write_varlist(conn, indir, startdate, version, alias): mopdb_log.debug(f"File pattern: {fpattern}") fwriter.writerow([f"#{fpattern}"]) # get attributes for the file variables - realm = get_realm(fpath, version) - ds = xr.open_dataset(fpath, decode_times=False) + ds = xr.open_dataset(str(pattern_list[0]), decode_times=False) + realm = get_realm(fpath, version, ds) coords = [c for c in ds.coords] + ['latitude_longitude'] - frequency, umfrq = get_frequency(realm, fpath.name, ds) + #pass next file in case of 1 timestep per file and no frq in name + fnext = str(pattern_list[1]) + frequency, umfrq = get_frequency(realm, fpath.name, ds, fnext) multiple_frq = False if umfrq != {}: multiple_frq = True @@ -569,7 +585,7 @@ def write_varlist(conn, indir, startdate, version, alias): mopdb_log.debug(f"Variable: {v.name}") # get size in bytes of grid for 1 timestep and number of timesteps vsize = v[0].nbytes - nsteps = nfiles * v.shape[0] + nsteps = nfiles * v.shape[0]/2 # assign specific frequency if more than one is available if multiple_frq: if 'time' in v.dims[0]: @@ -969,7 +985,7 @@ def check_realm_units(conn, var): return var -def get_realm(fpath, version): +def get_realm(fpath, version, ds): '''Return realm for variable in files or NArealm''' mopdb_log = logging.getLogger('mopdb_log') @@ -978,7 +994,7 @@ def get_realm(fpath, version): else: realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] if x in fpath.parts][0] - if realm == 'atm': + if realm == 'atm' or 'um_version' in ds.attrs.keys(): realm = 'atmos' elif realm == 'ocn': realm = 'ocean' From d24bea219d88ca30cb3b0f2be9675154215b9be6 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Mon, 8 Jul 2024 12:36:52 +1000 Subject: [PATCH 008/137] removed unneccessary adjustment to variable size from mopdb_utils.py --- src/mopdb/mopdb_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index f723025..18bc48f 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -585,7 +585,7 @@ def write_varlist(conn, indir, match, version, alias): mopdb_log.debug(f"Variable: {v.name}") # get size in bytes of grid for 1 timestep and number of timesteps vsize = v[0].nbytes - nsteps = nfiles * v.shape[0]/2 + nsteps = nfiles * v.shape[0] # assign specific frequency if more than one is available if multiple_frq: if 'time' in v.dims[0]: From 704d607e104abd0e6a2f630107e8e9dff515b4e6 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 15:12:23 +1000 Subject: [PATCH 009/137] minor adjustment to conftest and action --- .github/workflows/mopper-conda.yaml | 2 +- src/mopdb/mopdb_utils.py | 10 ++++++++++ tests/conftest.py | 9 --------- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index b4ecaa5..c232518 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -4,11 +4,11 @@ name: mopper-conda-install-test on: push: branches: - - main - prerelease pull_request: branches: - main + - prerelease jobs: diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 18bc48f..a14ca5e 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -539,6 +539,15 @@ def get_cell_methods(attrs, dims): return val, frqmod +def identify_patterns(files): + """Return patterns of files + """ + i = 0 + while present is True: + + + return patterns + def write_varlist(conn, indir, match, version, alias): """Based on model output files create a variable list and save it to a csv file. Main attributes needed to map output are provided @@ -546,6 +555,7 @@ def write_varlist(conn, indir, match, version, alias): """ mopdb_log = logging.getLogger('mopdb_log') files = list_files(indir, f"*{match}*") + patterns = identify_patterns(files) patterns = [] if alias == '': alias = 'mopdb' diff --git a/tests/conftest.py b/tests/conftest.py index 9f2f190..9a60849 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,15 +30,6 @@ TESTS_HOME = os.path.abspath(os.path.dirname(__file__)) TESTS_DATA = os.path.join(TESTS_HOME, "testdata") -# setting up loggers for both mopdb and mop -@pytest.fixture -def moplog(): - return logging.getLogger('mop_log') - - -@pytest.fixture -def mopdblog(): - return logging.getLogger('mopdb_log') # setting up fixtures for databases:a ccess.db and mopper.db @pytest.fixture From 7c45897cf8403591e1449289ae522a967a938c61 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 16:17:25 +1000 Subject: [PATCH 010/137] some imporvements to tests --- tests/conftest.py | 16 ++++++++++++---- tests/test_calculations.py | 8 ++++---- tests/test_mop_utils.py | 22 +++++++++++----------- tests/test_mopdb.py | 6 +++--- tests/test_mopdb_utils.py | 7 +++---- tests/testdata/varlist_ex.csv | 3 +++ 6 files changed, 36 insertions(+), 26 deletions(-) create mode 100644 tests/testdata/varlist_ex.csv diff --git a/tests/conftest.py b/tests/conftest.py index 9a60849..0dd6c56 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,7 @@ import pandas as pd import datetime import logging +import csv from mopdb.mopdb_utils import mapping_sql, cmorvar_sql from mopper.setup_utils import filelist_sql @@ -72,12 +73,19 @@ def test_check_timestamp(caplog): @pytest.fixture def varlist_rows(): - lines = ["fld_s03i236;tas;K;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature", - "fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;AUS2200_A1hr;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction", -"fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX W/M2;surface_upward_latent_heat_flu"] - rows = [l.split(";") for l in lines] + # read list of vars from iexample file + with open('testdata/varlist_ex.csv', 'r') as csvfile: + reader = csv.DictReader(csvfile, delimiter=';') + rows = list(reader) return rows +@pytest.fixture +def matches(): + matches = [("tas", "fld_s03i236", "", "1hr", "atmos", "AUS2200", "AUS2200_A1hr", "", "K"), + ("siconca", "fld_s00i031", "", "mon", "ocean", "CM2", "CMIP6_OImon", "", "1"), + ("hfls", "fld_s03i234", "", "mon", "atmos", "CM2", "CMIP6_Amon", "up", "W/m2")] + return matches + @pytest.fixture def add_var_out(): vlist = [{'cmor_var': '', 'input_vars': '', 'calculation': '', 'units': '' diff --git a/tests/test_calculations.py b/tests/test_calculations.py index dcd6398..9037b53 100644 --- a/tests/test_calculations.py +++ b/tests/test_calculations.py @@ -23,10 +23,9 @@ import logging from mopper.calculations import * -logger = logging.getLogger('var_log') ctx = click.Context(click.Command('cmd'), obj={'sel_start': '198302170600', 'sel_end': '198302181300', - 'realm': 'atmos', 'frequency': '1hr', 'var_log': logger}) + 'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'}) def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100): @@ -68,8 +67,9 @@ def test_calc_topsoil(): xrtest.assert_allclose(out, expected, rtol=1e-05) -def test_overturn_stream(): - global ctx, logger +def test_overturn_stream(caplog): + global ctx + caplog.set_level(logging.DEBUG, logger='varlog_1') # set up input dims = ['time', 'depth', 'lat', 'lon'] time = pd.date_range("2014-09-06", periods=1) diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py index f177f21..4889274 100644 --- a/tests/test_mop_utils.py +++ b/tests/test_mop_utils.py @@ -19,7 +19,6 @@ import numpy as np import pandas as pd from mopper.mop_utils import * -from conftest import moplog #try: # import unittest.mock as mock @@ -28,24 +27,24 @@ ctx = click.Context(click.Command('cmd'), obj={'sel_start': '198302170600', 'sel_end': '198302181300', - 'realm': 'atmos', 'frequency': '1hr'}) -#logger = logging.getLogger('mop_log') + 'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'}) -def test_check_timestamp(caplog, ctx): - moplog.set_level(logging.DEBUG)#, logger='mop_log') +def test_check_timestamp(caplog): + global ctx + caplog.set_level(logging.DEBUG, logger='mop_log') + caplog.set_level(logging.DEBUG, logger='varlog_1') # test atmos files files = [f'obj_198302{d}T{str(h).zfill(2)}01_1hr.nc' for d in ['17','18','19'] for h in range(24)] - print(files) inrange = files[6:37] with ctx: - out1 = check_timestamp(files, logger) + out1 = check_timestamp(files) assert out1 == inrange # get only first file is frequency is fx ctx.obj['frequency'] = 'fx' inrange = [files[0]] with ctx: - out2 = check_timestamp(files, logger) + out2 = check_timestamp(files) assert out2 == inrange # test ocn files ctx.obj['frequency'] = 'day' @@ -53,12 +52,13 @@ def test_check_timestamp(caplog, ctx): files = [f'ocn_daily.nc-198302{str(d).zfill(2)}' for d in range(1,29)] inrange = files[16:18] with ctx: - out3 = check_timestamp(files, logger) + out3 = check_timestamp(files) assert out3 == inrange -def test_get_cmorname(caplog, ctx): - caplog.set_level(logging.DEBUG)#, logger='mop_log') +def test_get_cmorname(caplog): + global ctx + caplog.set_level(logging.DEBUG, logger='mop_log') # axis_name t ctx.obj['calculation'] = "plevinterp(var[0], var[1], 24)" ctx.obj['variable_id'] = "ta24" diff --git a/tests/test_mopdb.py b/tests/test_mopdb.py index 0eddc58..e570fdb 100644 --- a/tests/test_mopdb.py +++ b/tests/test_mopdb.py @@ -30,7 +30,7 @@ def test_mopdb(command, subcommand, runner): result = runner.invoke(mopdb, [subcommand, '--help']) assert result.exit_code == 0 -@pytest.mark.usefixtures("setup_db") # 1 +@pytest.mark.usefixtures("setup_access_db") # 1 def test_template(session): runner = CliRunner() @@ -45,8 +45,8 @@ def test_template(session): result = runner.invoke(mopdb, ['template', '-f varlist.txt', '-vCM2']) #assert result.exit_code == 0 - assert 'Opened database successfully' in result.output - assert 'Definable cmip var' in result.output + assert 'Opened database ' in result.output + #assert 'Definable cmip var' in result.output #Pass temp_dir to control where the temporary directory is created. The directory will not be removed by Click in this case. This is useful to integrate with a framework like Pytest that manages temporary files. #def test_keep_dir(tmp_path): diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py index 9737c52..ebc8be0 100644 --- a/tests/test_mopdb_utils.py +++ b/tests/test_mopdb_utils.py @@ -27,12 +27,11 @@ @pytest.mark.parametrize('idx', [0,1,2]) -def test_add_var(varlist_rows, idx, caplog): +def test_add_var(varlist_rows, matches, idx, caplog): caplog.set_level(logging.DEBUG, logger='mopdb_log') vlist = [] - match = [("tas", "", "K"), ("siconca", "", ""), ("hfls", "", "")] - vlist = add_var(vlist, varlist_rows[idx], match[idx]) - assert vlist[idx]['cmor_var'] == match[idx][0] + vlist = add_var(vlist, varlist_rows[idx], matches[idx]) + assert vlist[0]['cmor_var'] == matches[idx][0] def test_build_umfrq(um_multi_time, caplog): diff --git a/tests/testdata/varlist_ex.csv b/tests/testdata/varlist_ex.csv new file mode 100644 index 0000000..154729f --- /dev/null +++ b/tests/testdata/varlist_ex.csv @@ -0,0 +1,3 @@ +name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name +fld_s03i236;tas;degC;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature +fld_s00i031;siconca;%;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX W/M2;surface_upward_latent_heat_flu From 63f3b380ede007c88b08b646d4f0a9eae73a6122 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 16:30:57 +1000 Subject: [PATCH 011/137] updated install instruction to current situation in docs --- docs/overview.rst | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/docs/overview.rst b/docs/overview.rst index 908db06..f074224 100644 --- a/docs/overview.rst +++ b/docs/overview.rst @@ -1,25 +1,16 @@ Install ======= -You can install the latest version of `mopper` directly from conda (accessnri channel):: +We are planning to release ACCESS-MOPPeR in conda soon and then it will be available at NCI on our conda environments. +In the meantime, you can icreate a custom conda environment and install mopper following these steps: - conda install -c accessnri mopper +1. module load conda/analysis3 +2. python -m venv mopper_env --system-site-packages +3. source /mopper_env/bin/activate +4. pip install git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@main + +The source command will activate the conda env you just created. +Any time you want to use the tool in a new session repeat the first and third steps. -If you want to install an unstable version or a different branch: +The `pip` command above will install from the main branch, you can also indicate a different branch. - * git clone - * git checkout (if installing a a different branch from master) - * cd mopper - * pip install ./ - use --user flag if you want to install it in ~/.local - -Working on the NCI server -------------------------- - -MOPPeR is pre-installed into a Conda environment at NCI. Load it with:: - - module use /g/data3/hh5/public/modules - module load conda/analysis3-unstable - -.. note:: - You need to be a member of the hh5 project to load the modules. From fc8bb02d7688e7b0671981bba099a73a0b81e320 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 16:43:04 +1000 Subject: [PATCH 012/137] removed partial pattern function --- src/mopdb/mopdb_utils.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index a14ca5e..80565b4 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -261,7 +261,7 @@ def query(conn, sql, tup=(), first=True): def get_columns(conn, table): - """Gets list of columns form db table + """Gets list of columns from db table """ mopdb_log = logging.getLogger('mopdb_log') sql = f'PRAGMA table_info({table});' @@ -539,15 +539,6 @@ def get_cell_methods(attrs, dims): return val, frqmod -def identify_patterns(files): - """Return patterns of files - """ - i = 0 - while present is True: - - - return patterns - def write_varlist(conn, indir, match, version, alias): """Based on model output files create a variable list and save it to a csv file. Main attributes needed to map output are provided From cacdd9570acb0526012880c1c7348ab89665381d Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 16:46:03 +1000 Subject: [PATCH 013/137] removed partial pattern function 2 --- src/mopdb/mopdb_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 80565b4..b9875c7 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -546,7 +546,6 @@ def write_varlist(conn, indir, match, version, alias): """ mopdb_log = logging.getLogger('mopdb_log') files = list_files(indir, f"*{match}*") - patterns = identify_patterns(files) patterns = [] if alias == '': alias = 'mopdb' From 1a3b63ceea2b325c28623fe4f092d351ff630ac0 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 9 Jul 2024 19:10:25 +1000 Subject: [PATCH 014/137] introduced intake sub-command, and class Variable for mopdb varlist --- src/mopdb/mopdb.py | 57 ++++++++++++++++++++++++--- src/mopdb/mopdb_class.py | 84 ++++++++++++++++++++++++++++++++++++++++ src/mopdb/mopdb_utils.py | 74 +++++++++++++++++++---------------- 3 files changed, 176 insertions(+), 39 deletions(-) create mode 100644 src/mopdb/mopdb_class.py diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 4a63bba..335a367 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -29,7 +29,6 @@ from mopdb.mopdb_utils import * - def mopdb_catch(): """ """ @@ -44,7 +43,7 @@ def mopdb_catch(): def require_date(ctx, param, value): - """Changes match option in template command from optional to + """Changes match option in template/intake commands from optional to required if fpath is a directory. """ if Path(value).is_dir(): @@ -331,7 +330,7 @@ def map_template(ctx, fpath, match, dbname, version, alias): fname = fpath.name else: mopdb_log.debug(f"Calling model_vars() from template: {fpath}") - fname = model_vars(fpath, match, dbname, version, alias) + fname, vobjs = model_vars(fpath, match, dbname, version, alias) if alias == '': alias = fname.split(".")[0] # connect to db, check first if db exists or exit @@ -368,6 +367,52 @@ def map_template(ctx, fpath, match, dbname, version, alias): return +@mopdb.command(name='intake') +@map_args +@click.pass_context +def write_catalogue(ctx, fpath, match, dbname, version, alias): + """Writes an intake-esm catalogue. + + It can get as input the directory containing the output in + which case it will first call model_vars() (varlist command) + or the file output of the same if already available. + + Parameters + ---------- + ctx : obj + Click context object + fpath : str + Path of csv input file with output variables to map or + of directory containing output files to scan + match : str + Date or other string to match to individuate one file per type + dbname : str + Database relative path (default is data/access.db) + version : str + Version of ACCESS model used to generate variables + alias : str + Indicates origin of records to add, if '' csv filename + base is used instead + + Returns + ------- + """ + mopdb_log = logging.getLogger('mopdb_log') + # work out if fpath is varlist or path to output + fpath = Path(fpath) + if fpath.is_file(): + fname = fpath.name + else: + mopdb_log.debug(f"Calling model_vars() from intake: {fpath}") + fname, vobjs = model_vars(fpath, match, dbname, version, alias) + if alias == '': + alias = fname.split(".")[0] + # connect to db, check first if db exists or exit + if dbname == 'default': + dbname = import_files('data').joinpath('access.db') + conn = db_connect(dbname) + + @mopdb.command(name='map') @db_args @click.pass_context @@ -422,7 +467,7 @@ def update_map(ctx, dbname, fname, alias): @click.pass_context def list_vars(ctx, fpath, match, dbname, version, alias): """Calls model_vars to generate list of variables""" - fname = model_vars(fpath, match, dbname, version, alias) + fname, vobjs = model_vars(fpath, match, dbname, version, alias) @click.pass_context @@ -457,9 +502,9 @@ def model_vars(ctx, fpath, match, dbname, version, alias): if dbname == 'default': dbname = import_files('data').joinpath('access.db') conn = db_connect(dbname) - fname = write_varlist(conn, fpath, match, version, alias) + fname, vobjs = write_varlist(conn, fpath, match, version, alias) conn.close() - return fname + return fname, vobjs @mopdb.command(name='del') diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py new file mode 100644 index 0000000..a554ee0 --- /dev/null +++ b/src/mopdb/mopdb_class.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# Copyright 2024 ARC Centre of Excellence for Climate Extremes (CLEX) +# Author: Paola Petrelli for CLEX +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# contact: paola.petrelli@utas.edu.au +# +# last updated 06/07/2024 + +class Variable(): + + # __slots__ = ('name', 'pattern', 'files', 'frequency', 'realm', + # 'cmor_var', 'cmor_table', 'version', 'units', 'dimensions', + # 'cell_methods', 'positive', 'long_name', 'standard_name', + # 'vtype', 'size', 'nsteps') + + def __init__(self, varname, fpattern): + self.name = varname + # path attributes + self.pattern = fpattern + self.files = [] + # mapping attributes + self.frequency = 'NAfrq' + self.realm = 'NArealm' + self.cmor_var = '' + self.cmor_table = '' + self.version = '' + # descriptive attributes + self.units = '' + self.dimensions = '' + self.cell_methods = '' + self.positive = '' + self.long_name = '' + self.standard_name = '' + # type and size attributes + self.vtype = '' + self.size = 0 + self.nsteps = 0 + + + @property + def frequency(self): + return self._frequency + + @frequency.setter + def frequency(self, value): + fix_frq = {'dCai': 'day', '3h': '3hr', '6h': '6hr'} + if value in fix_frq.keys(): + self._frequency = fix_frq[value] + value = value.replace('hPt', 'hrPt') + if not any(x in value for x in + ['min', 'hr', 'day', 'mon', 'yr']): + self._frequency = 'NAfrq' + self._frequency = value + + + @property + def realm(self): + return self._realm + + @realm.setter + def realm(self, value): + fix_realm = {'atm': 'atmos', 'ice': 'seaIce', 'ocn': 'ocean'} + if value in fix_realm.keys(): + self._realm = fix_realm[value] + if not any(x in value for x in + ['atmos', 'seaIce', 'ocean', 'land']): + self._realm = 'NArealm' + + def list_files(self): + """Returns list of files matching input directory and match""" + self.files = [x for x in Path(self.indir).rglob(f"{self.match}") if x.is_file()] + return files.sort(key=lambda x:x.name) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index b9875c7..295e1ab 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -35,6 +35,7 @@ from operator import itemgetter from pathlib import Path +from mopdb.mopdb_class import Variable def config_log(debug): """Configures log file""" @@ -270,50 +271,50 @@ def get_columns(conn, table): return columns -def get_cmorname(conn, varname, version, frequency): +def get_cmorname(conn, vobj, version): """Queries mapping table for cmip name given variable name as output by the model """ mopdb_log = logging.getLogger('mopdb_log') sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping - WHERE input_vars='{varname}' and (calculation='' + WHERE input_vars='{vobj.vname}' and (calculation='' or calculation IS NULL)""" results = query(conn, sql, first=False) names = list(x[0] for x in results) tables = list(x[2] for x in results) if len(names) == 0: - cmor_var = '' - cmor_table = '' + vobj.cmor_var = '' + vobj.cmor_table = '' elif len(names) == 1: - cmor_var = names[0] - cmor_table = tables[0] + vobj.cmor_var = names[0] + vobj.cmor_table = tables[0] elif len(names) > 1: - mopdb_log.debug(f"Found more than 1 definition for {varname}:\n" + + mopdb_log.debug(f"Found more than 1 definition for {vobj.name}:\n" + f"{results}") match_found = False for r in results: - if r[1] == version and r[3] == frequency: - cmor_var, cmor_table = r[0], r[2] + if r[1] == version and r[3] == vobj.frequency: + vobj.cmor_var, vobj.cmor_table = r[0], r[2] match_found = True break if not match_found: for r in results: - if r[3] == frequency: - cmor_var, cmor_table = r[0], r[2] + if r[3] == vobj.frequency: + vobj.cmor_var, vobj.cmor_table = r[0], r[2] match_found = True break if not match_found: for r in results: if r[1] == version: - cmor_var, cmor_table = r[0], r[2] + vobj.cmor_var, vobj.cmor_table = r[0], r[2] match_found = True break if not match_found: - cmor_var = names[0] - cmor_table = tables[0] - mopdb_log.info(f"Found more than 1 definition for {varname}:\n"+ - f"{results}\n Using {cmor_var} from {cmor_table}") - return cmor_var, cmor_table + vobj.cmor_var = names[0] + vobj.cmor_table = tables[0] + mopdb_log.info(f"Found more than 1 definition for {vobj.name}:\n"+ + f"{results}\n Using {vobj.cmor_var} from {vobj.cmor_table}") + return vobj def cmor_table_header(name, realm, frequency): @@ -545,6 +546,10 @@ def write_varlist(conn, indir, match, version, alias): for each variable """ mopdb_log = logging.getLogger('mopdb_log') + line_cols = ['name', 'cmor_var', 'units', 'dimensions', + 'frequency', 'realm', 'cell_methods', 'cmor_table', 'vtype', + 'size', 'nsteps', 'filename', 'long_name', 'standard_name'] + vobj_list = [] files = list_files(indir, f"*{match}*") patterns = [] if alias == '': @@ -580,35 +585,38 @@ def write_varlist(conn, indir, match, version, alias): multiple_frq = True mopdb_log.debug(f"Multiple frq: {multiple_frq}") for vname in ds.variables: + vobj = Variable(vname, fpattern) + vobj.realm = realm if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']): v = ds[vname] - mopdb_log.debug(f"Variable: {v.name}") + mopdb_log.debug(f"Variable: {vobj.name}") # get size in bytes of grid for 1 timestep and number of timesteps - vsize = v[0].nbytes - nsteps = nfiles * v.shape[0] - # assign specific frequency if more than one is available + vobj.size = v[0].nbytes + vobj.nsteps = nfiles * v.shape[0] + # assign time axis frequency if more than one is available if multiple_frq: if 'time' in v.dims[0]: frequency = umfrq[v.dims[0]] else: - frequency = 'NA' mopdb_log.info(f"Could not detect frequency for variable: {v}") attrs = v.attrs - cell_methods, frqmod = get_cell_methods(attrs, v.dims) - varfrq = frequency + frqmod - mopdb_log.debug(f"Frequency x var: {varfrq}") + vobj.cell_methods, frqmod = get_cell_methods(attrs, v.dims) + vobj.frequency = frequency + frqmod + mopdb_log.debug(f"Frequency x var: {vobj.frequency}") # try to retrieve cmip name - cmor_var, cmor_table = get_cmorname(conn, vname, - version, varfrq) - line = [v.name, cmor_var, attrs.get('units', ""), - " ".join(v.dims), varfrq, realm, - cell_methods, cmor_table, v.dtype, vsize, - nsteps, fpattern, attrs.get('long_name', ""), - attrs.get('standard_name', "")] + cmor_var, cmor_table = get_cmorname(conn, vobj, + version) + vobj.units = attrs.get('units', "") + vobj.long_name = attrs.get('long_name', "") + vobj.standard_name = attrs.get('standard_name', "") + vobj.dimensions = " ".join(v.dims) + vobj.type = v.dtype + line = [vobj[k] for k in line_cols] fwriter.writerow(line) + vobj_list.append(vobj) mopdb_log.info(f"Variable list for {fpattern} successfully written") fcsv.close() - return fname + return fname, vobj_list def read_map_app4(fname): From d81cdbd1ec04de7d6fac74f2533050f9a16f6ac2 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 09:59:13 +1000 Subject: [PATCH 015/137] Update mopper-conda.yaml to fix python version --- .github/workflows/mopper-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index c232518..cbd7984 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -22,7 +22,7 @@ jobs: - name: Set up Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.10 + python-version: '3.10' - name: Add conda to system path run: | # $CONDA is an environment variable pointing to the root of the miniconda directory From d47081806ed3de27533365688eaf2ffb5ae0cba7 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 10:02:32 +1000 Subject: [PATCH 016/137] Update mopper-conda.yaml to fix conda file --- .github/workflows/mopper-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index cbd7984..d5868d9 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -29,7 +29,7 @@ jobs: echo $CONDA/bin >> $GITHUB_PATH - name: Install dependencies run: | - conda env update --file conda/environment.yml --name base + conda env update --file conda/meta.yml --name base - name: Lint with flake8 run: | conda install flake8 From a16bfee3a20a5269216632fd48cb33a532903802 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 10:03:41 +1000 Subject: [PATCH 017/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index d5868d9..a8d26a6 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -29,7 +29,7 @@ jobs: echo $CONDA/bin >> $GITHUB_PATH - name: Install dependencies run: | - conda env update --file conda/meta.yml --name base + conda env update --file conda/meta.yaml --name base - name: Lint with flake8 run: | conda install flake8 From 4c344cc1c711e91ab94098d867555026c9d76828 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 10 Jul 2024 10:12:26 +1000 Subject: [PATCH 018/137] adjustments to class --- src/mopdb/mopdb_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 295e1ab..3dfdc31 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -277,11 +277,12 @@ def get_cmorname(conn, vobj, version): """ mopdb_log = logging.getLogger('mopdb_log') sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping - WHERE input_vars='{vobj.vname}' and (calculation='' + WHERE input_vars='{vobj.name}' and (calculation='' or calculation IS NULL)""" results = query(conn, sql, first=False) names = list(x[0] for x in results) tables = list(x[2] for x in results) + mopdb_log.debug(f"In get_cmorname query results: {results}") if len(names) == 0: vobj.cmor_var = '' vobj.cmor_table = '' @@ -602,7 +603,7 @@ def write_varlist(conn, indir, match, version, alias): attrs = v.attrs vobj.cell_methods, frqmod = get_cell_methods(attrs, v.dims) vobj.frequency = frequency + frqmod - mopdb_log.debug(f"Frequency x var: {vobj.frequency}") + mopdb_log.debug(f"Frequency var: {vobj.frequency}") # try to retrieve cmip name cmor_var, cmor_table = get_cmorname(conn, vobj, version) From 6583c895ca2db0251913330204350628d0a0674d Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 10 Jul 2024 10:14:43 +1000 Subject: [PATCH 019/137] removed extra line from docs --- docs/gettingstarted.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst index 666e89a..2e7181e 100644 --- a/docs/gettingstarted.rst +++ b/docs/gettingstarted.rst @@ -56,7 +56,7 @@ Step2: Set up the working environment *mop -c setup* .. code-block:: console -https://climate-cms.org/posts/2023-05-31-vscode-are.html + $ mop -c exp_conf.yaml setup Simulation to process: cy286 Setting environment and creating working directory From b0d04f1785dfc6f93fe0eb29e48ba078255f0244 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 10:51:33 +1000 Subject: [PATCH 020/137] Update meta.yaml --- conda/meta.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index f20a79a..b4f0932 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,7 +1,6 @@ -{% set version = "1.0.0" %} package: name: mopper - version: {{ version }} + version: {{ "1.0.0" }} #source: # path: ./ @@ -9,7 +8,7 @@ package: source: #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git - git_rev: {{ version }} + git_rev: {{ "1.0.0" }} git_depth: 1 # (Defaults to -1/not shallow) build: From b8feb63b893215b30fa14890aa11b5ee1a8ad7f9 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 10:52:00 +1000 Subject: [PATCH 021/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 86 +++++++++++++++-------------- 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index c232518..0bb68f7 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -4,51 +4,57 @@ name: mopper-conda-install-test on: push: branches: - - prerelease - pull_request: - branches: - - main - - prerelease + - pytests_sam jobs: - build-linux: + build: + runs-on: ubuntu-latest strategy: max-parallel: 5 + matrix: + python-version: ["3.9", "3.10", "3.11"] + steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.10 - uses: actions/setup-python@v2 - with: - python-version: 3.10 - - name: Add conda to system path - run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - - name: Install dependencies - run: | - conda env update --file conda/environment.yml --name base - - name: Lint with flake8 - run: | - conda install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - # - name: Install package - run: | - conda build conda/meta.yaml - - name: Test with pytest - run: | - conda install pytest coverage codecov - conda run python -m pytest - conda run coverage run --source src -m py.test - - name: Upload to codecov - if: steps.build.outcome == 'success' - run: | - curl -Os https://uploader.codecov.io/latest/linux/codecov - chmod +x codecov - ./codecov - + - uses: actions/checkout@v4 + #--------------------------------------------------- + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + #--------------------------------------------------- + - name: Add conda to system path + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + echo $CONDA/bin >> $GITHUB_PATH + #--------------------------------------------------- + - name: Install dependencies + run: | + conda env update --file conda/meta.yaml --name base + #--------------------------------------------------- + - name: Lint with flake8 + run: | + conda install flake8 + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + # - name: Install package + run: | + conda build conda/meta.yaml + #--------------------------------------------------- + - name: Test with pytest + run: | + conda install pytest coverage codecov + conda run python -m pytest + conda run coverage run --source src -m py.test + #--------------------------------------------------- + - name: Upload to codecov + if: steps.build.outcome == 'success' + run: | + curl -Os https://uploader.codecov.io/latest/linux/codecov + chmod +x codecov + ./codecov + #--------------------------------------------------- From 84731f4954a91a74df8519e2feb0001af41c9ad9 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 10:58:29 +1000 Subject: [PATCH 022/137] Update meta.yaml --- conda/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index b4f0932..146929a 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,6 +1,6 @@ package: name: mopper - version: {{ "1.0.0" }} + version: "1.0.0" #source: # path: ./ @@ -8,7 +8,7 @@ package: source: #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git - git_rev: {{ "1.0.0" }} + git_rev: "1.0.0" git_depth: 1 # (Defaults to -1/not shallow) build: From 15fbfa686eb0c1e6a7d9ca1d29b80f04b07a4daa Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 12:30:42 +1000 Subject: [PATCH 023/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 0bb68f7..d699f16 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -34,16 +34,16 @@ jobs: run: | conda env update --file conda/meta.yaml --name base #--------------------------------------------------- - - name: Lint with flake8 - run: | - conda install flake8 + #- name: Lint with flake8 + # run: | + # conda install flake8 # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics # - name: Install package - run: | - conda build conda/meta.yaml + # run: | + # conda build conda/meta.yaml #--------------------------------------------------- - name: Test with pytest run: | From 70f94252675f4cbad631dafe011edbc76b13f918 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 12:35:43 +1000 Subject: [PATCH 024/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index d699f16..0d8eade 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -14,7 +14,7 @@ jobs: strategy: max-parallel: 5 matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10"] steps: From 45c5cab09b65618a89f0c83a8fa6e7a2bd5dc5f6 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 12:38:20 +1000 Subject: [PATCH 025/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 0d8eade..90f46d2 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -47,6 +47,7 @@ jobs: #--------------------------------------------------- - name: Test with pytest run: | + conda activate base conda install pytest coverage codecov conda run python -m pytest conda run coverage run --source src -m py.test From 85f96a7406ded480c72cd4437496c54326728caf Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 12:39:18 +1000 Subject: [PATCH 026/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 90f46d2..74eac53 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -47,6 +47,7 @@ jobs: #--------------------------------------------------- - name: Test with pytest run: | + conda init conda activate base conda install pytest coverage codecov conda run python -m pytest From 905b898d617500ec160dc1067b498f15b026342e Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 12:46:33 +1000 Subject: [PATCH 027/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 74eac53..f89c896 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -25,10 +25,12 @@ jobs: with: python-version: ${{ matrix.python-version }} #--------------------------------------------------- - - name: Add conda to system path - run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH + - name: Install Miniconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python-version }} + activate-environment: base #--------------------------------------------------- - name: Install dependencies run: | @@ -47,11 +49,9 @@ jobs: #--------------------------------------------------- - name: Test with pytest run: | - conda init - conda activate base conda install pytest coverage codecov - conda run python -m pytest - conda run coverage run --source src -m py.test + conda run -n base python -m pytest + conda run -n base coverage run --source src -m pytest #--------------------------------------------------- - name: Upload to codecov if: steps.build.outcome == 'success' From 062ec2873a6f8a8a841787b9d050f6c7e7ef3786 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 12:50:21 +1000 Subject: [PATCH 028/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index f89c896..e776402 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -30,11 +30,12 @@ jobs: with: auto-update-conda: true python-version: ${{ matrix.python-version }} - activate-environment: base + activate-environment: test-env + environment-file: conda/meta.yaml #--------------------------------------------------- - name: Install dependencies run: | - conda env update --file conda/meta.yaml --name base + conda env update --file conda/meta.yaml --name test-env #--------------------------------------------------- #- name: Lint with flake8 # run: | @@ -50,8 +51,8 @@ jobs: - name: Test with pytest run: | conda install pytest coverage codecov - conda run -n base python -m pytest - conda run -n base coverage run --source src -m pytest + conda run -n test-env python -m pytest + conda run -n test-env coverage run --source src -m pytest #--------------------------------------------------- - name: Upload to codecov if: steps.build.outcome == 'success' From 44ddf3639e36f3613ede373e82900230df8f2f46 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 12:53:54 +1000 Subject: [PATCH 029/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index e776402..0b9cb76 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -25,17 +25,22 @@ jobs: with: python-version: ${{ matrix.python-version }} #--------------------------------------------------- + # Install Miniconda - name: Install Miniconda uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true python-version: ${{ matrix.python-version }} - activate-environment: test-env - environment-file: conda/meta.yaml - #--------------------------------------------------- - - name: Install dependencies + + # Create and activate conda environment + - name: Create and activate conda environment run: | - conda env update --file conda/meta.yaml --name test-env + conda env create --name test-env --file conda/meta.yaml + conda activate test-env + + # Install dependencies from conda + - name: Install dependencies + run: conda env update --name test-env --file conda/meta.yaml #--------------------------------------------------- #- name: Lint with flake8 # run: | From 0391714d8c4d119d5fd6b86f2885502e455666d2 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 12:55:35 +1000 Subject: [PATCH 030/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 0b9cb76..27c56fc 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -36,7 +36,6 @@ jobs: - name: Create and activate conda environment run: | conda env create --name test-env --file conda/meta.yaml - conda activate test-env # Install dependencies from conda - name: Install dependencies From c373636921d7d20e468afc9918249ba11db08e12 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 12:58:09 +1000 Subject: [PATCH 031/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 27c56fc..0c6fe68 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -54,7 +54,7 @@ jobs: #--------------------------------------------------- - name: Test with pytest run: | - conda install pytest coverage codecov + conda install -n test-env pytest coverage codecov conda run -n test-env python -m pytest conda run -n test-env coverage run --source src -m pytest #--------------------------------------------------- From 5b578065e7afddcb406521e5dcc6632bf39f558b Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 10 Jul 2024 13:19:16 +1000 Subject: [PATCH 032/137] more validation for realm added to class --- src/mopdb/mopdb_class.py | 8 +++++--- src/mopdb/mopdb_utils.py | 27 +++++++++++++-------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index a554ee0..5cf3b6b 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -28,14 +28,16 @@ class Variable(): def __init__(self, varname, fpattern): self.name = varname # path attributes - self.pattern = fpattern + self.fpattern = fpattern self.files = [] # mapping attributes - self.frequency = 'NAfrq' - self.realm = 'NArealm' + self._frequency = '' + self._realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] + if x in self.fpattern.parts][0] self.cmor_var = '' self.cmor_table = '' self.version = '' + self.match = False # descriptive attributes self.units = '' self.dimensions = '' diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 3dfdc31..dfca89c 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -548,8 +548,8 @@ def write_varlist(conn, indir, match, version, alias): """ mopdb_log = logging.getLogger('mopdb_log') line_cols = ['name', 'cmor_var', 'units', 'dimensions', - 'frequency', 'realm', 'cell_methods', 'cmor_table', 'vtype', - 'size', 'nsteps', 'filename', 'long_name', 'standard_name'] + '_frequency', '_realm', 'cell_methods', 'cmor_table', 'vtype', + 'size', 'nsteps', 'fpattern', 'long_name', 'standard_name'] vobj_list = [] files = list_files(indir, f"*{match}*") patterns = [] @@ -573,7 +573,7 @@ def write_varlist(conn, indir, match, version, alias): pattern_list = list_files(indir, f"{fpattern}*") nfiles = len(pattern_list) mopdb_log.debug(f"File pattern: {fpattern}") - fwriter.writerow([f"#{fpattern}"]) + #fwriter.writerow([f"#{fpattern}"]) # get attributes for the file variables ds = xr.open_dataset(str(pattern_list[0]), decode_times=False) realm = get_realm(fpath, version, ds) @@ -587,7 +587,6 @@ def write_varlist(conn, indir, match, version, alias): mopdb_log.debug(f"Multiple frq: {multiple_frq}") for vname in ds.variables: vobj = Variable(vname, fpattern) - vobj.realm = realm if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']): v = ds[vname] mopdb_log.debug(f"Variable: {vobj.name}") @@ -605,14 +604,13 @@ def write_varlist(conn, indir, match, version, alias): vobj.frequency = frequency + frqmod mopdb_log.debug(f"Frequency var: {vobj.frequency}") # try to retrieve cmip name - cmor_var, cmor_table = get_cmorname(conn, vobj, - version) + vobj = get_cmorname(conn, vobj, version) vobj.units = attrs.get('units', "") vobj.long_name = attrs.get('long_name', "") vobj.standard_name = attrs.get('standard_name', "") vobj.dimensions = " ".join(v.dims) - vobj.type = v.dtype - line = [vobj[k] for k in line_cols] + vobj.vtype = v.dtype + line = [vobj.__dict__[k] for k in line_cols] fwriter.writerow(line) vobj_list.append(vobj) mopdb_log.info(f"Variable list for {fpattern} successfully written") @@ -998,18 +996,19 @@ def get_realm(fpath, version, ds): '''Return realm for variable in files or NArealm''' mopdb_log = logging.getLogger('mopdb_log') + #realm = None if version == 'AUS2200': realm = 'atmos' else: realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] if x in fpath.parts][0] - if realm == 'atm' or 'um_version' in ds.attrs.keys(): + if realm is None and 'um_version' in ds.attrs.keys(): realm = 'atmos' - elif realm == 'ocn': - realm = 'ocean' - elif realm is None: - realm = 'NArealm' - mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm") + #elif realm == 'ocn': + # realm = 'ocean' + #elif realm is None: + # realm = 'NArealm' + # mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm") mopdb_log.debug(f"Realm is {realm}") return realm From b6a424100fdf1d87f3e15cccc07defe22dd9b4ab Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 10 Jul 2024 13:28:59 +1000 Subject: [PATCH 033/137] attempt to fix meta.yaml --- conda/meta.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index f20a79a..6a68a4f 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,7 +1,6 @@ -{% set version = "1.0.0" %} package: name: mopper - version: {{ version }} + version: 1.0.0 #source: # path: ./ From be08654f5a1dda4fdb78b7cc232f23670140eed6 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 10 Jul 2024 13:39:27 +1000 Subject: [PATCH 034/137] more validation for realm added to class 2 --- src/mopdb/mopdb_class.py | 5 +++-- src/mopdb/mopdb_utils.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index 5cf3b6b..53c4e4c 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -25,11 +25,12 @@ class Variable(): # 'cell_methods', 'positive', 'long_name', 'standard_name', # 'vtype', 'size', 'nsteps') - def __init__(self, varname, fpattern): + def __init__(self, varname, fpattern, fpath, files): self.name = varname # path attributes self.fpattern = fpattern - self.files = [] + self.fpath = fpath + self.files = files # mapping attributes self._frequency = '' self._realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index dfca89c..06391c8 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -565,6 +565,7 @@ def write_varlist(conn, indir, match, version, alias): # get filename pattern until date match mopdb_log.debug(f"Filename: {fpath.name}") fpattern = fpath.name.split(match)[0] + print(fpattern) # adding this in case we have a mix of yyyy/yyyymn date stamps # as then a user would have to pass yyyy only and would get 12 files for some of the patterns if fpattern in patterns: @@ -586,7 +587,7 @@ def write_varlist(conn, indir, match, version, alias): multiple_frq = True mopdb_log.debug(f"Multiple frq: {multiple_frq}") for vname in ds.variables: - vobj = Variable(vname, fpattern) + vobj = Variable(vname, fpattern, fpath, pattern_list) if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']): v = ds[vname] mopdb_log.debug(f"Variable: {vobj.name}") From 21d5ecfde8dbd68c55eb4bf7ba315af0dd3f5998 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 10 Jul 2024 13:40:31 +1000 Subject: [PATCH 035/137] attempt to fix meta.yaml 2 --- conda/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 6a68a4f..05d58fc 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -8,7 +8,7 @@ package: source: #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git - git_rev: {{ version }} + git_rev: "{{ version }}" git_depth: 1 # (Defaults to -1/not shallow) build: From 56ec024bb2ebc13961496022ab6f758be14fe443 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 14:17:45 +1000 Subject: [PATCH 036/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 0c6fe68..3741c47 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -55,7 +55,7 @@ jobs: - name: Test with pytest run: | conda install -n test-env pytest coverage codecov - conda run -n test-env python -m pytest + conda run -n test-env pytest conda run -n test-env coverage run --source src -m pytest #--------------------------------------------------- - name: Upload to codecov From 2237ea2ce0bee3483321a9936bf277170009b478 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 14:25:53 +1000 Subject: [PATCH 037/137] Create environment.yaml --- conda/environment.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 conda/environment.yaml diff --git a/conda/environment.yaml b/conda/environment.yaml new file mode 100644 index 0000000..4cde7a9 --- /dev/null +++ b/conda/environment.yaml @@ -0,0 +1,21 @@ +name: test-env +channels: + - defaults + - conda-forge +dependencies: + - python=3.10 + - pip + - pbr + - click + - cmor + - xarray + - numpy + - dask + - pyyaml + - cftime + - python-dateutil + - pytest + - coverage + - codecov + - pip: + - mop==1.0.0 From 1c2f27f79ede2be0faae9d06cb7581e7ad8a68ff Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 14:26:33 +1000 Subject: [PATCH 038/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 3741c47..b2a6d22 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -35,11 +35,11 @@ jobs: # Create and activate conda environment - name: Create and activate conda environment run: | - conda env create --name test-env --file conda/meta.yaml + conda env create --name test-env --file conda/environment.yaml # Install dependencies from conda - name: Install dependencies - run: conda env update --name test-env --file conda/meta.yaml + run: conda env update --name test-env --file conda/environment.yaml #--------------------------------------------------- #- name: Lint with flake8 # run: | From 8a1ea8d99d6277f9d67b9188c68c2adfd9501262 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 14:30:57 +1000 Subject: [PATCH 039/137] Update environment.yaml --- conda/environment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/environment.yaml b/conda/environment.yaml index 4cde7a9..40390e1 100644 --- a/conda/environment.yaml +++ b/conda/environment.yaml @@ -18,4 +18,4 @@ dependencies: - coverage - codecov - pip: - - mop==1.0.0 + - git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@pytests_sam From 05a6d09342ee5411e817f3f1eda0157ea2c80393 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 14:37:32 +1000 Subject: [PATCH 040/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index b2a6d22..c635674 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -55,13 +55,13 @@ jobs: - name: Test with pytest run: | conda install -n test-env pytest coverage codecov - conda run -n test-env pytest - conda run -n test-env coverage run --source src -m pytest + conda run -n test-env pytest -q test_calculations.py + # conda run -n test-env coverage run --source src -m pytest #--------------------------------------------------- - - name: Upload to codecov - if: steps.build.outcome == 'success' - run: | - curl -Os https://uploader.codecov.io/latest/linux/codecov - chmod +x codecov - ./codecov + #- name: Upload to codecov + # if: steps.build.outcome == 'success' + # run: | + # curl -Os https://uploader.codecov.io/latest/linux/codecov + # chmod +x codecov + # ./codecov #--------------------------------------------------- From 8ef5df0c4fc0bc50fa5d077ee8c7b575768e0ceb Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 14:43:22 +1000 Subject: [PATCH 041/137] Update conftest.py --- tests/conftest.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0dd6c56..f26c225 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,7 +25,7 @@ import logging import csv from mopdb.mopdb_utils import mapping_sql, cmorvar_sql -from mopper.setup_utils import filelist_sql +#from mopper.setup_utils import filelist_sql TESTS_HOME = os.path.abspath(os.path.dirname(__file__)) @@ -59,12 +59,12 @@ def setup_access_db(session): session.connection.commit() -@pytest.fixture -def setup_mopper_db(session): - filelist_sql = mapping_sql() - session.execute(filelist_sql) - session.execute('''INSERT INTO filelist VALUES ("/testdata/atmos/umnsa_spec_*.nc", "/testdata/mjo-elnino/v1-0/A10min/", "tas_AUS2200_mjo-elnino_subhrPt_20160101001000-20160102000000.nc", "fld_s03i236", "tas", "AUS2200_A10min", "subhrPt", "atmos", "point", "20160101T0005", "20160102T0000", "201601010000", "201601012355", "unprocessed", "3027.83203125", "mjo-elnino", "K", "AUS2200", "AUS2200", "/testdata/mjo-elnino/mjo-elnino.json", "1970-01-01", "v1-0")''') - session.connection.commit() +#@pytest.fixture +#def setup_mopper_db(session): +# filelist_sql = mapping_sql() +# session.execute(filelist_sql) +# session.execute('''INSERT INTO filelist VALUES ("/testdata/atmos/umnsa_spec_*.nc", "/testdata/mjo-elnino/v1-0/A10min/", "tas_AUS2200_mjo-elnino_subhrPt_20160101001000-20160102000000.nc", "fld_s03i236", "tas", "AUS2200_A10min", "subhrPt", "atmos", "point", "20160101T0005", "20160102T0000", "201601010000", "201601012355", "unprocessed", "3027.83203125", "mjo-elnino", "K", "AUS2200", "AUS2200", "/testdata/mjo-elnino/mjo-elnino.json", "1970-01-01", "v1-0")''') +# session.connection.commit() def test_check_timestamp(caplog): From 05d618b44d2cb7ec37fabb5a631fa08576ad6d0d Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 14:47:44 +1000 Subject: [PATCH 042/137] Update mopper-conda.yaml --- .github/workflows/mopper-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index c635674..50d2512 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -55,7 +55,7 @@ jobs: - name: Test with pytest run: | conda install -n test-env pytest coverage codecov - conda run -n test-env pytest -q test_calculations.py + conda run -n test-env pytest -q tests/test_calculations.py # conda run -n test-env coverage run --source src -m pytest #--------------------------------------------------- #- name: Upload to codecov From 53067bf658a198d4d4d9718e228388e7b30aed33 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 16:14:28 +1000 Subject: [PATCH 043/137] Update environment.yaml --- conda/environment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/environment.yaml b/conda/environment.yaml index 40390e1..2f0d566 100644 --- a/conda/environment.yaml +++ b/conda/environment.yaml @@ -17,5 +17,6 @@ dependencies: - pytest - coverage - codecov + - importlib_resources - pip: - git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@pytests_sam From 0572339cfb807496548cf715a1aa9fc33fee23e9 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 10 Jul 2024 16:18:33 +1000 Subject: [PATCH 044/137] moved frequency logic to class --- src/mopdb/mopdb_class.py | 28 +++++++++++++++++++++++---- src/mopdb/mopdb_utils.py | 41 ++++++++++++++++++++-------------------- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index 53c4e4c..f4b3e0b 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -18,6 +18,8 @@ # # last updated 06/07/2024 +from pathlib import Path + class Variable(): # __slots__ = ('name', 'pattern', 'files', 'frequency', 'realm', @@ -25,16 +27,16 @@ class Variable(): # 'cell_methods', 'positive', 'long_name', 'standard_name', # 'vtype', 'size', 'nsteps') - def __init__(self, varname, fpattern, fpath, files): + def __init__(self, varname: str, fpattern: str, fpath: Path, files: list): self.name = varname # path attributes self.fpattern = fpattern self.fpath = fpath self.files = files # mapping attributes - self._frequency = '' + self._frequency = None self._realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] - if x in self.fpattern.parts][0] + if x in self.fpath.parts][0] self.cmor_var = '' self.cmor_table = '' self.version = '' @@ -54,11 +56,29 @@ def __init__(self, varname, fpattern, fpath, files): @property def frequency(self): + if self._frequency is None: + fname = self.files[0] + if self._realm == 'atmos': + fbits = fname.split("_") + self._frequency = fbits[-1].replace(".nc", "") + elif self._realm == 'ocean': + if any(x in fname for x in ['scalar', 'month']): + self._frequency = 'mon' + elif 'daily' in fname: + self._frequency = 'day' + elif self._realm == 'seaIce': + if '_m.' in fname: + self._frequency = 'mon' + elif '_d.' in fname: + self._frequency = 'day' + else: + self._frequency = 'NAfrq' return self._frequency + @frequency.setter def frequency(self, value): - fix_frq = {'dCai': 'day', '3h': '3hr', '6h': '6hr'} + fix_frq = {'dai': 'day', '3h': '3hr', '6h': '6hr'} if value in fix_frq.keys(): self._frequency = fix_frq[value] value = value.replace('hPt', 'hrPt') diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 06391c8..17d1cba 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -577,17 +577,23 @@ def write_varlist(conn, indir, match, version, alias): #fwriter.writerow([f"#{fpattern}"]) # get attributes for the file variables ds = xr.open_dataset(str(pattern_list[0]), decode_times=False) - realm = get_realm(fpath, version, ds) coords = [c for c in ds.coords] + ['latitude_longitude'] #pass next file in case of 1 timestep per file and no frq in name fnext = str(pattern_list[1]) - frequency, umfrq = get_frequency(realm, fpath.name, ds, fnext) + #frequency, umfrq = get_frequency(realm, fpath.name, ds, fnext) multiple_frq = False - if umfrq != {}: - multiple_frq = True - mopdb_log.debug(f"Multiple frq: {multiple_frq}") - for vname in ds.variables: + for idx, vname in enumerate(ds.variables): vobj = Variable(vname, fpattern, fpath, pattern_list) + if vobj.frequency == 'NAfrq' or vobj.realm == 'atmos': + # if this is the first variable get frq from time axes + if idx == 0: + frq_dict = get_file_frq(ds, fnext) + # if only one frequency detected empty dict + if len(frq_dict) == 1: + vobj._frequency = frq_dict.popitem()[1] + else: + multiple_frq = True + mopdb_log.debug(f"Multiple frq: {multiple_frq}") if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']): v = ds[vname] mopdb_log.debug(f"Variable: {vobj.name}") @@ -597,12 +603,12 @@ def write_varlist(conn, indir, match, version, alias): # assign time axis frequency if more than one is available if multiple_frq: if 'time' in v.dims[0]: - frequency = umfrq[v.dims[0]] + vobj._frequency = frq_dict[v.dims[0]] else: mopdb_log.info(f"Could not detect frequency for variable: {v}") attrs = v.attrs vobj.cell_methods, frqmod = get_cell_methods(attrs, v.dims) - vobj.frequency = frequency + frqmod + vobj.frequency = vobj.frequency + frqmod mopdb_log.debug(f"Frequency var: {vobj.frequency}") # try to retrieve cmip name vobj = get_cmorname(conn, vobj, version) @@ -611,6 +617,8 @@ def write_varlist(conn, indir, match, version, alias): vobj.standard_name = attrs.get('standard_name', "") vobj.dimensions = " ".join(v.dims) vobj.vtype = v.dtype + if vobj.realm == "NArealm": + vobj.realm = get_realm(version, ds) line = [vobj.__dict__[k] for k in line_cols] fwriter.writerow(line) vobj_list.append(vobj) @@ -671,7 +679,7 @@ def read_map(fname, alias): notes = row[16] else: notes = row[15] - if alias is '': + if alias == '': alias = fname.replace(".csv","") var_list.append(row[:11] + [notes, alias]) return var_list @@ -993,23 +1001,14 @@ def check_realm_units(conn, var): return var -def get_realm(fpath, version, ds): - '''Return realm for variable in files or NArealm''' +def get_realm(version, ds): + '''Try to retrieve realm if using path failed''' mopdb_log = logging.getLogger('mopdb_log') - #realm = None if version == 'AUS2200': realm = 'atmos' - else: - realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] - if x in fpath.parts][0] - if realm is None and 'um_version' in ds.attrs.keys(): + elif 'um_version' in ds.attrs.keys(): realm = 'atmos' - #elif realm == 'ocn': - # realm = 'ocean' - #elif realm is None: - # realm = 'NArealm' - # mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm") mopdb_log.debug(f"Realm is {realm}") return realm From 2e66b798232d6e8c19e818520076392955a332a3 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Wed, 10 Jul 2024 16:23:51 +1000 Subject: [PATCH 045/137] Update test_calculations.py --- tests/test_calculations.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_calculations.py b/tests/test_calculations.py index 9037b53..dc8507b 100644 --- a/tests/test_calculations.py +++ b/tests/test_calculations.py @@ -41,7 +41,7 @@ def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100): dims.insert(0, 'lev') coords['lev'] = lev shape.insert(0, nlev) - elif sdepth is True: + if sdepth is True: depth = np.array([0.05, 0.2, 0.5, 1]) dims.insert(0, 'depth') coords['depth'] = depth @@ -66,7 +66,7 @@ def test_calc_topsoil(): out = calc_topsoil(mrsol) xrtest.assert_allclose(out, expected, rtol=1e-05) - +''' def test_overturn_stream(caplog): global ctx caplog.set_level(logging.DEBUG, logger='varlog_1') @@ -118,3 +118,4 @@ def test_overturn_stream(caplog): with ctx: out4 = overturn_stream(varlist) nptest.assert_array_equal(res4, out4) +''' From 79983531d461e83889b38a7e3bf2eec0d1f1eaac Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 08:54:25 +1000 Subject: [PATCH 046/137] fixed issues hihglighted by flake --- .github/workflows/mopper-conda.yaml | 1 + src/mopdb/mopdb_utils.py | 2 +- src/mopper/calculations.py | 5 +++-- src/mopper/setup_utils.py | 8 ++++---- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index a8d26a6..f776b8f 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -14,6 +14,7 @@ on: jobs: build-linux: runs-on: ubuntu-latest + timeout-minutes: 60 strategy: max-parallel: 5 diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index b9875c7..32ceb81 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -663,7 +663,7 @@ def read_map(fname, alias): notes = row[16] else: notes = row[15] - if alias is '': + if alias == '': alias = fname.replace(".csv","") var_list.append(row[:11] + [notes, alias]) return var_list diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index 1adf216..c87213a 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -950,7 +950,7 @@ def tos_3hr(var, landfrac): vout : Xarray dataset """ - v = tos_degC(var) + v = K_degC(var) vout = xr.zeros_like(var) t = len(var.time) @@ -1346,6 +1346,7 @@ def get_basin_mask(ctx, lat, lon): basin_mask: DataArray basin_mask(lat,lon) """ + var_log = logging.getLogger(ctx.obj['var_log']) coords = ['t', 't'] if 'xu' in lon: coords[0] = 'u' @@ -1443,6 +1444,6 @@ def calc_depositions(ctx, var, weight=None): varlist.append(v0) if weight is None: weight = 0.05844 - deps = sum_vars(varlist) * mole_weight + deps = sum_vars(varlist) * weight return deps diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index e0341fa..192f642 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -44,6 +44,7 @@ from json.decoder import JSONDecodeError from mopdb.mopdb_utils import query +from mopdb.cmip_utils import fix_years def write_var_map(outpath, table, matches): @@ -166,7 +167,7 @@ def find_custom_tables(ctx): mop_log = logging.getLogger('mop_log') tables = [] path = ctx.obj['tables_path'] - tables = ctx.obj['tables_path'].rglob("*_*.json") + table_files = ctx.obj['tables_path'].rglob("*_*.json") for f in table_files: f = str(f).replace(".json", "") tables.append(f) @@ -626,9 +627,8 @@ def define_files(ctx, cursor, opts, mp): if mp['years'] != 'all' and ctx.obj['dreq_years']: exp_start, exp_end = fix_years(mp['years'], exp_start[:4], exp_end[:4]) if exp_start is None: - mop_log.info("Years requested for variable are outside specified") - mop_log.info((f"period: {table_id}, {var},", - f"{match['tstart']}, {match['tend']}")) + mop_log.info(f"""Years requested for variable are outside + specified period: {mp['years']}""") return tstep_dict = {'10min': ['minutes=10', 'minutes=5'], '30min': ['minutes=30', 'minutes=15'], From 57894bd6e9c31e2ffde041350b766182a1424ecb Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 09:13:05 +1000 Subject: [PATCH 047/137] fixing conda package install in actions --- .github/workflows/mopper-conda.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index f776b8f..64c4906 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -38,9 +38,6 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - # - name: Install package - run: | - conda build conda/meta.yaml - name: Test with pytest run: | conda install pytest coverage codecov From f044f04810c48c505b0626773fe93aa99dc83c51 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 09:30:03 +1000 Subject: [PATCH 048/137] fixing conda package install in actions 2 --- .github/workflows/mopper-conda.yaml | 2 +- env.yaml | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 env.yaml diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 64c4906..bed7062 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -30,7 +30,7 @@ jobs: echo $CONDA/bin >> $GITHUB_PATH - name: Install dependencies run: | - conda env update --file conda/meta.yaml --name base + conda env update --file env.yaml --name base - name: Lint with flake8 run: | conda install flake8 diff --git a/env.yaml b/env.yaml new file mode 100644 index 0000000..64e2483 --- /dev/null +++ b/env.yaml @@ -0,0 +1,7 @@ +name: mopenv +dependencies: + - click + - cmor + - xarray + - numpy + - pyyaml From 7b3460e85bf3739af5c6e7d3f2fd055ba2732e3d Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 09:34:10 +1000 Subject: [PATCH 049/137] fixing conda package install in actions 3 --- env.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/env.yaml b/env.yaml index 64e2483..10266f8 100644 --- a/env.yaml +++ b/env.yaml @@ -1,4 +1,6 @@ name: mopenv +channels: + - conda-forge dependencies: - click - cmor From e6b37e50096bbb3da99e6770a9133b4c1511eace Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 09:43:21 +1000 Subject: [PATCH 050/137] fixing conda package install in actions 4 --- .github/workflows/mopper-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index bed7062..f32f3a4 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -33,7 +33,7 @@ jobs: conda env update --file env.yaml --name base - name: Lint with flake8 run: | - conda install flake8 + conda install flake8 --solver classic # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide From 3c02c32fdd0b9c8bed8b2ccda34f1e1aeefedf49 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 09:46:23 +1000 Subject: [PATCH 051/137] fixing conda package install in actions 5 --- .github/workflows/mopper-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index f32f3a4..5ff395e 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -40,7 +40,7 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | - conda install pytest coverage codecov + conda install pytest coverage codecov --solver classic conda run python -m pytest conda run coverage run --source src -m py.test - name: Upload to codecov From a36cd1395f20a899afdc42dfcfc0691030a1235c Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 10:07:14 +1000 Subject: [PATCH 052/137] fixing conda package install in actions 6 --- .github/workflows/mopper-conda.yaml | 3 +++ env.yaml | 1 + 2 files changed, 4 insertions(+) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 5ff395e..1b005b5 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -38,6 +38,9 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: install package + source activate base + pip install ./ - name: Test with pytest run: | conda install pytest coverage codecov --solver classic diff --git a/env.yaml b/env.yaml index 10266f8..6e46467 100644 --- a/env.yaml +++ b/env.yaml @@ -7,3 +7,4 @@ dependencies: - xarray - numpy - pyyaml + - dask From a1f393e7e63dd55dda84d3cb41fcfc7057ded9e9 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 10:11:17 +1000 Subject: [PATCH 053/137] fixing conda package install in actions 7 --- .github/workflows/mopper-conda.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index 1b005b5..d08666f 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -39,6 +39,7 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: install package + run: | source activate base pip install ./ - name: Test with pytest From 9a746c2af438c566f886fdc2db91a99eae1cfa23 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 10:20:06 +1000 Subject: [PATCH 054/137] issue #155 --- src/mopper/calculations.py | 2 +- src/mopper/mop_utils.py | 2 +- src/mopper/setup_utils.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index c87213a..d217aef 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -39,7 +39,7 @@ import dask import logging -from importlib_resources import files as import_files +from importlib.resources import files as import_files from mopper.setup_utils import read_yaml # Global Variables diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index cdb78db..6017b68 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -40,7 +40,7 @@ from mopper.calculations import * from mopper.setup_utils import read_yaml -from importlib_resources import files as import_files +from importlib.resources import files as import_files def config_log(debug, path, stream_level=logging.WARNING): diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index 192f642..be5cae0 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -40,7 +40,7 @@ from collections import OrderedDict from datetime import datetime#, timedelta from dateutil.relativedelta import relativedelta -from importlib_resources import files as import_files +from importlib.resources import files as import_files from json.decoder import JSONDecodeError from mopdb.mopdb_utils import query From 357e4e9df24767541a5b85eb373b9c7ab9eb5488 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 10:23:55 +1000 Subject: [PATCH 055/137] fixed import --- src/mopper/setup_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index be5cae0..ef2c1ec 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -44,7 +44,7 @@ from json.decoder import JSONDecodeError from mopdb.mopdb_utils import query -from mopdb.cmip_utils import fix_years +from mopper.cmip_utils import fix_years def write_var_map(outpath, table, matches): From 835a1c550069db7a5bcbdaf256525054bb99e7c6 Mon Sep 17 00:00:00 2001 From: Sam Green Date: Thu, 11 Jul 2024 16:39:36 +1000 Subject: [PATCH 056/137] Typo in topsoil calc --- src/mopper/calculations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index 1adf216..8ce6783 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -1175,7 +1175,7 @@ def calc_topsoil(soilvar): # calculate the fraction of maxlev which falls in first 10cm fraction = (0.1 - depth[maxlev -1])/(depth[maxlev] - depth[maxlev-1]) topsoil = soilvar.isel(depth=slice(0,maxlev)).sum(dim='depth') - topsoil = topsoil + fraction * topsoil.isel(depth=maxlev) + topsoil = topsoil + fraction * soilvar.isel(depth=maxlev) return topsoil #---------------------------------------------------------------------- From b0e738e2de6f801fa92193960b34fadd5254d119 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 11 Jul 2024 17:52:21 +1000 Subject: [PATCH 057/137] moved to two classes for FPattern and Variable --- .github/workflows/mopper-conda.yaml | 2 +- env.yaml => conda/enviroment.yaml | 0 src/mopdb/mopdb_class.py | 109 +++++++++++++++---------- src/mopdb/mopdb_utils.py | 118 ++++++++-------------------- 4 files changed, 101 insertions(+), 128 deletions(-) rename env.yaml => conda/enviroment.yaml (100%) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml index d08666f..4642d83 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-conda.yaml @@ -30,7 +30,7 @@ jobs: echo $CONDA/bin >> $GITHUB_PATH - name: Install dependencies run: | - conda env update --file env.yaml --name base + conda env update --file conda/environment.yaml --name base - name: Lint with flake8 run: | conda install flake8 --solver classic diff --git a/env.yaml b/conda/enviroment.yaml similarity index 100% rename from env.yaml rename to conda/enviroment.yaml diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index f4b3e0b..41381ef 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -20,26 +20,82 @@ from pathlib import Path +class FPattern(): + """This class represent a file pattern with a set list of variables + its attributes represents features of the variables which are shared. + """ + + def __init__(self, fpattern: str, fpath: Path): + self.fpattern = fpattern + self.fpath = fpath + self.files = self.get_files() + self.realm = self.get_realm() + self.frequency = self.get_frequency() + self.version = '' + self.multiple_frq = False + + def get_frequency(self): + frequency = 'NAfrq' + fname = str(self.files[0]) + if self.realm == 'atmos': + fbits = fname.split("_") + frequency = fbits[-1].replace(".nc", "") + elif self.realm == 'ocean': + if any(x in fname for x in ['scalar', 'month']): + frequency = 'mon' + elif 'daily' in fname: + frequency = 'day' + elif self.realm == 'seaIce': + if '_m.' in fname: + frequency = 'mon' + elif '_d.' in fname: + frequency = 'day' + return frequency + + + def get_realm(self): + realm = 'NArealm' + realm = next((x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] + if x in self.fpath.parts), 'NArealm') + fix_realm = {'atm': 'atmos', 'ice': 'seaIce', 'ocn': 'ocean'} + if realm in fix_realm.keys(): + realm = fix_realm[realm] + return realm + + def get_files(self): + return self.list_files(self.fpath, self.fpattern) + + @staticmethod + def list_files(indir, match): + """Returns list of files matching input directory and match""" + files = [x for x in Path(indir).rglob(f"*{match}*") + if x.is_file() and '.nc' in str(x)] + files.sort(key=lambda x:x.name) + return files + + class Variable(): - + """This class represent a single variable with attributes derived from file + and the one added by mapping. + """ + # __slots__ = ('name', 'pattern', 'files', 'frequency', 'realm', # 'cmor_var', 'cmor_table', 'version', 'units', 'dimensions', # 'cell_methods', 'positive', 'long_name', 'standard_name', # 'vtype', 'size', 'nsteps') - def __init__(self, varname: str, fpattern: str, fpath: Path, files: list): + def __init__(self, varname: str, fobj: FPattern): self.name = varname - # path attributes - self.fpattern = fpattern - self.fpath = fpath - self.files = files + # path object + self.fobj = fobj + #self.fpath = fobj.fpath + #self.files = fobj.files # mapping attributes - self._frequency = None - self._realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] - if x in self.fpath.parts][0] + self._frequency = fobj.frequency + self._realm = fobj.realm self.cmor_var = '' self.cmor_table = '' - self.version = '' + #self.version = self.fpattern.version self.match = False # descriptive attributes self.units = '' @@ -56,31 +112,11 @@ def __init__(self, varname: str, fpattern: str, fpath: Path, files: list): @property def frequency(self): - if self._frequency is None: - fname = self.files[0] - if self._realm == 'atmos': - fbits = fname.split("_") - self._frequency = fbits[-1].replace(".nc", "") - elif self._realm == 'ocean': - if any(x in fname for x in ['scalar', 'month']): - self._frequency = 'mon' - elif 'daily' in fname: - self._frequency = 'day' - elif self._realm == 'seaIce': - if '_m.' in fname: - self._frequency = 'mon' - elif '_d.' in fname: - self._frequency = 'day' - else: - self._frequency = 'NAfrq' return self._frequency @frequency.setter def frequency(self, value): - fix_frq = {'dai': 'day', '3h': '3hr', '6h': '6hr'} - if value in fix_frq.keys(): - self._frequency = fix_frq[value] value = value.replace('hPt', 'hrPt') if not any(x in value for x in ['min', 'hr', 'day', 'mon', 'yr']): @@ -94,14 +130,7 @@ def realm(self): @realm.setter def realm(self, value): - fix_realm = {'atm': 'atmos', 'ice': 'seaIce', 'ocn': 'ocean'} - if value in fix_realm.keys(): - self._realm = fix_realm[value] if not any(x in value for x in - ['atmos', 'seaIce', 'ocean', 'land']): - self._realm = 'NArealm' - - def list_files(self): - """Returns list of files matching input directory and match""" - self.files = [x for x in Path(self.indir).rglob(f"{self.match}") if x.is_file()] - return files.sort(key=lambda x:x.name) + ['atmos', 'seaIce', 'ocean', 'land', 'landIce']): + value = 'NArealm' + self.realm = value diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 17d1cba..ca8560a 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -24,7 +24,6 @@ import sys import os import csv -import glob import json import stat import xarray as xr @@ -32,10 +31,10 @@ import math from datetime import datetime, date from collections import Counter -from operator import itemgetter +from operator import itemgetter, attrgetter from pathlib import Path -from mopdb.mopdb_class import Variable +from mopdb.mopdb_class import FPattern, Variable def config_log(debug): """Configures log file""" @@ -425,17 +424,6 @@ def delete_record(conn, table, col, pairs): return -def list_files(indir, match): - """Returns list of files matching input directory and match""" - mopdb_log = logging.getLogger('mopdb_log') - mopdb_log.debug(f"Pattern to list files: {indir}/**/*{match}*") - files = [x for x in Path(indir).rglob(f"{match}") if x.is_file() - and '.nc' in str(x)] - files.sort(key=lambda x:x.name) - mopdb_log.debug(f"Files after sorting: {files}") - return files - - def get_file_frq(ds, fnext): """Return a dictionary with frequency for each time axis. @@ -469,12 +457,11 @@ def get_file_frq(ds, fnext): for t in time_axs: mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}") if len(ds[t]) > 1: - interval = (ds[t][1]-ds[t][0]).values #/ np.timedelta64(1, 'D') - interval_file = (ds[t][-1] -ds[t][0]).values #/ np.timedelta64(1, 'D') + interval = (ds[t][1]-ds[t][0]).values + interval_file = (ds[t][-1] -ds[t][0]).values else: interval = interval_file mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}") - #mopdb_log.debug(f"interval entire file {t}: {interval_file}") for k,v in int2frq.items(): if math.isclose(interval, v, rel_tol=0.05): frq[t] = k @@ -482,44 +469,6 @@ def get_file_frq(ds, fnext): return frq -def get_frequency(realm, fname, ds, fnext): - """Return frequency based on realm and filename - For UM files checks if more than one time axis is present and if so - returns dictionary with frequency: variable list - """ - mopdb_log = logging.getLogger('mopdb_log') - frq_dict = {} - frequency = 'NAfrq' - if realm == 'atmos': - fbits = fname.split("_") - frequency = fbits[-1].replace(".nc", "") - fix_frq = {'dai': 'day', '3h': '3hr', '6h': '6hr'} - if frequency in fix_frq.keys(): - frequency = fix_frq[frequency] - else: - frequency = frequency.replace('hPt', 'hrPt') - frq_dict = get_file_frq(ds, fnext) - mopdb_log.debug(f"frq_dict: {frq_dict}") - elif realm == 'ocean': - # if I found scalar or monthly in any of fbits - if any(x in fname for x in ['scalar', 'month']): - frequency = 'mon' - elif 'daily' in fname: - frequency = 'day' - elif realm == 'ice': - if '_m.' in fname: - frequency = 'mon' - elif '_d.' in fname: - frequency = 'day' - if frequency == 'NAfrq': - frq_dict = get_file_frq(ds, fnext) - # if only one frequency detected empty dict - if len(frq_dict) == 1: - frequency = frq_dict.popitem()[1] - mopdb_log.debug(f"Frequency: {frequency}") - return frequency, frq_dict - - def get_cell_methods(attrs, dims): """Get cell_methods from variable attributes. If cell_methods is not defined assumes values are instantaneous @@ -547,11 +496,12 @@ def write_varlist(conn, indir, match, version, alias): for each variable """ mopdb_log = logging.getLogger('mopdb_log') - line_cols = ['name', 'cmor_var', 'units', 'dimensions', - '_frequency', '_realm', 'cell_methods', 'cmor_table', 'vtype', - 'size', 'nsteps', 'fpattern', 'long_name', 'standard_name'] + line_cols = ['name','cmor_var','units','dimensions','_frequency', + '_realm','cell_methods','cmor_table','vtype','size', + 'nsteps','fobj.fpattern','long_name','standard_name'] vobj_list = [] - files = list_files(indir, f"*{match}*") + files = FPattern.list_files(indir, match) + mopdb_log.debug(f"Files after sorting: {files}") patterns = [] if alias == '': alias = 'mopdb' @@ -560,40 +510,36 @@ def write_varlist(conn, indir, match, version, alias): fwriter = csv.writer(fcsv, delimiter=';') fwriter.writerow(["name", "cmor_var", "units", "dimensions", "frequency", "realm", "cell_methods", "cmor_table", "vtype", - "size", "nsteps", "filename", "long_name", "standard_name"]) - for i, fpath in enumerate(files): + "size", "nsteps", "fpattern", "long_name", "standard_name"]) + for fpath in files: # get filename pattern until date match mopdb_log.debug(f"Filename: {fpath.name}") fpattern = fpath.name.split(match)[0] - print(fpattern) - # adding this in case we have a mix of yyyy/yyyymn date stamps - # as then a user would have to pass yyyy only and would get 12 files for some of the patterns if fpattern in patterns: continue patterns.append(fpattern) - pattern_list = list_files(indir, f"{fpattern}*") - nfiles = len(pattern_list) - mopdb_log.debug(f"File pattern: {fpattern}") + fobj = FPattern(fpattern, Path(indir)) + #pattern_list = list_files(indir, f"{fpattern}*") + nfiles = len(fobj.files) + mopdb_log.debug(f"File pattern, number of files: {fpattern}, {nfiles}") #fwriter.writerow([f"#{fpattern}"]) # get attributes for the file variables - ds = xr.open_dataset(str(pattern_list[0]), decode_times=False) + ds = xr.open_dataset(str(fobj.files[0]), decode_times=False) coords = [c for c in ds.coords] + ['latitude_longitude'] #pass next file in case of 1 timestep per file and no frq in name - fnext = str(pattern_list[1]) - #frequency, umfrq = get_frequency(realm, fpath.name, ds, fnext) - multiple_frq = False - for idx, vname in enumerate(ds.variables): - vobj = Variable(vname, fpattern, fpath, pattern_list) - if vobj.frequency == 'NAfrq' or vobj.realm == 'atmos': - # if this is the first variable get frq from time axes - if idx == 0: - frq_dict = get_file_frq(ds, fnext) - # if only one frequency detected empty dict - if len(frq_dict) == 1: - vobj._frequency = frq_dict.popitem()[1] - else: - multiple_frq = True - mopdb_log.debug(f"Multiple frq: {multiple_frq}") + fnext = str(fobj.files[1]) + if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos': + frq_dict = get_file_frq(ds, fnext) + # if only one frequency detected empty dict + if len(frq_dict) == 1: + fobj.frequency = frq_dict.popitem()[1] + else: + fobj.multiple_frq = True + mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}") + if fobj.realm == "NArealm": + fobj.realm = get_realm(version, ds) + for vname in ds.variables: + vobj = Variable(vname, fobj) if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']): v = ds[vname] mopdb_log.debug(f"Variable: {vobj.name}") @@ -601,7 +547,7 @@ def write_varlist(conn, indir, match, version, alias): vobj.size = v[0].nbytes vobj.nsteps = nfiles * v.shape[0] # assign time axis frequency if more than one is available - if multiple_frq: + if fobj.multiple_frq: if 'time' in v.dims[0]: vobj._frequency = frq_dict[v.dims[0]] else: @@ -617,9 +563,7 @@ def write_varlist(conn, indir, match, version, alias): vobj.standard_name = attrs.get('standard_name', "") vobj.dimensions = " ".join(v.dims) vobj.vtype = v.dtype - if vobj.realm == "NArealm": - vobj.realm = get_realm(version, ds) - line = [vobj.__dict__[k] for k in line_cols] + line = [attrgetter(k)(vobj) for k in line_cols] fwriter.writerow(line) vobj_list.append(vobj) mopdb_log.info(f"Variable list for {fpattern} successfully written") From 0195945b0e8ffb5591016b25b39706c338c04643 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 12 Jul 2024 15:14:24 +1000 Subject: [PATCH 058/137] progress on intake subcommand --- setup.cfg | 2 +- src/{data => mopdata}/access.db | Bin src/{data => mopdata}/access_dump.sql | 0 .../cmor_tables/ACDD_CV.json | 0 .../cmor_tables/ACDD_coordinate.json | 0 .../cmor_tables/ACDD_formula_terms.json | 0 .../cmor_tables/ACDD_grids.json | 0 .../cmor_tables/AUS2200_A10min.json | 0 .../cmor_tables/AUS2200_A1hr.json | 0 .../cmor_tables/AUS2200_A1hrPlev.json | 0 .../cmor_tables/AUS2200_A3hr.json | 0 .../cmor_tables/AUS2200_A6hr.json | 0 .../cmor_tables/AUS2200_Aday.json | 0 .../cmor_tables/AUS2200_fx.json | 0 .../cmor_tables/CM2_3hr.json | 0 .../cmor_tables/CM2_6hr.json | 0 .../cmor_tables/CM2_day.json | 0 .../cmor_tables/CM2_mon.json | 0 .../cmor_tables/CMIP6_3hr.json | 0 .../cmor_tables/CMIP6_6hrLev.json | 0 .../cmor_tables/CMIP6_6hrPlev.json | 0 .../cmor_tables/CMIP6_6hrPlevPt.json | 0 .../cmor_tables/CMIP6_AERday.json | 0 .../cmor_tables/CMIP6_AERhr.json | 0 .../cmor_tables/CMIP6_AERmon.json | 0 .../cmor_tables/CMIP6_AERmonZ.json | 0 .../cmor_tables/CMIP6_Amon.json | 0 .../cmor_tables/CMIP6_CF3hr.json | 0 .../cmor_tables/CMIP6_CFday.json | 0 .../cmor_tables/CMIP6_CFmon.json | 0 .../cmor_tables/CMIP6_CFsubhr.json | 0 .../cmor_tables/CMIP6_CV.json | 0 .../cmor_tables/CMIP6_E1hr.json | 0 .../cmor_tables/CMIP6_E1hrClimMon.json | 0 .../cmor_tables/CMIP6_E3hr.json | 0 .../cmor_tables/CMIP6_E3hrPt.json | 0 .../cmor_tables/CMIP6_E6hrZ.json | 0 .../cmor_tables/CMIP6_Eday.json | 0 .../cmor_tables/CMIP6_EdayZ.json | 0 .../cmor_tables/CMIP6_Efx.json | 0 .../cmor_tables/CMIP6_Emon.json | 0 .../cmor_tables/CMIP6_EmonZ.json | 0 .../cmor_tables/CMIP6_Esubhr.json | 0 .../cmor_tables/CMIP6_Eyr.json | 0 .../cmor_tables/CMIP6_IfxAnt.json | 0 .../cmor_tables/CMIP6_IfxGre.json | 0 .../cmor_tables/CMIP6_ImonAnt.json | 0 .../cmor_tables/CMIP6_ImonGre.json | 0 .../cmor_tables/CMIP6_IyrAnt.json | 0 .../cmor_tables/CMIP6_IyrGre.json | 0 .../cmor_tables/CMIP6_LImon.json | 0 .../cmor_tables/CMIP6_Lmon.json | 0 .../cmor_tables/CMIP6_Oclim.json | 0 .../cmor_tables/CMIP6_Oday.json | 0 .../cmor_tables/CMIP6_Odec.json | 0 .../cmor_tables/CMIP6_Ofx.json | 0 .../cmor_tables/CMIP6_Omon.json | 0 .../cmor_tables/CMIP6_Oyr.json | 0 .../cmor_tables/CMIP6_SIday.json | 0 .../cmor_tables/CMIP6_SImon.json | 0 .../cmor_tables/CMIP6_coordinate.json | 0 .../cmor_tables/CMIP6_day.json | 0 .../cmor_tables/CMIP6_formula_terms.json | 0 .../cmor_tables/CMIP6_fx.json | 0 .../cmor_tables/CMIP6_grids.json | 0 .../dreq/cmvme_all_piControl_3_3.csv | 0 src/{data => mopdata}/land_tiles.yaml | 0 src/{data => mopdata}/landtype.yaml | 0 src/{data => mopdata}/latlon_vertices.yaml | 0 src/{data => mopdata}/model_levels.yaml | 0 src/{data => mopdata}/notes.yaml | 0 src/{data => mopdata}/transport_lines.yaml | 0 src/mopdb/mopdb.py | 90 +++++++++-------- src/mopdb/mopdb_class.py | 1 + src/mopdb/mopdb_utils.py | 95 +++++++++++++++--- src/mopper/calculations.py | 8 +- src/mopper/mop_setup.py | 6 +- src/mopper/mop_utils.py | 4 +- src/mopper/setup_utils.py | 1 - 79 files changed, 140 insertions(+), 67 deletions(-) rename src/{data => mopdata}/access.db (100%) rename src/{data => mopdata}/access_dump.sql (100%) rename src/{data => mopdata}/cmor_tables/ACDD_CV.json (100%) rename src/{data => mopdata}/cmor_tables/ACDD_coordinate.json (100%) rename src/{data => mopdata}/cmor_tables/ACDD_formula_terms.json (100%) rename src/{data => mopdata}/cmor_tables/ACDD_grids.json (100%) rename src/{data => mopdata}/cmor_tables/AUS2200_A10min.json (100%) rename src/{data => mopdata}/cmor_tables/AUS2200_A1hr.json (100%) rename src/{data => mopdata}/cmor_tables/AUS2200_A1hrPlev.json (100%) rename src/{data => mopdata}/cmor_tables/AUS2200_A3hr.json (100%) rename src/{data => mopdata}/cmor_tables/AUS2200_A6hr.json (100%) rename src/{data => mopdata}/cmor_tables/AUS2200_Aday.json (100%) rename src/{data => mopdata}/cmor_tables/AUS2200_fx.json (100%) rename src/{data => mopdata}/cmor_tables/CM2_3hr.json (100%) rename src/{data => mopdata}/cmor_tables/CM2_6hr.json (100%) rename src/{data => mopdata}/cmor_tables/CM2_day.json (100%) rename src/{data => mopdata}/cmor_tables/CM2_mon.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_3hr.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_6hrLev.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_6hrPlev.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_6hrPlevPt.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_AERday.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_AERhr.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_AERmon.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_AERmonZ.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Amon.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_CF3hr.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_CFday.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_CFmon.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_CFsubhr.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_CV.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_E1hr.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_E1hrClimMon.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_E3hr.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_E3hrPt.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_E6hrZ.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Eday.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_EdayZ.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Efx.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Emon.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_EmonZ.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Esubhr.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Eyr.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_IfxAnt.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_IfxGre.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_ImonAnt.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_ImonGre.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_IyrAnt.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_IyrGre.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_LImon.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Lmon.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Oclim.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Oday.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Odec.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Ofx.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Omon.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_Oyr.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_SIday.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_SImon.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_coordinate.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_day.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_formula_terms.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_fx.json (100%) rename src/{data => mopdata}/cmor_tables/CMIP6_grids.json (100%) rename src/{data => mopdata}/dreq/cmvme_all_piControl_3_3.csv (100%) rename src/{data => mopdata}/land_tiles.yaml (100%) rename src/{data => mopdata}/landtype.yaml (100%) rename src/{data => mopdata}/latlon_vertices.yaml (100%) rename src/{data => mopdata}/model_levels.yaml (100%) rename src/{data => mopdata}/notes.yaml (100%) rename src/{data => mopdata}/transport_lines.yaml (100%) diff --git a/setup.cfg b/setup.cfg index e2d1814..677f9bc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,7 +26,7 @@ include_package_data = True where = src [options.package_data] -data = *.json, *.yaml, *.db, *.csv +mopdata = *.json, *.yaml, *.db, *.csv mopper = update_db.py [pbr] diff --git a/src/data/access.db b/src/mopdata/access.db similarity index 100% rename from src/data/access.db rename to src/mopdata/access.db diff --git a/src/data/access_dump.sql b/src/mopdata/access_dump.sql similarity index 100% rename from src/data/access_dump.sql rename to src/mopdata/access_dump.sql diff --git a/src/data/cmor_tables/ACDD_CV.json b/src/mopdata/cmor_tables/ACDD_CV.json similarity index 100% rename from src/data/cmor_tables/ACDD_CV.json rename to src/mopdata/cmor_tables/ACDD_CV.json diff --git a/src/data/cmor_tables/ACDD_coordinate.json b/src/mopdata/cmor_tables/ACDD_coordinate.json similarity index 100% rename from src/data/cmor_tables/ACDD_coordinate.json rename to src/mopdata/cmor_tables/ACDD_coordinate.json diff --git a/src/data/cmor_tables/ACDD_formula_terms.json b/src/mopdata/cmor_tables/ACDD_formula_terms.json similarity index 100% rename from src/data/cmor_tables/ACDD_formula_terms.json rename to src/mopdata/cmor_tables/ACDD_formula_terms.json diff --git a/src/data/cmor_tables/ACDD_grids.json b/src/mopdata/cmor_tables/ACDD_grids.json similarity index 100% rename from src/data/cmor_tables/ACDD_grids.json rename to src/mopdata/cmor_tables/ACDD_grids.json diff --git a/src/data/cmor_tables/AUS2200_A10min.json b/src/mopdata/cmor_tables/AUS2200_A10min.json similarity index 100% rename from src/data/cmor_tables/AUS2200_A10min.json rename to src/mopdata/cmor_tables/AUS2200_A10min.json diff --git a/src/data/cmor_tables/AUS2200_A1hr.json b/src/mopdata/cmor_tables/AUS2200_A1hr.json similarity index 100% rename from src/data/cmor_tables/AUS2200_A1hr.json rename to src/mopdata/cmor_tables/AUS2200_A1hr.json diff --git a/src/data/cmor_tables/AUS2200_A1hrPlev.json b/src/mopdata/cmor_tables/AUS2200_A1hrPlev.json similarity index 100% rename from src/data/cmor_tables/AUS2200_A1hrPlev.json rename to src/mopdata/cmor_tables/AUS2200_A1hrPlev.json diff --git a/src/data/cmor_tables/AUS2200_A3hr.json b/src/mopdata/cmor_tables/AUS2200_A3hr.json similarity index 100% rename from src/data/cmor_tables/AUS2200_A3hr.json rename to src/mopdata/cmor_tables/AUS2200_A3hr.json diff --git a/src/data/cmor_tables/AUS2200_A6hr.json b/src/mopdata/cmor_tables/AUS2200_A6hr.json similarity index 100% rename from src/data/cmor_tables/AUS2200_A6hr.json rename to src/mopdata/cmor_tables/AUS2200_A6hr.json diff --git a/src/data/cmor_tables/AUS2200_Aday.json b/src/mopdata/cmor_tables/AUS2200_Aday.json similarity index 100% rename from src/data/cmor_tables/AUS2200_Aday.json rename to src/mopdata/cmor_tables/AUS2200_Aday.json diff --git a/src/data/cmor_tables/AUS2200_fx.json b/src/mopdata/cmor_tables/AUS2200_fx.json similarity index 100% rename from src/data/cmor_tables/AUS2200_fx.json rename to src/mopdata/cmor_tables/AUS2200_fx.json diff --git a/src/data/cmor_tables/CM2_3hr.json b/src/mopdata/cmor_tables/CM2_3hr.json similarity index 100% rename from src/data/cmor_tables/CM2_3hr.json rename to src/mopdata/cmor_tables/CM2_3hr.json diff --git a/src/data/cmor_tables/CM2_6hr.json b/src/mopdata/cmor_tables/CM2_6hr.json similarity index 100% rename from src/data/cmor_tables/CM2_6hr.json rename to src/mopdata/cmor_tables/CM2_6hr.json diff --git a/src/data/cmor_tables/CM2_day.json b/src/mopdata/cmor_tables/CM2_day.json similarity index 100% rename from src/data/cmor_tables/CM2_day.json rename to src/mopdata/cmor_tables/CM2_day.json diff --git a/src/data/cmor_tables/CM2_mon.json b/src/mopdata/cmor_tables/CM2_mon.json similarity index 100% rename from src/data/cmor_tables/CM2_mon.json rename to src/mopdata/cmor_tables/CM2_mon.json diff --git a/src/data/cmor_tables/CMIP6_3hr.json b/src/mopdata/cmor_tables/CMIP6_3hr.json similarity index 100% rename from src/data/cmor_tables/CMIP6_3hr.json rename to src/mopdata/cmor_tables/CMIP6_3hr.json diff --git a/src/data/cmor_tables/CMIP6_6hrLev.json b/src/mopdata/cmor_tables/CMIP6_6hrLev.json similarity index 100% rename from src/data/cmor_tables/CMIP6_6hrLev.json rename to src/mopdata/cmor_tables/CMIP6_6hrLev.json diff --git a/src/data/cmor_tables/CMIP6_6hrPlev.json b/src/mopdata/cmor_tables/CMIP6_6hrPlev.json similarity index 100% rename from src/data/cmor_tables/CMIP6_6hrPlev.json rename to src/mopdata/cmor_tables/CMIP6_6hrPlev.json diff --git a/src/data/cmor_tables/CMIP6_6hrPlevPt.json b/src/mopdata/cmor_tables/CMIP6_6hrPlevPt.json similarity index 100% rename from src/data/cmor_tables/CMIP6_6hrPlevPt.json rename to src/mopdata/cmor_tables/CMIP6_6hrPlevPt.json diff --git a/src/data/cmor_tables/CMIP6_AERday.json b/src/mopdata/cmor_tables/CMIP6_AERday.json similarity index 100% rename from src/data/cmor_tables/CMIP6_AERday.json rename to src/mopdata/cmor_tables/CMIP6_AERday.json diff --git a/src/data/cmor_tables/CMIP6_AERhr.json b/src/mopdata/cmor_tables/CMIP6_AERhr.json similarity index 100% rename from src/data/cmor_tables/CMIP6_AERhr.json rename to src/mopdata/cmor_tables/CMIP6_AERhr.json diff --git a/src/data/cmor_tables/CMIP6_AERmon.json b/src/mopdata/cmor_tables/CMIP6_AERmon.json similarity index 100% rename from src/data/cmor_tables/CMIP6_AERmon.json rename to src/mopdata/cmor_tables/CMIP6_AERmon.json diff --git a/src/data/cmor_tables/CMIP6_AERmonZ.json b/src/mopdata/cmor_tables/CMIP6_AERmonZ.json similarity index 100% rename from src/data/cmor_tables/CMIP6_AERmonZ.json rename to src/mopdata/cmor_tables/CMIP6_AERmonZ.json diff --git a/src/data/cmor_tables/CMIP6_Amon.json b/src/mopdata/cmor_tables/CMIP6_Amon.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Amon.json rename to src/mopdata/cmor_tables/CMIP6_Amon.json diff --git a/src/data/cmor_tables/CMIP6_CF3hr.json b/src/mopdata/cmor_tables/CMIP6_CF3hr.json similarity index 100% rename from src/data/cmor_tables/CMIP6_CF3hr.json rename to src/mopdata/cmor_tables/CMIP6_CF3hr.json diff --git a/src/data/cmor_tables/CMIP6_CFday.json b/src/mopdata/cmor_tables/CMIP6_CFday.json similarity index 100% rename from src/data/cmor_tables/CMIP6_CFday.json rename to src/mopdata/cmor_tables/CMIP6_CFday.json diff --git a/src/data/cmor_tables/CMIP6_CFmon.json b/src/mopdata/cmor_tables/CMIP6_CFmon.json similarity index 100% rename from src/data/cmor_tables/CMIP6_CFmon.json rename to src/mopdata/cmor_tables/CMIP6_CFmon.json diff --git a/src/data/cmor_tables/CMIP6_CFsubhr.json b/src/mopdata/cmor_tables/CMIP6_CFsubhr.json similarity index 100% rename from src/data/cmor_tables/CMIP6_CFsubhr.json rename to src/mopdata/cmor_tables/CMIP6_CFsubhr.json diff --git a/src/data/cmor_tables/CMIP6_CV.json b/src/mopdata/cmor_tables/CMIP6_CV.json similarity index 100% rename from src/data/cmor_tables/CMIP6_CV.json rename to src/mopdata/cmor_tables/CMIP6_CV.json diff --git a/src/data/cmor_tables/CMIP6_E1hr.json b/src/mopdata/cmor_tables/CMIP6_E1hr.json similarity index 100% rename from src/data/cmor_tables/CMIP6_E1hr.json rename to src/mopdata/cmor_tables/CMIP6_E1hr.json diff --git a/src/data/cmor_tables/CMIP6_E1hrClimMon.json b/src/mopdata/cmor_tables/CMIP6_E1hrClimMon.json similarity index 100% rename from src/data/cmor_tables/CMIP6_E1hrClimMon.json rename to src/mopdata/cmor_tables/CMIP6_E1hrClimMon.json diff --git a/src/data/cmor_tables/CMIP6_E3hr.json b/src/mopdata/cmor_tables/CMIP6_E3hr.json similarity index 100% rename from src/data/cmor_tables/CMIP6_E3hr.json rename to src/mopdata/cmor_tables/CMIP6_E3hr.json diff --git a/src/data/cmor_tables/CMIP6_E3hrPt.json b/src/mopdata/cmor_tables/CMIP6_E3hrPt.json similarity index 100% rename from src/data/cmor_tables/CMIP6_E3hrPt.json rename to src/mopdata/cmor_tables/CMIP6_E3hrPt.json diff --git a/src/data/cmor_tables/CMIP6_E6hrZ.json b/src/mopdata/cmor_tables/CMIP6_E6hrZ.json similarity index 100% rename from src/data/cmor_tables/CMIP6_E6hrZ.json rename to src/mopdata/cmor_tables/CMIP6_E6hrZ.json diff --git a/src/data/cmor_tables/CMIP6_Eday.json b/src/mopdata/cmor_tables/CMIP6_Eday.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Eday.json rename to src/mopdata/cmor_tables/CMIP6_Eday.json diff --git a/src/data/cmor_tables/CMIP6_EdayZ.json b/src/mopdata/cmor_tables/CMIP6_EdayZ.json similarity index 100% rename from src/data/cmor_tables/CMIP6_EdayZ.json rename to src/mopdata/cmor_tables/CMIP6_EdayZ.json diff --git a/src/data/cmor_tables/CMIP6_Efx.json b/src/mopdata/cmor_tables/CMIP6_Efx.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Efx.json rename to src/mopdata/cmor_tables/CMIP6_Efx.json diff --git a/src/data/cmor_tables/CMIP6_Emon.json b/src/mopdata/cmor_tables/CMIP6_Emon.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Emon.json rename to src/mopdata/cmor_tables/CMIP6_Emon.json diff --git a/src/data/cmor_tables/CMIP6_EmonZ.json b/src/mopdata/cmor_tables/CMIP6_EmonZ.json similarity index 100% rename from src/data/cmor_tables/CMIP6_EmonZ.json rename to src/mopdata/cmor_tables/CMIP6_EmonZ.json diff --git a/src/data/cmor_tables/CMIP6_Esubhr.json b/src/mopdata/cmor_tables/CMIP6_Esubhr.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Esubhr.json rename to src/mopdata/cmor_tables/CMIP6_Esubhr.json diff --git a/src/data/cmor_tables/CMIP6_Eyr.json b/src/mopdata/cmor_tables/CMIP6_Eyr.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Eyr.json rename to src/mopdata/cmor_tables/CMIP6_Eyr.json diff --git a/src/data/cmor_tables/CMIP6_IfxAnt.json b/src/mopdata/cmor_tables/CMIP6_IfxAnt.json similarity index 100% rename from src/data/cmor_tables/CMIP6_IfxAnt.json rename to src/mopdata/cmor_tables/CMIP6_IfxAnt.json diff --git a/src/data/cmor_tables/CMIP6_IfxGre.json b/src/mopdata/cmor_tables/CMIP6_IfxGre.json similarity index 100% rename from src/data/cmor_tables/CMIP6_IfxGre.json rename to src/mopdata/cmor_tables/CMIP6_IfxGre.json diff --git a/src/data/cmor_tables/CMIP6_ImonAnt.json b/src/mopdata/cmor_tables/CMIP6_ImonAnt.json similarity index 100% rename from src/data/cmor_tables/CMIP6_ImonAnt.json rename to src/mopdata/cmor_tables/CMIP6_ImonAnt.json diff --git a/src/data/cmor_tables/CMIP6_ImonGre.json b/src/mopdata/cmor_tables/CMIP6_ImonGre.json similarity index 100% rename from src/data/cmor_tables/CMIP6_ImonGre.json rename to src/mopdata/cmor_tables/CMIP6_ImonGre.json diff --git a/src/data/cmor_tables/CMIP6_IyrAnt.json b/src/mopdata/cmor_tables/CMIP6_IyrAnt.json similarity index 100% rename from src/data/cmor_tables/CMIP6_IyrAnt.json rename to src/mopdata/cmor_tables/CMIP6_IyrAnt.json diff --git a/src/data/cmor_tables/CMIP6_IyrGre.json b/src/mopdata/cmor_tables/CMIP6_IyrGre.json similarity index 100% rename from src/data/cmor_tables/CMIP6_IyrGre.json rename to src/mopdata/cmor_tables/CMIP6_IyrGre.json diff --git a/src/data/cmor_tables/CMIP6_LImon.json b/src/mopdata/cmor_tables/CMIP6_LImon.json similarity index 100% rename from src/data/cmor_tables/CMIP6_LImon.json rename to src/mopdata/cmor_tables/CMIP6_LImon.json diff --git a/src/data/cmor_tables/CMIP6_Lmon.json b/src/mopdata/cmor_tables/CMIP6_Lmon.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Lmon.json rename to src/mopdata/cmor_tables/CMIP6_Lmon.json diff --git a/src/data/cmor_tables/CMIP6_Oclim.json b/src/mopdata/cmor_tables/CMIP6_Oclim.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Oclim.json rename to src/mopdata/cmor_tables/CMIP6_Oclim.json diff --git a/src/data/cmor_tables/CMIP6_Oday.json b/src/mopdata/cmor_tables/CMIP6_Oday.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Oday.json rename to src/mopdata/cmor_tables/CMIP6_Oday.json diff --git a/src/data/cmor_tables/CMIP6_Odec.json b/src/mopdata/cmor_tables/CMIP6_Odec.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Odec.json rename to src/mopdata/cmor_tables/CMIP6_Odec.json diff --git a/src/data/cmor_tables/CMIP6_Ofx.json b/src/mopdata/cmor_tables/CMIP6_Ofx.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Ofx.json rename to src/mopdata/cmor_tables/CMIP6_Ofx.json diff --git a/src/data/cmor_tables/CMIP6_Omon.json b/src/mopdata/cmor_tables/CMIP6_Omon.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Omon.json rename to src/mopdata/cmor_tables/CMIP6_Omon.json diff --git a/src/data/cmor_tables/CMIP6_Oyr.json b/src/mopdata/cmor_tables/CMIP6_Oyr.json similarity index 100% rename from src/data/cmor_tables/CMIP6_Oyr.json rename to src/mopdata/cmor_tables/CMIP6_Oyr.json diff --git a/src/data/cmor_tables/CMIP6_SIday.json b/src/mopdata/cmor_tables/CMIP6_SIday.json similarity index 100% rename from src/data/cmor_tables/CMIP6_SIday.json rename to src/mopdata/cmor_tables/CMIP6_SIday.json diff --git a/src/data/cmor_tables/CMIP6_SImon.json b/src/mopdata/cmor_tables/CMIP6_SImon.json similarity index 100% rename from src/data/cmor_tables/CMIP6_SImon.json rename to src/mopdata/cmor_tables/CMIP6_SImon.json diff --git a/src/data/cmor_tables/CMIP6_coordinate.json b/src/mopdata/cmor_tables/CMIP6_coordinate.json similarity index 100% rename from src/data/cmor_tables/CMIP6_coordinate.json rename to src/mopdata/cmor_tables/CMIP6_coordinate.json diff --git a/src/data/cmor_tables/CMIP6_day.json b/src/mopdata/cmor_tables/CMIP6_day.json similarity index 100% rename from src/data/cmor_tables/CMIP6_day.json rename to src/mopdata/cmor_tables/CMIP6_day.json diff --git a/src/data/cmor_tables/CMIP6_formula_terms.json b/src/mopdata/cmor_tables/CMIP6_formula_terms.json similarity index 100% rename from src/data/cmor_tables/CMIP6_formula_terms.json rename to src/mopdata/cmor_tables/CMIP6_formula_terms.json diff --git a/src/data/cmor_tables/CMIP6_fx.json b/src/mopdata/cmor_tables/CMIP6_fx.json similarity index 100% rename from src/data/cmor_tables/CMIP6_fx.json rename to src/mopdata/cmor_tables/CMIP6_fx.json diff --git a/src/data/cmor_tables/CMIP6_grids.json b/src/mopdata/cmor_tables/CMIP6_grids.json similarity index 100% rename from src/data/cmor_tables/CMIP6_grids.json rename to src/mopdata/cmor_tables/CMIP6_grids.json diff --git a/src/data/dreq/cmvme_all_piControl_3_3.csv b/src/mopdata/dreq/cmvme_all_piControl_3_3.csv similarity index 100% rename from src/data/dreq/cmvme_all_piControl_3_3.csv rename to src/mopdata/dreq/cmvme_all_piControl_3_3.csv diff --git a/src/data/land_tiles.yaml b/src/mopdata/land_tiles.yaml similarity index 100% rename from src/data/land_tiles.yaml rename to src/mopdata/land_tiles.yaml diff --git a/src/data/landtype.yaml b/src/mopdata/landtype.yaml similarity index 100% rename from src/data/landtype.yaml rename to src/mopdata/landtype.yaml diff --git a/src/data/latlon_vertices.yaml b/src/mopdata/latlon_vertices.yaml similarity index 100% rename from src/data/latlon_vertices.yaml rename to src/mopdata/latlon_vertices.yaml diff --git a/src/data/model_levels.yaml b/src/mopdata/model_levels.yaml similarity index 100% rename from src/data/model_levels.yaml rename to src/mopdata/model_levels.yaml diff --git a/src/data/notes.yaml b/src/mopdata/notes.yaml similarity index 100% rename from src/data/notes.yaml rename to src/mopdata/notes.yaml diff --git a/src/data/transport_lines.yaml b/src/mopdata/transport_lines.yaml similarity index 100% rename from src/data/transport_lines.yaml rename to src/mopdata/transport_lines.yaml diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 335a367..c4fdb38 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -122,7 +122,7 @@ def check_cmor(ctx, dbname): mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing if dbname == 'default': - dbname = import_files('data').joinpath('access.db') + dbname = import_files('mopdata').joinpath('access.db') conn = db_connect(dbname) # get list of variables already in db sql = 'SELECT name, out_name FROM cmorvar' @@ -175,7 +175,7 @@ def cmor_table(ctx, dbname, fname, alias, label): mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing if dbname == 'default': - dbname = import_files('data').joinpath('access.db') + dbname = import_files('mopdata').joinpath('access.db') conn = db_connect(dbname) # get list of variables already in db sql = "SELECT out_name, frequency, modeling_realm FROM cmorvar" @@ -251,7 +251,7 @@ def update_cmor(ctx, dbname, fname, alias): alias = alias.replace('.json', '') mopdb_log.info(f"Adding {alias} to variable name to track origin") # connect to db, this will create one if not existing - dbcentral = import_files('data').joinpath('access.db') + dbcentral = import_files('mopdata').joinpath('access.db') if dbname in [dbcentral, 'default']: mopdb_log.error("The package database cannot be updated") sys.exit() @@ -287,6 +287,7 @@ def update_cmor(ctx, dbname, fname, alias): sys.exit() # insert new vars and update existing ones update_db(conn, 'cmorvar', vars_list) + conn.close() return @@ -324,19 +325,19 @@ def map_template(ctx, fpath, match, dbname, version, alias): ------- """ mopdb_log = logging.getLogger('mopdb_log') + # connect to db, this will create one if not existing + if dbname == 'default': + dbname = import_files('mopdata').joinpath('access.db') + conn = db_connect(dbname) # work out if fpath is varlist or path to output fpath = Path(fpath) if fpath.is_file(): fname = fpath.name else: mopdb_log.debug(f"Calling model_vars() from template: {fpath}") - fname, vobjs = model_vars(fpath, match, dbname, version, alias) + fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) if alias == '': alias = fname.split(".")[0] - # connect to db, check first if db exists or exit - if dbname == 'default': - dbname = import_files('data').joinpath('access.db') - conn = db_connect(dbname) # read list of vars from file with open(fname, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=';') @@ -344,24 +345,9 @@ def map_template(ctx, fpath, match, dbname, version, alias): check_varlist(rows, fname) # return lists of fully/partially matching variables and stash_vars # these are input_vars for calculation defined in already in mapping db - full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, - rows, version) - - # remove duplicates from partially matched variables - no_ver = remove_duplicate(no_ver) - no_frq = remove_duplicate(no_frq, strict=False) - no_match = remove_duplicate(no_match, strict=False) - - # check if more derived variables can be added based on all - # input_vars being available - pot_full, pot_part, pot_varnames = potential_vars(conn, rows, - stash_vars, version) + parsed = map_variables(conn, rows, version) # potential vars have always duplicates: 1 for each input_var - pot_full = remove_duplicate(pot_full, strict=False) - pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False) - mopdb_log.info(f"Derived variables: {pot_varnames}") - write_map_template(conn, full, no_ver, no_frq, stdn, - no_match, pot_full, pot_part, alias) + write_map_template(conn, parsed, alias) conn.close() return @@ -370,7 +356,7 @@ def map_template(ctx, fpath, match, dbname, version, alias): @mopdb.command(name='intake') @map_args @click.pass_context -def write_catalogue(ctx, fpath, match, dbname, version, alias): +def write_intake(ctx, fpath, match, dbname, version, alias): """Writes an intake-esm catalogue. It can get as input the directory containing the output in @@ -398,19 +384,32 @@ def write_catalogue(ctx, fpath, match, dbname, version, alias): ------- """ mopdb_log = logging.getLogger('mopdb_log') + # connect to db, check first if db exists or exit + if dbname == 'default': + dbname = import_files('mopdata').joinpath('access.db') + conn = db_connect(dbname) # work out if fpath is varlist or path to output fpath = Path(fpath) if fpath.is_file(): fname = fpath.name else: mopdb_log.debug(f"Calling model_vars() from intake: {fpath}") - fname, vobjs = model_vars(fpath, match, dbname, version, alias) - if alias == '': + fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) + if alias == '' alias = fname.split(".")[0] - # connect to db, check first if db exists or exit - if dbname == 'default': - dbname = import_files('data').joinpath('access.db') - conn = db_connect(dbname) + # read list of vars from file + with open(fname, 'r') as csvfile: + reader = csv.DictReader(csvfile, delimiter=';') + rows = list(reader) + check_varlist(rows, fname) + # return lists of fully/partially matching variables and stash_vars + # these are input_vars for calculation defined in already in mapping db + parsed = map_variables(conn, rows, version) + # potential vars have always duplicates: 1 for each input_var + cat_name, fcsv = write_catalogue(conn, parsed, vobjs, fobjs, alias) + mopdb_log.info("Intake-esm catalogue written to {cat_name} and {fcsv}") + conn.close() + return None @mopdb.command(name='map') @@ -438,7 +437,7 @@ def update_map(ctx, dbname, fname, alias): """ mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing - dbcentral = import_files('data').joinpath('access.db') + dbcentral = import_files('mopdata').joinpath('access.db') if dbname in [dbcentral, 'default']: mopdb_log.error("The package database cannot be updated") sys.exit() @@ -459,7 +458,8 @@ def update_map(ctx, dbname, fname, alias): var_list = read_map(fname, alias) # update mapping table update_db(conn, 'mapping', var_list) - return + conn.close() + return None @mopdb.command(name='varlist') @@ -467,11 +467,17 @@ def update_map(ctx, dbname, fname, alias): @click.pass_context def list_vars(ctx, fpath, match, dbname, version, alias): """Calls model_vars to generate list of variables""" - fname, vobjs = model_vars(fpath, match, dbname, version, alias) + # connect to db, check first if db exists or exit + if dbname == 'default': + dbname = import_files('mopdata').joinpath('access.db') + conn = db_connect(dbname) + fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) + conn.close() + return None @click.pass_context -def model_vars(ctx, fpath, match, dbname, version, alias): +def model_vars(ctx, fpath, match, conn, version, alias): """Read variables from model output opens one file for each kind, save variable list as csv file @@ -498,13 +504,8 @@ def model_vars(ctx, fpath, match, dbname, version, alias): """ mopdb_log = logging.getLogger('mopdb_log') - # connect to db, this will create one if not existing - if dbname == 'default': - dbname = import_files('data').joinpath('access.db') - conn = db_connect(dbname) - fname, vobjs = write_varlist(conn, fpath, match, version, alias) - conn.close() - return fname, vobjs + fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) + return fname, vobjs, fobjs @mopdb.command(name='del') @@ -536,7 +537,7 @@ def remove_record(ctx, dbname, table, pair): """ mopdb_log = logging.getLogger('mopdb_log') # connect to db, this will create one if not existing - dbcentral = import_files('data').joinpath('access.db') + dbcentral = import_files('mopdata').joinpath('access.db') if dbname == dbcentral: mopdb_log.error("The package database cannot be updated") sys.exit() @@ -548,4 +549,5 @@ def remove_record(ctx, dbname, table, pair): col = "cmor_var,frequency,realm,cmor_table" # select, confirm, delete record/s delete_record(conn, table, col, pair) + conn.close() return diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index 41381ef..a592465 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -33,6 +33,7 @@ def __init__(self, fpattern: str, fpath: Path): self.frequency = self.get_frequency() self.version = '' self.multiple_frq = False + self.varlist = [] def get_frequency(self): frequency = 'NAfrq' diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index ca8560a..79161b7 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -26,13 +26,16 @@ import csv import json import stat +import lzma import xarray as xr import numpy as np import math + from datetime import datetime, date from collections import Counter from operator import itemgetter, attrgetter from pathlib import Path +from importlib.resources import files as import_files from mopdb.mopdb_class import FPattern, Variable @@ -222,7 +225,6 @@ def update_db(conn, table, rows_list): c.executemany(sql, rows_list) nmodified = c.rowcount mopdb_log.info(f"Rows modified: {nmodified}") - conn.close() mopdb_log.info('--- Done ---') return @@ -420,7 +422,6 @@ def delete_record(conn, table, col, pairs): mopdb_log.info(f"Rows modified: {c.fetchall()[0][0]}") else: mopdb_log.info("The query did not return any records") - conn.close() return @@ -500,9 +501,10 @@ def write_varlist(conn, indir, match, version, alias): '_realm','cell_methods','cmor_table','vtype','size', 'nsteps','fobj.fpattern','long_name','standard_name'] vobj_list = [] + fobj_list = [] + patterns = [] files = FPattern.list_files(indir, match) mopdb_log.debug(f"Files after sorting: {files}") - patterns = [] if alias == '': alias = 'mopdb' fname = f"varlist_{alias}.csv" @@ -538,6 +540,7 @@ def write_varlist(conn, indir, match, version, alias): mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}") if fobj.realm == "NArealm": fobj.realm = get_realm(version, ds) + pattern_var_list = [] for vname in ds.variables: vobj = Variable(vname, fobj) if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']): @@ -566,9 +569,12 @@ def write_varlist(conn, indir, match, version, alias): line = [attrgetter(k)(vobj) for k in line_cols] fwriter.writerow(line) vobj_list.append(vobj) + pattern_var_list.append(vobj.name) + fjob.varlist = pattern_var_list + fjob_list.append(fobj) mopdb_log.info(f"Variable list for {fpattern} successfully written") fcsv.close() - return fname, vobj_list + return fname, vobj_list, fobj_list def read_map_app4(fname): @@ -602,7 +608,7 @@ def read_map(fname, alias): Fields from file: cmor_var, input_vars, calculation, units, dimensions, frequency, realm, cell_methods, positive, cmor_table, version, vtype, size, nsteps, - filename, long_name, standard_name + fpattern, long_name, standard_name Fields in table: cmor_var, input_vars, calculation, units, dimensions, frequency, realm, cell_methods, positive, model, notes, origin @@ -846,28 +852,28 @@ def potential_vars(conn, rows, stash_vars, version): return pot_full, pot_part, pot_varnames -def write_map_template(conn, full, no_ver, no_frq, stdn, - no_match, pot_full, pot_part, alias): +def write_map_template(conn, parsed, alias): """Write mapping csv file template based on list of variables to define Input varlist file order: name, cmor_var, units, dimensions, frequency, realm, cell_methods, - cmor_table, vtype, size, nsteps, filename, long_name, standard_name + cmor_table, vtype, size, nsteps, fpattern, long_name, standard_name Mapping db order: cmor_var, input_vars, calculation, units, dimensions, frequency, realm, cell_methods, positive, cmor_table, model, notes, origin - for pot vars + vtype, size, nsteps, filename + for pot vars + vtype, size, nsteps, fpattern Final template order: cmor_var, input_vars, calculation, units, dimensions, frequency, realm, - cell_methods, positive, cmor_table, version, vtype, size, nsteps, filename, + cell_methods, positive, cmor_table, version, vtype, size, nsteps, fpattern, long_name, standard_name """ mopdb_log = logging.getLogger('mopdb_log') + full, no_ver, no_frq, stdn, no_match, pot_full, pot_part = parsed keys = ['cmor_var', 'input_vars', 'calculation', 'units', 'dimensions', 'frequency', 'realm', 'cell_methods', 'positive', 'cmor_table', 'version', 'vtype', 'size', - 'nsteps', 'filename', 'long_name', 'standard_name'] + 'nsteps', 'fpattern', 'long_name', 'standard_name'] with open(f"map_{alias}.csv", 'w') as fcsv: fwriter = csv.DictWriter(fcsv, keys, delimiter=';') @@ -875,7 +881,6 @@ def write_map_template(conn, full, no_ver, no_frq, stdn, div = ("# Derived variables with matching version and " + "frequency: Use with caution!") write_vars(pot_full, fwriter, div, conn=conn) - #pot=True, conn=conn, sortby=0) div = ("# Variables definitions coming from different " + "version") write_vars(no_ver, fwriter, div, conn=conn) @@ -982,3 +987,69 @@ def check_varlist(rows, fname): Some values might be invalid and need fixing""") sys.exit() return + + +def map_variables(conn, rows, version): + """ + """ + mopdb_log = logging.getLogger('mopdb_log') + # return lists of fully/partially matching variables and stash_vars + # these are input_vars for calculation defined in already in mapping db + full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, + rows, version) + # remove duplicates from partially matched variables + no_ver = remove_duplicate(no_ver) + no_frq = remove_duplicate(no_frq, strict=False) + no_match = remove_duplicate(no_match, strict=False) + # check if more derived variables can be added based on all + # input_vars being available + pot_full, pot_part, pot_varnames = potential_vars(conn, rows, + stash_vars, version) + # potential vars have always duplicates: 1 for each input_var + pot_full = remove_duplicate(pot_full, strict=False) + pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False) + mopdb_log.info(f"Derived variables: {pot_varnames}") + return full, no_ver, no_frq, stdn, no_match, pot_full, pot_part + + +def write_catalogue(conn, parsed, vobjs, fobjs, alias): + """Write intake-esm catalogue and returns name + """ + mopdb_log = logging.getLogger('mopdb_log') + # read template json data + jfile = import_files('mopdata').joinpath('intake_cat_template.json') + with open(jfile, 'r') as f: + template = json.load(f) + mopdb_log.debug("Opened intake template file") + # update json data with relevant information + # update title, description etc with experiment + for k,v in template.items(): + if type(v) == str: + template[k] = v.replace('', alias) + # write updated json to file + jfile = f"intake_{alias}.json" + with open(jfile, 'w') as f: + json.dump(template, f, indent=4) + # create a dictionary for each file to list + for pat_obj in fobjs: + var_list = get_pattern_vars. + base_dict = {'experiment': alias, + 'realm': = pat_obj.realm, + 'realm': = pat_obj.realm, + # write csv file + csvname = template['catalog_file'] + with lzma.open(csvname, 'wt') as fcsv: + fwriter = csv.DictWriter(fcsv, keys, delimiter=',') + for f in files_dict: + fwriter.writerow(f) + fcsv.close() + return jfile, csvname + +"experiment" + "column_name": "realm" + "column_name": "frequency" + "variable" + "column_name": "map_var" + "column_name": "map_table" + "column_name": "standard_name" + "column_name": "date_range" diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index d217aef..954d72a 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -153,7 +153,7 @@ class IceTransportCalculations(): @click.pass_context def __init__(self, ctx): - fname = import_files('data').joinpath('transport_lines.yaml') + fname = import_files('mopdata').joinpath('transport_lines.yaml') self.yaml_data = read_yaml(fname)['lines'] self.gridfile = xr.open_dataset(f"{ctx.obj['ancils_path']}/"+ @@ -568,7 +568,7 @@ class SeaIceCalculations(): @click.pass_context def __init__(self, ctx): - fname = import_files('data').joinpath('transport_lines.yaml') + fname = import_files('mopdata').joinpath('transport_lines.yaml') self.yaml_data = read_yaml(fname)['lines'] self.gridfile = xr.open_dataset(f"{ctx.obj['ancil_path']}/" + @@ -1004,7 +1004,7 @@ def extract_tilefrac(ctx, tilefrac, tilenum, landfrac=None, lev=None): vout = vout * landfrac if lev: - fname = import_files('data').joinpath('landtype.yaml') + fname = import_files('mopdata').joinpath('landtype.yaml') data = read_yaml(fname) type_dict = data['mod_mapping'] vout = vout.expand_dims(dim={lev: type_dict[lev]}) @@ -1147,7 +1147,7 @@ def average_tile(var, tilefrac=None, lfrac=1, landfrac=None, lev=None): vout = vout * landfrac if lev: - fname = import_files('data').joinpath('landtype.yaml') + fname = import_files('mopdata').joinpath('landtype.yaml') data = read_yaml(fname) type_dict = data['mod_mapping'] vout = vout.expand_dims(dim={lev: type_dict[lev]}) diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py index 90ba47e..5bc5634 100755 --- a/src/mopper/mop_setup.py +++ b/src/mopper/mop_setup.py @@ -256,7 +256,7 @@ def var_map(ctx, activity_id=None): access_version = ctx.obj['access_version'] if ctx.obj['force_dreq'] is True: if ctx.obj['dreq'] == 'default': - ctx.obj['dreq'] = import_files('data').joinpath( + ctx.obj['dreq'] = import_files('mopdata').joinpath( 'data/dreq/cmvme_all_piControl_3_3.csv' ) with ctx.obj['master_map'].open(mode='r') as f: reader = csv.DictReader(f, delimiter=';') @@ -300,7 +300,7 @@ def create_var_map(ctx, table, mappings, activity_id=None, matches = [] fpath = ctx.obj['tables_path'] / f"{table}.json" if not fpath.exists(): - fpath = import_files('data').joinpath( + fpath = import_files('mopdata').joinpath( f"cmor_tables/{table}.json") table_id = table.split('_')[1] mop_log.debug(f"Mappings: {mappings}") @@ -406,7 +406,7 @@ def manage_env(ctx): '_control_vocabulary_file']: fpath = ctx.obj['tables_path'] / ctx.obj[f] if not fpath.exists(): - fpath = import_files('data').joinpath( + fpath = import_files('mopdata').joinpath( f"cmor_tables/{ctx.obj[f]}") if f == '_control_vocabulary_file': fname = "CMIP6_CV.json" diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index 6017b68..2eb9695 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -575,7 +575,7 @@ def get_coords(ctx, ovar, coords): ds = xr.open_dataset(f"{ctx.obj['ancils_path']}/{ancil_file}") var_log.debug(f"ancil ds: {ds}") # read lat/lon and vertices mapping - cfile = import_files('data').joinpath('latlon_vertices.yaml') + cfile = import_files('mopdata').joinpath('latlon_vertices.yaml') with open(cfile, 'r') as yfile: data = yaml.safe_load(yfile) ll_dict = data[ctx.obj['realm']] @@ -901,7 +901,7 @@ def define_attrs(ctx): attrs = ctx.obj['attrs'] notes = attrs.get('notes', '') # open file containing notes - fname = import_files('data').joinpath('notes.yaml') + fname = import_files('mopdata').joinpath('notes.yaml') data = read_yaml(fname)['notes'] # check all fields and if any of their keys (e.g. a specific variable) # match the field value for the file being processed diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index ef2c1ec..5b1b36c 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -40,7 +40,6 @@ from collections import OrderedDict from datetime import datetime#, timedelta from dateutil.relativedelta import relativedelta -from importlib.resources import files as import_files from json.decoder import JSONDecodeError from mopdb.mopdb_utils import query From 8fa654cc26809672e338409170f5f8cb013bb06f Mon Sep 17 00:00:00 2001 From: Sam Green Date: Fri, 12 Jul 2024 17:34:14 +1000 Subject: [PATCH 059/137] Rename mopper-conda.yaml to mopper-test-calcs.yaml --- .github/workflows/{mopper-conda.yaml => mopper-test-calcs.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{mopper-conda.yaml => mopper-test-calcs.yaml} (100%) diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-test-calcs.yaml similarity index 100% rename from .github/workflows/mopper-conda.yaml rename to .github/workflows/mopper-test-calcs.yaml From c14581c98a2c24ada023ef25e3e1d5077381531b Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 12 Jul 2024 20:28:17 +1000 Subject: [PATCH 060/137] started reorganising mopdb_utils.py and intake now working --- .../{mopper-conda.yaml => mopper-pytest.yaml} | 4 - conda/enviroment.yaml | 1 - src/mopdata/intake_cat_template.json | 51 ++++ src/mopdata/intake_cat_template.yaml | 19 ++ src/mopdb/mopdb.py | 37 +-- src/mopdb/mopdb_class.py | 2 +- src/mopdb/mopdb_utils.py | 227 ++++++------------ src/mopdb/utils.py | 210 ++++++++++++++++ src/mopper/calculations.py | 2 +- src/mopper/mop_setup.py | 1 + src/mopper/mop_utils.py | 2 +- src/mopper/setup_utils.py | 33 +-- tests/test_mopdb_utils.py | 19 ++ 13 files changed, 391 insertions(+), 217 deletions(-) rename .github/workflows/{mopper-conda.yaml => mopper-pytest.yaml} (94%) create mode 100644 src/mopdata/intake_cat_template.json create mode 100644 src/mopdata/intake_cat_template.yaml create mode 100644 src/mopdb/utils.py diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-pytest.yaml similarity index 94% rename from .github/workflows/mopper-conda.yaml rename to .github/workflows/mopper-pytest.yaml index 4642d83..430693f 100644 --- a/.github/workflows/mopper-conda.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -38,10 +38,6 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: install package - run: | - source activate base - pip install ./ - name: Test with pytest run: | conda install pytest coverage codecov --solver classic diff --git a/conda/enviroment.yaml b/conda/enviroment.yaml index 6e46467..3856ac4 100644 --- a/conda/enviroment.yaml +++ b/conda/enviroment.yaml @@ -1,4 +1,3 @@ -name: mopenv channels: - conda-forge dependencies: diff --git a/src/mopdata/intake_cat_template.json b/src/mopdata/intake_cat_template.json new file mode 100644 index 0000000..b4549b4 --- /dev/null +++ b/src/mopdata/intake_cat_template.json @@ -0,0 +1,51 @@ +{ + "id": "", + "title": " model output.", + "description": " raw model output. \\nProject: \\nMaintained By: \\nContact: \\nDocumentation:\\nLicense: https://creativecommons.org/licenses/by/4.0/\\nCitation:\\nReferences:\\n", + "assets": { + "column_name": "path", + "format": "netcdf" + }, + "aggregation_control": { + "variable_column_name": "variable", + "groupby_attrs": [ + "realm", + "frequency", + "variable" + ], + "aggregations": [ + { + "type": "join_existing", + "attribute_name": "date_range", + "options": { + "dim": "time" + } + } + ] + }, + "esmcat_version": "0.1.0", + "catalog_file": "catalogue.csv.xz", + "attributes": [ + { + "column_name": "experiment" + }, + { + "column_name": "realm" + }, + { + "column_name": "frequency" + }, + { + "column_name": "variable" + }, + { + "column_name": "map_var" + }, + { + "column_name": "standard_name" + }, + { + "column_name": "date" + } + ] +} diff --git a/src/mopdata/intake_cat_template.yaml b/src/mopdata/intake_cat_template.yaml new file mode 100644 index 0000000..1a04ac4 --- /dev/null +++ b/src/mopdata/intake_cat_template.yaml @@ -0,0 +1,19 @@ +metadata: + version: 1 +sources: + experiment: + description: "Intake catalogue to load ACCESS model output" + Project: "" + Maintained By: "" + Contact: "" + Documentation: "" + License: "https://creativecommons.org/licenses/by/4.0/" + Citation: "" + References: "" + driver: intake_esm.esm_datastore + args: + obj: "{{CATALOG_DIR}}/catalogue.json" + columns_with_iterables: + - variable + - map_var + - standard_name diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index c4fdb38..d495d7b 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -28,6 +28,7 @@ from importlib.resources import files as import_files from mopdb.mopdb_utils import * +from mopdb.utils import * def mopdb_catch(): """ @@ -100,7 +101,7 @@ def mopdb(ctx, debug): ctx.obj={} # set up a default value for flow if none selected for logging ctx.obj['debug'] = debug - mopdb_log = config_log(debug) + mopdb_log = config_log(debug, logname='mopdb_log') @mopdb.command(name='check') @@ -123,10 +124,10 @@ def check_cmor(ctx, dbname): # connect to db, this will create one if not existing if dbname == 'default': dbname = import_files('mopdata').joinpath('access.db') - conn = db_connect(dbname) + conn = db_connect(dbname, logname='mopdb_log') # get list of variables already in db sql = 'SELECT name, out_name FROM cmorvar' - results = query(conn, sql, first=False) + results = query(conn, sql, first=False, logname='mopdb_log') # first set is the actual cmip variable name # second set is the name used in tables to distinguish different dims/freq # original maps files use the second style @@ -135,7 +136,7 @@ def check_cmor(ctx, dbname): cmor_vars.update(cmor_vars2) sql = 'SELECT cmor_var FROM mapping' - results = query(conn, sql, first=False) + results = query(conn, sql, first=False, logname='mopdb_log') map_vars = [x[0] for x in results] missing = set(map_vars) - set(cmor_vars) mopdb_log.info("Variables not yet defined in cmorvar table:") @@ -176,10 +177,10 @@ def cmor_table(ctx, dbname, fname, alias, label): # connect to db, this will create one if not existing if dbname == 'default': dbname = import_files('mopdata').joinpath('access.db') - conn = db_connect(dbname) + conn = db_connect(dbname, logname='mopdb_log') # get list of variables already in db sql = "SELECT out_name, frequency, modeling_realm FROM cmorvar" - results = query(conn, sql, first=False) + results = query(conn, sql, first=False, logname='mopdb_log') # cmor_vars is the actual cmip variable name # this sometime differs from name used in tables tohat can distinguish different dims/freq cmor_vars = set(x[0] for x in results) @@ -196,7 +197,7 @@ def cmor_table(ctx, dbname, fname, alias, label): else: sql = f"SELECT * FROM cmorvar WHERE out_name='{v[0]}'" - records = query(conn, sql, first=False) + records = query(conn, sql, first=False, logname='mopdb_log') record = records[0] if len(records) > 1: for r in records: @@ -255,14 +256,14 @@ def update_cmor(ctx, dbname, fname, alias): if dbname in [dbcentral, 'default']: mopdb_log.error("The package database cannot be updated") sys.exit() - conn = db_connect(dbname) + conn = db_connect(dbname, logname='mopdb_log') # create table if not existing table_sql = cmorvar_sql() - create_table(conn, table_sql) + create_table(conn, table_sql, logname='mopdb_log') # get list of variables already in db in debug mode if ctx.obj['debug']: sql = 'SELECT name FROM cmorvar' - results = query(conn, sql, first=False) + results = query(conn, sql, first=False, logname='mopdb_log') existing_vars = [x[0] for x in results] mopdb_log.debug(f"Variables already in db: {existing_vars}") @@ -328,7 +329,7 @@ def map_template(ctx, fpath, match, dbname, version, alias): # connect to db, this will create one if not existing if dbname == 'default': dbname = import_files('mopdata').joinpath('access.db') - conn = db_connect(dbname) + conn = db_connect(dbname, logname='mopdb_log') # work out if fpath is varlist or path to output fpath = Path(fpath) if fpath.is_file(): @@ -387,7 +388,7 @@ def write_intake(ctx, fpath, match, dbname, version, alias): # connect to db, check first if db exists or exit if dbname == 'default': dbname = import_files('mopdata').joinpath('access.db') - conn = db_connect(dbname) + conn = db_connect(dbname, logname='mopdb_log') # work out if fpath is varlist or path to output fpath = Path(fpath) if fpath.is_file(): @@ -395,7 +396,7 @@ def write_intake(ctx, fpath, match, dbname, version, alias): else: mopdb_log.debug(f"Calling model_vars() from intake: {fpath}") fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) - if alias == '' + if alias == '': alias = fname.split(".")[0] # read list of vars from file with open(fname, 'r') as csvfile: @@ -441,14 +442,14 @@ def update_map(ctx, dbname, fname, alias): if dbname in [dbcentral, 'default']: mopdb_log.error("The package database cannot be updated") sys.exit() - conn = db_connect(dbname) + conn = db_connect(dbname, logname='mopdb_log') # create table if not existing table_sql = mapping_sql() - create_table(conn, table_sql) + create_table(conn, table_sql, logname='mopdb_log') # get list of variables already in db in debug mode if ctx.obj['debug']: sql = 'SELECT cmor_var FROM mapping' - results = query(conn, sql, first=False) + results = query(conn, sql, first=False, logname='mopdb_log') existing_vars = [x[0] for x in results] mopdb_log.debug(f"Variables already in db: {existing_vars}") # read list of vars from file @@ -471,6 +472,7 @@ def list_vars(ctx, fpath, match, dbname, version, alias): if dbname == 'default': dbname = import_files('mopdata').joinpath('access.db') conn = db_connect(dbname) + conn = db_connect(dbname, logname='mopdb_log') fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) conn.close() return None @@ -542,12 +544,13 @@ def remove_record(ctx, dbname, table, pair): mopdb_log.error("The package database cannot be updated") sys.exit() conn = db_connect(dbname) + conn = db_connect(dbname, logname='mopdb_log') # set which columns to show based on table if table == 'cmorvar': col = "name" elif table == 'mapping': col = "cmor_var,frequency,realm,cmor_table" # select, confirm, delete record/s - delete_record(conn, table, col, pair) + delete_record(conn, table, col, pair, logname='mopdb_log') conn.close() return diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index a592465..9a9aa0e 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -88,7 +88,7 @@ class Variable(): def __init__(self, varname: str, fobj: FPattern): self.name = varname # path object - self.fobj = fobj + self.fpattern = fobj.fpattern #self.fpath = fobj.fpath #self.files = fobj.files # mapping attributes diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 79161b7..17f475e 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -35,54 +35,11 @@ from collections import Counter from operator import itemgetter, attrgetter from pathlib import Path +from itertools import compress from importlib.resources import files as import_files from mopdb.mopdb_class import FPattern, Variable - -def config_log(debug): - """Configures log file""" - # start a logger - logger = logging.getLogger('mopdb_log') - # set a formatter to manage the output format of our handler - formatter = logging.Formatter('%(asctime)s; %(message)s',"%Y-%m-%d %H:%M:%S") - # set the level for the logger, has to be logging.LEVEL not a string - level = logging.INFO - flevel = logging.WARNING - if debug: - level = logging.DEBUG - flevel = logging.DEBUG - logger.setLevel(level) - - # add a handler to send WARNING level messages to console - # or DEBUG level if debug is on - clog = logging.StreamHandler() - clog.setLevel(level) - logger.addHandler(clog) - - # add a handler to send INFO level messages to file - # the messagges will be appended to the same file - # create a new log file every month - day = date.today().strftime("%Y%m%d") - logname = 'mopdb_log_' + day + '.txt' - flog = logging.FileHandler(logname) - try: - os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO); - except OSError: - pass - flog.setLevel(flevel) - flog.setFormatter(formatter) - logger.addHandler(flog) - # return the logger object - return logger - - -def db_connect(db): - """Connects to ACCESS mapping sqlite database""" - mopdb_log = logging.getLogger('mopdb_log') - conn = sqlite3.connect(db, timeout=10, isolation_level=None) - if conn.total_changes == 0: - mopdb_log.info(f"Opened database {db} successfully") - return conn +from mopdb.utils import * def mapping_sql(): @@ -228,50 +185,6 @@ def update_db(conn, table, rows_list): mopdb_log.info('--- Done ---') return - -def query(conn, sql, tup=(), first=True): - """Executes generic sql query and returns row/s - - Parameters - ---------- - conn : connection object - Connection to sqlite database - sql : str - sql string representing query - tup : tuple - By default empty, used to pass values when placeholder ? is used - in sql string - first : boolean - By default True will return only first record found, set to False - to return all matching records - - Returns - ------- - result : tuple/list(tuple) - tuple or a list of, representing row/s returned by query - """ - mopdb_log = logging.getLogger('mopdb_log') - with conn: - c = conn.cursor() - c.execute(sql, tup) - if first: - result = c.fetchone() - else: - result = [ x for x in c.fetchall() ] - #columns = [description[0] for description in c.description] - return result - - -def get_columns(conn, table): - """Gets list of columns from db table - """ - mopdb_log = logging.getLogger('mopdb_log') - sql = f'PRAGMA table_info({table});' - table_data = query(conn, sql, first=False) - columns = [x[1] for x in table_data] - return columns - - def get_cmorname(conn, vobj, version): """Queries mapping table for cmip name given variable name as output by the model @@ -280,7 +193,7 @@ def get_cmorname(conn, vobj, version): sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping WHERE input_vars='{vobj.name}' and (calculation='' or calculation IS NULL)""" - results = query(conn, sql, first=False) + results = query(conn, sql, first=False, logname='mopdb_log') names = list(x[0] for x in results) tables = list(x[2] for x in results) mopdb_log.debug(f"In get_cmorname query results: {results}") @@ -381,50 +294,6 @@ def write_cmor_table(var_list, name): json.dump(out, f, indent=4) return - -def delete_record(conn, table, col, pairs): - """Deletes record from table based on pairs of column and - value passed for selection - - Parameters - ---------- - conn : connection object - connection to db - table: str - db table name - col: str - name of column to return with query - pairs : list[tuple(str, str)] - pairs of columns, values to select record/s - """ - mopdb_log = logging.getLogger('mopdb_log') - # Set up query - sqlwhere = f"FROM {table} WHERE " - for c,v in pairs: - sqlwhere += f"{c}='{v}' AND " - sql = f"SELECT {col} " + sqlwhere[:-4] - mopdb_log.debug(f"Delete query: {sql}") - xl = query(conn, sql, first=False) - # Delete from db - if xl is not None: - mopdb_log.info(f"Found {len(xl)} records") - for x in xl: - mopdb_log.info(f"{x}") - confirm = input('Confirm deletion from database: Y/N ') - if confirm == 'Y': - mopdb_log.info('Updating db ...') - with conn: - c = conn.cursor() - sql = "DELETE " + sqlwhere[:-4] - mopdb_log.debug(f"Delete sql: {sql}") - c.execute(sql) - c.execute('select total_changes()') - mopdb_log.info(f"Rows modified: {c.fetchall()[0][0]}") - else: - mopdb_log.info("The query did not return any records") - return - - def get_file_frq(ds, fnext): """Return a dictionary with frequency for each time axis. @@ -499,7 +368,7 @@ def write_varlist(conn, indir, match, version, alias): mopdb_log = logging.getLogger('mopdb_log') line_cols = ['name','cmor_var','units','dimensions','_frequency', '_realm','cell_methods','cmor_table','vtype','size', - 'nsteps','fobj.fpattern','long_name','standard_name'] + 'nsteps','fpattern','long_name','standard_name'] vobj_list = [] fobj_list = [] patterns = [] @@ -569,9 +438,9 @@ def write_varlist(conn, indir, match, version, alias): line = [attrgetter(k)(vobj) for k in line_cols] fwriter.writerow(line) vobj_list.append(vobj) - pattern_var_list.append(vobj.name) - fjob.varlist = pattern_var_list - fjob_list.append(fobj) + pattern_var_list.append(vobj) + fobj.varlist = pattern_var_list + fobj_list.append(fobj) mopdb_log.info(f"Variable list for {fpattern} successfully written") fcsv.close() return fname, vobj_list, fobj_list @@ -644,7 +513,7 @@ def match_stdname(conn, row, stdn): found_match = False sql = f"""SELECT name FROM cmorvar where standard_name='{row['standard_name']}'""" - results = query(conn, sql, first=False) + results = query(conn, sql, first=False, logname='mopdb_log') matches = [x[0] for x in results] if len(matches) > 0: stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True) @@ -674,7 +543,7 @@ def match_var(row, version, mode, conn, records): elif mode == 'no_ver': sql = sql_base + sql_frq # execute query and process results - result = query(conn, sql, first=False) + result = query(conn, sql, first=False, logname='mopdb_log') mopdb_log.debug(f"match_var: {result}, sql: {sql[110:]}") if result is not None and result != []: for x in result: @@ -835,7 +704,7 @@ def potential_vars(conn, rows, stash_vars, version): sql = f"""SELECT cmor_var,input_vars,calculation,frequency, realm,model,cmor_table,positive,units FROM mapping WHERE input_vars like '%{row['name']}%'""" - results = query(conn, sql, first=False) + results = query(conn, sql, first=False, logname='mopdb_log') mopdb_log.debug(f"In potential: var {row['name']}, db results {results}") for r in results: allinput = r[1].split(" ") @@ -933,7 +802,7 @@ def check_realm_units(conn, var): # retrieve modeling_realm, units from db cmor table sql = f"""SELECT modeling_realm, units FROM cmorvar WHERE name='{vname}' """ - result = query(conn, sql) + result = query(conn, sql, logname='mopdb_log') mopdb_log.debug(f"In check_realm_units: {vname}, {result}") if result is not None: dbrealm = result[0] @@ -1016,40 +885,78 @@ def write_catalogue(conn, parsed, vobjs, fobjs, alias): """Write intake-esm catalogue and returns name """ mopdb_log = logging.getLogger('mopdb_log') - # read template json data + # read template json file jfile = import_files('mopdata').joinpath('intake_cat_template.json') with open(jfile, 'r') as f: template = json.load(f) - mopdb_log.debug("Opened intake template file") + # read template yaml file + yfile = import_files('mopdata').joinpath('intake_cat_template.yaml') + maincat = read_yaml(yfile) + mopdb_log.debug("Opened intake template files") # update json data with relevant information # update title, description etc with experiment for k,v in template.items(): if type(v) == str: template[k] = v.replace('', alias) + for k,v in maincat.items(): + if type(v) == str: + maincat[k] = v.replace('', alias) # write updated json to file jfile = f"intake_{alias}.json" with open(jfile, 'w') as f: json.dump(template, f, indent=4) + # write updated yaml to file + jfile = f"intake_{alias}.yaml" + write_yaml(maincat, jfile, 'mopdb_log') # create a dictionary for each file to list - for pat_obj in fobjs: - var_list = get_pattern_vars. - base_dict = {'experiment': alias, - 'realm': = pat_obj.realm, - 'realm': = pat_obj.realm, + lines = create_file_dict(fobjs) # write csv file + cols = [x['column_name'] for x in template['attributes']] + cols = ['path'] + cols csvname = template['catalog_file'] with lzma.open(csvname, 'wt') as fcsv: - fwriter = csv.DictWriter(fcsv, keys, delimiter=',') - for f in files_dict: - fwriter.writerow(f) + fwriter = csv.DictWriter(fcsv, cols, delimiter=';') + fwriter.writeheader() + for fd in lines: + fwriter.writerow(fd) fcsv.close() return jfile, csvname -"experiment" - "column_name": "realm" - "column_name": "frequency" - "variable" - "column_name": "map_var" - "column_name": "map_table" - "column_name": "standard_name" - "column_name": "date_range" +def get_date_pattern(fname, fpattern): + """Try to build a date range for each file pattern based + on its filename + """ + mopdb_log = logging.getLogger('mopdb_log') + # assign False to any character which is not a digit + date_pattern = [True if c.isdigit() else False for c in fname] + # assign False to fpattern + n = len(fpattern) + date_pattern[:n] = [False] * n + return date_pattern + +def create_file_dict(fobjs): + """ + """ + mopdb_log = logging.getLogger('mopdb_log') + for pat_obj in fobjs: + var_list = [v.name for v in pat_obj.varlist] + # set to remove '' duplicates + mapvar_list = list(set(v.cmor_var for v in pat_obj.varlist)) + stnm_list = list(set(v.standard_name for v in pat_obj.varlist)) + base_dict = {'experiment': alias, + 'realm': pat_obj.realm, + 'frequency': pat_obj.frequency, + 'variable': var_list, + 'map_var': mapvar_list, + 'standard_name': stnm_list} + # work out date_pattern in filename + fname = pat_obj.files[0].name + date_pattern = get_date_pattern(fname, pat_obj.fpattern) + # add date and path for each file + for fpath in pat_obj.files: + f = fpath.name + fd = base_dict.copy() + fd['path'] = str(fpath) + fd['date'] = ''.join(c for c in compress(f, date_pattern)) + lines.append(fd) + return lines diff --git a/src/mopdb/utils.py b/src/mopdb/utils.py new file mode 100644 index 0000000..1a6ff11 --- /dev/null +++ b/src/mopdb/utils.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python +# Copyright 2024 ARC Centre of Excellence for Climate Extremes (CLEX) +# Author: Paola Petrelli for CLEX +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# contact: paola.petrelli@utas.edu.au +# +# last updated 12/07/2024 +# + +import sqlite3 +import logging +import os +import csv +import json +import stat +import yaml + +from datetime import date + + +def config_log(debug, logname): + """Configures log file""" + # start a logger + logger = logging.getLogger(logname) + # set a formatter to manage the output format of our handler + formatter = logging.Formatter('%(asctime)s; %(message)s',"%Y-%m-%d %H:%M:%S") + # set the level for the logger, has to be logging.LEVEL not a string + level = logging.INFO + flevel = logging.WARNING + if debug: + level = logging.DEBUG + flevel = logging.DEBUG + logger.setLevel(level) + + # add a handler to send WARNING level messages to console + # or DEBUG level if debug is on + clog = logging.StreamHandler() + clog.setLevel(level) + logger.addHandler(clog) + + # add a handler to send INFO level messages to file + # the messagges will be appended to the same file + # create a new log file every month + day = date.today().strftime("%Y%m%d") + logname = f"{logname}_{day}.txt" + flog = logging.FileHandler(logname) + try: + os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO); + except OSError: + pass + flog.setLevel(flevel) + flog.setFormatter(formatter) + logger.addHandler(flog) + # return the logger object + return logger + +def db_connect(db, logname='__name__'): + """Connects to ACCESS mapping sqlite database""" + log = logging.getLogger(logname) + conn = sqlite3.connect(db, timeout=10, isolation_level=None) + if conn.total_changes == 0: + log.info(f"Opened database {db} successfully") + return conn + +def create_table(conn, sql, logname='__name__'): + """Creates table if database is empty + + Parameters + ---------- + conn : connection object + sql : str + SQL style string defining table to create + """ + log = logging.getLogger(logname) + try: + c = conn.cursor() + c.execute(sql) + except Exception as e: + log.error(e) + return + +def query(conn, sql, tup=(), first=True, logname='__name__'): + """Executes generic sql query and returns row/s + + Parameters + ---------- + conn : connection object + Connection to sqlite database + sql : str + sql string representing query + tup : tuple + By default empty, used to pass values when placeholder ? is used + in sql string + first : boolean + By default True will return only first record found, set to False + to return all matching records + + Returns + ------- + result : tuple/list(tuple) + tuple or a list of, representing row/s returned by query + """ + log = logging.getLogger(logname) + with conn: + c = conn.cursor() + c.execute(sql, tup) + if first: + result = c.fetchone() + else: + result = [ x for x in c.fetchall() ] + #columns = [description[0] for description in c.description] + return result + + +def get_columns(conn, table, logname='__name__'): + """Gets list of columns from db table + """ + log = logging.getLogger(logname) + sql = f'PRAGMA table_info({table});' + table_data = query(conn, sql, first=False, logname=logname) + columns = [x[1] for x in table_data] + return columns + + +def delete_record(conn, table, col, pairs, logname='__name__'): + """Deletes record from table based on pairs of column and + value passed for selection + + Parameters + ---------- + conn : connection object + connection to db + table: str + db table name + col: str + name of column to return with query + pairs : list[tuple(str, str)] + pairs of columns, values to select record/s + """ + log = logging.getLogger(logname) + # Set up query + sqlwhere = f"FROM {table} WHERE " + for c,v in pairs: + sqlwhere += f"{c}='{v}' AND " + sql = f"SELECT {col} " + sqlwhere[:-4] + log.debug(f"Delete query: {sql}") + xl = query(conn, sql, first=False, logname=logname) + # Delete from db + if xl is not None: + log.info(f"Found {len(xl)} records") + for x in xl: + log.info(f"{x}") + confirm = input('Confirm deletion from database: Y/N ') + if confirm == 'Y': + log.info('Updating db ...') + with conn: + c = conn.cursor() + sql = "DELETE " + sqlwhere[:-4] + log.debug(f"Delete sql: {sql}") + c.execute(sql) + c.execute('select total_changes()') + log.info(f"Rows modified: {c.fetchall()[0][0]}") + else: + log.info("The query did not return any records") + return + +def read_yaml(fname, logname='__name__'): + """Read yaml file + """ + log = logging.getLogger(logname) + try: + with fname.open(mode='r') as yfile: + data = yaml.safe_load(yfile) + except Exception as e: + log.error(f"Check that {fname} exists and it is a valid yaml file") + log.error(f"Exception: {e}") + return data + +def write_yaml(data, fname, logname='__name__'): + """Write data to a yaml file + + Parameters + ---------- + data : dict + The file content as a dictionary + fname : str + Yaml filename + + Returns + ------- + """ + log = logging.getLogger(logname) + try: + with open(fname, 'w') as f: + yaml.dump(data, f) + except: + log.error(f"Check that {data} exists and it is an object compatible with yaml") + return diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index 954d72a..c910c80 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -40,7 +40,7 @@ import logging from importlib.resources import files as import_files -from mopper.setup_utils import read_yaml +from mopdb.utils import read_yaml # Global Variables #---------------------------------------------------------------------- diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py index 5bc5634..8831595 100755 --- a/src/mopper/mop_setup.py +++ b/src/mopper/mop_setup.py @@ -34,6 +34,7 @@ from importlib.resources import files as import_files from mopper.setup_utils import * +from mopdb.utils import read_yaml def find_matches(table, var, realm, frequency, varlist): diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index 2eb9695..a5d1423 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -39,7 +39,7 @@ from pathlib import Path from mopper.calculations import * -from mopper.setup_utils import read_yaml +from mopper.utils import read_yaml from importlib.resources import files as import_files diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index 5b1b36c..da3dc6b 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -42,7 +42,7 @@ from dateutil.relativedelta import relativedelta from json.decoder import JSONDecodeError -from mopdb.mopdb_utils import query +from mopdb.utils import query, write_yaml, read_yaml from mopper.cmip_utils import fix_years @@ -103,37 +103,6 @@ def adjust_nsteps(v, frq): new_nsteps = tot_days * nstep_day[frq] return new_nsteps - -def read_yaml(fname): - """Read yaml file - """ - with fname.open(mode='r') as yfile: - data = yaml.safe_load(yfile) - return data - - -def write_yaml(data, fname, log_name='__name__'): - """Write data to a yaml file - - Parameters - ---------- - data : dict - The file content as a dictionary - fname : str - Yaml filename - - Returns - ------- - """ - logger = logging.getLogger(log_name) - try: - with open(fname, 'w') as f: - yaml.dump(data, f) - except: - logger.error(f"Check that {data} exists and it is an object compatible with json") - return - - @click.pass_context def write_config(ctx, fname='exp_config.yaml'): """Write data to a yaml file diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py index ebc8be0..858697e 100644 --- a/tests/test_mopdb_utils.py +++ b/tests/test_mopdb_utils.py @@ -20,6 +20,7 @@ import sqlite3 import click import logging +import itertools from mopdb.mopdb_utils import * from conftest import um_multi_time @@ -41,3 +42,21 @@ def test_build_umfrq(um_multi_time, caplog): out = build_umfrq(time_axs, um_multi_time) assert umfrq == out +#@pytest.mark.parametrize('fname', [0,1,2]) +def test_get_date_pattern(caplog): + caplog.set_level(logging.DEBUG, logger='mopdb_log') + fname = 'ocean_month.nc-09961231' + fpattern = 'ocean_month.nc-' + dp = get_date_pattern(fname, fpattern) + date = ''.join(x for x in itertools.compress(fname,dp)) + assert date == '09961231' + fname = 'umnsa_cldrad_20160603T0000.nc' + fpattern = 'umnsa_cldrad_' + dp = get_date_pattern(fname, fpattern) + date = ''.join(x for x in itertools.compress(fname,dp)) + assert date == '201606030000' + fname = 'cw323a.pm095101_mon.nc' + fpattern = 'cw323a.pm' + dp = get_date_pattern(fname, fpattern) + date = ''.join(x for x in itertools.compress(fname,dp)) + assert date == '095101' From 1b4df80fae53380657237923278d99a15d9e9f4c Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 12 Jul 2024 20:30:34 +1000 Subject: [PATCH 061/137] fixed missing arg in create_dict_file --- src/mopdb/mopdb_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 17f475e..ac3f102 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -909,7 +909,7 @@ def write_catalogue(conn, parsed, vobjs, fobjs, alias): jfile = f"intake_{alias}.yaml" write_yaml(maincat, jfile, 'mopdb_log') # create a dictionary for each file to list - lines = create_file_dict(fobjs) + lines = create_file_dict(fobjs, alias) # write csv file cols = [x['column_name'] for x in template['attributes']] cols = ['path'] + cols @@ -934,7 +934,7 @@ def get_date_pattern(fname, fpattern): date_pattern[:n] = [False] * n return date_pattern -def create_file_dict(fobjs): +def create_file_dict(fobjs, alias): """ """ mopdb_log = logging.getLogger('mopdb_log') From d20db0eed0634df6831c9893c2cd6b724471cb74 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 16 Jul 2024 09:44:06 +1000 Subject: [PATCH 062/137] now producing intake catalogue from scratch, re-organised mopdb code #158 --- src/mopdata/intake_cat_template.json | 7 +- src/mopdata/intake_cat_template.yaml | 7 +- src/mopdb/mopdb.py | 3 +- src/mopdb/mopdb_class.py | 2 +- src/mopdb/mopdb_map.py | 559 +++++++++++++++++++++++++++ src/mopdb/mopdb_utils.py | 551 +------------------------- 6 files changed, 570 insertions(+), 559 deletions(-) create mode 100644 src/mopdb/mopdb_map.py diff --git a/src/mopdata/intake_cat_template.json b/src/mopdata/intake_cat_template.json index b4549b4..502bf2a 100644 --- a/src/mopdata/intake_cat_template.json +++ b/src/mopdata/intake_cat_template.json @@ -7,16 +7,15 @@ "format": "netcdf" }, "aggregation_control": { - "variable_column_name": "variable", + "variable_column_name": "frequency", "groupby_attrs": [ "realm", - "frequency", - "variable" + "frequency" ], "aggregations": [ { "type": "join_existing", - "attribute_name": "date_range", + "attribute_name": "date", "options": { "dim": "time" } diff --git a/src/mopdata/intake_cat_template.yaml b/src/mopdata/intake_cat_template.yaml index 1a04ac4..87fc7e4 100644 --- a/src/mopdata/intake_cat_template.yaml +++ b/src/mopdata/intake_cat_template.yaml @@ -1,7 +1,7 @@ metadata: version: 1 sources: - experiment: + : description: "Intake catalogue to load ACCESS model output" Project: "" Maintained By: "" @@ -10,10 +10,11 @@ sources: License: "https://creativecommons.org/licenses/by/4.0/" Citation: "" References: "" - driver: intake_esm.esm_datastore + driver: intake_esm.core.esm_datastore args: - obj: "{{CATALOG_DIR}}/catalogue.json" columns_with_iterables: - variable - map_var - standard_name + read_csv_kwargs: {"dtype": {"date": str}} + obj: "{{CATALOG_DIR}}/intake_.json" diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index d495d7b..aa16b2e 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -408,7 +408,8 @@ def write_intake(ctx, fpath, match, dbname, version, alias): parsed = map_variables(conn, rows, version) # potential vars have always duplicates: 1 for each input_var cat_name, fcsv = write_catalogue(conn, parsed, vobjs, fobjs, alias) - mopdb_log.info("Intake-esm catalogue written to {cat_name} and {fcsv}") + mopdb_log.info(f"""Intake-esm and intake catalogues written to + {cat_name} and {cat_name.replace('json','yaml')}. File list saved to {fcsv}""") conn.close() return None diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index 9a9aa0e..2fec511 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -121,7 +121,7 @@ def frequency(self, value): value = value.replace('hPt', 'hrPt') if not any(x in value for x in ['min', 'hr', 'day', 'mon', 'yr']): - self._frequency = 'NAfrq' + value = 'NAfrq' self._frequency = value diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py new file mode 100644 index 0000000..a0e580c --- /dev/null +++ b/src/mopdb/mopdb_map.py @@ -0,0 +1,559 @@ +#!/usr/bin/env python +# Copyright 2023 ARC Centre of Excellence for Climate Extremes (CLEX) +# Author: Paola Petrelli for CLEX +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# contact: paola.petrelli@utas.edu.au +# +# last updated 10/04/2024 +# + +import logging +import csv +import json +import lzma +import xarray as xr + +from operator import itemgetter, attrgetter +from pathlib import Path +from itertools import compress +from importlib.resources import files as import_files + +from mopdb.mopdb_class import FPattern, Variable +from mopdb.utils import * +from mopdb.mopdb_utils import (get_cell_methods, remove_duplicates, + get_realm, check_realm_units, get_date_pattern) + + +def get_cmorname(conn, vobj, version): + """Queries mapping table for cmip name given variable name as output + by the model + """ + mopdb_log = logging.getLogger('mopdb_log') + sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping + WHERE input_vars='{vobj.name}' and (calculation='' + or calculation IS NULL)""" + results = query(conn, sql, first=False, logname='mopdb_log') + names = list(x[0] for x in results) + tables = list(x[2] for x in results) + mopdb_log.debug(f"In get_cmorname query results: {results}") + if len(names) == 0: + vobj.cmor_var = '' + vobj.cmor_table = '' + elif len(names) == 1: + vobj.cmor_var = names[0] + vobj.cmor_table = tables[0] + elif len(names) > 1: + mopdb_log.debug(f"Found more than 1 definition for {vobj.name}:\n" + + f"{results}") + match_found = False + for r in results: + if r[1] == version and r[3] == vobj.frequency: + vobj.cmor_var, vobj.cmor_table = r[0], r[2] + match_found = True + break + if not match_found: + for r in results: + if r[3] == vobj.frequency: + vobj.cmor_var, vobj.cmor_table = r[0], r[2] + match_found = True + break + if not match_found: + for r in results: + if r[1] == version: + vobj.cmor_var, vobj.cmor_table = r[0], r[2] + match_found = True + break + if not match_found: + vobj.cmor_var = names[0] + vobj.cmor_table = tables[0] + mopdb_log.info(f"Found more than 1 definition for {vobj.name}:\n"+ + f"{results}\n Using {vobj.cmor_var} from {vobj.cmor_table}") + return vobj + "mip_era": "", + "Conventions": "CF-1.7 ACDD1.3" + } + return header + +def get_file_frq(ds, fnext): + """Return a dictionary with frequency for each time axis. + + Frequency is inferred by comparing interval between two consecutive + timesteps with expected interval at a given frequency. + Order time_axis so ones with only one step are last, so we can use + file frequency (interval_file) inferred from other time axes. + This is called if there are more than one time axis in file + (usually only UM) or if frequency can be guessed from filename. + """ + mopdb_log = logging.getLogger('mopdb_log') + frq = {} + int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0, + 'day': 1.0, '6hr': 0.25, '3hr': 0.125, + '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944} + # retrieve all time axes + time_axs = [d for d in ds.dims if 'time' in d] + time_axs_len = set(len(ds[d]) for d in time_axs) + time_axs.sort(key=lambda x: len(ds[x]), reverse=True) + mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}") + max_len = len(ds[time_axs[0]]) + # if all time axes have only 1 timestep we cannot infer frequency + # so we open also next file but get only time axs + if max_len == 1: + dsnext = xr.open_dataset(fnext, decode_times = False) + time_axs2 = [d for d in dsnext.dims if 'time' in d] + ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time') + time_axs = [d for d in ds.dims if 'time' in d] + time_axs_len = set(len(ds[d]) for d in time_axs) + time_axs.sort(key=lambda x: len(ds[x]), reverse=True) + for t in time_axs: + mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}") + if len(ds[t]) > 1: + interval = (ds[t][1]-ds[t][0]).values + interval_file = (ds[t][-1] -ds[t][0]).values + else: + interval = interval_file + mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}") + for k,v in int2frq.items(): + if math.isclose(interval, v, rel_tol=0.05): + frq[t] = k + break + return frq + +def write_varlist(conn, indir, match, version, alias): + """Based on model output files create a variable list and save it + to a csv file. Main attributes needed to map output are provided + for each variable + """ + mopdb_log = logging.getLogger('mopdb_log') + line_cols = ['name','cmor_var','units','dimensions','_frequency', + '_realm','cell_methods','cmor_table','vtype','size', + 'nsteps','fpattern','long_name','standard_name'] + vobj_list = [] + fobj_list = [] + patterns = [] + files = FPattern.list_files(indir, match) + mopdb_log.debug(f"Files after sorting: {files}") + if alias == '': + alias = 'mopdb' + fname = f"varlist_{alias}.csv" + fcsv = open(fname, 'w') + fwriter = csv.writer(fcsv, delimiter=';') + fwriter.writerow(["name", "cmor_var", "units", "dimensions", + "frequency", "realm", "cell_methods", "cmor_table", "vtype", + "size", "nsteps", "fpattern", "long_name", "standard_name"]) + for fpath in files: + # get filename pattern until date match + mopdb_log.debug(f"Filename: {fpath.name}") + fpattern = fpath.name.split(match)[0] + if fpattern in patterns: + continue + patterns.append(fpattern) + fobj = FPattern(fpattern, Path(indir)) + #pattern_list = list_files(indir, f"{fpattern}*") + nfiles = len(fobj.files) + mopdb_log.debug(f"File pattern, number of files: {fpattern}, {nfiles}") + #fwriter.writerow([f"#{fpattern}"]) + # get attributes for the file variables + ds = xr.open_dataset(str(fobj.files[0]), decode_times=False) + coords = [c for c in ds.coords] + ['latitude_longitude'] + #pass next file in case of 1 timestep per file and no frq in name + fnext = str(fobj.files[1]) + if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos': + frq_dict = get_file_frq(ds, fnext) + # if only one frequency detected empty dict + if len(frq_dict) == 1: + fobj.frequency = frq_dict.popitem()[1] + else: + fobj.multiple_frq = True + fobj.frequency = frq_dict['time'] + mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}") + if fobj.realm == "NArealm": + fobj.realm = get_realm(version, ds) + pattern_var_list = [] + for vname in ds.variables: + vobj = Variable(vname, fobj) + if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']): + v = ds[vname] + mopdb_log.debug(f"Variable: {vobj.name}") + # get size in bytes of grid for 1 timestep and number of timesteps + vobj.size = v[0].nbytes + vobj.nsteps = nfiles * v.shape[0] + # assign time axis frequency if more than one is available + if fobj.multiple_frq: + if 'time' in v.dims[0]: + vobj._frequency = frq_dict[v.dims[0]] + else: + mopdb_log.info(f"Could not detect frequency for variable: {v}") + attrs = v.attrs + vobj.cell_methods, frqmod = get_cell_methods(attrs, v.dims) + vobj.frequency = vobj.frequency + frqmod + mopdb_log.debug(f"Frequency var: {vobj.frequency}") + # try to retrieve cmip name + vobj = get_cmorname(conn, vobj, version) + vobj.units = attrs.get('units', "") + vobj.long_name = attrs.get('long_name', "") + vobj.standard_name = attrs.get('standard_name', "") + vobj.dimensions = " ".join(v.dims) + vobj.vtype = v.dtype + line = [attrgetter(k)(vobj) for k in line_cols] + fwriter.writerow(line) + vobj_list.append(vobj) + pattern_var_list.append(vobj) + fobj.varlist = pattern_var_list + fobj_list.append(fobj) + mopdb_log.info(f"Variable list for {fpattern} successfully written") + fcsv.close() + return fname, vobj_list, fobj_list + +def match_stdname(conn, row, stdn): + """Returns an updated stdn list if finds one or more variables + in cmorvar table that match the standard name passed as input. + It also return a False/True found_match boolean. + """ + mopdb_log = logging.getLogger('mopdb_log') + found_match = False + sql = f"""SELECT name FROM cmorvar where + standard_name='{row['standard_name']}'""" + results = query(conn, sql, first=False, logname='mopdb_log') + matches = [x[0] for x in results] + if len(matches) > 0: + stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True) + found_match = True + + return stdn, found_match + + +def match_var(row, version, mode, conn, records): + """Returns match for variable if found after looping + variables already mapped in database + Parameters + + """ + mopdb_log = logging.getLogger('mopdb_log') + found_match = False + # build sql query based on mode + sql_base = f"""SELECT cmor_var,input_vars,calculation,frequency, + realm,model,cmor_table,positive,units FROM mapping where + input_vars='{row['name']}'""" + sql_frq = f" and frequency='{row['frequency']}'" + sql_ver = f" and model='{version}'" + if mode == 'full': + sql = sql_base + sql_frq + sql_ver + elif mode == 'no_frq': + sql = sql_base + sql_ver + elif mode == 'no_ver': + sql = sql_base + sql_frq + # execute query and process results + result = query(conn, sql, first=False, logname='mopdb_log') + mopdb_log.debug(f"match_var: {result}, sql: {sql[110:]}") + if result is not None and result != []: + for x in result: + mopdb_log.debug(f"match: {x}") + records = add_var(records, row, x) + found_match = True + + return records, found_match + +def parse_vars(conn, rows, version): + """Returns records of variables to include in template mapping file, + a list of all stash variables + frequency available in model output + and a list of variables already defined in db + + Parameters + ---------- + conn : connection object + rows : list(dict) + list of variables to match + version : str + model version to use to match variables + + Returns + ------- + stash_vars : list + varname-frequency for each listed variable, varname is from model output + """ + mopdb_log = logging.getLogger('mopdb_log') + full = [] + no_ver = [] + no_frq = [] + stdn = [] + no_match = [] + stash_vars = [] + + # looping through variables from file and attempt matches to db + for row in rows: + if row['name'][0] == "#" or row['name'] == 'name': + continue + else: + full, found = match_var(row, version, 'full', conn, full) + # if no match, ignore model version first and then frequency + mopdb_log.debug(f"found perfect match: {found}") + if not found: + no_ver, found = match_var(row, version, 'no_ver', conn, no_ver) + mopdb_log.debug(f"found no ver match: {found}") + if not found: + no_frq, found = match_var(row, version, 'no_frq', conn, no_frq) + mopdb_log.debug(f"found no frq match: {found}") + # make a last attempt to match using standard_name + if not found: + if row['standard_name'] != '': + stdn, found = match_stdname(conn, row, stdn) + mopdb_log.debug(f"found stdnm match: {found}") + if not found: + no_match = add_var(no_match, row, tuple([row['name']]+['']*8)) + stash_vars.append(f"{row['name']}-{row['frequency']}") + + return full, no_ver, no_frq, stdn, no_match, stash_vars + +def add_var(vlist, row, match, stdnm=False): + """Add information from match to variable list and re-order + fields so they correspond to final mapping output. + + Parameters + match : tuple + match values (cmor_var,input_vars,calculation,frequency, + realm,model(version),cmor_table,positive,units) + """ + mopdb_log = logging.getLogger('mopdb_log') + # assign cmor_var from match and swap place with input_vars + mopdb_log.debug(f"Assign cmor_var: {match}") + mopdb_log.debug(f"initial row: {row}") + var = row.copy() + var['cmor_var'] = match[0] + var['input_vars'] = match[1] + orig_name = var.pop('name') + # assign realm from match + var['realm'] = match[4] + # with stdn assign cmorvar and table if only 1 match returned + # otherwise assign table from match + if stdnm: + var['input_vars'] = orig_name + if len(var['cmor_var']) == 1: + cmor_var, table = var['cmor_var'][0].split("-") + var['cmor_var'] = cmor_var + var['cmor_table'] = table + else: + var['cmor_table'] = match[6] + # add calculation, positive and version + var['calculation'] = match[2] + var['positive'] = match[7] + var['version'] = match[5] + # maybe we should override units here rather than in check_realm_units + # if units missing get them from match + if var['units'] is None or var['units'] == '': + var['units'] = match[8] + vlist.append(var) + return vlist + +def potential_vars(conn, rows, stash_vars, version): + """Returns list of variables that can be potentially derived from + model output. + + Loop across all model variables to match + Select any mapping that contains the variable and if there's a calculation + NB rows modified by add_row when assigning cmorname and positive values + + Parameters + ---------- + conn : connection object + rows : list(dict) + list of variables to match + stash_vars : list + varname-frequency for each listed variable, varname is from model output + version : str + model version to use to match variables + + Returns + ------- + """ + mopdb_log = logging.getLogger('mopdb_log') + pot_full = [] + pot_part = [] + pot_varnames = set() + for row in rows: + sql = f"""SELECT cmor_var,input_vars,calculation,frequency, + realm,model,cmor_table,positive,units FROM mapping + WHERE input_vars like '%{row['name']}%'""" + results = query(conn, sql, first=False, logname='mopdb_log') + mopdb_log.debug(f"In potential: var {row['name']}, db results {results}") + for r in results: + allinput = r[1].split(" ") + mopdb_log.debug(f"{len(allinput)> 1}") + mopdb_log.debug(all(f"{x}-{row['frequency']}" in stash_vars for x in allinput)) + if len(allinput) > 1 and all(f"{x}-{row['frequency']}" in stash_vars for x in allinput): + # if both version and frequency of applied mapping match + # consider this a full matching potential var + if r[5] == version and r[3] == row['frequency']: + pot_full = add_var(pot_full, row, r) + else: + pot_part = add_var(pot_part, row, r) + pot_varnames.add(r[0]) + return pot_full, pot_part, pot_varnames + + +def write_map_template(conn, parsed, alias): + """Write mapping csv file template based on list of variables to define + + Input varlist file order: + name, cmor_var, units, dimensions, frequency, realm, cell_methods, + cmor_table, vtype, size, nsteps, fpattern, long_name, standard_name + Mapping db order: + cmor_var, input_vars, calculation, units, dimensions, frequency, realm, + cell_methods, positive, cmor_table, model, notes, origin + for pot vars + vtype, size, nsteps, fpattern + Final template order: + cmor_var, input_vars, calculation, units, dimensions, frequency, realm, + cell_methods, positive, cmor_table, version, vtype, size, nsteps, fpattern, + long_name, standard_name + """ + + mopdb_log = logging.getLogger('mopdb_log') + full, no_ver, no_frq, stdn, no_match, pot_full, pot_part = parsed + keys = ['cmor_var', 'input_vars', 'calculation', 'units', + 'dimensions', 'frequency', 'realm', 'cell_methods', + 'positive', 'cmor_table', 'version', 'vtype', 'size', + 'nsteps', 'fpattern', 'long_name', 'standard_name'] + + with open(f"map_{alias}.csv", 'w') as fcsv: + fwriter = csv.DictWriter(fcsv, keys, delimiter=';') + write_vars(full, fwriter, keys, conn=conn) + div = ("# Derived variables with matching version and " + + "frequency: Use with caution!") + write_vars(pot_full, fwriter, div, conn=conn) + div = ("# Variables definitions coming from different " + + "version") + write_vars(no_ver, fwriter, div, conn=conn) + div = ("# Variables with different frequency: Use with" + + " caution!") + write_vars(no_ver, fwriter, div, conn=conn) + div = ("# Variables matched using standard_name: Use " + + "with caution!") + write_vars(stdn, fwriter, div, sortby='input_vars') + div = "# Derived variables: Use with caution!" + write_vars(pot_part, fwriter, div, conn=conn) + #pot=True, conn=conn, sortby=0) + div = "# Variables without mapping" + write_vars(no_match, fwriter, div) + mopdb_log.debug("Finished writing variables to mapping template") + fcsv.close() + return + +def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'): + """ + """ + + mopdb_log = logging.getLogger('mopdb_log') + if len(vlist) > 0: + if type(div) is str: + divrow = {x:'' for x in vlist[0].keys()} + divrow['cmor_var'] = div + elif type(div) is list: + divrow = {x:x for x in div} + fwriter.writerow(divrow) + for var in sorted(vlist, key=itemgetter(sortby)): + if conn: + var = check_realm_units(conn, var) + fwriter.writerow(var) + return + +def map_variables(conn, rows, version): + """ + """ + mopdb_log = logging.getLogger('mopdb_log') + # return lists of fully/partially matching variables and stash_vars + # these are input_vars for calculation defined in already in mapping db + full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, + rows, version) + # remove duplicates from partially matched variables + no_ver = remove_duplicate(no_ver) + no_frq = remove_duplicate(no_frq, strict=False) + no_match = remove_duplicate(no_match, strict=False) + # check if more derived variables can be added based on all + # input_vars being available + pot_full, pot_part, pot_varnames = potential_vars(conn, rows, + stash_vars, version) + # potential vars have always duplicates: 1 for each input_var + pot_full = remove_duplicate(pot_full, strict=False) + pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False) + mopdb_log.info(f"Derived variables: {pot_varnames}") + return full, no_ver, no_frq, stdn, no_match, pot_full, pot_part + +def write_catalogue(conn, parsed, vobjs, fobjs, alias): + """Write intake-esm catalogue and returns name + """ + mopdb_log = logging.getLogger('mopdb_log') + # read template json file + jfile = import_files('mopdata').joinpath('intake_cat_template.json') + with open(jfile, 'r') as f: + template = json.load(f) + # write updated json to file + for k,v in template.items(): + if type(v) == str: + template[k] = v.replace("", alias) + jout = f"intake_{alias}.json" + with open(jout, 'w') as f: + json.dump(template, f, indent=4) + # read template yaml file + yfile = import_files('mopdata').joinpath('intake_cat_template.yaml') + with open(yfile, "r") as f: + maincat = f.read() + maincat = maincat.replace("", alias) + mopdb_log.debug("Opened intake template files") + # write updated yaml to file + yout = f"intake_{alias}.yaml" + with open(yout, 'w') as f: + f.writelines(maincat) + # create a dictionary for each file to list + lines = create_file_dict(fobjs, alias) + # write csv file + cols = [x['column_name'] for x in template['attributes']] + cols = ['path'] + cols + csvname = template['catalog_file'] + with lzma.open(csvname, 'wt') as fcsv: + fwriter = csv.DictWriter(fcsv, cols) + fwriter.writeheader() + for fd in lines: + fwriter.writerow(fd) + fcsv.close() + return jout, csvname + +def create_file_dict(fobjs, alias): + """ + """ + mopdb_log = logging.getLogger('mopdb_log') + lines = [] + for pat_obj in fobjs: + var_list = [v.name for v in pat_obj.varlist] + # set to remove '' duplicates + mapvar_list = list(set(v.cmor_var for v in pat_obj.varlist)) + mapvar_list.remove("") + stnm_list = list(set(v.standard_name for v in pat_obj.varlist)) + stnm_list.remove("") + base_dict = {'experiment': alias, + 'realm': pat_obj.realm, + 'frequency': pat_obj.frequency, + 'variable': str(var_list), + 'map_var': str(mapvar_list), + 'standard_name': str(stnm_list)} + # work out date_pattern in filename + fname = pat_obj.files[0].name + date_pattern = get_date_pattern(fname, pat_obj.fpattern) + # add date and path for each file + for fpath in pat_obj.files: + f = fpath.name + fd = base_dict.copy() + fd['path'] = str(fpath) + fd['date'] = ''.join(c for c in compress(f, date_pattern)) + lines.append(fd) + return lines diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index ac3f102..062e9dd 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -22,23 +22,12 @@ import sqlite3 import logging import sys -import os import csv import json -import stat -import lzma -import xarray as xr -import numpy as np -import math -from datetime import datetime, date +from datetime import date from collections import Counter -from operator import itemgetter, attrgetter -from pathlib import Path -from itertools import compress -from importlib.resources import files as import_files -from mopdb.mopdb_class import FPattern, Variable from mopdb.utils import * @@ -68,7 +57,6 @@ def mapping_sql(): ) WITHOUT ROWID;""") return sql - def cmorvar_sql(): """Returns sql definition of cmorvar table @@ -99,7 +87,6 @@ def cmorvar_sql(): ok_max_mean_abs TEXT);""") return sql - def map_update_sql(): """Returns sql needed to update mapping table @@ -117,7 +104,6 @@ def map_update_sql(): {', '.join(x+' = excluded.'+x for x in cols)}""" return sql - def cmor_update_sql(): """Returns sql needed to update cmorvar table @@ -136,7 +122,6 @@ def cmor_update_sql(): {', '.join(x+' = excluded.'+x for x in cols)}""" return sql - def create_table(conn, sql): """Creates table if database is empty @@ -154,7 +139,6 @@ def create_table(conn, sql): mopdb_log.error(e) return - def update_db(conn, table, rows_list): """Adds to table new variables definitions @@ -185,53 +169,6 @@ def update_db(conn, table, rows_list): mopdb_log.info('--- Done ---') return -def get_cmorname(conn, vobj, version): - """Queries mapping table for cmip name given variable name as output - by the model - """ - mopdb_log = logging.getLogger('mopdb_log') - sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping - WHERE input_vars='{vobj.name}' and (calculation='' - or calculation IS NULL)""" - results = query(conn, sql, first=False, logname='mopdb_log') - names = list(x[0] for x in results) - tables = list(x[2] for x in results) - mopdb_log.debug(f"In get_cmorname query results: {results}") - if len(names) == 0: - vobj.cmor_var = '' - vobj.cmor_table = '' - elif len(names) == 1: - vobj.cmor_var = names[0] - vobj.cmor_table = tables[0] - elif len(names) > 1: - mopdb_log.debug(f"Found more than 1 definition for {vobj.name}:\n" + - f"{results}") - match_found = False - for r in results: - if r[1] == version and r[3] == vobj.frequency: - vobj.cmor_var, vobj.cmor_table = r[0], r[2] - match_found = True - break - if not match_found: - for r in results: - if r[3] == vobj.frequency: - vobj.cmor_var, vobj.cmor_table = r[0], r[2] - match_found = True - break - if not match_found: - for r in results: - if r[1] == version: - vobj.cmor_var, vobj.cmor_table = r[0], r[2] - match_found = True - break - if not match_found: - vobj.cmor_var = names[0] - vobj.cmor_table = tables[0] - mopdb_log.info(f"Found more than 1 definition for {vobj.name}:\n"+ - f"{results}\n Using {vobj.cmor_var} from {vobj.cmor_table}") - return vobj - - def cmor_table_header(name, realm, frequency): """ """ @@ -255,7 +192,6 @@ def cmor_table_header(name, realm, frequency): } return header - def write_cmor_table(var_list, name): """ """ @@ -294,51 +230,6 @@ def write_cmor_table(var_list, name): json.dump(out, f, indent=4) return -def get_file_frq(ds, fnext): - """Return a dictionary with frequency for each time axis. - - Frequency is inferred by comparing interval between two consecutive - timesteps with expected interval at a given frequency. - Order time_axis so ones with only one step are last, so we can use - file frequency (interval_file) inferred from other time axes. - This is called if there are more than one time axis in file - (usually only UM) or if frequency can be guessed from filename. - """ - mopdb_log = logging.getLogger('mopdb_log') - frq = {} - int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0, - 'day': 1.0, '6hr': 0.25, '3hr': 0.125, - '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944} - # retrieve all time axes - time_axs = [d for d in ds.dims if 'time' in d] - time_axs_len = set(len(ds[d]) for d in time_axs) - time_axs.sort(key=lambda x: len(ds[x]), reverse=True) - mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}") - max_len = len(ds[time_axs[0]]) - # if all time axes have only 1 timestep we cannot infer frequency - # so we open also next file but get only time axs - if max_len == 1: - dsnext = xr.open_dataset(fnext, decode_times = False) - time_axs2 = [d for d in dsnext.dims if 'time' in d] - ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time') - time_axs = [d for d in ds.dims if 'time' in d] - time_axs_len = set(len(ds[d]) for d in time_axs) - time_axs.sort(key=lambda x: len(ds[x]), reverse=True) - for t in time_axs: - mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}") - if len(ds[t]) > 1: - interval = (ds[t][1]-ds[t][0]).values - interval_file = (ds[t][-1] -ds[t][0]).values - else: - interval = interval_file - mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}") - for k,v in int2frq.items(): - if math.isclose(interval, v, rel_tol=0.05): - frq[t] = k - break - return frq - - def get_cell_methods(attrs, dims): """Get cell_methods from variable attributes. If cell_methods is not defined assumes values are instantaneous @@ -359,93 +250,6 @@ def get_cell_methods(attrs, dims): val = val.replace(time_axs[0], 'time') return val, frqmod - -def write_varlist(conn, indir, match, version, alias): - """Based on model output files create a variable list and save it - to a csv file. Main attributes needed to map output are provided - for each variable - """ - mopdb_log = logging.getLogger('mopdb_log') - line_cols = ['name','cmor_var','units','dimensions','_frequency', - '_realm','cell_methods','cmor_table','vtype','size', - 'nsteps','fpattern','long_name','standard_name'] - vobj_list = [] - fobj_list = [] - patterns = [] - files = FPattern.list_files(indir, match) - mopdb_log.debug(f"Files after sorting: {files}") - if alias == '': - alias = 'mopdb' - fname = f"varlist_{alias}.csv" - fcsv = open(fname, 'w') - fwriter = csv.writer(fcsv, delimiter=';') - fwriter.writerow(["name", "cmor_var", "units", "dimensions", - "frequency", "realm", "cell_methods", "cmor_table", "vtype", - "size", "nsteps", "fpattern", "long_name", "standard_name"]) - for fpath in files: - # get filename pattern until date match - mopdb_log.debug(f"Filename: {fpath.name}") - fpattern = fpath.name.split(match)[0] - if fpattern in patterns: - continue - patterns.append(fpattern) - fobj = FPattern(fpattern, Path(indir)) - #pattern_list = list_files(indir, f"{fpattern}*") - nfiles = len(fobj.files) - mopdb_log.debug(f"File pattern, number of files: {fpattern}, {nfiles}") - #fwriter.writerow([f"#{fpattern}"]) - # get attributes for the file variables - ds = xr.open_dataset(str(fobj.files[0]), decode_times=False) - coords = [c for c in ds.coords] + ['latitude_longitude'] - #pass next file in case of 1 timestep per file and no frq in name - fnext = str(fobj.files[1]) - if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos': - frq_dict = get_file_frq(ds, fnext) - # if only one frequency detected empty dict - if len(frq_dict) == 1: - fobj.frequency = frq_dict.popitem()[1] - else: - fobj.multiple_frq = True - mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}") - if fobj.realm == "NArealm": - fobj.realm = get_realm(version, ds) - pattern_var_list = [] - for vname in ds.variables: - vobj = Variable(vname, fobj) - if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']): - v = ds[vname] - mopdb_log.debug(f"Variable: {vobj.name}") - # get size in bytes of grid for 1 timestep and number of timesteps - vobj.size = v[0].nbytes - vobj.nsteps = nfiles * v.shape[0] - # assign time axis frequency if more than one is available - if fobj.multiple_frq: - if 'time' in v.dims[0]: - vobj._frequency = frq_dict[v.dims[0]] - else: - mopdb_log.info(f"Could not detect frequency for variable: {v}") - attrs = v.attrs - vobj.cell_methods, frqmod = get_cell_methods(attrs, v.dims) - vobj.frequency = vobj.frequency + frqmod - mopdb_log.debug(f"Frequency var: {vobj.frequency}") - # try to retrieve cmip name - vobj = get_cmorname(conn, vobj, version) - vobj.units = attrs.get('units', "") - vobj.long_name = attrs.get('long_name', "") - vobj.standard_name = attrs.get('standard_name', "") - vobj.dimensions = " ".join(v.dims) - vobj.vtype = v.dtype - line = [attrgetter(k)(vobj) for k in line_cols] - fwriter.writerow(line) - vobj_list.append(vobj) - pattern_var_list.append(vobj) - fobj.varlist = pattern_var_list - fobj_list.append(fobj) - mopdb_log.info(f"Variable list for {fpattern} successfully written") - fcsv.close() - return fname, vobj_list, fobj_list - - def read_map_app4(fname): """Reads APP4 style mapping """ mopdb_log = logging.getLogger('mopdb_log') @@ -503,150 +307,6 @@ def read_map(fname, alias): var_list.append(row[:11] + [notes, alias]) return var_list - -def match_stdname(conn, row, stdn): - """Returns an updated stdn list if finds one or more variables - in cmorvar table that match the standard name passed as input. - It also return a False/True found_match boolean. - """ - mopdb_log = logging.getLogger('mopdb_log') - found_match = False - sql = f"""SELECT name FROM cmorvar where - standard_name='{row['standard_name']}'""" - results = query(conn, sql, first=False, logname='mopdb_log') - matches = [x[0] for x in results] - if len(matches) > 0: - stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True) - found_match = True - - return stdn, found_match - - -def match_var(row, version, mode, conn, records): - """Returns match for variable if found after looping - variables already mapped in database - Parameters - - """ - mopdb_log = logging.getLogger('mopdb_log') - found_match = False - # build sql query based on mode - sql_base = f"""SELECT cmor_var,input_vars,calculation,frequency, - realm,model,cmor_table,positive,units FROM mapping where - input_vars='{row['name']}'""" - sql_frq = f" and frequency='{row['frequency']}'" - sql_ver = f" and model='{version}'" - if mode == 'full': - sql = sql_base + sql_frq + sql_ver - elif mode == 'no_frq': - sql = sql_base + sql_ver - elif mode == 'no_ver': - sql = sql_base + sql_frq - # execute query and process results - result = query(conn, sql, first=False, logname='mopdb_log') - mopdb_log.debug(f"match_var: {result}, sql: {sql[110:]}") - if result is not None and result != []: - for x in result: - mopdb_log.debug(f"match: {x}") - records = add_var(records, row, x) - found_match = True - - return records, found_match - - -def parse_vars(conn, rows, version): - """Returns records of variables to include in template mapping file, - a list of all stash variables + frequency available in model output - and a list of variables already defined in db - - Parameters - ---------- - conn : connection object - rows : list(dict) - list of variables to match - version : str - model version to use to match variables - - Returns - ------- - stash_vars : list - varname-frequency for each listed variable, varname is from model output - """ - mopdb_log = logging.getLogger('mopdb_log') - full = [] - no_ver = [] - no_frq = [] - stdn = [] - no_match = [] - stash_vars = [] - - # looping through variables from file and attempt matches to db - for row in rows: - if row['name'][0] == "#" or row['name'] == 'name': - continue - else: - full, found = match_var(row, version, 'full', conn, full) - # if no match, ignore model version first and then frequency - mopdb_log.debug(f"found perfect match: {found}") - if not found: - no_ver, found = match_var(row, version, 'no_ver', conn, no_ver) - mopdb_log.debug(f"found no ver match: {found}") - if not found: - no_frq, found = match_var(row, version, 'no_frq', conn, no_frq) - mopdb_log.debug(f"found no frq match: {found}") - # make a last attempt to match using standard_name - if not found: - if row['standard_name'] != '': - stdn, found = match_stdname(conn, row, stdn) - mopdb_log.debug(f"found stdnm match: {found}") - if not found: - no_match = add_var(no_match, row, tuple([row['name']]+['']*8)) - stash_vars.append(f"{row['name']}-{row['frequency']}") - - return full, no_ver, no_frq, stdn, no_match, stash_vars - - -def add_var(vlist, row, match, stdnm=False): - """Add information from match to variable list and re-order - fields so they correspond to final mapping output. - - Parameters - match : tuple - match values (cmor_var,input_vars,calculation,frequency, - realm,model(version),cmor_table,positive,units) - """ - mopdb_log = logging.getLogger('mopdb_log') - # assign cmor_var from match and swap place with input_vars - mopdb_log.debug(f"Assign cmor_var: {match}") - mopdb_log.debug(f"initial row: {row}") - var = row.copy() - var['cmor_var'] = match[0] - var['input_vars'] = match[1] - orig_name = var.pop('name') - # assign realm from match - var['realm'] = match[4] - # with stdn assign cmorvar and table if only 1 match returned - # otherwise assign table from match - if stdnm: - var['input_vars'] = orig_name - if len(var['cmor_var']) == 1: - cmor_var, table = var['cmor_var'][0].split("-") - var['cmor_var'] = cmor_var - var['cmor_table'] = table - else: - var['cmor_table'] = match[6] - # add calculation, positive and version - var['calculation'] = match[2] - var['positive'] = match[7] - var['version'] = match[5] - # maybe we should override units here rather than in check_realm_units - # if units missing get them from match - if var['units'] is None or var['units'] == '': - var['units'] = match[8] - vlist.append(var) - return vlist - - def remove_duplicate(vlist, extra=[], strict=True): """Returns list without duplicate variable definitions. @@ -674,121 +334,6 @@ def remove_duplicate(vlist, extra=[], strict=True): vid_list.append(vid) return final - -def potential_vars(conn, rows, stash_vars, version): - """Returns list of variables that can be potentially derived from - model output. - - Loop across all model variables to match - Select any mapping that contains the variable and if there's a calculation - NB rows modified by add_row when assigning cmorname and positive values - - Parameters - ---------- - conn : connection object - rows : list(dict) - list of variables to match - stash_vars : list - varname-frequency for each listed variable, varname is from model output - version : str - model version to use to match variables - - Returns - ------- - """ - mopdb_log = logging.getLogger('mopdb_log') - pot_full = [] - pot_part = [] - pot_varnames = set() - for row in rows: - sql = f"""SELECT cmor_var,input_vars,calculation,frequency, - realm,model,cmor_table,positive,units FROM mapping - WHERE input_vars like '%{row['name']}%'""" - results = query(conn, sql, first=False, logname='mopdb_log') - mopdb_log.debug(f"In potential: var {row['name']}, db results {results}") - for r in results: - allinput = r[1].split(" ") - mopdb_log.debug(f"{len(allinput)> 1}") - mopdb_log.debug(all(f"{x}-{row['frequency']}" in stash_vars for x in allinput)) - if len(allinput) > 1 and all(f"{x}-{row['frequency']}" in stash_vars for x in allinput): - # if both version and frequency of applied mapping match - # consider this a full matching potential var - if r[5] == version and r[3] == row['frequency']: - pot_full = add_var(pot_full, row, r) - else: - pot_part = add_var(pot_part, row, r) - pot_varnames.add(r[0]) - return pot_full, pot_part, pot_varnames - - -def write_map_template(conn, parsed, alias): - """Write mapping csv file template based on list of variables to define - - Input varlist file order: - name, cmor_var, units, dimensions, frequency, realm, cell_methods, - cmor_table, vtype, size, nsteps, fpattern, long_name, standard_name - Mapping db order: - cmor_var, input_vars, calculation, units, dimensions, frequency, realm, - cell_methods, positive, cmor_table, model, notes, origin - for pot vars + vtype, size, nsteps, fpattern - Final template order: - cmor_var, input_vars, calculation, units, dimensions, frequency, realm, - cell_methods, positive, cmor_table, version, vtype, size, nsteps, fpattern, - long_name, standard_name - """ - - mopdb_log = logging.getLogger('mopdb_log') - full, no_ver, no_frq, stdn, no_match, pot_full, pot_part = parsed - keys = ['cmor_var', 'input_vars', 'calculation', 'units', - 'dimensions', 'frequency', 'realm', 'cell_methods', - 'positive', 'cmor_table', 'version', 'vtype', 'size', - 'nsteps', 'fpattern', 'long_name', 'standard_name'] - - with open(f"map_{alias}.csv", 'w') as fcsv: - fwriter = csv.DictWriter(fcsv, keys, delimiter=';') - write_vars(full, fwriter, keys, conn=conn) - div = ("# Derived variables with matching version and " + - "frequency: Use with caution!") - write_vars(pot_full, fwriter, div, conn=conn) - div = ("# Variables definitions coming from different " + - "version") - write_vars(no_ver, fwriter, div, conn=conn) - div = ("# Variables with different frequency: Use with" - + " caution!") - write_vars(no_ver, fwriter, div, conn=conn) - div = ("# Variables matched using standard_name: Use " + - "with caution!") - write_vars(stdn, fwriter, div, sortby='input_vars') - div = "# Derived variables: Use with caution!" - write_vars(pot_part, fwriter, div, conn=conn) - #pot=True, conn=conn, sortby=0) - div = "# Variables without mapping" - write_vars(no_match, fwriter, div) - mopdb_log.debug("Finished writing variables to mapping template") - fcsv.close() - - return - - -def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'): - """ - """ - - mopdb_log = logging.getLogger('mopdb_log') - if len(vlist) > 0: - if type(div) is str: - divrow = {x:'' for x in vlist[0].keys()} - divrow['cmor_var'] = div - elif type(div) is list: - divrow = {x:x for x in div} - fwriter.writerow(divrow) - for var in sorted(vlist, key=itemgetter(sortby)): - if conn: - var = check_realm_units(conn, var) - fwriter.writerow(var) - return - - def check_realm_units(conn, var): """Checks that realm and units are consistent with values in cmor table. @@ -818,7 +363,6 @@ def check_realm_units(conn, var): mopdb_log.warning(f"Variable {vname} not found in cmor table") return var - def get_realm(version, ds): '''Try to retrieve realm if using path failed''' @@ -830,7 +374,6 @@ def get_realm(version, ds): mopdb_log.debug(f"Realm is {realm}") return realm - def check_varlist(rows, fname): """Checks that varlist written to file has sensible information for frequency and realm to avoid incorrect mapping to be produced. @@ -857,71 +400,6 @@ def check_varlist(rows, fname): sys.exit() return - -def map_variables(conn, rows, version): - """ - """ - mopdb_log = logging.getLogger('mopdb_log') - # return lists of fully/partially matching variables and stash_vars - # these are input_vars for calculation defined in already in mapping db - full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, - rows, version) - # remove duplicates from partially matched variables - no_ver = remove_duplicate(no_ver) - no_frq = remove_duplicate(no_frq, strict=False) - no_match = remove_duplicate(no_match, strict=False) - # check if more derived variables can be added based on all - # input_vars being available - pot_full, pot_part, pot_varnames = potential_vars(conn, rows, - stash_vars, version) - # potential vars have always duplicates: 1 for each input_var - pot_full = remove_duplicate(pot_full, strict=False) - pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False) - mopdb_log.info(f"Derived variables: {pot_varnames}") - return full, no_ver, no_frq, stdn, no_match, pot_full, pot_part - - -def write_catalogue(conn, parsed, vobjs, fobjs, alias): - """Write intake-esm catalogue and returns name - """ - mopdb_log = logging.getLogger('mopdb_log') - # read template json file - jfile = import_files('mopdata').joinpath('intake_cat_template.json') - with open(jfile, 'r') as f: - template = json.load(f) - # read template yaml file - yfile = import_files('mopdata').joinpath('intake_cat_template.yaml') - maincat = read_yaml(yfile) - mopdb_log.debug("Opened intake template files") - # update json data with relevant information - # update title, description etc with experiment - for k,v in template.items(): - if type(v) == str: - template[k] = v.replace('', alias) - for k,v in maincat.items(): - if type(v) == str: - maincat[k] = v.replace('', alias) - # write updated json to file - jfile = f"intake_{alias}.json" - with open(jfile, 'w') as f: - json.dump(template, f, indent=4) - # write updated yaml to file - jfile = f"intake_{alias}.yaml" - write_yaml(maincat, jfile, 'mopdb_log') - # create a dictionary for each file to list - lines = create_file_dict(fobjs, alias) - # write csv file - cols = [x['column_name'] for x in template['attributes']] - cols = ['path'] + cols - csvname = template['catalog_file'] - with lzma.open(csvname, 'wt') as fcsv: - fwriter = csv.DictWriter(fcsv, cols, delimiter=';') - fwriter.writeheader() - for fd in lines: - fwriter.writerow(fd) - fcsv.close() - return jfile, csvname - def get_date_pattern(fname, fpattern): """Try to build a date range for each file pattern based on its filename @@ -933,30 +411,3 @@ def get_date_pattern(fname, fpattern): n = len(fpattern) date_pattern[:n] = [False] * n return date_pattern - -def create_file_dict(fobjs, alias): - """ - """ - mopdb_log = logging.getLogger('mopdb_log') - for pat_obj in fobjs: - var_list = [v.name for v in pat_obj.varlist] - # set to remove '' duplicates - mapvar_list = list(set(v.cmor_var for v in pat_obj.varlist)) - stnm_list = list(set(v.standard_name for v in pat_obj.varlist)) - base_dict = {'experiment': alias, - 'realm': pat_obj.realm, - 'frequency': pat_obj.frequency, - 'variable': var_list, - 'map_var': mapvar_list, - 'standard_name': stnm_list} - # work out date_pattern in filename - fname = pat_obj.files[0].name - date_pattern = get_date_pattern(fname, pat_obj.fpattern) - # add date and path for each file - for fpath in pat_obj.files: - f = fpath.name - fd = base_dict.copy() - fd['path'] = str(fpath) - fd['date'] = ''.join(c for c in compress(f, date_pattern)) - lines.append(fd) - return lines From 917582f23294c4e3db7e94ef9fea956da3d728d8 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 16 Jul 2024 11:32:31 +1000 Subject: [PATCH 063/137] fixed imports --- .gitignore | 9 ++++++++- src/mopdb/mopdb.py | 7 ++++++- src/mopdb/mopdb_map.py | 10 ++-------- tests/pytest.ini | 3 +++ 4 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 tests/pytest.ini diff --git a/.gitignore b/.gitignore index 3ba2e33..c0e81d6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ -custom_app4_*.sh __pycache__/ build/ +mopper_venv/ +*.csv +*.yaml +*.json +localdata/ +src/mopper.egg-info/ +extras/ +*.txt diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index aa16b2e..4d8a269 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -26,9 +26,14 @@ import json from importlib.resources import files as import_files +from pathlib import Path -from mopdb.mopdb_utils import * +from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map, + read_map_app4, map_update_sql, create_table, write_cmor_table, + check_varlist) from mopdb.utils import * +from mopdb.mopdb_map import (write_varlist, write_map_template, + write_catalogue, map_variables) def mopdb_catch(): """ diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index a0e580c..11b9ca1 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -23,6 +23,7 @@ import csv import json import lzma +import math import xarray as xr from operator import itemgetter, attrgetter @@ -32,7 +33,7 @@ from mopdb.mopdb_class import FPattern, Variable from mopdb.utils import * -from mopdb.mopdb_utils import (get_cell_methods, remove_duplicates, +from mopdb.mopdb_utils import (get_cell_methods, remove_duplicate, get_realm, check_realm_units, get_date_pattern) @@ -81,10 +82,6 @@ def get_cmorname(conn, vobj, version): mopdb_log.info(f"Found more than 1 definition for {vobj.name}:\n"+ f"{results}\n Using {vobj.cmor_var} from {vobj.cmor_table}") return vobj - "mip_era": "", - "Conventions": "CF-1.7 ACDD1.3" - } - return header def get_file_frq(ds, fnext): """Return a dictionary with frequency for each time axis. @@ -230,10 +227,8 @@ def match_stdname(conn, row, stdn): if len(matches) > 0: stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True) found_match = True - return stdn, found_match - def match_var(row, version, mode, conn, records): """Returns match for variable if found after looping variables already mapped in database @@ -262,7 +257,6 @@ def match_var(row, version, mode, conn, records): mopdb_log.debug(f"match: {x}") records = add_var(records, row, x) found_match = True - return records, found_match def parse_vars(conn, rows, version): diff --git a/tests/pytest.ini b/tests/pytest.ini new file mode 100644 index 0000000..389e88b --- /dev/null +++ b/tests/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +log_cli = true +log_cli_level = DEBUG From cdb24633755a6cabc4f29bfe8abf40c941c3ea18 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 17 Jul 2024 14:00:34 +1000 Subject: [PATCH 064/137] improved intake catalogue --- conda/environment.yaml | 12 +------ src/mopdata/intake_cat_template.json | 7 +++-- src/mopdata/intake_cat_template.yaml | 2 -- src/mopdb/mopdb.py | 1 + src/mopdb/mopdb_map.py | 47 ++++++++++++++++++++++------ src/mopper/calculations.py | 2 +- tests/conftest.py | 14 ++++----- tests/test_calculations.py | 6 ++-- 8 files changed, 54 insertions(+), 37 deletions(-) diff --git a/conda/environment.yaml b/conda/environment.yaml index 2f0d566..ae78e08 100644 --- a/conda/environment.yaml +++ b/conda/environment.yaml @@ -1,11 +1,8 @@ -name: test-env +#name: test-env channels: - defaults - conda-forge dependencies: - - python=3.10 - - pip - - pbr - click - cmor - xarray @@ -13,10 +10,3 @@ dependencies: - dask - pyyaml - cftime - - python-dateutil - - pytest - - coverage - - codecov - - importlib_resources - - pip: - - git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@pytests_sam diff --git a/src/mopdata/intake_cat_template.json b/src/mopdata/intake_cat_template.json index 502bf2a..85c943c 100644 --- a/src/mopdata/intake_cat_template.json +++ b/src/mopdata/intake_cat_template.json @@ -7,10 +7,11 @@ "format": "netcdf" }, "aggregation_control": { - "variable_column_name": "frequency", + "variable_column_name": "variable", "groupby_attrs": [ "realm", - "frequency" + "frequency", + "mapvar" ], "aggregations": [ { @@ -38,7 +39,7 @@ "column_name": "variable" }, { - "column_name": "map_var" + "column_name": "mapvar" }, { "column_name": "standard_name" diff --git a/src/mopdata/intake_cat_template.yaml b/src/mopdata/intake_cat_template.yaml index 87fc7e4..0f94eeb 100644 --- a/src/mopdata/intake_cat_template.yaml +++ b/src/mopdata/intake_cat_template.yaml @@ -14,7 +14,5 @@ sources: args: columns_with_iterables: - variable - - map_var - - standard_name read_csv_kwargs: {"dtype": {"date": str}} obj: "{{CATALOG_DIR}}/intake_.json" diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 4d8a269..4f4e3cd 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -398,6 +398,7 @@ def write_intake(ctx, fpath, match, dbname, version, alias): fpath = Path(fpath) if fpath.is_file(): fname = fpath.name + vobjs, fobjs = load_vars(fname) else: mopdb_log.debug(f"Calling model_vars() from intake: {fpath}") fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index 11b9ca1..a96fd51 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -530,24 +530,53 @@ def create_file_dict(fobjs, alias): for pat_obj in fobjs: var_list = [v.name for v in pat_obj.varlist] # set to remove '' duplicates - mapvar_list = list(set(v.cmor_var for v in pat_obj.varlist)) - mapvar_list.remove("") - stnm_list = list(set(v.standard_name for v in pat_obj.varlist)) - stnm_list.remove("") base_dict = {'experiment': alias, - 'realm': pat_obj.realm, - 'frequency': pat_obj.frequency, - 'variable': str(var_list), - 'map_var': str(mapvar_list), - 'standard_name': str(stnm_list)} + 'realm': pat_obj.realm, + 'frequency': pat_obj.frequency, + 'variable': str(var_list), + 'mapvar': "NAV", + 'standard_name': "NAV"} # work out date_pattern in filename fname = pat_obj.files[0].name date_pattern = get_date_pattern(fname, pat_obj.fpattern) # add date and path for each file + path_list = [] for fpath in pat_obj.files: f = fpath.name fd = base_dict.copy() fd['path'] = str(fpath) fd['date'] = ''.join(c for c in compress(f, date_pattern)) lines.append(fd) + path_list.append((fd['path'],fd['date'])) + lines = add_mapvars(pat_obj.varlist, lines, path_list, alias) return lines + +def add_mapvars(vobjs, lines, path_list, alias): + """ + """ + mopdb_log = logging.getLogger('mopdb_log') + for vobj in vobjs: + if vobj.cmor_var != "" or vobj.standard_name != "": + mapvar = vobj.cmor_var + stdname = vobj.standard_name + base_dict = {'experiment': alias, + 'realm': vobj.realm, + 'frequency': vobj.frequency, + 'variable': str([vobj.name]), + 'mapvar': mapvar if mapvar else "NAV", + 'standard_name': stdname if stdname else "NAV"} + # use path_list to add path and date for all files + for fpath, date in path_list: + fd = base_dict.copy() + fd['path'] = fpath + fd['date'] = date + lines.append(fd) + return lines + +def load_vars(fname): + """Returns Variable and FPattern objs from varlist or map file. + """ + vobjs = [] + fobjs = [] + # distinguish between varlist and mapping file vbased on header + return vobjs, fobjs diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index 9f34e35..ed9ae92 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -1167,7 +1167,7 @@ def calc_topsoil(soilvar): Returns ------- topsoil : Xarray DataArray - Variable define don top 10cm of soil + Variable defined on top 10cm of soil """ depth = soilvar.depth # find index of bottom depth level including the first 10cm of soil diff --git a/tests/conftest.py b/tests/conftest.py index f26c225..0dd6c56 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,7 +25,7 @@ import logging import csv from mopdb.mopdb_utils import mapping_sql, cmorvar_sql -#from mopper.setup_utils import filelist_sql +from mopper.setup_utils import filelist_sql TESTS_HOME = os.path.abspath(os.path.dirname(__file__)) @@ -59,12 +59,12 @@ def setup_access_db(session): session.connection.commit() -#@pytest.fixture -#def setup_mopper_db(session): -# filelist_sql = mapping_sql() -# session.execute(filelist_sql) -# session.execute('''INSERT INTO filelist VALUES ("/testdata/atmos/umnsa_spec_*.nc", "/testdata/mjo-elnino/v1-0/A10min/", "tas_AUS2200_mjo-elnino_subhrPt_20160101001000-20160102000000.nc", "fld_s03i236", "tas", "AUS2200_A10min", "subhrPt", "atmos", "point", "20160101T0005", "20160102T0000", "201601010000", "201601012355", "unprocessed", "3027.83203125", "mjo-elnino", "K", "AUS2200", "AUS2200", "/testdata/mjo-elnino/mjo-elnino.json", "1970-01-01", "v1-0")''') -# session.connection.commit() +@pytest.fixture +def setup_mopper_db(session): + filelist_sql = mapping_sql() + session.execute(filelist_sql) + session.execute('''INSERT INTO filelist VALUES ("/testdata/atmos/umnsa_spec_*.nc", "/testdata/mjo-elnino/v1-0/A10min/", "tas_AUS2200_mjo-elnino_subhrPt_20160101001000-20160102000000.nc", "fld_s03i236", "tas", "AUS2200_A10min", "subhrPt", "atmos", "point", "20160101T0005", "20160102T0000", "201601010000", "201601012355", "unprocessed", "3027.83203125", "mjo-elnino", "K", "AUS2200", "AUS2200", "/testdata/mjo-elnino/mjo-elnino.json", "1970-01-01", "v1-0")''') + session.connection.commit() def test_check_timestamp(caplog): diff --git a/tests/test_calculations.py b/tests/test_calculations.py index dc8507b..692211f 100644 --- a/tests/test_calculations.py +++ b/tests/test_calculations.py @@ -58,15 +58,14 @@ def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100): attrs={'name': 'random'}) return da -mrsol = create_var(2, 3, ntime=4, sdepth=True) def test_calc_topsoil(): - global mrsol + mrsol = create_var(2, 3, ntime=4, sdepth=True) + #print(mrsol) expected = mrsol.isel(depth=0) + mrsol.isel(depth=1)/3.0 out = calc_topsoil(mrsol) xrtest.assert_allclose(out, expected, rtol=1e-05) -''' def test_overturn_stream(caplog): global ctx caplog.set_level(logging.DEBUG, logger='varlog_1') @@ -118,4 +117,3 @@ def test_overturn_stream(caplog): with ctx: out4 = overturn_stream(varlist) nptest.assert_array_equal(res4, out4) -''' From c049d46af3e13101e815d98356ebfa126293de98 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 18 Jul 2024 17:09:05 +1000 Subject: [PATCH 065/137] modifying intake function to allow different workflows --- src/mopdata/intake_cat_template.json | 9 +++ src/mopdb/mopdb.py | 105 +++++++++++++++------------ src/mopdb/mopdb_class.py | 1 + src/mopdb/mopdb_map.py | 38 ++++++++-- 4 files changed, 102 insertions(+), 51 deletions(-) diff --git a/src/mopdata/intake_cat_template.json b/src/mopdata/intake_cat_template.json index 85c943c..f3395ba 100644 --- a/src/mopdata/intake_cat_template.json +++ b/src/mopdata/intake_cat_template.json @@ -46,6 +46,15 @@ }, { "column_name": "date" + }, + { + "column_name": "units" + }, + { + "column_name": "calculation" + }, + { + "column_name": "cell_methods" } ] } diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 4f4e3cd..85e6136 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -33,7 +33,7 @@ check_varlist) from mopdb.utils import * from mopdb.mopdb_map import (write_varlist, write_map_template, - write_catalogue, map_variables) + write_catalogue, map_variables, load_vars) def mopdb_catch(): """ @@ -49,11 +49,20 @@ def mopdb_catch(): def require_date(ctx, param, value): - """Changes match option in template/intake commands from optional to + """Changes match option in template command from optional to required if fpath is a directory. """ - if Path(value).is_dir(): - ctx.command.params[1].required = True + # this looks convoluted but pop() was necessary to retrieve the + # objetc rather than the string + names = [] + print(ix for x in ctx.command.params.keys()) + for i in range(len(ctx.command.params)): + opt = ctx.command.params.pop() + print(type(opt)) + names.append(opt.name) + idx = names.index('match') + if Path(value).is_dir() and 'filelist' not in names: + ctx.command.params[idx].required = True return value @@ -77,20 +86,20 @@ def map_args(f): constraints = [ click.option('--fpath', '-f', type=str, required=True, callback=require_date, - help=(''''Path for model output files. For "template" - command can also be file generated by varlist step''')), + help=("""Model output files path. For 'template' + command can also be file generated by varlist step""")), click.option('--match', '-m', type=str, required=False, - help=('''String to match output files. Most often - the timestamp from one of the output files''')), + help=("""String to match output files. Most often + the timestamp from one of the output files""")), click.option('--version', '-v', required=True, type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True, - help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2'), + help="ACCESS version currently only CM2, ESM1.5, AUS2200, OM2"), click.option('--dbname', type=str, required=False, default='default', - help='Database relative path by default is package access.db'), + help="Database relative path by default is package access.db"), click.option('--alias', '-a', type=str, required=False, default='', - help='''Alias to use to keep track of variable definition origin. - If none passed uses input filename''')] + help="""Alias to use to keep track of variable definition origin. + If none passed uses input filename""")] for c in reversed(constraints): f = c(f) return f @@ -307,7 +316,7 @@ def map_template(ctx, fpath, match, dbname, version, alias): mapping table. If not tries to partially match them. It can get as input the directory containing the output in - which case it will first call model_vars() (varlist command) + which case it will first call write_varlist() or the file output of the same if already available. Parameters @@ -338,20 +347,25 @@ def map_template(ctx, fpath, match, dbname, version, alias): # work out if fpath is varlist or path to output fpath = Path(fpath) if fpath.is_file(): - fname = fpath.name + fobjs, vobjs = load_vars(fpath) + name = fpath.name else: - mopdb_log.debug(f"Calling model_vars() from template: {fpath}") - fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) + mopdb_log.debug(f"Calling write_varlist() from template: {fpath}") + fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) if alias == '': alias = fname.split(".")[0] +# also from here on it should be called by separate function I can call from intake too +# without repeating steps # read list of vars from file - with open(fname, 'r') as csvfile: - reader = csv.DictReader(csvfile, delimiter=';') - rows = list(reader) - check_varlist(rows, fname) + # this should now spit out fobjs, vobjs to pass to template + #with open(fname, 'r') as csvfile: + # reader = csv.DictReader(csvfile, delimiter=';') + # rows = list(reader) + #check_varlist(rows, fname) # return lists of fully/partially matching variables and stash_vars # these are input_vars for calculation defined in already in mapping db - parsed = map_variables(conn, rows, version) + #parsed = map_variables(conn, rows, version) + parsed = map_variables(conn, vobjs, version) # potential vars have always duplicates: 1 for each input_var write_map_template(conn, parsed, alias) conn.close() @@ -361,12 +375,14 @@ def map_template(ctx, fpath, match, dbname, version, alias): @mopdb.command(name='intake') @map_args +@click.option('--filelist','-fl', type=str, required=False, default=None, + help='Map or varlist csv file relative path') @click.pass_context -def write_intake(ctx, fpath, match, dbname, version, alias): +def write_intake(ctx, fpath, match, filelist, dbname, version, alias): """Writes an intake-esm catalogue. It can get as input the directory containing the output in - which case it will first call model_vars() (varlist command) + which case it will first call write_varlist() (varlist command) or the file output of the same if already available. Parameters @@ -374,10 +390,11 @@ def write_intake(ctx, fpath, match, dbname, version, alias): ctx : obj Click context object fpath : str - Path of csv input file with output variables to map or - of directory containing output files to scan + Path of directory containing output files to scan match : str Date or other string to match to individuate one file per type + filelist : str + Map or varlist csv file path, optional (default is None) dbname : str Database relative path (default is data/access.db) version : str @@ -397,11 +414,15 @@ def write_intake(ctx, fpath, match, dbname, version, alias): # work out if fpath is varlist or path to output fpath = Path(fpath) if fpath.is_file(): - fname = fpath.name - vobjs, fobjs = load_vars(fname) + mopdb_log.error(f""" {fpath} + should be absolute or relative path to model output. + To pass a varlist or map file use --filelist/-f""") + elif filelist is None: + mopdb_log.debug(f"Calling write_varlist() from intake: {fpath}") + fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) else: - mopdb_log.debug(f"Calling model_vars() from intake: {fpath}") - fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) + fname = filelist.name + vobjs, fobjs = load_vars(filelist) if alias == '': alias = fname.split(".")[0] # read list of vars from file @@ -470,23 +491,13 @@ def update_map(ctx, dbname, fname, alias): return None -@mopdb.command(name='varlist') -@map_args -@click.pass_context -def list_vars(ctx, fpath, match, dbname, version, alias): - """Calls model_vars to generate list of variables""" - # connect to db, check first if db exists or exit - if dbname == 'default': - dbname = import_files('mopdata').joinpath('access.db') - conn = db_connect(dbname) - conn = db_connect(dbname, logname='mopdb_log') - fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) - conn.close() return None +@mopdb.command(name='varlist') +@map_args @click.pass_context -def model_vars(ctx, fpath, match, conn, version, alias): +def model_vars(ctx, fpath, match, version, alias): """Read variables from model output opens one file for each kind, save variable list as csv file @@ -508,13 +519,15 @@ def model_vars(ctx, fpath, match, conn, version, alias): Returns ------- - fname : str - Name of output varlist file """ - + # connect to db, check first if db exists or exit + if dbname == 'default': + dbname = import_files('mopdata').joinpath('access.db') + conn = db_connect(dbname, logname='mopdb_log') mopdb_log = logging.getLogger('mopdb_log') fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) - return fname, vobjs, fobjs + conn.close() + return None @mopdb.command(name='del') diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index 2fec511..864b5d1 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -96,6 +96,7 @@ def __init__(self, varname: str, fobj: FPattern): self._realm = fobj.realm self.cmor_var = '' self.cmor_table = '' + self.calculation = '' #self.version = self.fpattern.version self.match = False # descriptive attributes diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index a96fd51..dfd56c6 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -30,11 +30,12 @@ from pathlib import Path from itertools import compress from importlib.resources import files as import_files +from access_nri_intake.source.builders import AccessEsm15Builder from mopdb.mopdb_class import FPattern, Variable from mopdb.utils import * from mopdb.mopdb_utils import (get_cell_methods, remove_duplicate, - get_realm, check_realm_units, get_date_pattern) + get_realm, check_realm_units, get_date_pattern, check_varlist) def get_cmorname(conn, vobj, version): @@ -535,7 +536,10 @@ def create_file_dict(fobjs, alias): 'frequency': pat_obj.frequency, 'variable': str(var_list), 'mapvar': "NAV", - 'standard_name': "NAV"} + 'standard_name': "", + 'units': "", + 'calculation': "", + 'cell_methods': ""} # work out date_pattern in filename fname = pat_obj.files[0].name date_pattern = get_date_pattern(fname, pat_obj.fpattern) @@ -558,13 +562,15 @@ def add_mapvars(vobjs, lines, path_list, alias): for vobj in vobjs: if vobj.cmor_var != "" or vobj.standard_name != "": mapvar = vobj.cmor_var - stdname = vobj.standard_name base_dict = {'experiment': alias, 'realm': vobj.realm, 'frequency': vobj.frequency, 'variable': str([vobj.name]), 'mapvar': mapvar if mapvar else "NAV", - 'standard_name': stdname if stdname else "NAV"} + 'standard_name': vobj.standard_name, + 'units': vobj.units, + 'calculation': vobj.calculation, + 'cell_methods': vobj.cell_methods} # use path_list to add path and date for all files for fpath, date in path_list: fd = base_dict.copy() @@ -578,5 +584,27 @@ def load_vars(fname): """ vobjs = [] fobjs = [] - # distinguish between varlist and mapping file vbased on header + # distinguish between varlist and mapping file based on header + with open(fname, 'r') as csvfile: + reader = csv.DictReader(csvfile, delimiter=';') + rows = list(reader) + #check_varlist(rows, fname) + # set fobjs + patterns = list(set(x['fpattern'] for x in rows)) + print(patterns) + for pat in patterns: + if pat != "": + fo = FPattern(fpattern, Path(indir)) + fobjs.append(fo) + patterns2 = [x['fpattern'] for x in rows] + sys.exit() + + if 'calculation' in rows[0].keys(): + map_file = True + else: + map_file = False + for row in rows[1:]: + row['fpattern'] + v = Variable(row['name'], ) + #for field in row[0]: return vobjs, fobjs From 57d6d2466a073cfaabf46803d7efdf8174925ddb Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 23 Jul 2024 21:14:05 +1000 Subject: [PATCH 066/137] completed #147, introduced MapVaribale class --- src/mopdb/mopdb.py | 37 ++++----- src/mopdb/mopdb_class.py | 98 +++++++++++++++++----- src/mopdb/mopdb_map.py | 171 +++++++++++++++++++++++---------------- src/mopdb/mopdb_utils.py | 22 ++--- src/mopper/mop_setup.py | 2 +- src/mopper/mop_utils.py | 2 +- 6 files changed, 210 insertions(+), 122 deletions(-) diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 85e6136..fee8f45 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -33,7 +33,7 @@ check_varlist) from mopdb.utils import * from mopdb.mopdb_map import (write_varlist, write_map_template, - write_catalogue, map_variables, load_vars) + write_catalogue, map_variables, load_vars, get_map_obj) def mopdb_catch(): """ @@ -52,14 +52,9 @@ def require_date(ctx, param, value): """Changes match option in template command from optional to required if fpath is a directory. """ - # this looks convoluted but pop() was necessary to retrieve the - # objetc rather than the string names = [] - print(ix for x in ctx.command.params.keys()) for i in range(len(ctx.command.params)): - opt = ctx.command.params.pop() - print(type(opt)) - names.append(opt.name) + names.append(ctx.command.params[i].name) idx = names.index('match') if Path(value).is_dir() and 'filelist' not in names: ctx.command.params[idx].required = True @@ -347,8 +342,10 @@ def map_template(ctx, fpath, match, dbname, version, alias): # work out if fpath is varlist or path to output fpath = Path(fpath) if fpath.is_file(): - fobjs, vobjs = load_vars(fpath) - name = fpath.name + map_file, vobjs, fobjs = load_vars(fpath) + fname = fpath.name + mopdb_log.debug(f"Imported {len(vobjs)} objects from file {fpath}") + mopdb_log.debug(f"Is mapping file? {map_file}") else: mopdb_log.debug(f"Calling write_varlist() from template: {fpath}") fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) @@ -369,7 +366,6 @@ def map_template(ctx, fpath, match, dbname, version, alias): # potential vars have always duplicates: 1 for each input_var write_map_template(conn, parsed, alias) conn.close() - return @@ -420,21 +416,26 @@ def write_intake(ctx, fpath, match, filelist, dbname, version, alias): elif filelist is None: mopdb_log.debug(f"Calling write_varlist() from intake: {fpath}") fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) + map_file = False else: - fname = filelist.name - vobjs, fobjs = load_vars(filelist) + flist = Path(filelist) + fname = flist.name + map_file, vobjs, fobjs = load_vars(flist, indir=fpath) if alias == '': alias = fname.split(".")[0] # read list of vars from file - with open(fname, 'r') as csvfile: - reader = csv.DictReader(csvfile, delimiter=';') - rows = list(reader) - check_varlist(rows, fname) + #with open(fname, 'r') as csvfile: + # reader = csv.DictReader(csvfile, delimiter=';') + # rows = list(reader) + #check_varlist(rows, fname) # return lists of fully/partially matching variables and stash_vars # these are input_vars for calculation defined in already in mapping db - parsed = map_variables(conn, rows, version) + if map_file is False: + parsed = map_variables(conn, vobjs, version) + vobjs = get_map_obj(parsed) + write_map_template(conn, parsed, alias) # potential vars have always duplicates: 1 for each input_var - cat_name, fcsv = write_catalogue(conn, parsed, vobjs, fobjs, alias) + cat_name, fcsv = write_catalogue(conn, vobjs, fobjs, alias) mopdb_log.info(f"""Intake-esm and intake catalogues written to {cat_name} and {cat_name.replace('json','yaml')}. File list saved to {fcsv}""") conn.close() diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index 864b5d1..8b73805 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -25,7 +25,7 @@ class FPattern(): its attributes represents features of the variables which are shared. """ - def __init__(self, fpattern: str, fpath: Path): + def __init__(self, fpattern: str, fpath: Path | None) -> None: self.fpattern = fpattern self.fpath = fpath self.files = self.get_files() @@ -37,34 +37,39 @@ def __init__(self, fpattern: str, fpath: Path): def get_frequency(self): frequency = 'NAfrq' - fname = str(self.files[0]) - if self.realm == 'atmos': - fbits = fname.split("_") - frequency = fbits[-1].replace(".nc", "") - elif self.realm == 'ocean': - if any(x in fname for x in ['scalar', 'month']): - frequency = 'mon' - elif 'daily' in fname: - frequency = 'day' - elif self.realm == 'seaIce': - if '_m.' in fname: - frequency = 'mon' - elif '_d.' in fname: - frequency = 'day' + if len(self.files) > 0 and self.realm != 'NArealm': + fname = str(self.files[0]) + if self.realm == 'atmos': + fbits = fname.split("_") + frequency = fbits[-1].replace(".nc", "") + elif self.realm == 'ocean': + if any(x in fname for x in ['scalar', 'month']): + frequency = 'mon' + elif 'daily' in fname: + frequency = 'day' + elif self.realm == 'seaIce': + if '_m.' in fname: + frequency = 'mon' + elif '_d.' in fname: + frequency = 'day' return frequency - def get_realm(self): realm = 'NArealm' - realm = next((x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] - if x in self.fpath.parts), 'NArealm') + if self.fpath is not None: + realm = next((x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] + if x in self.fpath.parts), 'NArealm') fix_realm = {'atm': 'atmos', 'ice': 'seaIce', 'ocn': 'ocean'} if realm in fix_realm.keys(): realm = fix_realm[realm] return realm def get_files(self): - return self.list_files(self.fpath, self.fpattern) + if self.fpath is None: + files = [] + else: + files = self.list_files(self.fpath, self.fpattern) + return files @staticmethod def list_files(indir, match): @@ -96,8 +101,9 @@ def __init__(self, varname: str, fobj: FPattern): self._realm = fobj.realm self.cmor_var = '' self.cmor_table = '' + #self.input_vars = varname self.calculation = '' - #self.version = self.fpattern.version + self.version = fobj.version self.match = False # descriptive attributes self.units = '' @@ -136,3 +142,55 @@ def realm(self, value): ['atmos', 'seaIce', 'ocean', 'land', 'landIce']): value = 'NArealm' self.realm = value + + def get_match(self): + """Returns the attributes that mimic + a database match""" + if self.cmor_var != '': + cmor_var = self.cmor_var + else: + cmor_var = self.name + match = (self.cmor_var, self.name, '', self.frequency, + self.realm, self.version, '', self.positive, self.units) + return match + + +class MapVariable(): + """This class represent a mapping for variable + It's similar but from a cmor_name point of view + """ + + def __init__(self, match: list, vobj: Variable): + # path object + self.fpattern = vobj.fpattern + # mapping attributes + self.frequency = vobj.frequency + self.realm = match[4] + self.cmor_var = match[0] + self.cmor_table = match[6] + self.input_vars = match[1] + self.calculation = match[2] + self.version = match[5] + # could change this to nomatch found or + # kind of match + self.match = True + # descriptive attributes + self.units = vobj.units + if self.units == '': + self.units = match[8] + self.dimensions = vobj.dimensions + self.cell_methods = vobj.cell_methods + self.positive = match[7] + self.long_name = vobj.long_name + self.standard_name = vobj.standard_name + # type and size attributes + self.vtype = vobj.vtype + self.size = vobj.size + self.nsteps = vobj.nsteps + + def attrs(self): + attrs = [] + for k in self.__dict__.keys(): + if k not in ['match']: + attrs.append(k) + return attrs diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index dfd56c6..4f098ba 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -32,7 +32,7 @@ from importlib.resources import files as import_files from access_nri_intake.source.builders import AccessEsm15Builder -from mopdb.mopdb_class import FPattern, Variable +from mopdb.mopdb_class import FPattern, Variable, MapVariable from mopdb.utils import * from mopdb.mopdb_utils import (get_cell_methods, remove_duplicate, get_realm, check_realm_units, get_date_pattern, check_varlist) @@ -214,7 +214,7 @@ def write_varlist(conn, indir, match, version, alias): fcsv.close() return fname, vobj_list, fobj_list -def match_stdname(conn, row, stdn): +def match_stdname(conn, vobj, stdn): """Returns an updated stdn list if finds one or more variables in cmorvar table that match the standard name passed as input. It also return a False/True found_match boolean. @@ -222,15 +222,15 @@ def match_stdname(conn, row, stdn): mopdb_log = logging.getLogger('mopdb_log') found_match = False sql = f"""SELECT name FROM cmorvar where - standard_name='{row['standard_name']}'""" + standard_name='{vobj.standard_name}'""" results = query(conn, sql, first=False, logname='mopdb_log') matches = [x[0] for x in results] if len(matches) > 0: - stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True) + stdn = add_var(stdn, vobj, tuple([matches]+['']*7), stdnm=True) found_match = True return stdn, found_match -def match_var(row, version, mode, conn, records): +def match_var(vobj, version, mode, conn, records): """Returns match for variable if found after looping variables already mapped in database Parameters @@ -241,8 +241,8 @@ def match_var(row, version, mode, conn, records): # build sql query based on mode sql_base = f"""SELECT cmor_var,input_vars,calculation,frequency, realm,model,cmor_table,positive,units FROM mapping where - input_vars='{row['name']}'""" - sql_frq = f" and frequency='{row['frequency']}'" + input_vars='{vobj.name}'""" + sql_frq = f" and frequency='{vobj.frequency}'" sql_ver = f" and model='{version}'" if mode == 'full': sql = sql_base + sql_frq + sql_ver @@ -252,15 +252,15 @@ def match_var(row, version, mode, conn, records): sql = sql_base + sql_frq # execute query and process results result = query(conn, sql, first=False, logname='mopdb_log') - mopdb_log.debug(f"match_var: {result}, sql: {sql[110:]}") + mopdb_log.debug(f"match_var: {result}, sql: {sql[114:]}") if result is not None and result != []: for x in result: mopdb_log.debug(f"match: {x}") - records = add_var(records, row, x) + records = add_var(records, vobj, x) found_match = True return records, found_match -def parse_vars(conn, rows, version): +def parse_vars(conn, vobjs, version): """Returns records of variables to include in template mapping file, a list of all stash variables + frequency available in model output and a list of variables already defined in db @@ -287,31 +287,33 @@ def parse_vars(conn, rows, version): stash_vars = [] # looping through variables from file and attempt matches to db - for row in rows: - if row['name'][0] == "#" or row['name'] == 'name': - continue - else: - full, found = match_var(row, version, 'full', conn, full) + for v in vobjs: + #if row['name'][0] == "#" or row['name'] == 'name': + # continue + #else: + full, found = match_var(v, version, 'full', conn, full) # if no match, ignore model version first and then frequency - mopdb_log.debug(f"found perfect match: {found}") + #mopdb_log.debug(f"found perfect match: {found}") if not found: - no_ver, found = match_var(row, version, 'no_ver', conn, no_ver) + no_ver, found = match_var(v, version, 'no_ver', conn, no_ver) mopdb_log.debug(f"found no ver match: {found}") if not found: - no_frq, found = match_var(row, version, 'no_frq', conn, no_frq) + no_frq, found = match_var(v, version, 'no_frq', conn, no_frq) mopdb_log.debug(f"found no frq match: {found}") # make a last attempt to match using standard_name if not found: - if row['standard_name'] != '': - stdn, found = match_stdname(conn, row, stdn) + if v.standard_name != '': + stdn, found = match_stdname(conn, v, stdn) mopdb_log.debug(f"found stdnm match: {found}") if not found: - no_match = add_var(no_match, row, tuple([row['name']]+['']*8)) - stash_vars.append(f"{row['name']}-{row['frequency']}") + # use original var values for match + match = v.get_match() + no_match = add_var(no_match, v, v.get_match()) + stash_vars.append(f"{v.name}-{v.frequency}") return full, no_ver, no_frq, stdn, no_match, stash_vars -def add_var(vlist, row, match, stdnm=False): +def add_var(vlist, vobj, match, stdnm=False): """Add information from match to variable list and re-order fields so they correspond to final mapping output. @@ -323,35 +325,36 @@ def add_var(vlist, row, match, stdnm=False): mopdb_log = logging.getLogger('mopdb_log') # assign cmor_var from match and swap place with input_vars mopdb_log.debug(f"Assign cmor_var: {match}") - mopdb_log.debug(f"initial row: {row}") - var = row.copy() - var['cmor_var'] = match[0] - var['input_vars'] = match[1] - orig_name = var.pop('name') + mopdb_log.debug(f"initial variable definition: {vobj}") + #var = vobj.__dict__.copy() + var = MapVariable(match, vobj) + #var.cmor_var = match[0] + #vobj.input_vars = match[1] + # orig_name = var.pop('name') # assign realm from match - var['realm'] = match[4] + #var['realm'] = match[4] # with stdn assign cmorvar and table if only 1 match returned # otherwise assign table from match if stdnm: - var['input_vars'] = orig_name - if len(var['cmor_var']) == 1: - cmor_var, table = var['cmor_var'][0].split("-") - var['cmor_var'] = cmor_var - var['cmor_table'] = table - else: - var['cmor_table'] = match[6] + var.input_vars = vobj.name + if len(var.cmor_var) == 1: + cmor_var, table = var.cmor_var[0].split("-") + var.cmor_var = cmor_var + var.cmor_table = table + #else: + # var['cmor_table'] = match[6] # add calculation, positive and version - var['calculation'] = match[2] - var['positive'] = match[7] - var['version'] = match[5] + #var['calculation'] = match[2] + #var['positive'] = match[7] + #var['version'] = match[5] # maybe we should override units here rather than in check_realm_units # if units missing get them from match - if var['units'] is None or var['units'] == '': - var['units'] = match[8] + #if var['units'] is None or var['units'] == '': + # var['units'] = match[8] vlist.append(var) return vlist -def potential_vars(conn, rows, stash_vars, version): +def potential_vars(conn, vobjs, stash_vars, version): """Returns list of variables that can be potentially derived from model output. @@ -376,23 +379,23 @@ def potential_vars(conn, rows, stash_vars, version): pot_full = [] pot_part = [] pot_varnames = set() - for row in rows: + for v in vobjs: sql = f"""SELECT cmor_var,input_vars,calculation,frequency, realm,model,cmor_table,positive,units FROM mapping - WHERE input_vars like '%{row['name']}%'""" + WHERE input_vars like '%{v.name}%'""" results = query(conn, sql, first=False, logname='mopdb_log') - mopdb_log.debug(f"In potential: var {row['name']}, db results {results}") + mopdb_log.debug(f"In potential: var {v.name}, db results {results}") for r in results: allinput = r[1].split(" ") mopdb_log.debug(f"{len(allinput)> 1}") - mopdb_log.debug(all(f"{x}-{row['frequency']}" in stash_vars for x in allinput)) - if len(allinput) > 1 and all(f"{x}-{row['frequency']}" in stash_vars for x in allinput): + mopdb_log.debug(all(f"{x}-{v.frequency}" in stash_vars for x in allinput)) + if len(allinput) > 1 and all(f"{x}-{v.frequency}" in stash_vars for x in allinput): # if both version and frequency of applied mapping match # consider this a full matching potential var - if r[5] == version and r[3] == row['frequency']: - pot_full = add_var(pot_full, row, r) + if r[5] == version and r[3] == v.frequency: + pot_full = add_var(pot_full, v, r) else: - pot_part = add_var(pot_part, row, r) + pot_part = add_var(pot_part, v, r) pot_varnames.add(r[0]) return pot_full, pot_part, pot_varnames @@ -437,7 +440,6 @@ def write_map_template(conn, parsed, alias): write_vars(stdn, fwriter, div, sortby='input_vars') div = "# Derived variables: Use with caution!" write_vars(pot_part, fwriter, div, conn=conn) - #pot=True, conn=conn, sortby=0) div = "# Variables without mapping" write_vars(no_match, fwriter, div) mopdb_log.debug("Finished writing variables to mapping template") @@ -451,32 +453,36 @@ def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'): mopdb_log = logging.getLogger('mopdb_log') if len(vlist) > 0: if type(div) is str: - divrow = {x:'' for x in vlist[0].keys()} + divrow = {x:'' for x in vlist[0].attrs()} divrow['cmor_var'] = div elif type(div) is list: divrow = {x:x for x in div} fwriter.writerow(divrow) - for var in sorted(vlist, key=itemgetter(sortby)): + dlist = [] + for var in vlist: if conn: var = check_realm_units(conn, var) - fwriter.writerow(var) + dlist.append( var.__dict__ ) + for dvar in sorted(dlist, key=itemgetter(sortby)): + dvar.pop('match') + fwriter.writerow(dvar) return -def map_variables(conn, rows, version): +def map_variables(conn, vobjs, version): """ """ mopdb_log = logging.getLogger('mopdb_log') # return lists of fully/partially matching variables and stash_vars # these are input_vars for calculation defined in already in mapping db full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, - rows, version) + vobjs, version) # remove duplicates from partially matched variables no_ver = remove_duplicate(no_ver) no_frq = remove_duplicate(no_frq, strict=False) no_match = remove_duplicate(no_match, strict=False) # check if more derived variables can be added based on all # input_vars being available - pot_full, pot_part, pot_varnames = potential_vars(conn, rows, + pot_full, pot_part, pot_varnames = potential_vars(conn, vobjs, stash_vars, version) # potential vars have always duplicates: 1 for each input_var pot_full = remove_duplicate(pot_full, strict=False) @@ -484,7 +490,16 @@ def map_variables(conn, rows, version): mopdb_log.info(f"Derived variables: {pot_varnames}") return full, no_ver, no_frq, stdn, no_match, pot_full, pot_part -def write_catalogue(conn, parsed, vobjs, fobjs, alias): +def get_map_obj(parsed): + """Returns list of variable objects to pass to intake""" + full, no_ver, no_frq, stdn, no_match, pot_full, pot_part = parsed + vobjs = [] + select = full + no_ver + no_frq + for v in select: + vobjs.append(v) + return vobjs + +def write_catalogue(conn, vobjs, fobjs, alias): """Write intake-esm catalogue and returns name """ mopdb_log = logging.getLogger('mopdb_log') @@ -579,11 +594,14 @@ def add_mapvars(vobjs, lines, path_list, alias): lines.append(fd) return lines -def load_vars(fname): +def load_vars(fname, indir=None): """Returns Variable and FPattern objs from varlist or map file. """ + mopdb_log = logging.getLogger('mopdb_log') vobjs = [] - fobjs = [] + fobjs = {} + if indir is not None: + indir = Path(indir) # distinguish between varlist and mapping file based on header with open(fname, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=';') @@ -591,20 +609,31 @@ def load_vars(fname): #check_varlist(rows, fname) # set fobjs patterns = list(set(x['fpattern'] for x in rows)) - print(patterns) for pat in patterns: if pat != "": - fo = FPattern(fpattern, Path(indir)) - fobjs.append(fo) - patterns2 = [x['fpattern'] for x in rows] - sys.exit() - + fo = FPattern(pat, indir) + fobjs[pat] = fo if 'calculation' in rows[0].keys(): map_file = True + colname = 'input_vars' else: map_file = False - for row in rows[1:]: - row['fpattern'] - v = Variable(row['name'], ) - #for field in row[0]: - return vobjs, fobjs + colname = 'name' + for row in rows: + fo = fobjs[row['fpattern']] + vo = Variable(row[colname], fo) + for k,v in row.items(): + if k in ['realm', 'frequency']: + k = '_' + k + vo.__dict__[k] = v + if fo.realm == 'NArealm': + fo.realm = vo.realm + if fo.frequency == 'NAfrq': + fo.frequency = vo.frequency + fo.varlist.append(vo) + if map_file is True: + mvo = MapVariable(list(vo.get_match()), vo) + vobjs.append(mvo) + else: + vobjs.append(vo) + return map_file, vobjs, [x for x in fobjs.values()] diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 062e9dd..0f80b6d 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -323,12 +323,12 @@ def remove_duplicate(vlist, extra=[], strict=True): if strict is True: keys += ['frequency', 'realm'] if extra: - vid_list = [tuple(x[k] for k in keys) for x in extra] + vid_list = [tuple(getattr(x,k) for k in keys) for x in extra] mopdb_log.debug(f"vid_list: {vid_list}") final = [] for v in vlist: - vid = tuple(v[k] for k in keys) - mopdb_log.debug(f"var and vid: {v['cmor_var']}, {vid}") + vid = tuple(getattr(v,k) for k in keys) + mopdb_log.debug(f"var and vid: {v.cmor_var}, {vid}") if vid not in vid_list: final.append(v) vid_list.append(vid) @@ -340,8 +340,8 @@ def check_realm_units(conn, var): """ mopdb_log = logging.getLogger('mopdb_log') - vname = f"{var['cmor_var']}-{var['cmor_table']}" - if var['cmor_table'] is None or var['cmor_table'] == "": + vname = f"{var.cmor_var}-{var.cmor_table}" + if var.cmor_table is None or var.cmor_table == "": mopdb_log.warning(f"Variable: {vname} has no associated cmor_table") else: # retrieve modeling_realm, units from db cmor table @@ -353,12 +353,12 @@ def check_realm_units(conn, var): dbrealm = result[0] dbunits = result[1] # dbrealm could have two realms - if var['realm'] not in [dbrealm] + dbrealm.split(): - mopdb_log.info(f"Changing {vname} realm from {var['realm']} to {dbrealm}") - var['realm'] = dbrealm - if var['units'] != dbunits : - mopdb_log.info(f"Changing {vname} units from {var['units']} to {dbunits}") - var['units'] = dbunits + if var.realm not in [dbrealm] + dbrealm.split(): + mopdb_log.info(f"Changing {vname} realm from {var.realm} to {dbrealm}") + var.realm = dbrealm + if var.units != dbunits : + mopdb_log.info(f"Changing {vname} units from {var.units} to {dbunits}") + var.units = dbunits else: mopdb_log.warning(f"Variable {vname} not found in cmor table") return var diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py index 8831595..441d05b 100755 --- a/src/mopper/mop_setup.py +++ b/src/mopper/mop_setup.py @@ -103,7 +103,7 @@ def find_matches(table, var, realm, frequency, varlist): realmdir = 'atmos' else: realmdir = match['realm'] - in_fname = match['filename'].split() + in_fname = match['fpattern'].split() match['file_structure'] = '' for f in in_fname: #match['file_structure'] += f"/{realmdir}/{f}* " diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index a5d1423..01b29d9 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -39,7 +39,7 @@ from pathlib import Path from mopper.calculations import * -from mopper.utils import read_yaml +from mopdb.utils import read_yaml from importlib.resources import files as import_files From 3c5e8b48dfedf597abd936107840fbd088355a38 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 24 Jul 2024 10:22:00 +1000 Subject: [PATCH 067/137] updated docs --- docs/cmor_conf.yaml | 1 + docs/gettingstarted.rst | 10 ++++++---- docs/mopdb_command.rst | 30 ++++++++++++++++++++++++++++++ src/mopper/mopper.py | 2 +- 4 files changed, 38 insertions(+), 5 deletions(-) diff --git a/docs/cmor_conf.yaml b/docs/cmor_conf.yaml index 8ff286d..5c93d65 100755 --- a/docs/cmor_conf.yaml +++ b/docs/cmor_conf.yaml @@ -93,3 +93,4 @@ cmor: # walltime in "hh:mm:ss" walltime: '8:00:00' mode: custom + conda_env: /g/data/.../mopper_env/bin/activate diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst index 2e7181e..62aabd1 100644 --- a/docs/gettingstarted.rst +++ b/docs/gettingstarted.rst @@ -1,13 +1,15 @@ Starting with MOPPeR ==================== -A typical workflow to post-process an ACCESS or UM model output requires three steps. +A typical workflow to post-process an ACCESS or UM model output requires two steps. +The first step is creating the mapping for a spcific simualtion and it is done only once for an experiment. +The second step is to setup and run the actual post-processing. Step1: create a template for a mapping file ------------------------------------------- - *mopdb template -f -v -a * + *mopdb template -f -m -v -a * .. code-block:: console @@ -53,11 +55,11 @@ It also provides an intermediate varlist_.csv file that shows the informa Step2: Set up the working environment ------------------------------------- - *mop -c setup* + *mop setup -c * .. code-block:: console - $ mop -c exp_conf.yaml setup + $ mop setup -c exp_conf.yaml Simulation to process: cy286 Setting environment and creating working directory Output directory '/scratch/v45/pxp581/MOPPER_output/cy286' exists. diff --git a/docs/mopdb_command.rst b/docs/mopdb_command.rst index 421f39c..5c24196 100644 --- a/docs/mopdb_command.rst +++ b/docs/mopdb_command.rst @@ -11,6 +11,7 @@ This module is used to manage the mapping of raw output to CMIP style variables. - **varlist** creates an initial list of variables and attributes based on actual files - **template** uses the above list to generate a template of mappings to use in the processing +- **intake** uses the mappings to create an intake catalogue of the raw model output - **cmor** populates the database cmor variables table - **map** populates the database mappings table - **check** checks a variable list against the cmor database table to individuate variables without a definition @@ -107,6 +108,35 @@ The other groups of records require checking, as either the version or the frequ ... +Create an intake catalogue +-------------------------- +.. code-block:: + +This represents an extra step on top of the mapping, so it can be start directly from an existing mapping or from scratch by providing the model ouptut path and a match. + +From output path: + + mopdb intake -f -m -v { -a } + +From varlist file: + + mopdb intake -f -fl -v { -a } + +From mapping file: + + mopdb intake -f -fl -v { -a } + +NB the model output path is still needed even when passing an existing mapping or variable list. + +`intake` will generate: +* intake_.yaml - the main intake catalogue; +* intake_.json - the intake-esm catalogue; +* catalogue.csv.xz - a csv file containing a list of the assets. + +The esm-catalogue is a multi-variable catalogue, which means that each file can have more than one variable as it is usual for raw model output. While each file contains a lot of variables, a user can select just one or few and only these will be loaded as an xarray dataset. This is helpful with the UM output where variables with different dimensions can co-exist in a file. In such case, it's necessary to use preprocess to select variables with consitent dimensions to avoid concatenation issues. As this is the standard behaviour for multi-variable intake-esm catalogues, the user don't need to worry about it. + +The esm-intake catalogue also lists separately each variable that can be mapped to a cmor name and/or standard_name. This allows to use the cmor names and/or the standard_names more effectively to query the data. + Get a list of variables from the model output --------------------------------------------- .. code-block:: diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 6313edd..7a136c1 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -288,7 +288,7 @@ def mop_process(ctx): var_log.debug(f"{ctx.obj['reference_date']}") t_ax_val = cftime.date2num(axes['t_ax'], units=ctx.obj['reference_date'], calendar=ctx.obj['attrs']['calendar']) - var_log.debug(f"t_ax[3] {t_ax_val[3]}") + #var_log.debug(f"t_ax[3] {t_ax_val[3]}") t_bounds = None if cmor_tName in bounds_list: t_bounds = get_bounds(dsin[var1], axes['t_ax'], cmor_tName, From 683962f406438931682c3218f4ecf1502d500b6e Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 24 Jul 2024 13:08:45 +1000 Subject: [PATCH 068/137] switch temporarily to analysis3 stable --- src/mopper/setup_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index da3dc6b..ff1e082 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -683,7 +683,7 @@ def define_template(ctx, flag, nrows): # for a list of packages module use /g/data/hh5/public/modules -module load conda/analysis3-unstable +module load conda/analysis3 {ctx.obj['conda_env']} cd {ctx.obj['appdir']} From 47f4b1acf64da92995c5d80b94b3fec548b753a3 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 31 Jul 2024 10:16:37 +1000 Subject: [PATCH 069/137] fixed #161 --- mappings/map_AUS2200.csv | 2 +- mappings/map_aerosol_CM2.csv | 2 +- mappings/map_atmos_CM2.csv | 2 +- mappings/map_land_CM2.csv | 2 +- mappings/map_land_ESM1.5.csv | 2 +- mappings/map_ocean_OM2.csv | 2 +- src/mopper/mop_utils.py | 38 +++++++++++++++++++++++++++++++----- src/mopper/mopper.py | 2 ++ 8 files changed, 41 insertions(+), 11 deletions(-) diff --git a/mappings/map_AUS2200.csv b/mappings/map_AUS2200.csv index 06e3df1..a6c1ab9 100644 --- a/mappings/map_AUS2200.csv +++ b/mappings/map_AUS2200.csv @@ -1,4 +1,4 @@ -#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name +#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name amdry;fld_s30i403;;kg m-2;time lat lon;10minPt;atmos;area: time: point;;AUS2200_A10min;AUS2200;;float32;22048000;2304;umnsa_spec;TOTAL COLUMN DRY MASS RHO GRID; amwet;fld_s30i404;;kg m-2;time lat lon;10minPt;atmos;area: time: point;;AUS2200_A10min;AUS2200;;float32;22048000;2304;umnsa_spec;TOTAL COLUMN WET MASS RHO GRID;atmosphere_mass_per_unit_area cl;fld_s00i265;level_to_height(var[0],levs=(0,66));1;time model_theta_level_number lat lon;1hrPt;atmos;area: time: point;;AUS2200_A1hr;AUS2200;float32;1543360000;384;umnsa_cldrad;AREA CLOUD FRACTION IN EACH LAYER;cloud_area_fraction_in_atmosphere_layer diff --git a/mappings/map_aerosol_CM2.csv b/mappings/map_aerosol_CM2.csv index 4c7b1b3..18d671a 100644 --- a/mappings/map_aerosol_CM2.csv +++ b/mappings/map_aerosol_CM2.csv @@ -1,4 +1,4 @@ -#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name +#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name abs550aer;fld_s02i240 fld_s02i241 fld_s02i242 fld_s02i243 fld_s02i585;optical_depth(var,3);1;time pseudo_level_0 lat lon;mon;aerosol;area: time: mean;;CMIP6_AERmon;CM2;float32;663552;12;cw323a.pm;Ambient Aerosol Absorption Optical Thickness at 550nm;atmosphere_absorption_optical_thickness_due_to_ambient_aerosol_particles dryss;fld_s38i218 fld_s38i219;calc_depositions(var);kg m-2 s-1;time model_theta_level_number lat lon;mon;aerosol;area: time: mean;;CMIP6_AERmon;CM2;float32;9400320;12;cw323a.pm;Dry Deposition Rate of Sea-Salt Aerosol;minus_tendency_of_atmosphere_mass_content_of_sea_salt_dry_aerosol_particles_due_to_dry_deposition lwp;fld_s30i405;;kg m-2;time lat lon;mon;aerosol;area: time: mean;;CMIP6_AERmon;CM2;float32;110592;12;cw323a.pm;TOTAL COLUMN QCL RHO GRID;atmosphere_cloud_liquid_water_content diff --git a/mappings/map_atmos_CM2.csv b/mappings/map_atmos_CM2.csv index 5d6f369..b2cc1c3 100644 --- a/mappings/map_atmos_CM2.csv +++ b/mappings/map_atmos_CM2.csv @@ -1,4 +1,4 @@ -#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name +#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name amdry;fld_s30i403;;kg m-2;time lat lon;mon;atmos;area: time: mean;;CM2_mon;CM2;float32;110592;12;cw323a.pm;TOTAL COLUMN DRY MASS RHO GRID;atmosphere_mass_per_unit_area amwet;fld_s30i404;;kg m-2;time lat lon;mon;atmos;area: time: mean;;CM2_mon;CM2;float32;110592;12;cw323a.pm;TOTAL COLUMN WET MASS RHO GRID;atmosphere_mass_per_unit_area ci;fld_s05i269;;1;time lat lon;mon;atmos;area: time: mean;;CMIP6_Amon;CM2;float32;110592;12;cw323a.pm;deep convection indicator; diff --git a/mappings/map_land_CM2.csv b/mappings/map_land_CM2.csv index 92da281..af1f539 100644 --- a/mappings/map_land_CM2.csv +++ b/mappings/map_land_CM2.csv @@ -1,4 +1,4 @@ -#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name +#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name agesno;fld_s03i832 fld_s03i317;average_tile(var[0],tilefrac=var[1]);mon;time pseudo_level_1 lat lon;mon;landIce land;area: time: mean;;CMIP6_LImon;CM2;float32;1880064;12;cw323a.pm;CABLE SNOW AGE ON TILES;age_of_surface_snow baresoilFrac;fld_s03i317 fld_s03i395;extract_tilefrac(var[0],14,landfrac=var[1],lev='typebare');1;time lat lon;mon;land;area: mean where land over all_area_types time: mean;;CMIP6_Lmon;CM2;float32;110592;12;cw323a.pm;Bare Soil Percentage Area Coverage;area_fraction c3PftFrac;fld_s03i317 fld_s03i395;extract_tilefrac(var[0],[1,2,3,4,5,6,8,9,11],landfrac=var[1],lev='typec3pft');1;time pseudo_level_1 lat lon;mon;land;area: mean where land over all_area_types time: mean;;CMIP6_Lmon;CM2;float32;1880064;12;cw323a.pm;Percentage Cover by C3 Plant Functional Type;area_fraction diff --git a/mappings/map_land_ESM1.5.csv b/mappings/map_land_ESM1.5.csv index 23b129a..56b59dd 100644 --- a/mappings/map_land_ESM1.5.csv +++ b/mappings/map_land_ESM1.5.csv @@ -1,4 +1,4 @@ -#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name +#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name agesno;fld_s03i832 fld_s03i317;average_tile(var[0],tilefrac=var[1]);mon;time pseudo_level_1 lat lon;mon;landIce land;area: time: mean;;CMIP6_LImon;ESM1.5;float32;1880064;12;cw323a.pm;CABLE SNOW AGE ON TILES;age_of_surface_snow baresoilFrac;fld_s03i317 fld_s03i395;extract_tilefrac(var[0],14,landfrac=var[1],lev='typebare');1;time lat lon;mon;land;area: mean where land over all_area_types time: mean;;CMIP6_Lmon;ESM1.5;float32;110592;12;cw323a.pm;Bare Soil Percentage Area Coverage;area_fraction c3PftFrac;fld_s03i317 fld_s03i395;extract_tilefrac(var[0],[1,2,3,4,5,6,8,9,11],landfrac=var[1],lev='typec3pft');1;time pseudo_level_1 lat lon;mon;land;area: mean where land over all_area_types time: mean;;CMIP6_Lmon;ESM1.5;float32;1880064;12;cw323a.pm;Percentage Cover by C3 Plant Functional Type;area_fraction diff --git a/mappings/map_ocean_OM2.csv b/mappings/map_ocean_OM2.csv index 224677f..073e4b4 100644 --- a/mappings/map_ocean_OM2.csv +++ b/mappings/map_ocean_OM2.csv @@ -1,4 +1,4 @@ -#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name +#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name advectsweby;temp_sweby_advec;;W m-2;time st_ocean yt_ocean xt_ocean;mon;ocean;area: time: mean;;CM2_mon;OM2;float32;21600000;36;ocean_month.nc-;cp*rho*dzt*sweby advect tendency; agessc;age_global;;yr;time st_ocean yt_ocean xt_ocean;mon;ocean;area: time: mean;;CMIP6_Omon;OM2;float32;21600000;708;ocean_month.nc-;Age (global);sea_water_age_since_surface_contact areacello;dummy;get_areacello();1;time st_ocean yt_ocean xt_ocean;mon;ocean;area: sum;;CMIP6_Ofx;OM2;float32;21600000;708;ocean_month.nc-;t-cell thickness;cell_area diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index 01b29d9..ef41953 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -112,7 +112,8 @@ def _preselect(ds, varlist): if bounds is None: bounds = ds[c].attrs.get('edges', None) if bounds is not None: - bnds.extend(bounds.split()) + bnds.extend([b for b in bounds.split() if b in ds.variables]) + # check all bnds are in file varsel.extend(bnds) # remove attributes for boundaries for v in bnds: @@ -520,7 +521,7 @@ def ll_axis(ctx, ax, ax_name, ds, table, bounds_list): """ """ var_log = logging.getLogger(ctx.obj['var_log']) - var_log.debug(f"n ll_axis") + var_log.debug(f"in ll_axis") cmor.set_table(table) cmor_aName = get_cmorname(ax_name, ax) try: @@ -562,7 +563,6 @@ def define_grid(ctx, j_id, i_id, lat, lat_bnds, lon, lon_bnds): var_log.info("setup of lat,lon grid complete") return grid_id - @click.pass_context def get_coords(ctx, ovar, coords): """Get lat/lon and their boundaries from ancil file @@ -628,6 +628,9 @@ def get_axis_dim(ctx, var): axes['lat_ax'] = axis elif any(x in dim.lower() for x in ['nj', 'yu_ocean', 'yt_ocean']): axes['j_ax'] = axis + # have to add this because a simulation didn't have the dimenision variables + elif any(x in dim.lower() for x in ['nj', 'yu_ocean', 'yt_ocean']): + axes['j_ax'] = axis elif axis_name and 'X' in axis_name: if 'glon' in dim.lower(): axes['glon_ax'] = axis @@ -635,6 +638,9 @@ def get_axis_dim(ctx, var): axes['lon_ax'] = axis elif any(x in dim.lower() for x in ['ni', 'xu_ocean', 'xt_ocean']): axes['i_ax'] = axis + # have to add this because a simulation didn't have the dimenision variables + elif any(x in dim.lower() for x in ['ni', 'xu_ocean', 'xt_ocean']): + axes['i_ax'] = axis elif axis_name == 'Z' or any(x in dim for x in ['lev', 'heigth', 'depth']): axes['z_ax'] = axis #z_ax.attrs['axis'] = 'Z' @@ -723,10 +729,10 @@ def get_bounds(ctx, ds, axis, cmor_name, ax_val=None): if 'subhr' in frq: frq = ctx.obj['subhr'] + frq.split('subhr')[1] if 'bounds' in keys and not changed_bnds: - dim_bnds_val = ds[axis.bounds].values + calc, dim_bnds_val = get_bounds_values(ds, axis.bounds) var_log.info(f"Using dimension bounds: {axis.bounds}") elif 'edges' in keys and not changed_bnds: - dim_bnds_val = ds[axis.edges].values + calc, dim_bnds_val = get_bounds_values(ds, axis.edges) var_log.info(f"Using dimension edges as bounds: {axis.edges}") else: var_log.info(f"No bounds for {dim}") @@ -752,6 +758,7 @@ def get_bounds(ctx, ds, axis, cmor_name, ax_val=None): max_val = np.roll(min_val, -1) max_val[-1] = 1.5*ax_val[-1] - 0.5*ax_val[-2] dim_bnds_val = np.column_stack((min_val, max_val)) + var_log.debug(f"{axis.name} bnds: {dim_bnds_val}") except Exception as e: var_log.warning(f"dodgy bounds for dimension: {dim}") var_log.error(f"error: {e}") @@ -785,6 +792,27 @@ def get_bounds(ctx, ds, axis, cmor_name, ax_val=None): var_log.info(f"setting minimum {cmor_name} bound to 0") return dim_bnds_val +@click.pass_context +def get_bounds_values(ctx, ds, bname): + """Return values of axis bounds, if they're not in file + tries to get them from ancillary grid file instead. + """ + calc = False + var_log = logging.getLogger(ctx.obj['var_log']) + var_log.debug(f"Getting bounds values for {bname}") + ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"] + if bname in ds.variables: + bnds_val = ds[bname].values + elif ancil_file != "": + fname = f"{ctx.obj['ancils_path']}/{ancil_file}" + ancil = xr.open_dataset(fname) + if bname in ancil.variables: + bnds_val = ancil[bname].values + else: + var_log.info(f"Can't locate {bname} in data or ancil file") + bnds_val = None + calc = True + return calc, bnds_val @click.pass_context def get_attrs(ctx, infiles, var1): diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 7a136c1..1b6c769 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -318,6 +318,7 @@ def mop_process(ctx): axis_ids.append(z_ax_id) # if both i, j are defined setgrid if only one treat as lat/lon if axes['i_ax'] is not None and axes['j_ax'] is not None: + var_log.debug(f"Setting grid with {axes}") setgrid = True j_id = ij_axis(axes['j_ax'], 'j_index', tables[0]) i_id = ij_axis(axes['i_ax'], 'i_index', tables[0]) @@ -331,6 +332,7 @@ def mop_process(ctx): grid_id = define_grid(j_id, i_id, lat, lat_bnds, lon, lon_bnds) else: if axes['glat_ax'] is not None: + print("getting lat") lat_id = ll_axis(axes['glat_ax'], 'glat', dsin[var1], tables[1], bounds_list) axis_ids.append(lat_id) From 350b47080cae0ec5d8e423fa5d9305654a4a9bf7 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 31 Jul 2024 10:19:49 +1000 Subject: [PATCH 070/137] removed print --- src/mopper/mopper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 1b6c769..4b08c5a 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -332,7 +332,6 @@ def mop_process(ctx): grid_id = define_grid(j_id, i_id, lat, lat_bnds, lon, lon_bnds) else: if axes['glat_ax'] is not None: - print("getting lat") lat_id = ll_axis(axes['glat_ax'], 'glat', dsin[var1], tables[1], bounds_list) axis_ids.append(lat_id) From 1e31e25e425597ca2ef38367f3700b140de54850 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 31 Jul 2024 11:23:49 +1000 Subject: [PATCH 071/137] fixed missing dbname in model_vars arguments --- src/mopdb/mopdb.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index fee8f45..c21d647 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -77,7 +77,9 @@ def db_args(f): def map_args(f): - """Define mapping click options for varlist and template commands""" + """Define mapping click options for varlist, template, intake + commands + """ constraints = [ click.option('--fpath', '-f', type=str, required=True, callback=require_date, @@ -498,7 +500,7 @@ def update_map(ctx, dbname, fname, alias): @mopdb.command(name='varlist') @map_args @click.pass_context -def model_vars(ctx, fpath, match, version, alias): +def model_vars(ctx, fpath, match, dbname, version, alias): """Read variables from model output opens one file for each kind, save variable list as csv file From e3c2bedc8681aa89fa279be10e023e5d68098c7f Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 31 Jul 2024 12:37:40 +1000 Subject: [PATCH 072/137] added import of update_db to mopdb.py --- src/mopdb/mopdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index c21d647..40757b2 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -30,7 +30,7 @@ from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map, read_map_app4, map_update_sql, create_table, write_cmor_table, - check_varlist) + check_varlist, update_db) from mopdb.utils import * from mopdb.mopdb_map import (write_varlist, write_map_template, write_catalogue, map_variables, load_vars, get_map_obj) From f9c48a5fc9f45790989d4afbfb4852bb4e001f69 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 2 Aug 2024 15:55:38 +1000 Subject: [PATCH 073/137] attempt 1 actions --- .github/workflows/mopper-pytest.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 430693f..8a9825f 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -4,7 +4,8 @@ name: mopper-conda-install-test on: push: branches: - - prerelease + #- prerelease + - class pull_request: branches: - main @@ -38,6 +39,10 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + # making sure we are testing installed package + - name: Install package + run: | + conda env install --name base - name: Test with pytest run: | conda install pytest coverage codecov --solver classic From 7c1a243758f69db53bd8974cb6c55e1e7263d821 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 2 Aug 2024 16:02:40 +1000 Subject: [PATCH 074/137] attempt 2 actions --- .github/workflows/mopper-pytest.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 8a9825f..1bed83d 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -40,9 +40,9 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics # making sure we are testing installed package - - name: Install package - run: | - conda env install --name base + #- name: Install package + # run: | + # conda env install --name base - name: Test with pytest run: | conda install pytest coverage codecov --solver classic From 8039925e93bd38f0882632b17f06d59643207bdd Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 2 Aug 2024 16:13:54 +1000 Subject: [PATCH 075/137] attempt 3 actions, sub flake8 with ruff --- .github/workflows/mopper-pytest.yaml | 19 +++++++++++-------- .github/workflows/mopper-test-calcs.yaml | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 1bed83d..4ddcd48 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -1,4 +1,4 @@ -name: mopper-conda-install-test +name: mopper-all-tests #on: [push] on: @@ -6,10 +6,10 @@ on: branches: #- prerelease - class - pull_request: - branches: - - main - - prerelease + #pull_request: + # branches: + # - main + # - prerelease jobs: @@ -34,11 +34,14 @@ jobs: conda env update --file conda/environment.yaml --name base - name: Lint with flake8 run: | - conda install flake8 --solver classic + conda install -c conda-forge ruff + ruff check --output-format=github . + continue-on-error: true + #conda install flake8 --solver classic # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics # making sure we are testing installed package #- name: Install package # run: | diff --git a/.github/workflows/mopper-test-calcs.yaml b/.github/workflows/mopper-test-calcs.yaml index 50d2512..ee830d6 100644 --- a/.github/workflows/mopper-test-calcs.yaml +++ b/.github/workflows/mopper-test-calcs.yaml @@ -1,4 +1,4 @@ -name: mopper-conda-install-test +name: mopper-branch-test #on: [push] on: From 9898c91fbdc37e7ef2bb981a82168813d75b8896 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 2 Aug 2024 16:35:13 +1000 Subject: [PATCH 076/137] attempt 4 actions --- .github/workflows/mopper-pytest.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 4ddcd48..2cb0e6d 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -34,7 +34,7 @@ jobs: conda env update --file conda/environment.yaml --name base - name: Lint with flake8 run: | - conda install -c conda-forge ruff + conda install -c conda-forge ruff --solver classic ruff check --output-format=github . continue-on-error: true #conda install flake8 --solver classic @@ -43,9 +43,9 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics # making sure we are testing installed package - #- name: Install package - # run: | - # conda env install --name base + - name: Install package + run: | + pip install -e . - name: Test with pytest run: | conda install pytest coverage codecov --solver classic From 87c00b153d582c30adac5c6c959b882111c95ea8 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 2 Aug 2024 16:49:03 +1000 Subject: [PATCH 077/137] temporarily remove import of nri intake --- src/mopdb/mopdb_map.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index 4f098ba..3bef37c 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -30,7 +30,7 @@ from pathlib import Path from itertools import compress from importlib.resources import files as import_files -from access_nri_intake.source.builders import AccessEsm15Builder +#from access_nri_intake.source.builders import AccessEsm15Builder from mopdb.mopdb_class import FPattern, Variable, MapVariable from mopdb.utils import * From 6429a5697939d14b6e8a3489d4148512f78bcf0a Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 6 Aug 2024 12:07:15 +1000 Subject: [PATCH 078/137] do not try to get frequency if only 1 file, fixed getting match data if matched by standard_name --- .github/workflows/mopper-test-conda.yaml | 44 ++++++++++++++++++++++++ src/mopdb/mopdb_class.py | 1 + src/mopdb/mopdb_map.py | 30 +++++++++------- 3 files changed, 63 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/mopper-test-conda.yaml diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml new file mode 100644 index 0000000..471e1fe --- /dev/null +++ b/.github/workflows/mopper-test-conda.yaml @@ -0,0 +1,44 @@ +name: Test Build of mopper conda package + +# Controls when the action will run. +on: + # Triggers the workflow on push or pull request events but only for the prerelase branch + push: + branches: [ prerelease ] + pull_request: + branches: [ prerelease ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + conda_deployment_with_new_tag: + name: Test conda deployment of package with Python ${{ matrix.python-version }} + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11"] + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Conda environment creation and activation + uses: conda-incubator/setup-miniconda@v2 + with: + python-version: ${{ matrix.python-version }} + environment-file: devtools/conda-envs/build_env.yaml # Path to the build conda environment + auto-update-conda: false + auto-activate-base: false + show-channel-urls: true + - name: Build but do not upload the conda packages + uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0 + with: + meta_yaml_dir: devtools/conda-build + python-version: ${{ matrix.python-version }} # Values previously defined in `matrix` + platform_linux-64: true + platform_osx-64: true + platform_win-64: true + user: uibcdf + label: auto + upload: false + token: ${{ secrets.ANACONDA_TOKEN }} # Replace with the right name of your secret diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index 8b73805..373903b 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -163,6 +163,7 @@ class MapVariable(): def __init__(self, match: list, vobj: Variable): # path object self.fpattern = vobj.fpattern + print(match) # mapping attributes self.frequency = vobj.frequency self.realm = match[4] diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index 3bef37c..e9b529b 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -166,15 +166,18 @@ def write_varlist(conn, indir, match, version, alias): ds = xr.open_dataset(str(fobj.files[0]), decode_times=False) coords = [c for c in ds.coords] + ['latitude_longitude'] #pass next file in case of 1 timestep per file and no frq in name - fnext = str(fobj.files[1]) - if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos': - frq_dict = get_file_frq(ds, fnext) - # if only one frequency detected empty dict - if len(frq_dict) == 1: - fobj.frequency = frq_dict.popitem()[1] - else: - fobj.multiple_frq = True - fobj.frequency = frq_dict['time'] + if len(fobj.files) > 1: + fnext = str(fobj.files[1]) + if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos': + frq_dict = get_file_frq(ds, fnext) + # if only one frequency detected empty dict + if len(frq_dict) == 1: + fobj.frequency = frq_dict.popitem()[1] + else: + fobj.multiple_frq = True + fobj.frequency = frq_dict['time'] + else: + mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}") mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}") if fobj.realm == "NArealm": fobj.realm = get_realm(version, ds) @@ -226,7 +229,9 @@ def match_stdname(conn, vobj, stdn): results = query(conn, sql, first=False, logname='mopdb_log') matches = [x[0] for x in results] if len(matches) > 0: - stdn = add_var(stdn, vobj, tuple([matches]+['']*7), stdnm=True) + vmatch = vobj.get_match() + stdn = add_var(stdn, vobj, tuple([matches]+list(vmatch[1:])), + stdnm=True) found_match = True return stdn, found_match @@ -307,8 +312,9 @@ def parse_vars(conn, vobjs, version): mopdb_log.debug(f"found stdnm match: {found}") if not found: # use original var values for match - match = v.get_match() - no_match = add_var(no_match, v, v.get_match()) + vmatch = v.get_match() + mopdb_log.debug(f"Getting match from variable: {vmatch}") + no_match = add_var(no_match, v, vmatch) stash_vars.append(f"{v.name}-{v.frequency}") return full, no_ver, no_frq, stdn, no_match, stash_vars From ffb234a20382ab1dc9de287b5a248c2db851f1ff Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 6 Aug 2024 12:08:05 +1000 Subject: [PATCH 079/137] removed print --- src/mopdb/mopdb_class.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index 373903b..8b73805 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -163,7 +163,6 @@ class MapVariable(): def __init__(self, match: list, vobj: Variable): # path object self.fpattern = vobj.fpattern - print(match) # mapping attributes self.frequency = vobj.frequency self.realm = match[4] From fdddbbdf937e7bd7b7b4a3683728e4ad9ec9f366 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 6 Aug 2024 15:38:28 +1000 Subject: [PATCH 080/137] switching to pyproject.toml --- pyproject.toml | 52 ++++++++++++++++++++++++++++ setup.cfg | 49 -------------------------- src/{mopper => mopdata}/update_db.py | 0 src/mopper/mop_setup.py | 2 +- 4 files changed, 53 insertions(+), 50 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.cfg rename src/{mopper => mopdata}/update_db.py (100%) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..dc328d7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,52 @@ +[build-system] +requires = ["setuptools>=64.0.0", "setuptools-scm", "pbr>=6.0.0"] +build-backend = "pbr.build" +#build-backend = "setuptools.build_meta" + +[project] +name = "ACCESS-MOPPeR" +url = https://github.com/ACCESS-Hive/ACCESS-MOPPeR + +authors = [ + {name = "Paola Petrelli", email = "paola.petrelli@utas.edu.au"}, + {name = "Sam Green", email = "sam.green@unsw.edu.au"}, +] +description = "ACCESS Model Output Post-Processor, maps raw model output to CMIP-style defined variables and produce post-processed output using CMOR3" +readme = "README.md" +requires-python = ">=3.8" +keywords = ["ACCESS", "post-processing"] +license = {text = "Apache-2.0"} +classifiers = [ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: Apache Software License", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", +] +dynamic = ["version", "dependencies"] + +[tool.setuptools-git-versioning] +enabled = true + +[project.scripts] +mop = "mopper.mopper:mop_catch" +mopdb = "mopdb.mopdb:mopdb_catch" + +[tool.setuptools.dynamic] +dependencies = {file = "requirements.txt"} + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools] +include_package_data = True + +[tool.setuptools.package-data] +mopdata = ["*.json", "*.yaml", "*.db", "*.csv", "update_db.py"] + + +# ... other project metadata fields as listed in: +# https://packaging.python.org/en/latest/guides/writing-pyproject-toml/ diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 677f9bc..0000000 --- a/setup.cfg +++ /dev/null @@ -1,49 +0,0 @@ -[metadata] -name = mopper -url = https://github.com/ACCESS-Hive/ACCESS-MOPPeR -author = Paola Petrelli, Sam Green -author_email = paola.petrelli@utas.edu.au, sam.green@unsw.edu.au -summary = 'ACCESS Model Output Post-Processor, maps raw model output to CMIP-style defined variables and produce post-processed output using CMOR3' -description_file = README.md -licence = 'Apache-2.0' -keywords = 'ACCESS model' -classifier = - Development Status :: 3 - Alpha - Environment :: Console - Intended Audience :: Science/Research - License :: OSI Approved :: Apache Software License - Operating System :: POSIX :: Linux - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - -[options] -packages = find_namespace: -package_dir = - = src -include_package_data = True - -[options.packages.find] -where = src - -[options.package_data] -mopdata = *.json, *.yaml, *.db, *.csv -mopper = update_db.py - -[pbr] -autodoc_tree_index_modules = True -autodoc_tree_excludes = - setup.py - test - docs/conf.py - -[entry_points] -console_scripts = - mop = mopper.mopper:mop_catch - mopdb = mopdb.mopdb:mopdb_catch - -[build_sphinx] -source_dir = docs -build_dir = docs/_build - -[tool:pytest] -addopts = --doctest-modules --doctest-glob='*.rst' --ignore setup.py --ignore docs/conf.py diff --git a/src/mopper/update_db.py b/src/mopdata/update_db.py similarity index 100% rename from src/mopper/update_db.py rename to src/mopdata/update_db.py diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py index 441d05b..cc4b0e1 100755 --- a/src/mopper/mop_setup.py +++ b/src/mopper/mop_setup.py @@ -417,6 +417,6 @@ def manage_env(ctx): else: fname = ctx.obj[f] shutil.copyfile(fpath, ctx.obj['tpath'] / fname) - update_code = import_files('mopper').joinpath("update_db.py") + update_code = import_files('mopdata').joinpath("update_db.py") shutil.copyfile(update_code, ctx.obj['outpath'] / "update_db.py") return From b0e8497ce57012737795f87de7a83ec72e06fd21 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 6 Aug 2024 16:09:38 +1000 Subject: [PATCH 081/137] removed url form pyporject.toml --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dc328d7..682b28f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,6 @@ build-backend = "pbr.build" [project] name = "ACCESS-MOPPeR" -url = https://github.com/ACCESS-Hive/ACCESS-MOPPeR authors = [ {name = "Paola Petrelli", email = "paola.petrelli@utas.edu.au"}, From f805da2fea9fd16bf9e2dfc68f9b823c9f09566a Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 6 Aug 2024 16:24:14 +1000 Subject: [PATCH 082/137] try removing inlcude_package_data --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 682b28f..c606a5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,9 +40,6 @@ dependencies = {file = "requirements.txt"} [tool.setuptools.packages.find] where = ["src"] -[tool.setuptools] -include_package_data = True - [tool.setuptools.package-data] mopdata = ["*.json", "*.yaml", "*.db", "*.csv", "update_db.py"] From f12d0cff510b9be1de20c868e3e120a92baae32f Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 6 Aug 2024 16:32:35 +1000 Subject: [PATCH 083/137] trying to re-introduce setup.cfg --- setup.cfg | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 setup.cfg diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..48b922f --- /dev/null +++ b/setup.cfg @@ -0,0 +1,13 @@ +[pbr] +autodoc_tree_index_modules = True +autodoc_tree_excludes = + setup.py + test + docs/conf.py + +[build_sphinx] +source_dir = docs +build_dir = docs/_build + +[tool:pytest] +addopts = --doctest-modules --doctest-glob='*.rst' --ignore setup.py --ignore docs/conf.py From 0d60f373948b2c5f2fe301c9e72c56c1289421ad Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 6 Aug 2024 16:37:14 +1000 Subject: [PATCH 084/137] removing pbr --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c606a5a..0bc0d99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [build-system] -requires = ["setuptools>=64.0.0", "setuptools-scm", "pbr>=6.0.0"] -build-backend = "pbr.build" -#build-backend = "setuptools.build_meta" +requires = ["setuptools>=64.0.0", "setuptools-scm"] +#build-backend = "pbr.build" +build-backend = "setuptools.build_meta" [project] name = "ACCESS-MOPPeR" From 668a61f89284971ab7a75ad892d74a8d081398e3 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 6 Aug 2024 16:44:56 +1000 Subject: [PATCH 085/137] removed pbr from setup.py --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 52f1ab9..ba5a4e2 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,6 @@ from setuptools import setup setup( - setup_requires=['pbr', 'setuptools'], - pbr=True, + setup_requires=['setuptools-scm', 'setuptools'], ) From bf491608604d7f407c0dd7f1839f73a5a5e3cfa2 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 7 Aug 2024 10:04:44 +1000 Subject: [PATCH 086/137] storing update.py as txt --- src/mopdata/{update_db.py => update_db.py.txt} | 0 src/mopper/mop_setup.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/mopdata/{update_db.py => update_db.py.txt} (100%) diff --git a/src/mopdata/update_db.py b/src/mopdata/update_db.py.txt similarity index 100% rename from src/mopdata/update_db.py rename to src/mopdata/update_db.py.txt diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py index cc4b0e1..5c1e04b 100755 --- a/src/mopper/mop_setup.py +++ b/src/mopper/mop_setup.py @@ -417,6 +417,6 @@ def manage_env(ctx): else: fname = ctx.obj[f] shutil.copyfile(fpath, ctx.obj['tpath'] / fname) - update_code = import_files('mopdata').joinpath("update_db.py") + update_code = import_files('mopdata').joinpath("update_db.py.txt") shutil.copyfile(update_code, ctx.obj['outpath'] / "update_db.py") return From 3fe92d72515ad69061f6bd5ab128fcefe57e660a Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 7 Aug 2024 14:28:33 +1000 Subject: [PATCH 087/137] Update requirements.txt added cftime --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 7b8bd45..233e9da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ cmor xarray numpy pyyaml +cftime From a6dde1ae1b799967ebdad2ed590f4781b1cb587c Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 7 Aug 2024 14:30:29 +1000 Subject: [PATCH 088/137] Update environment.yaml removed unnecessary packages --- conda/environment.yaml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/conda/environment.yaml b/conda/environment.yaml index 2f0d566..d66b631 100644 --- a/conda/environment.yaml +++ b/conda/environment.yaml @@ -3,9 +3,6 @@ channels: - defaults - conda-forge dependencies: - - python=3.10 - - pip - - pbr - click - cmor - xarray @@ -13,10 +10,3 @@ dependencies: - dask - pyyaml - cftime - - python-dateutil - - pytest - - coverage - - codecov - - importlib_resources - - pip: - - git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@pytests_sam From 6ad967fef47919ca2ec0969242ee36c9d715e5f9 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 7 Aug 2024 14:32:15 +1000 Subject: [PATCH 089/137] Update environment.yaml --- conda/environment.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/conda/environment.yaml b/conda/environment.yaml index d66b631..7c2904b 100644 --- a/conda/environment.yaml +++ b/conda/environment.yaml @@ -1,6 +1,5 @@ name: test-env channels: - - defaults - conda-forge dependencies: - click From 54cc2d89d45e1deecd82b9a2fbc3f96f947c59b7 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 7 Aug 2024 17:58:40 +1000 Subject: [PATCH 090/137] more test added fake fs --- .github/workflows/mopper-test-conda.yaml | 10 ++-- conda/enviroment.yaml | 9 ---- conda/environment.yaml | 2 +- src/mopdb/mopdb_map.py | 38 ++++++++------- tests/conftest.py | 62 +++++++++++++++++++----- tests/test_mop_utils.py | 8 +-- tests/test_mopdb_map.py | 59 ++++++++++++++++++++++ tests/test_mopdb_utils.py | 16 +----- tests/testdata/varlist_ex.csv | 2 +- 9 files changed, 142 insertions(+), 64 deletions(-) delete mode 100644 conda/enviroment.yaml create mode 100644 tests/test_mopdb_map.py diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml index 471e1fe..fd46b01 100644 --- a/.github/workflows/mopper-test-conda.yaml +++ b/.github/workflows/mopper-test-conda.yaml @@ -26,19 +26,19 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: python-version: ${{ matrix.python-version }} - environment-file: devtools/conda-envs/build_env.yaml # Path to the build conda environment + environment-file: conda/environment.yaml # Path to the build conda environment auto-update-conda: false auto-activate-base: false - show-channel-urls: true + show-channel-urls: true # - name: Build but do not upload the conda packages - uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0 + uses: coecms/action-build-and-upload-conda-packages@v1.3.0 with: - meta_yaml_dir: devtools/conda-build + meta_yaml_dir: conda python-version: ${{ matrix.python-version }} # Values previously defined in `matrix` platform_linux-64: true platform_osx-64: true platform_win-64: true - user: uibcdf + user: coecms label: auto upload: false token: ${{ secrets.ANACONDA_TOKEN }} # Replace with the right name of your secret diff --git a/conda/enviroment.yaml b/conda/enviroment.yaml deleted file mode 100644 index 3856ac4..0000000 --- a/conda/enviroment.yaml +++ /dev/null @@ -1,9 +0,0 @@ -channels: - - conda-forge -dependencies: - - click - - cmor - - xarray - - numpy - - pyyaml - - dask diff --git a/conda/environment.yaml b/conda/environment.yaml index ae78e08..d66b631 100644 --- a/conda/environment.yaml +++ b/conda/environment.yaml @@ -1,4 +1,4 @@ -#name: test-env +name: test-env channels: - defaults - conda-forge diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index e9b529b..9db15eb 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -108,12 +108,16 @@ def get_file_frq(ds, fnext): # if all time axes have only 1 timestep we cannot infer frequency # so we open also next file but get only time axs if max_len == 1: - dsnext = xr.open_dataset(fnext, decode_times = False) - time_axs2 = [d for d in dsnext.dims if 'time' in d] - ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time') - time_axs = [d for d in ds.dims if 'time' in d] - time_axs_len = set(len(ds[d]) for d in time_axs) - time_axs.sort(key=lambda x: len(ds[x]), reverse=True) + if fnext is None: + mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}") + return frq + else: + dsnext = xr.open_dataset(fnext, decode_times = False) + time_axs2 = [d for d in dsnext.dims if 'time' in d] + ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time') + time_axs = [d for d in ds.dims if 'time' in d] + time_axs_len = set(len(ds[d]) for d in time_axs) + time_axs.sort(key=lambda x: len(ds[x]), reverse=True) for t in time_axs: mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}") if len(ds[t]) > 1: @@ -166,18 +170,18 @@ def write_varlist(conn, indir, match, version, alias): ds = xr.open_dataset(str(fobj.files[0]), decode_times=False) coords = [c for c in ds.coords] + ['latitude_longitude'] #pass next file in case of 1 timestep per file and no frq in name - if len(fobj.files) > 1: - fnext = str(fobj.files[1]) - if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos': - frq_dict = get_file_frq(ds, fnext) - # if only one frequency detected empty dict - if len(frq_dict) == 1: - fobj.frequency = frq_dict.popitem()[1] - else: - fobj.multiple_frq = True - fobj.frequency = frq_dict['time'] + if len(fobj.files) == 1: + fnext = None else: - mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}") + fnext = str(fobj.files[1]) + if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos': + frq_dict = get_file_frq(ds, fnext) + # if only one frequency detected empty dict + if len(frq_dict) == 1: + fobj.frequency = frq_dict.popitem()[1] + else: + fobj.multiple_frq = True + fobj.frequency = frq_dict['time'] mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}") if fobj.realm == "NArealm": fobj.realm = get_realm(version, ds) diff --git a/tests/conftest.py b/tests/conftest.py index 0dd6c56..6123524 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,13 +24,29 @@ import datetime import logging import csv +import pyfakefs +from pathlib import Path + from mopdb.mopdb_utils import mapping_sql, cmorvar_sql +from mopdb.mopdb_class import MapVariable, Variable, FPattern from mopper.setup_utils import filelist_sql TESTS_HOME = os.path.abspath(os.path.dirname(__file__)) TESTS_DATA = os.path.join(TESTS_HOME, "testdata") +# consecutive files with multiple time axes +dsmulti = os.path.join(TESTS_DATA, "multitime.nc") +dsmulti2 = os.path.join(TESTS_DATA, "multitime_next.nc") +# consecutive files with a 1-time step time axis +dsonestep = os.path.join(TESTS_DATA, "onetstep.nc") +dsonestep2 = os.path.join(TESTS_DATA, "onetstep_next.nc") +@pytest.fixture +def fake_fs(fs): # pylint:disable=invalid-name + """Variable name 'fs' causes a pylint warning. Provide a longer name + acceptable to pylint for use in tests. + """ + yield fs # setting up fixtures for databases:a ccess.db and mopper.db @pytest.fixture @@ -40,6 +56,15 @@ def session(): yield db_session connection.close() +@pytest.fixture +def input_dir(fake_fs): + dfrq = {'d': 'dai', '8': '3h', '7': '6h', 'm': 'mon'} + for date in ['201312', '201401', '201402']: + for k,v in dfrq.items(): + filebase = f"cm000a.p{k}{date}_{v}.nc" + fake_fs.create_file("/raw/atmos/"+ filebase) + assert os.path.exists("/raw/atmos/cm000a.p8201402_3h.nc") + @pytest.fixture def setup_access_db(session): @@ -100,15 +125,28 @@ def map_rows(): return maps @pytest.fixture -def um_multi_time(): - '''Return a um stule file with multiple time axes''' - time1 = pd.date_range("2001-01-01", periods=1) - time2 = pd.date_range("2001-01-01", periods=24, freq='h') - time3 = pd.date_range("2001-01-01", periods=48, freq='30min') - var1 = xr.DataArray(name='var1', data=[1], - dims=["time"], coords={"time": time1}) - var2 = xr.DataArray(name='var2', data=np.arange(24), - dims=["time_0"], coords={"time_0": time2}) - var3 = xr.DataArray(name='var3', data=np.arange(48), dims=["time_1"], - coords={"time_1": time3}) - return xr.merge([var1, var2, var3]) +def fobj(input_dir): + fobj = FPattern("cm000a.", Path("/raw/atmos/")) + return fobj + +@pytest.fixture +def var_obj(fobj): + vobj = Variable('tas', fobj) + return vobj + +@pytest.fixture +def mapvar_obj(var_obj): + match = ('','','','','','','','','') + mvobj = MapVariable(match, var_obj) + return mvobj + +@pytest.fixture +def varobjs(mapvar_obj): + mvobj = mapvar_obj + vobjs = [] + vobjs.append(mvobj) + mvobj.name = 'siconca' + vobjs.append(mvobj) + mvobj.name = 'hfls' + vobjs.append(mvobj) + return vobjs diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py index 4889274..b47e158 100644 --- a/tests/test_mop_utils.py +++ b/tests/test_mop_utils.py @@ -71,10 +71,10 @@ def test_get_cmorname(caplog): foo = xr.DataArray(data, coords=[levs, tdata, lats, lons], dims=["lev", "t", "lat", "lon"]) with ctx: - tname = get_cmorname('t', foo.t, caplog, z_len=None) - iname = get_cmorname('lon', foo.lon, caplog, z_len=None) - jname = get_cmorname('lat', foo.lat, caplog, z_len=None) - zname = get_cmorname('z', foo.lev, caplog, z_len=3) + tname = get_cmorname('t', foo.t, z_len=None) + iname = get_cmorname('lon', foo.lon, z_len=None) + jname = get_cmorname('lat', foo.lat, z_len=None) + zname = get_cmorname('z', foo.lev, z_len=3) assert tname == 'time' assert iname == 'longitude' assert jname == 'latitude' diff --git a/tests/test_mopdb_map.py b/tests/test_mopdb_map.py new file mode 100644 index 0000000..38ac29e --- /dev/null +++ b/tests/test_mopdb_map.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# Copyright 2023 ARC Centre of Excellence for Climate Extremes +# author: Paola Petrelli +# author: Sam Green +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +import os +import sqlite3 +import click +import logging +import itertools +from mopdb.mopdb_map import * +from mopdb.mopdb_class import MapVariable, Variable, FPattern +from conftest import * + + +TESTS_HOME = os.path.abspath(os.path.dirname(__file__)) +TESTS_DATA = os.path.join(TESTS_HOME, "testdata") +# consecutive files with multiple time axes +dsmulti = os.path.join(TESTS_DATA, "multitime.nc") +dsmulti2 = os.path.join(TESTS_DATA, "multitime_next.nc") +# consecutive files with a 1-time step time axis +dsonestep = os.path.join(TESTS_DATA, "onetstep.nc") +dsonestep2 = os.path.join(TESTS_DATA, "onetstep_next.nc") + +@pytest.mark.parametrize('idx', [0,1,2]) +def test_add_var(varobjs, matches, idx, caplog): + caplog.set_level(logging.DEBUG, logger='mopdb_log') + vlist = [] + vlist = add_var(vlist, varobjs[idx], matches[idx]) + assert vlist[0].cmor_var == matches[idx][0] + + +def test_get_file_frq(caplog): + global dsmulti, dsmulti2, dsonestep, dsonestep2 + caplog.set_level(logging.DEBUG, logger='mopdb_log') + umfrq = {'time': 'day', 'time_0': '1hr', 'time_1': '30min'} + # multi time axes in file + ds = xr.open_dataset(dsmulti, decode_times=False) + out = get_file_frq(ds, dsmulti2) + assert umfrq == out + # only one time axis in file with 1 value + ds = xr.open_dataset(dsonestep, decode_times=False) + out = get_file_frq(ds, dsonestep2) + umfrq = {'time': 'day'} + assert umfrq == out + diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py index 858697e..0f872a9 100644 --- a/tests/test_mopdb_utils.py +++ b/tests/test_mopdb_utils.py @@ -22,25 +22,11 @@ import logging import itertools from mopdb.mopdb_utils import * -from conftest import um_multi_time +from mopdb.mopdb_class import MapVariable, Variable, FPattern #from click.testing import CliRunner -@pytest.mark.parametrize('idx', [0,1,2]) -def test_add_var(varlist_rows, matches, idx, caplog): - caplog.set_level(logging.DEBUG, logger='mopdb_log') - vlist = [] - vlist = add_var(vlist, varlist_rows[idx], matches[idx]) - assert vlist[0]['cmor_var'] == matches[idx][0] - - -def test_build_umfrq(um_multi_time, caplog): - caplog.set_level(logging.DEBUG, logger='mopdb_log') - time_axs = [d for d in um_multi_time.dims if 'time' in d] - umfrq = {'time': 'day', 'time_0': '1hr', 'time_1': '30min'} - out = build_umfrq(time_axs, um_multi_time) - assert umfrq == out #@pytest.mark.parametrize('fname', [0,1,2]) def test_get_date_pattern(caplog): diff --git a/tests/testdata/varlist_ex.csv b/tests/testdata/varlist_ex.csv index 154729f..780142d 100644 --- a/tests/testdata/varlist_ex.csv +++ b/tests/testdata/varlist_ex.csv @@ -1,3 +1,3 @@ -name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name +name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;fpattern;long_name;standard_name fld_s03i236;tas;degC;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature fld_s00i031;siconca;%;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX W/M2;surface_upward_latent_heat_flu From 33025660e89138fa4268f7bef19057cc242d937c Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 14:45:54 +1000 Subject: [PATCH 091/137] added test-requirements.txt --- .github/workflows/mopper-pytest.yaml | 2 +- pyproject.toml | 3 ++- test-requirements.txt | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 test-requirements.txt diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 2cb0e6d..f1d08c1 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -48,7 +48,7 @@ jobs: pip install -e . - name: Test with pytest run: | - conda install pytest coverage codecov --solver classic + conda install pytest pyfakefs coverage codecov --solver classic conda run python -m pytest conda run coverage run --source src -m py.test - name: Upload to codecov diff --git a/pyproject.toml b/pyproject.toml index 0bc0d99..51aac13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ] -dynamic = ["version", "dependencies"] +dynamic = ["version", "dependencies", "optional-dependencies"] [tool.setuptools-git-versioning] enabled = true @@ -36,6 +36,7 @@ mopdb = "mopdb.mopdb:mopdb_catch" [tool.setuptools.dynamic] dependencies = {file = "requirements.txt"} +optional-dependencies.test = { file = ["test-requirements.txt"] } [tool.setuptools.packages.find] where = ["src"] diff --git a/test-requirements.txt b/test-requirements.txt new file mode 100644 index 0000000..f6d89a6 --- /dev/null +++ b/test-requirements.txt @@ -0,0 +1,3 @@ +pytest +pyfakefs + From 136431441a67079ddc1b2c65e363b558c8dea818 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 15:14:35 +1000 Subject: [PATCH 092/137] improved test-conda action --- .github/workflows/mopper-test-conda.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml index fd46b01..78c0e5a 100644 --- a/.github/workflows/mopper-test-conda.yaml +++ b/.github/workflows/mopper-test-conda.yaml @@ -19,11 +19,11 @@ jobs: matrix: python-version: ["3.9", "3.10", "3.11"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4.1.7 with: fetch-depth: 0 - name: Conda environment creation and activation - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: python-version: ${{ matrix.python-version }} environment-file: conda/environment.yaml # Path to the build conda environment @@ -31,7 +31,7 @@ jobs: auto-activate-base: false show-channel-urls: true # - name: Build but do not upload the conda packages - uses: coecms/action-build-and-upload-conda-packages@v1.3.0 + uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0 with: meta_yaml_dir: conda python-version: ${{ matrix.python-version }} # Values previously defined in `matrix` From 2d66224ee130db2f2fd0b9c32cbba9cfa7ced1d2 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 16:06:16 +1000 Subject: [PATCH 093/137] more updates to conda and actions --- .github/workflows/mopper-pytest.yaml | 5 +++-- .github/workflows/mopper-test-conda.yaml | 4 ++-- conda/environment.yaml | 22 +++++++++++++--------- conda/meta.yaml | 19 ++++++++++++------- conda/test-env.yaml | 14 ++++++++++++++ requirements.txt | 5 ++++- 6 files changed, 48 insertions(+), 21 deletions(-) create mode 100644 conda/test-env.yaml diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index f1d08c1..9cf4e9f 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -31,7 +31,7 @@ jobs: echo $CONDA/bin >> $GITHUB_PATH - name: Install dependencies run: | - conda env update --file conda/environment.yaml --name base + conda env update --file conda/test-env.yaml --name base - name: Lint with flake8 run: | conda install -c conda-forge ruff --solver classic @@ -48,7 +48,8 @@ jobs: pip install -e . - name: Test with pytest run: | - conda install pytest pyfakefs coverage codecov --solver classic + conda install pytest coverage codecov --solver classic + conda install -c conda-forge pyfakefs conda run python -m pytest conda run coverage run --source src -m py.test - name: Upload to codecov diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml index 78c0e5a..1132a54 100644 --- a/.github/workflows/mopper-test-conda.yaml +++ b/.github/workflows/mopper-test-conda.yaml @@ -36,8 +36,8 @@ jobs: meta_yaml_dir: conda python-version: ${{ matrix.python-version }} # Values previously defined in `matrix` platform_linux-64: true - platform_osx-64: true - platform_win-64: true + platform_osx-64: false + platform_win-64: false user: coecms label: auto upload: false diff --git a/conda/environment.yaml b/conda/environment.yaml index d66b631..ff12e08 100644 --- a/conda/environment.yaml +++ b/conda/environment.yaml @@ -1,12 +1,16 @@ -name: test-env channels: - - defaults - conda-forge + - coecms + - default + dependencies: - - click - - cmor - - xarray - - numpy - - dask - - pyyaml - - cftime + - anaconda-client + - conda-build + - conda-verify + #- click + #- cmor + #- xarray + #- numpy + #- dask + #- pyyaml + #- cftime diff --git a/conda/meta.yaml b/conda/meta.yaml index 05d58fc..c5fb0cf 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,18 +1,19 @@ package: name: mopper - version: 1.0.0 + version: "{{ environ['GIT_DESCRIBE_TAG'] }}" #source: # path: ./ source: #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz - git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git - git_rev: "{{ version }}" - git_depth: 1 # (Defaults to -1/not shallow) + #git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git + #git_rev: "{{ version }}" + #git_depth: 1 # (Defaults to -1/not shallow) + path: ../src build: - number: 0 + number: 1 noarch: python script: "{{ PYTHON }} -m pip install . --no-deps --ignore-installed" entry_points: @@ -23,7 +24,6 @@ requirements: host: - python - pip - - pbr run: - python - click @@ -34,12 +34,17 @@ requirements: - pyyaml - cftime - python-dateutil + test: source_files: + - tests/testdata/* - tests/testdata + requires: + - pytest + - pyfakefs about: home: https://github.com/ACCESS-Hive/ACCESS-MOPPeR license: Apache 2.0 #license_file: LICENSE.txt - summary: 'ACCESS-MOPPeR post-process ACCESS raw model output to ESGF data standards' + summary: 'ACCESS-MOPPeR post-process ACCESS raw model output using CMOR and pre-defined data standards' diff --git a/conda/test-env.yaml b/conda/test-env.yaml new file mode 100644 index 0000000..e7866c9 --- /dev/null +++ b/conda/test-env.yaml @@ -0,0 +1,14 @@ +channels: + - conda-forge + - coecms + - default + +dependencies: + - click + - cmor + - xarray + - numpy + - dask + - pyyaml + - cftime + - python-dateutil diff --git a/requirements.txt b/requirements.txt index 7b8bd45..0953f2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,10 @@ # Add general dependencies here -# Optional dependencies e.g. [dev] are added in `setup.cfg` +# Optional dependencies e.g. [dev] are added in `test-requirements` click cmor xarray numpy pyyaml +dask +python-dateutil +cftime From 13ee445f99f0e564e3d686eb43612608132f3973 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 16:13:55 +1000 Subject: [PATCH 094/137] commenting codecov temporarily using solve classic --- .github/workflows/mopper-pytest.yaml | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 9cf4e9f..ff5072e 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -31,31 +31,27 @@ jobs: echo $CONDA/bin >> $GITHUB_PATH - name: Install dependencies run: | + conda config --set solver classic conda env update --file conda/test-env.yaml --name base - name: Lint with flake8 run: | - conda install -c conda-forge ruff --solver classic + conda install -c conda-forge ruff ruff check --output-format=github . continue-on-error: true - #conda install flake8 --solver classic - # stop the build if there are Python syntax errors or undefined names - #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics # making sure we are testing installed package - name: Install package run: | pip install -e . - name: Test with pytest run: | - conda install pytest coverage codecov --solver classic + conda install pytest coverage codecov conda install -c conda-forge pyfakefs conda run python -m pytest - conda run coverage run --source src -m py.test - - name: Upload to codecov - if: steps.build.outcome == 'success' - run: | - curl -Os https://uploader.codecov.io/latest/linux/codecov - chmod +x codecov - ./codecov + #conda run coverage run --source src -m py.test + # - name: Upload to codecov + # if: steps.build.outcome == 'success' + # run: | + # curl -Os https://uploader.codecov.io/latest/linux/codecov + # chmod +x codecov + # ./codecov From 89058b8792a0f7a046298710ac7b0248d2b4cd09 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 16:29:54 +1000 Subject: [PATCH 095/137] more updates --- .github/workflows/mopper-pytest.yaml | 4 ++-- .github/workflows/mopper-test-conda.yaml | 2 +- conda/environment.yaml | 7 ------- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index ff5072e..dfe5f90 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -20,9 +20,9 @@ jobs: max-parallel: 5 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4.1.7 - name: Set up Python 3.10 - uses: actions/setup-python@v2 + uses: actions/setup-python@v5.1.1 with: python-version: '3.10' - name: Add conda to system path diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml index 1132a54..0e89622 100644 --- a/.github/workflows/mopper-test-conda.yaml +++ b/.github/workflows/mopper-test-conda.yaml @@ -23,7 +23,7 @@ jobs: with: fetch-depth: 0 - name: Conda environment creation and activation - uses: conda-incubator/setup-miniconda@v3 + uses: conda-incubator/setup-miniconda@v3.0.4 with: python-version: ${{ matrix.python-version }} environment-file: conda/environment.yaml # Path to the build conda environment diff --git a/conda/environment.yaml b/conda/environment.yaml index ff12e08..069e0f9 100644 --- a/conda/environment.yaml +++ b/conda/environment.yaml @@ -7,10 +7,3 @@ dependencies: - anaconda-client - conda-build - conda-verify - #- click - #- cmor - #- xarray - #- numpy - #- dask - #- pyyaml - #- cftime From 52acabc8782e4dbe1379d876afb14cdb28a8f4b9 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 16:54:49 +1000 Subject: [PATCH 096/137] reverting solver as slows down conda install --- .github/workflows/mopper-pytest.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index dfe5f90..4dee0cb 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -31,11 +31,12 @@ jobs: echo $CONDA/bin >> $GITHUB_PATH - name: Install dependencies run: | - conda config --set solver classic + #conda config --set solver classic + # this seems to slow it down!!! conda env update --file conda/test-env.yaml --name base - - name: Lint with flake8 + - name: Lint with ruff run: | - conda install -c conda-forge ruff + conda install -c conda-forge ruff --solver classic ruff check --output-format=github . continue-on-error: true # making sure we are testing installed package @@ -44,7 +45,7 @@ jobs: pip install -e . - name: Test with pytest run: | - conda install pytest coverage codecov + conda install pytest coverage codecov --solver classic conda install -c conda-forge pyfakefs conda run python -m pytest #conda run coverage run --source src -m py.test From 5a5c415812dbc65d9544ce90449e89eb60cde3f4 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 17:00:01 +1000 Subject: [PATCH 097/137] adopting miniconda for pytest action --- .github/workflows/mopper-pytest.yaml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 4dee0cb..cb3fa56 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -25,10 +25,15 @@ jobs: uses: actions/setup-python@v5.1.1 with: python-version: '3.10' - - name: Add conda to system path - run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH + - name: Install Miniconda + uses: conda-incubator/setup-miniconda@v3.0.4 + with: + auto-update-conda: true + python-version: ${{ matrix.python-version }} + #- name: Add conda to system path + # run: | + # # $CONDA is an environment variable pointing to the root of the miniconda directory + # echo $CONDA/bin >> $GITHUB_PATH - name: Install dependencies run: | #conda config --set solver classic @@ -36,7 +41,7 @@ jobs: conda env update --file conda/test-env.yaml --name base - name: Lint with ruff run: | - conda install -c conda-forge ruff --solver classic + conda install -c conda-forge ruff #--solver classic ruff check --output-format=github . continue-on-error: true # making sure we are testing installed package @@ -45,7 +50,7 @@ jobs: pip install -e . - name: Test with pytest run: | - conda install pytest coverage codecov --solver classic + conda install pytest coverage codecov #--solver classic conda install -c conda-forge pyfakefs conda run python -m pytest #conda run coverage run --source src -m py.test From cb2055482076c988527fed5590bb7d2038e55083 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 17:03:59 +1000 Subject: [PATCH 098/137] fixed pytest action --- .github/workflows/mopper-pytest.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index cb3fa56..14134cf 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -18,18 +18,20 @@ jobs: timeout-minutes: 60 strategy: max-parallel: 5 + matrix: + python-version: ["3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v4.1.7 - name: Set up Python 3.10 uses: actions/setup-python@v5.1.1 with: - python-version: '3.10' - - name: Install Miniconda - uses: conda-incubator/setup-miniconda@v3.0.4 - with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} + python-version: ${{ matrix.python-version }} + - name: Install Miniconda + uses: conda-incubator/setup-miniconda@v3.0.4 + with: + auto-update-conda: true + python-version: ${{ matrix.python-version }} #- name: Add conda to system path # run: | # # $CONDA is an environment variable pointing to the root of the miniconda directory From 37a3096622156b314d9c8977522313d41b38b451 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 17:10:07 +1000 Subject: [PATCH 099/137] trying to install cmor --- conda/test-env.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/conda/test-env.yaml b/conda/test-env.yaml index e7866c9..525dd16 100644 --- a/conda/test-env.yaml +++ b/conda/test-env.yaml @@ -1,11 +1,9 @@ channels: - conda-forge - - coecms - - default dependencies: - - click - cmor + - click - xarray - numpy - dask From 9c47f708e800acdd4dd5b61174e25a9f7dac7ff5 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 17:16:05 +1000 Subject: [PATCH 100/137] trying to fix conda env --- .github/workflows/mopper-pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 14134cf..cec4d68 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -40,7 +40,7 @@ jobs: run: | #conda config --set solver classic # this seems to slow it down!!! - conda env update --file conda/test-env.yaml --name base + conda env update --file conda/test-env.yaml - name: Lint with ruff run: | conda install -c conda-forge ruff #--solver classic From 3c80278c60431e397d73a0f052ea5e0de256c7e5 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 17:24:15 +1000 Subject: [PATCH 101/137] attempt 1000 --- .github/workflows/mopper-pytest.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index cec4d68..461565c 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -31,6 +31,7 @@ jobs: uses: conda-incubator/setup-miniconda@v3.0.4 with: auto-update-conda: true + activate-environment: testenv python-version: ${{ matrix.python-version }} #- name: Add conda to system path # run: | @@ -40,10 +41,10 @@ jobs: run: | #conda config --set solver classic # this seems to slow it down!!! - conda env update --file conda/test-env.yaml + conda env update --file conda/test-env.yaml --name testenv - name: Lint with ruff run: | - conda install -c conda-forge ruff #--solver classic + conda install -c conda-forge ruff --name testenv #--solver classic ruff check --output-format=github . continue-on-error: true # making sure we are testing installed package @@ -52,8 +53,8 @@ jobs: pip install -e . - name: Test with pytest run: | - conda install pytest coverage codecov #--solver classic - conda install -c conda-forge pyfakefs + conda install pytest coverage codecov --name testenv #--solver classic + conda install -c conda-forge pyfakefs --name testenv conda run python -m pytest #conda run coverage run --source src -m py.test # - name: Upload to codecov From cbe72d5793d8cb4dde686e186ec4479d20847837 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 17:35:43 +1000 Subject: [PATCH 102/137] attempt 1001 --- .github/workflows/mopper-pytest.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 461565c..21259ca 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -44,12 +44,14 @@ jobs: conda env update --file conda/test-env.yaml --name testenv - name: Lint with ruff run: | - conda install -c conda-forge ruff --name testenv #--solver classic + conda install -c conda-forge ruff #--solver classic ruff check --output-format=github . continue-on-error: true + # making sure we are testing installed package - name: Install package run: | + conda list | grep cmor pip install -e . - name: Test with pytest run: | From 322fadbb324972ac6c201192dfe57df2151e97b7 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 17:46:45 +1000 Subject: [PATCH 103/137] attempt 1002 --- .github/workflows/mopper-pytest.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 21259ca..3d569a6 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -30,9 +30,11 @@ jobs: - name: Install Miniconda uses: conda-incubator/setup-miniconda@v3.0.4 with: - auto-update-conda: true - activate-environment: testenv + #auto-update-conda: true + activate-environment: "" + auto-activate-base: true python-version: ${{ matrix.python-version }} + channels: conda-forge #- name: Add conda to system path # run: | # # $CONDA is an environment variable pointing to the root of the miniconda directory @@ -41,7 +43,8 @@ jobs: run: | #conda config --set solver classic # this seems to slow it down!!! - conda env update --file conda/test-env.yaml --name testenv + conda env update --file conda/test-env.yaml --name base + conda list - name: Lint with ruff run: | conda install -c conda-forge ruff #--solver classic @@ -51,12 +54,11 @@ jobs: # making sure we are testing installed package - name: Install package run: | - conda list | grep cmor pip install -e . - name: Test with pytest run: | - conda install pytest coverage codecov --name testenv #--solver classic - conda install -c conda-forge pyfakefs --name testenv + conda install pytest coverage codecov #--name testenv #--solver classic + conda install -c conda-forge pyfakefs #--name testenv conda run python -m pytest #conda run coverage run --source src -m py.test # - name: Upload to codecov From 02b76c478686e1f28ab076e88ebede695500059f Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 17:50:14 +1000 Subject: [PATCH 104/137] attempt 1003 --- .github/workflows/mopper-pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 3d569a6..1fdc87f 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -31,8 +31,8 @@ jobs: uses: conda-incubator/setup-miniconda@v3.0.4 with: #auto-update-conda: true - activate-environment: "" auto-activate-base: true + activate-environment: "" python-version: ${{ matrix.python-version }} channels: conda-forge #- name: Add conda to system path From 430c327f57c6d602136e79827ca06218fd0a3ece Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 8 Aug 2024 17:52:32 +1000 Subject: [PATCH 105/137] attempt 1004 --- .github/workflows/mopper-pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 1fdc87f..08458e0 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -32,7 +32,7 @@ jobs: with: #auto-update-conda: true auto-activate-base: true - activate-environment: "" + activate-environment: true python-version: ${{ matrix.python-version }} channels: conda-forge #- name: Add conda to system path From 86aad611e4cd1da7e6d5b0e0979351170766d9a3 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 13:37:37 +1000 Subject: [PATCH 106/137] solved issue #148 --- mappings/map_AUS2200.csv | 1 + src/mopdata/access.db | Bin 1081344 -> 1085440 bytes src/mopdata/access_dump.sql | 2 + src/mopdata/cmor_tables/AUS2200_A3hr.json | 18 ++++++ src/mopdata/interval2frq.yaml | 25 +++++++++ src/mopdb/mopdb_class.py | 4 +- src/mopdb/mopdb_map.py | 65 ++++++++++------------ 7 files changed, 75 insertions(+), 40 deletions(-) create mode 100644 src/mopdata/interval2frq.yaml diff --git a/mappings/map_AUS2200.csv b/mappings/map_AUS2200.csv index a6c1ab9..7f7aec5 100644 --- a/mappings/map_AUS2200.csv +++ b/mappings/map_AUS2200.csv @@ -100,5 +100,6 @@ wsgmax10m;fld_s03i463;;m s-1;time lat lon;10minPt;atmos;area: time: point;;AUS22 wsgmax10m_max;fld_s03i463_max;;m s-1;time_0 lat lon;10min;atmos;area: time: maximum;;AUS2200_A10min;AUS2200;;float32;22048000;2304;umnsa_spec;WIND GUST;wind_speed_of_gust z0;fld_s00i026;;m;time lat lon;1hrPt;atmos;area: time: point;;AUS2200_A1hr;AUS2200;;float32;22048000;384;umnsa_slv;ROUGHNESS LENGTH AFTER TIMESTEP;surface_roughness_length zfull;fld_s15i101;level_to_height(var[0],levs=(0,66));m;time_0 model_theta_level_number lat lon_0;1hrPt;atmos;area: time: point;;AUS2200_A1hr;AUS2200;float32;1543360000;384;umnsa_mdl;H OF THETA MODEL LEVS FROM SEA LEVEL;height_above_reference_ellipsoid +zg16;fld_s16i202;;m;time pressure lat lon;3hrPt;atmos;area: time: point;;AUS2200_A3hr;AUS2200;float32;352768000;114;flreduced_;GEOPOTENTIAL HEIGHT ON P LEV/P GRID;geopotential_height zmla;fld_s00i025;;m;time_0 lat lon;1hr;atmos;area: time: mean;;AUS2200_A1hr;AUS2200;float32;22048000;384;umnsa_slv;BOUNDARY LAYER DEPTH AFTER TIMESTEP;atmosphere_boundary_layer_thickness zmla;fld_s00i025;;m;time lat lon;10minPt;atmos;area: time: point;;AUS2200_A10min;AUS2200;float32;22048000;2304;umnsa_spec;BOUNDARY LAYER DEPTH AFTER TIMESTEP;atmosphere_boundary_layer_thickness diff --git a/src/mopdata/access.db b/src/mopdata/access.db index 22328690cd4064cdbbd0ece89367c33ccc995762..70be17f5931c78e7391cfaf84613d7bb09b1b68c 100644 GIT binary patch delta 470 zcmZo@aB5iKG(nn`gMopObE1MhBge*sCHx$>xn3}UL^ksXY++>Lda-?D1K$Gn%?=Hn zjLh#RMs9v5wfigMhUu#W7%#9eJzzB3Y}jDLIQ@hWqp+fJMiIwRab`oN~gLuc3#7d4UKtm-aI*PRa5Ml&kCLm@8Viq7~-Tp&}O`=zZaUT;u%Sx6hEVV3| zETJs+EE+6=%)gnRFl}PIvR!8Zn=X+3oX#&g z{pT!x>FL6H0>bQ2@$H(L0@uU1ctH`tdzyjwG$azXA86oHWMgSDoESCz{98Wd=^Qut UcqhALN=@!>GT!dxZW`U!TZfT0$Uij2MRKEGjC52WU2v^ zT1=ZiO6~Z{_+a`f0mci=K%LVS3s`hFJ2c2KPTwHJD9oCYSjlk(#FS|NA;bv8OhC*G z#4JF}y8VX`n?$b+lNA#`%Sx6hEVV3|ETJs+EE+6=%)gnRFl}NA*simH&6AOZJ%yoU zvSCBP^p^|S0;lg=$fn(%x`+*k*@2h?h&h3n3y8Uam}h(HBHsCP87E9KTc{h9+` z2otm4bRS1Px$Pc~d|J$mwv!bNET=cDWMgUH;KH|kgA0GiZAP)_tO^41)72CN?rvvP T6j;g5*aWn)Zo8(Y!1XWygiKvX diff --git a/src/mopdata/access_dump.sql b/src/mopdata/access_dump.sql index de52ba2..8d4a090 100644 --- a/src/mopdata/access_dump.sql +++ b/src/mopdata/access_dump.sql @@ -2279,6 +2279,7 @@ INSERT INTO cmorvar VALUES('lmask-AUS2200_fx','fx','land','land_binary_mask','%' INSERT INTO cmorvar VALUES('omldamax-CM2_mon','mon','ocean','ocean_mixed_layer_thickness_defined_by_mixing_scheme','m','area: mean time: maximum','area: areacello','Mean Monthly Maximum Ocean Mixed Layer Thickness Defined by Mixing Scheme','The ocean mixed layer is the upper part of the ocean, regarded as being well-mixed. The base of the mixed layer defined by the mixing scheme is a diagnostic of ocean models. ''Thickness'' means the vertical extent of a layer.','longitude latitude time','omldamax','real','','','','','','',''); INSERT INTO cmorvar VALUES('difvho-CM2_mon','mon','ocean','ocean_vertical_heat_diffusivity','m2 s-1','area: mean time: mean','area: areacello volume: volcello','Ocean Vertical Heat Diffusivity','Vertical/dianeutral diffusivity applied to prognostic temperature field.','longitude latitude olevel time','difvho','real','','','','','','',''); INSERT INTO cmorvar VALUES('rho0-CM2_mon','mon','ocean','sea_water_potential_density','kg m-3','area: mean time: mean','area: areacello volume: volcello','Potential Density referenced to 0 dbar','','longitude latitude olevel time','rho0','real','','','','','','',''); +INSERT INTO cmorvar VALUES('zg16-AUS2200_A3hr','3hrPt','atmos','geopotential_height','m','area: mean time: point','area: areacella','Geopotential Height on pressure levels','Geopotential is the sum of the specific gravitational potential energy relative to the geoid and the specific centripetal potential energy. Geopotential height is the geopotential divided by the standard acceleration due to gravity. It is numerically similar to the altitude (or geometric height) and not to the quantity with standard name height, which is relative to the surface.','longitude latitude plev16 time1','zg','real','','','','','','',''); CREATE TABLE mapping ( cmor_var TEXT, input_vars TEXT, @@ -2734,6 +2735,7 @@ INSERT INTO mapping VALUES('zfull','fld_s15i101','','m','time model_theta_level_ INSERT INTO mapping VALUES('zg','fld_s30i297','','m','time pressure lat lon','mon','atmos','area: time: mean','','CMIP6_Amon','CM2','geopotential_height','map_atmos_CM2'); INSERT INTO mapping VALUES('zg','fld_s30i297','','m','time pressure lat lon','day','atmos','area: time: mean','','CMIP6_Eday','CM2','geopotential_height','map_atmos_CM2'); INSERT INTO mapping VALUES('zg','fld_s30i297','','m','time pressure lat lon','day','atmos','area: time: mean','','CMIP6_day','CM2','geopotential_height','map_atmos_CM2'); +INSERT INTO mapping VALUES('zg16','fld_s16i202','','m','time pressure lat lon','3hrPt','atmos','area: time: point','','AUS2200_A3hr','AUS2200','geopotential_height','AUS2200'); INSERT INTO mapping VALUES('zg500','fld_s30i297','','m','time pressure lat lon','6hrPt','atmos','area: time: point','','CMIP6_6hrPlevPt','CM2','geopotential_height','map_atmos_CM2'); INSERT INTO mapping VALUES('zg500','fld_s30i297','var[0].sel(pressure=500)','m','time pressure lat lon','day','aerosol','area: time: mean','','CMIP6_AERday','CM2','geopotential_height','map_aerosol_CM2'); INSERT INTO mapping VALUES('zguvgrid','fld_s30i207','','m','time pressure lat_v lon_u','mon','atmos','area: time: mean','','CM2_mon','CM2','geopotential_height','map_atmos_CM2'); diff --git a/src/mopdata/cmor_tables/AUS2200_A3hr.json b/src/mopdata/cmor_tables/AUS2200_A3hr.json index d9108a9..99e6833 100644 --- a/src/mopdata/cmor_tables/AUS2200_A3hr.json +++ b/src/mopdata/cmor_tables/AUS2200_A3hr.json @@ -103,6 +103,24 @@ "valid_max": "", "ok_min_mean_abs": "", "ok_max_mean_abs": "" + }, + "zg16": { + "frequency": "3hrPt", + "modeling_realm": "atmos", + "standard_name": "geopotential_height", + "units": "m", + "cell_methods": "area: mean time: point", + "cell_measures": "area: areacella", + "long_name": "Geopotential Height on pressure levels", + "comment": "Geopotential is the sum of the specific gravitational potential energy relative to the geoid and the specific centripetal potential energy. Geopotential height is the geopotential divided by the standard acceleration due to gravity. It is numerically similar to the altitude (or geometric height) and not to the quantity with standard name height, which is relative to the surface.", + "dimensions": "longitude latitude plev16 time1", + "out_name": "zg", + "type": "real", + "positive": "", + "valid_min": "", + "valid_max": "", + "ok_min_mean_abs": "", + "ok_max_mean_abs": "" } } } diff --git a/src/mopdata/interval2frq.yaml b/src/mopdata/interval2frq.yaml new file mode 100644 index 0000000..37189ec --- /dev/null +++ b/src/mopdata/interval2frq.yaml @@ -0,0 +1,25 @@ +# This file contains the dictionary neededto associate a time step interval +# to a frequency. There can be more than one depending on the units used by +# the time axis +days: + dec: 3652.0 + yr: 365.0 + mon: 30.0 + day: 1.0 + 6hr: 0.25 + 3hr: 0.125 + 1hr: 0.041667 + 30min: 0.020833 + 10min: 0.006944 + +hours: + dec: 87648.0 + yr: 8760.0 + mon: 720.0 + day: 24.0 + 6hr: 6.0 + 3hr: 3.0 + 1hr: 1.0 + 30min: 0.5 + 10min: 0.167 + diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index 8b73805..d044eaa 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -94,8 +94,6 @@ def __init__(self, varname: str, fobj: FPattern): self.name = varname # path object self.fpattern = fobj.fpattern - #self.fpath = fobj.fpath - #self.files = fobj.files # mapping attributes self._frequency = fobj.frequency self._realm = fobj.realm @@ -127,7 +125,7 @@ def frequency(self): def frequency(self, value): value = value.replace('hPt', 'hrPt') if not any(x in value for x in - ['min', 'hr', 'day', 'mon', 'yr']): + ['fx', 'min', 'hr', 'day', 'mon', 'yr']): value = 'NAfrq' self._frequency = value diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index 9db15eb..b88ea7c 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -84,7 +84,7 @@ def get_cmorname(conn, vobj, version): f"{results}\n Using {vobj.cmor_var} from {vobj.cmor_table}") return vobj -def get_file_frq(ds, fnext): +def get_file_frq(ds, fnext, int2frq): """Return a dictionary with frequency for each time axis. Frequency is inferred by comparing interval between two consecutive @@ -95,22 +95,23 @@ def get_file_frq(ds, fnext): (usually only UM) or if frequency can be guessed from filename. """ mopdb_log = logging.getLogger('mopdb_log') + mopdb_log.debug(f"in get_file_frq fnext: {fnext}") frq = {} - int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0, - 'day': 1.0, '6hr': 0.25, '3hr': 0.125, - '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944} # retrieve all time axes time_axs = [d for d in ds.dims if 'time' in d] time_axs_len = set(len(ds[d]) for d in time_axs) time_axs.sort(key=lambda x: len(ds[x]), reverse=True) mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}") - max_len = len(ds[time_axs[0]]) + if len(time_axs) > 0: + max_len = len(ds[time_axs[0]]) + else: + max_len = 0 + frq = {'time': 'fx'} # if all time axes have only 1 timestep we cannot infer frequency # so we open also next file but get only time axs if max_len == 1: if fnext is None: mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}") - return frq else: dsnext = xr.open_dataset(fnext, decode_times = False) time_axs2 = [d for d in dsnext.dims if 'time' in d] @@ -118,18 +119,19 @@ def get_file_frq(ds, fnext): time_axs = [d for d in ds.dims if 'time' in d] time_axs_len = set(len(ds[d]) for d in time_axs) time_axs.sort(key=lambda x: len(ds[x]), reverse=True) - for t in time_axs: - mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}") - if len(ds[t]) > 1: - interval = (ds[t][1]-ds[t][0]).values - interval_file = (ds[t][-1] -ds[t][0]).values - else: - interval = interval_file - mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}") - for k,v in int2frq.items(): - if math.isclose(interval, v, rel_tol=0.05): - frq[t] = k - break + if max_len > 0: + for t in time_axs: + mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}") + if len(ds[t]) > 1: + interval = (ds[t][1]-ds[t][0]).values + interval_file = (ds[t][-1] -ds[t][0]).values + else: + interval = interval_file + mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}") + for k,v in int2frq.items(): + if math.isclose(interval, v, rel_tol=0.05): + frq[t] = k + break return frq def write_varlist(conn, indir, match, version, alias): @@ -168,6 +170,10 @@ def write_varlist(conn, indir, match, version, alias): #fwriter.writerow([f"#{fpattern}"]) # get attributes for the file variables ds = xr.open_dataset(str(fobj.files[0]), decode_times=False) + time_units = ds['time'].units.split()[0] + yfile = import_files('mopdata').joinpath('interval2frq.yaml') + fdata = read_yaml(yfile) + int2frq = fdata[time_units] coords = [c for c in ds.coords] + ['latitude_longitude'] #pass next file in case of 1 timestep per file and no frq in name if len(fobj.files) == 1: @@ -175,7 +181,7 @@ def write_varlist(conn, indir, match, version, alias): else: fnext = str(fobj.files[1]) if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos': - frq_dict = get_file_frq(ds, fnext) + frq_dict = get_file_frq(ds, fnext, int2frq) # if only one frequency detected empty dict if len(frq_dict) == 1: fobj.frequency = frq_dict.popitem()[1] @@ -336,31 +342,13 @@ def add_var(vlist, vobj, match, stdnm=False): # assign cmor_var from match and swap place with input_vars mopdb_log.debug(f"Assign cmor_var: {match}") mopdb_log.debug(f"initial variable definition: {vobj}") - #var = vobj.__dict__.copy() var = MapVariable(match, vobj) - #var.cmor_var = match[0] - #vobj.input_vars = match[1] - # orig_name = var.pop('name') - # assign realm from match - #var['realm'] = match[4] - # with stdn assign cmorvar and table if only 1 match returned - # otherwise assign table from match if stdnm: var.input_vars = vobj.name if len(var.cmor_var) == 1: cmor_var, table = var.cmor_var[0].split("-") var.cmor_var = cmor_var var.cmor_table = table - #else: - # var['cmor_table'] = match[6] - # add calculation, positive and version - #var['calculation'] = match[2] - #var['positive'] = match[7] - #var['version'] = match[5] - # maybe we should override units here rather than in check_realm_units - # if units missing get them from match - #if var['units'] is None or var['units'] == '': - # var['units'] = match[8] vlist.append(var) return vlist @@ -436,6 +424,9 @@ def write_map_template(conn, parsed, alias): with open(f"map_{alias}.csv", 'w') as fcsv: fwriter = csv.DictWriter(fcsv, keys, delimiter=';') write_vars(full, fwriter, keys, conn=conn) + # write header as write_vars skips it if full is empty + if len(full) == 0: + fwriter.writerow({x:x for x in keys}) div = ("# Derived variables with matching version and " + "frequency: Use with caution!") write_vars(pot_full, fwriter, div, conn=conn) From 7547fb2835306a888ab4c7cffa0bbaead7ae069d Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 15:19:35 +1000 Subject: [PATCH 107/137] attempt 1005 to fix workflows --- .github/workflows/mopper-pytest.yaml | 14 +++++++------- conda/{test-env.yaml => testenv.yaml} | 5 +++++ 2 files changed, 12 insertions(+), 7 deletions(-) rename conda/{test-env.yaml => testenv.yaml} (68%) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 08458e0..f957e4e 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -30,9 +30,9 @@ jobs: - name: Install Miniconda uses: conda-incubator/setup-miniconda@v3.0.4 with: - #auto-update-conda: true - auto-activate-base: true - activate-environment: true + auto-update-conda: true + activate-environment: testenv + environment-file: conda/testenv.yml python-version: ${{ matrix.python-version }} channels: conda-forge #- name: Add conda to system path @@ -43,11 +43,11 @@ jobs: run: | #conda config --set solver classic # this seems to slow it down!!! - conda env update --file conda/test-env.yaml --name base + #conda env update --file conda/test-env.yaml --name testenv conda list - name: Lint with ruff run: | - conda install -c conda-forge ruff #--solver classic + #conda install -c conda-forge ruff #--solver classic ruff check --output-format=github . continue-on-error: true @@ -57,8 +57,8 @@ jobs: pip install -e . - name: Test with pytest run: | - conda install pytest coverage codecov #--name testenv #--solver classic - conda install -c conda-forge pyfakefs #--name testenv + #conda install pytest coverage codecov #--name testenv #--solver classic + #conda install -c conda-forge pyfakefs #--name testenv conda run python -m pytest #conda run coverage run --source src -m py.test # - name: Upload to codecov diff --git a/conda/test-env.yaml b/conda/testenv.yaml similarity index 68% rename from conda/test-env.yaml rename to conda/testenv.yaml index 525dd16..becd88a 100644 --- a/conda/test-env.yaml +++ b/conda/testenv.yaml @@ -10,3 +10,8 @@ dependencies: - pyyaml - cftime - python-dateutil + - pytest + - coverage + - codecov + - pyfakefs + - ruff From 2e92a8fcadfae40bc5c5785c19cf6993a149c8fc Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 15:42:16 +1000 Subject: [PATCH 108/137] fixed env name in workflow --- .github/workflows/mopper-pytest.yaml | 2 +- src/mopper/mop_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index f957e4e..732987c 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -32,7 +32,7 @@ jobs: with: auto-update-conda: true activate-environment: testenv - environment-file: conda/testenv.yml + environment-file: conda/testenv.yaml python-version: ${{ matrix.python-version }} channels: conda-forge #- name: Add conda to system path diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index ef41953..6ed8b60 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -800,10 +800,10 @@ def get_bounds_values(ctx, ds, bname): calc = False var_log = logging.getLogger(ctx.obj['var_log']) var_log.debug(f"Getting bounds values for {bname}") - ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"] if bname in ds.variables: bnds_val = ds[bname].values elif ancil_file != "": + ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"] fname = f"{ctx.obj['ancils_path']}/{ancil_file}" ancil = xr.open_dataset(fname) if bname in ancil.variables: From 4b16115468a8a1acf53819c3c4d579d210674f68 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 16:08:10 +1000 Subject: [PATCH 109/137] trying again by adding env name to env.yaml file --- .github/workflows/mopper-pytest.yaml | 5 +++++ conda/testenv.yaml | 1 + 2 files changed, 6 insertions(+) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 732987c..17bf218 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -44,26 +44,31 @@ jobs: #conda config --set solver classic # this seems to slow it down!!! #conda env update --file conda/test-env.yaml --name testenv + conda activate testenv conda list - name: Lint with ruff run: | #conda install -c conda-forge ruff #--solver classic + source ~/miniconda/bin/activate testenv ruff check --output-format=github . continue-on-error: true # making sure we are testing installed package - name: Install package run: | + source ~/miniconda/bin/activate testenv pip install -e . - name: Test with pytest run: | #conda install pytest coverage codecov #--name testenv #--solver classic #conda install -c conda-forge pyfakefs #--name testenv + source ~/miniconda/bin/activate testenv conda run python -m pytest #conda run coverage run --source src -m py.test # - name: Upload to codecov # if: steps.build.outcome == 'success' # run: | + source ~/miniconda/bin/activate testenv # curl -Os https://uploader.codecov.io/latest/linux/codecov # chmod +x codecov # ./codecov diff --git a/conda/testenv.yaml b/conda/testenv.yaml index becd88a..7fa8dd5 100644 --- a/conda/testenv.yaml +++ b/conda/testenv.yaml @@ -1,3 +1,4 @@ +name: testenv channels: - conda-forge From 801ed003ba8938f4141062365fe41322a01f1d57 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 16:11:07 +1000 Subject: [PATCH 110/137] trying again by adding env name to env.yaml file 2 --- .github/workflows/mopper-pytest.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 17bf218..9626530 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -30,7 +30,6 @@ jobs: - name: Install Miniconda uses: conda-incubator/setup-miniconda@v3.0.4 with: - auto-update-conda: true activate-environment: testenv environment-file: conda/testenv.yaml python-version: ${{ matrix.python-version }} @@ -44,31 +43,26 @@ jobs: #conda config --set solver classic # this seems to slow it down!!! #conda env update --file conda/test-env.yaml --name testenv - conda activate testenv conda list - name: Lint with ruff run: | #conda install -c conda-forge ruff #--solver classic - source ~/miniconda/bin/activate testenv ruff check --output-format=github . continue-on-error: true # making sure we are testing installed package - name: Install package run: | - source ~/miniconda/bin/activate testenv pip install -e . - name: Test with pytest run: | #conda install pytest coverage codecov #--name testenv #--solver classic #conda install -c conda-forge pyfakefs #--name testenv - source ~/miniconda/bin/activate testenv conda run python -m pytest #conda run coverage run --source src -m py.test # - name: Upload to codecov # if: steps.build.outcome == 'success' # run: | - source ~/miniconda/bin/activate testenv # curl -Os https://uploader.codecov.io/latest/linux/codecov # chmod +x codecov # ./codecov From 13aca2303e130072716dc071b93de02fbf475c45 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 16:28:51 +1000 Subject: [PATCH 111/137] found correct way to activate env? --- .github/workflows/mopper-pytest.yaml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 9626530..d6f8643 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -38,29 +38,25 @@ jobs: # run: | # # $CONDA is an environment variable pointing to the root of the miniconda directory # echo $CONDA/bin >> $GITHUB_PATH - - name: Install dependencies - run: | - #conda config --set solver classic - # this seems to slow it down!!! - #conda env update --file conda/test-env.yaml --name testenv - conda list - name: Lint with ruff + shell: bash -el {0} run: | - #conda install -c conda-forge ruff #--solver classic ruff check --output-format=github . continue-on-error: true # making sure we are testing installed package - name: Install package + shell: bash -el {0} run: | + conda activate testenv pip install -e . - name: Test with pytest + shell: bash -el {0} run: | - #conda install pytest coverage codecov #--name testenv #--solver classic - #conda install -c conda-forge pyfakefs #--name testenv conda run python -m pytest #conda run coverage run --source src -m py.test # - name: Upload to codecov + # shell: bash -el {0} # if: steps.build.outcome == 'success' # run: | # curl -Os https://uploader.codecov.io/latest/linux/codecov From 5534961231c34652a37c2a401625653c263e9f3d Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 17:03:07 +1000 Subject: [PATCH 112/137] remove 3.9 added 3.12 for tests, added testdata files --- .github/workflows/mopper-pytest.yaml | 8 ++------ .github/workflows/mopper-test-conda.yaml | 6 +++--- conda/environment.yaml | 1 + tests/test_mopdb_map.py | 5 +++-- tests/testdata/multitime.nc | Bin 0 -> 9731 bytes tests/testdata/multitime_next.nc | Bin 0 -> 9731 bytes tests/testdata/onetstep.nc | Bin 0 -> 8908 bytes tests/testdata/onetstep_next.nc | Bin 0 -> 8908 bytes 8 files changed, 9 insertions(+), 11 deletions(-) create mode 100644 tests/testdata/multitime.nc create mode 100644 tests/testdata/multitime_next.nc create mode 100644 tests/testdata/onetstep.nc create mode 100644 tests/testdata/onetstep_next.nc diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index d6f8643..51d846c 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -19,11 +19,11 @@ jobs: strategy: max-parallel: 5 matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4.1.7 - - name: Set up Python 3.10 + - name: Set up Python 3.10/3.11 uses: actions/setup-python@v5.1.1 with: python-version: ${{ matrix.python-version }} @@ -34,10 +34,6 @@ jobs: environment-file: conda/testenv.yaml python-version: ${{ matrix.python-version }} channels: conda-forge - #- name: Add conda to system path - # run: | - # # $CONDA is an environment variable pointing to the root of the miniconda directory - # echo $CONDA/bin >> $GITHUB_PATH - name: Lint with ruff shell: bash -el {0} run: | diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml index 0e89622..1ef10f9 100644 --- a/.github/workflows/mopper-test-conda.yaml +++ b/.github/workflows/mopper-test-conda.yaml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.10", "3.11"i, "3.12"] steps: - uses: actions/checkout@v4.1.7 with: @@ -26,12 +26,12 @@ jobs: uses: conda-incubator/setup-miniconda@v3.0.4 with: python-version: ${{ matrix.python-version }} + activate-environment: mopper_env environment-file: conda/environment.yaml # Path to the build conda environment - auto-update-conda: false - auto-activate-base: false show-channel-urls: true # - name: Build but do not upload the conda packages uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0 + shell: bash -el {0} with: meta_yaml_dir: conda python-version: ${{ matrix.python-version }} # Values previously defined in `matrix` diff --git a/conda/environment.yaml b/conda/environment.yaml index 069e0f9..62adf87 100644 --- a/conda/environment.yaml +++ b/conda/environment.yaml @@ -1,3 +1,4 @@ +name: mopper_env channels: - conda-forge - coecms diff --git a/tests/test_mopdb_map.py b/tests/test_mopdb_map.py index 38ac29e..8b7a5e9 100644 --- a/tests/test_mopdb_map.py +++ b/tests/test_mopdb_map.py @@ -47,13 +47,14 @@ def test_get_file_frq(caplog): global dsmulti, dsmulti2, dsonestep, dsonestep2 caplog.set_level(logging.DEBUG, logger='mopdb_log') umfrq = {'time': 'day', 'time_0': '1hr', 'time_1': '30min'} + int2frq = {'day': 1.0, '1hr': 0.041667, '30min': 0.020833} # multi time axes in file ds = xr.open_dataset(dsmulti, decode_times=False) - out = get_file_frq(ds, dsmulti2) + out = get_file_frq(ds, dsmulti2, int2frq) assert umfrq == out # only one time axis in file with 1 value ds = xr.open_dataset(dsonestep, decode_times=False) - out = get_file_frq(ds, dsonestep2) + out = get_file_frq(ds, dsonestep2, int2frq) umfrq = {'time': 'day'} assert umfrq == out diff --git a/tests/testdata/multitime.nc b/tests/testdata/multitime.nc new file mode 100644 index 0000000000000000000000000000000000000000..fbee02705ad3153d0188ed463ae8f1fda684de1f GIT binary patch literal 9731 zcmeHNU2IfE6rSz2rM6or0#%^u76BDX?bZr4fQMzf1!B9E)(9%5S=a@Zl$N#&SWM-| zK*|G1zy}fpA3&^7LJml93?|NlJa%o!-PlGad2%suJuxifd> z%$aY#xpU6#?p>Od9UnU-HX`v9NHMHmy+&sHbNJ(q;^u0qULs+8a$LK>wcAE zb%({$4^`W~zs|7x@gLc26RAGJh9V7&wmZn<1e}(1T1Kr#l}M z8^*cle|65`rlxi^Q91CV3Vp%wn!wr&Pf1BxAXMxR2g`YAjfitd2g!hAHo}HxqR~lg z>&bB!6sQS!gJ32fvzFz0eFe+%e1!!bcaB#%o}+rZiqai??p!aorv;(*1Uy@{W23R? z99lKbWra3|0{6RxJxCnLjL3U#Y%UIk%h~KCyh&yQ{)%#Ed8l|z(3zf=mNqBNg?~zg zu6u3v-qgyrt&ivp6eBMRpE3=im(6q zVaUI_D46OL$;jkEhfOE)a@CSK?=cU(WRrNv+o{m!d9$o!Q?M)?!u?3JnT8+guq9YV zOS~{WrI8-~sp*D#;wMwjRT__dQA$oHO~*B4F|K}J4>FBKqKTU<1QW?!0CW+bJX*|xn!Tx?vPwN6_hUuJiFSw<2yu&nEUFQr zTo-m7UeyRoD%TyqbgmJqu1~^SP4EM$)lCqGty~wHfV(F&!~U)t*_G>>;Ya*FeyJG} z4$a0cEs%fvR&{+*3vlsj3)F0#duRUCR!HQ5T44n@Yy}?P*#^I1>zb`?FtzpQzOEZ> zKwdd#w!<+jWmmQXo2zLDbQ*hh0K2iP15R8zxBT|44q*3Poxr!NI$=F}zx;M5d_`vG zb^#5JmLY{F)CC*ap>7})5)O4k12wzT4Lp5!56~=k9qxhiXuZ0=2iUFbUf@|A?S&8c zq8A7)cTOKL49ELm0T0s$4Bzs8VE3y3Km2)GNW>=+UQdzoRMAPIpAr46XgrWvoT8IO z)1!vud9BoR(KAF#Or~Ct@{6Kh5a9<3}XqUVTC75%bkOIZk52s}Cj{u$_*lhf$o zWgsSet&oG&oTDbhvyWzoM{wx~^BXyj-xUOPZG}VuuNhp;>21mw=q=b#!-1aUfF?em zSUufD$lBVc66Tvnp2QlNwVm@hddtzt&^U|*@U*{|`f$Kb&t(e8O`JYd&PT|XN`OGu z745NAWZ7U`ItXJ*5u!#lMKBQ~I>VAbW6bB8IX@3b5zN_R^nXba+GME)=dh!iA`r+U zP7xf*I$Io^!%`$klh8Tf$g)^=Bui(pamXIzPhobbsOW8f(dOWg$<|Q(;U!WX+bJI= zUPxL#?0ndF-K+<#?y%lW7|yyS(7vplQ;+1iz;Hg)?MMqdqSplE!k%BQY@q$C zA0MR>IvIu!i$|`J@rd1g>^yq^`(MA-Jen|oN9F~B(IY!wcFMuBgMdXbJc=02^M&Dj zs63){H1GAEPk0WG^J|GGFNdL|LkUG-m6tk7)$3~WKICq_dADWqYIEM3HzLwaRhz`v zR9M|?rwp&-AZc&6w@s10!x;b{s0zJ z`AHz<0c^kr5(FPWtdc+iyzl@*BwBpHD2)*l%Zom=(bNY(Bqlg>&Ky{3C9R>5m^ zN(of39J1>>e>*14&wbA@#!klBG@gK$S}hzj3XMap z+PH9PO)7D>Stvo|#mI2oF=JhQs-;<~9gjCjZ6e;<>}yWdFHibP{eJ%pe*pi4NOgVD zTOFGJDlLPo8uEmTPW^P21nmhlz2Q4$8CCMDnIl9~w1U_{l{zk;+I-Bo>O)MHke*E@tAg2;@Y(CElED5rRUP zA|rto)HkOZ>O~gazn2GhjKKjJ^l`!JFI8ZQjKD-}C24Mrqc9xfd*yaumgR`DTb8&h zK$XmmO)5Yy%QXsdgLUZu^^9bIlH-RRpfX%guzF(d;!F?QY`OAqpdFV>RDa>ucT@3Y z>12tIsjOU1I^=L7E0@lj@s^Ck2F&H=LSjwr6r|0^w>@rdV$7VltpSdS}>W^5TP$K3APs}3UihS z$XU`=k$+r3$2RIPApbaB8Sr7x)x)z+8&D(%nDEK0)4f#BlYM# z#St&vM*Z?ZkDMM!MEQ!zean}`z&CYK-k#|KDxyd6Qoyn%ci_}k0bBPj4MvX&ICSR3 z!Yh{roc}o+ZyE3lv48DvCCCgjM8(2sJU33B>83)bN1zS$P+x_Pm(ZKVZdRo)&8zQNYPGZy?# z>iR4|qfycY(u|-BYOY-F>`%Kun!nct^7eTboJ8w8`_b>Dc9I)#a@6dGe6=8;-fR2a zKq$0daKk&Y!wquzJ{xG2!Ke*CqW10$Hb}D$+CZzgdC`VB^1=o}%Qw{n5{88yI6%|1 zc|gL~=>ch8_x~q;9_JvEClX#ylekR|z;7cwtm{xtK+%%?EtoJ>5! z?Pr-k$9x)dJX$3cGoQh{g!%K#9p)h5An@Q2_-CT0GXc$qR)OgBwUrN6eU9oB&l<@T zkKj@n=I4AKzsU%y-d4^6UedVg(_6PO(VMfnTPAv31M2*Ma`jT3AuDYkaGI|ld7L|y z_xw3qW^Ykd42{NI08jfnrH@S5>A6e^xz5uEtN94|Motjuy7iCjcG9ddZXAFyl?d(; zEfI9ah{i~&nd13KMb38_C4xSCH2*Ip!gY4lz#4W$O9TRW*d>CuP*sZqYgo!8X%VUh z99|VG4GF6%HV2~z%#6g>DHJP3XPop~>Bi)Hn)cu;0&^O867X6F4;$iI2kZf!1<&X)9I z5tRMNY`Q9Etjuxn{`sWWZuNEZjV@h=p1IVL`LU#fMj?gMbkuTmB+I4IUieL(!^$}z zhoQmza&=v)Q0OG9=4b0vQc(fta{I%vJuPKkGaC=!#;zT{RI$N&icY5BycbZ)n%=0vFKwd8J znp>`j&u8&;J499kgBGzl%&MAeMSD^3Yg@RA%*0YvtLy;iqSjku7F2`~XBzyz286Zp>wNO8AxY;jjSNuUhv zGf59LrHqxl)Bn1>>q>c7Mm>iraVF-v9seer!Intpk)#%y^avG85@;&6L74y^#4lfv z;!<0i&TY-S1E>uRD1${6`|hUOcRj})h>DjnnkojvbeM$ctaN~~F(zdOMe_eiwjSzo zj3Ybs;r2aw#z!W=1egF5U;@V?aLws;#eaW#7NM#40P6Xz-Zx1vz?ZT*`O*M%ty@(f4 zuX_?a`UiN>qh37~QSjjZ;6d;o(3$sRi)Hn)cu;0&^O867X6AiT$hUdc?QAYo&Q|nt znJN1rZMrIEEa^CS|9mp&c80q7Mi(wamoBwreyr$B!;sQxJZc3zq80LZFZw3V*~&SF z4nt$}%hh$QhM^N#&6n#`QeFY(3j0B7Pb<=E%JFcx_3`^--u}WvOc$W>Nc#2MoPUds+1iH5$O5Q`6F4z-ceZ#QDLG1Oqo%!Dr+c~QO{?rb>YaT>-+ zkVhFWEjZPWY!DNeYXB3{l%Pq7b^2}JXG`EfPS|6ox8-_%Z{Ur5r`>m%c^>BtRul`o z_Lj@zi!vT>2hVCi&>}PkSygkLcrOlrZ3|YRnefXf%-Xo0#CgWku7DHU2!lyh&*Ee| zsy8jmx@g((VeIXbcTWdS_qyp~7A(lOG4h-n>G*Dx<}q->kD~T1w98UaVF;aJ^v;<1I?k%LrKjw=@BXxCD4>_L&%{A$;%h0 zxYX9Bb6Yd-Fwlkut1=5K4&6<6=z5Mj;uSALG*%3QJjkMaRyqXPn4mIaW%U0^wjSzo zh(kN|;r2aoMn@t*1c(3;AOgoCaLpO?`G0?W7J;ev0P6Xz-Zu#^V_&N3j7B0r1c(3; YAOb{y2oM1xKm>>Y5g-CY;9nu|6Z2-;;Q#;t literal 0 HcmV?d00001 From 4f1cfabe1aa00eda9bdcd5ae2264cab2c821701c Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 19:06:23 +1000 Subject: [PATCH 113/137] cleaned code following ruff report, except for tests --- docs/conf.py | 3 --- src/mopdb/__init__.py | 1 - src/mopdb/mopdb.py | 29 ++++------------------- src/mopdb/mopdb_class.py | 7 +----- src/mopdb/mopdb_map.py | 20 ++++++++-------- src/mopdb/mopdb_utils.py | 9 ++++---- src/mopdb/utils.py | 13 +++++------ src/mopper/__init__.py | 1 - src/mopper/calculations.py | 30 ++++-------------------- src/mopper/cmip_utils.py | 7 +++--- src/mopper/mop_setup.py | 19 +++++++-------- src/mopper/mop_utils.py | 47 ++++++++++++++++---------------------- src/mopper/mopper.py | 33 ++++++++++++++------------ src/mopper/setup_utils.py | 29 ++++++----------------- 14 files changed, 88 insertions(+), 160 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1f21a6f..bd47e1c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,9 +12,6 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys -import os - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the diff --git a/src/mopdb/__init__.py b/src/mopdb/__init__.py index 2a413df..e69de29 100644 --- a/src/mopdb/__init__.py +++ b/src/mopdb/__init__.py @@ -1 +0,0 @@ -from mopdb import * diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 40757b2..561e728 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -19,19 +19,16 @@ # last updated 08/04/2024 import click -import sqlite3 import logging import sys -import csv import json from importlib.resources import files as import_files from pathlib import Path from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map, - read_map_app4, map_update_sql, create_table, write_cmor_table, - check_varlist, update_db) -from mopdb.utils import * + read_map_app4, create_table, write_cmor_table, update_db) +from mopdb.utils import (config_log, db_connect, query, delete_record) from mopdb.mopdb_map import (write_varlist, write_map_template, write_catalogue, map_variables, load_vars, get_map_obj) @@ -112,7 +109,7 @@ def mopdb(ctx, debug): ctx.obj={} # set up a default value for flow if none selected for logging ctx.obj['debug'] = debug - mopdb_log = config_log(debug, logname='mopdb_log') + #mopdb_log = config_log(debug, logname='mopdb_log') @mopdb.command(name='check') @@ -200,7 +197,7 @@ def cmor_table(ctx, dbname, fname, alias, label): # extract cmor_var,units,dimensions,frequency,realm,cell_methods var_list = [] for v in vlist[1:]: - vid = (v[0], v[5], v[6]) + #vid = (v[0], v[5], v[6]) # This was adding variables to the table just if they didn't exists in other tables if v[0][:4] != 'fld_': if v[0] not in cmor_vars: @@ -353,17 +350,6 @@ def map_template(ctx, fpath, match, dbname, version, alias): fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) if alias == '': alias = fname.split(".")[0] -# also from here on it should be called by separate function I can call from intake too -# without repeating steps - # read list of vars from file - # this should now spit out fobjs, vobjs to pass to template - #with open(fname, 'r') as csvfile: - # reader = csv.DictReader(csvfile, delimiter=';') - # rows = list(reader) - #check_varlist(rows, fname) - # return lists of fully/partially matching variables and stash_vars - # these are input_vars for calculation defined in already in mapping db - #parsed = map_variables(conn, rows, version) parsed = map_variables(conn, vobjs, version) # potential vars have always duplicates: 1 for each input_var write_map_template(conn, parsed, alias) @@ -425,11 +411,6 @@ def write_intake(ctx, fpath, match, filelist, dbname, version, alias): map_file, vobjs, fobjs = load_vars(flist, indir=fpath) if alias == '': alias = fname.split(".")[0] - # read list of vars from file - #with open(fname, 'r') as csvfile: - # reader = csv.DictReader(csvfile, delimiter=';') - # rows = list(reader) - #check_varlist(rows, fname) # return lists of fully/partially matching variables and stash_vars # these are input_vars for calculation defined in already in mapping db if map_file is False: @@ -527,7 +508,7 @@ def model_vars(ctx, fpath, match, dbname, version, alias): if dbname == 'default': dbname = import_files('mopdata').joinpath('access.db') conn = db_connect(dbname, logname='mopdb_log') - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) conn.close() return None diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py index d044eaa..a36a6a4 100644 --- a/src/mopdb/mopdb_class.py +++ b/src/mopdb/mopdb_class.py @@ -85,11 +85,6 @@ class Variable(): and the one added by mapping. """ - # __slots__ = ('name', 'pattern', 'files', 'frequency', 'realm', - # 'cmor_var', 'cmor_table', 'version', 'units', 'dimensions', - # 'cell_methods', 'positive', 'long_name', 'standard_name', - # 'vtype', 'size', 'nsteps') - def __init__(self, varname: str, fobj: FPattern): self.name = varname # path object @@ -148,7 +143,7 @@ def get_match(self): cmor_var = self.cmor_var else: cmor_var = self.name - match = (self.cmor_var, self.name, '', self.frequency, + match = (cmor_var, self.name, '', self.frequency, self.realm, self.version, '', self.positive, self.units) return match diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index b88ea7c..135a960 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -33,9 +33,9 @@ #from access_nri_intake.source.builders import AccessEsm15Builder from mopdb.mopdb_class import FPattern, Variable, MapVariable -from mopdb.utils import * +from mopdb.utils import query, read_yaml from mopdb.mopdb_utils import (get_cell_methods, remove_duplicate, - get_realm, check_realm_units, get_date_pattern, check_varlist) + get_realm, check_realm_units, get_date_pattern) def get_cmorname(conn, vobj, version): @@ -111,13 +111,12 @@ def get_file_frq(ds, fnext, int2frq): # so we open also next file but get only time axs if max_len == 1: if fnext is None: - mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}") + mopdb_log.info(f"Only 1 file with 1 tstep cannot determine frequency") else: dsnext = xr.open_dataset(fnext, decode_times = False) time_axs2 = [d for d in dsnext.dims if 'time' in d] ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time') time_axs = [d for d in ds.dims if 'time' in d] - time_axs_len = set(len(ds[d]) for d in time_axs) time_axs.sort(key=lambda x: len(ds[x]), reverse=True) if max_len > 0: for t in time_axs: @@ -232,7 +231,7 @@ def match_stdname(conn, vobj, stdn): in cmorvar table that match the standard name passed as input. It also return a False/True found_match boolean. """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') found_match = False sql = f"""SELECT name FROM cmorvar where standard_name='{vobj.standard_name}'""" @@ -451,7 +450,7 @@ def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'): """ """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') if len(vlist) > 0: if type(div) is str: divrow = {x:'' for x in vlist[0].attrs()} @@ -503,6 +502,7 @@ def get_map_obj(parsed): def write_catalogue(conn, vobjs, fobjs, alias): """Write intake-esm catalogue and returns name """ + mopdb_log = logging.getLogger('mopdb_log') # read template json file jfile = import_files('mopdata').joinpath('intake_cat_template.json') @@ -510,7 +510,7 @@ def write_catalogue(conn, vobjs, fobjs, alias): template = json.load(f) # write updated json to file for k,v in template.items(): - if type(v) == str: + if type(v) is str: template[k] = v.replace("", alias) jout = f"intake_{alias}.json" with open(jout, 'w') as f: @@ -542,7 +542,7 @@ def write_catalogue(conn, vobjs, fobjs, alias): def create_file_dict(fobjs, alias): """ """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') lines = [] for pat_obj in fobjs: var_list = [v.name for v in pat_obj.varlist] @@ -574,7 +574,7 @@ def create_file_dict(fobjs, alias): def add_mapvars(vobjs, lines, path_list, alias): """ """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') for vobj in vobjs: if vobj.cmor_var != "" or vobj.standard_name != "": mapvar = vobj.cmor_var @@ -598,7 +598,7 @@ def add_mapvars(vobjs, lines, path_list, alias): def load_vars(fname, indir=None): """Returns Variable and FPattern objs from varlist or map file. """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') vobjs = [] fobjs = {} if indir is not None: diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 0f80b6d..6de2ddf 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -19,7 +19,6 @@ # last updated 10/04/2024 # -import sqlite3 import logging import sys import csv @@ -28,7 +27,7 @@ from datetime import date from collections import Counter -from mopdb.utils import * +from mopdb.utils import query def mapping_sql(): @@ -236,7 +235,7 @@ def get_cell_methods(attrs, dims): `time: point` If `area` not specified is added at start of string as `area: ` """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') frqmod = '' val = attrs.get('cell_methods', "") if 'area' not in val: @@ -252,7 +251,7 @@ def get_cell_methods(attrs, dims): def read_map_app4(fname): """Reads APP4 style mapping """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') # old order #cmor_var,definable,input_vars,calculation,units,axes_mod,positive,ACCESS_ver[CM2/ESM/both],realm,notes var_list = [] @@ -404,7 +403,7 @@ def get_date_pattern(fname, fpattern): """Try to build a date range for each file pattern based on its filename """ - mopdb_log = logging.getLogger('mopdb_log') + #mopdb_log = logging.getLogger('mopdb_log') # assign False to any character which is not a digit date_pattern = [True if c.isdigit() else False for c in fname] # assign False to fpattern diff --git a/src/mopdb/utils.py b/src/mopdb/utils.py index 1a6ff11..c71dc71 100644 --- a/src/mopdb/utils.py +++ b/src/mopdb/utils.py @@ -22,8 +22,6 @@ import sqlite3 import logging import os -import csv -import json import stat import yaml @@ -57,7 +55,7 @@ def config_log(debug, logname): logname = f"{logname}_{day}.txt" flog = logging.FileHandler(logname) try: - os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO); + os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) except OSError: pass flog.setLevel(flevel) @@ -112,7 +110,7 @@ def query(conn, sql, tup=(), first=True, logname='__name__'): result : tuple/list(tuple) tuple or a list of, representing row/s returned by query """ - log = logging.getLogger(logname) + #log = logging.getLogger(logname) with conn: c = conn.cursor() c.execute(sql, tup) @@ -127,7 +125,7 @@ def query(conn, sql, tup=(), first=True, logname='__name__'): def get_columns(conn, table, logname='__name__'): """Gets list of columns from db table """ - log = logging.getLogger(logname) + #log = logging.getLogger(logname) sql = f'PRAGMA table_info({table});' table_data = query(conn, sql, first=False, logname=logname) columns = [x[1] for x in table_data] @@ -205,6 +203,7 @@ def write_yaml(data, fname, logname='__name__'): try: with open(fname, 'w') as f: yaml.dump(data, f) - except: - log.error(f"Check that {data} exists and it is an object compatible with yaml") + except Exception as e: + log.error(f"Exception: {e}") + log.error(f"Check {data} exists and is yaml object") return diff --git a/src/mopper/__init__.py b/src/mopper/__init__.py index 2c52bb9..e69de29 100644 --- a/src/mopper/__init__.py +++ b/src/mopper/__init__.py @@ -1 +0,0 @@ -from mopper import * diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index ed9ae92..ab8462c 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -33,7 +33,6 @@ import click import xarray as xr import os -import yaml import json import numpy as np import dask @@ -233,7 +232,7 @@ def transAcrossLine(self, var, i_start, i_end, j_start, j_end): #sum each axis apart from time (3d) #trans = var.isel(yu_ocean=slice(271, 271+1), xt_ocean=slice(292, 300+1)) trans = var[..., j_start:j_end+1, i_start:i_end+1].sum(dim=['st_ocean', f'{y_ocean}', f'{x_ocean}']) #4D - except: + except Exception as e: trans = var[..., j_start:j_end+1, i_start:i_end+1].sum(dim=[f'{y_ocean}', f'{x_ocean}']) #3D return trans @@ -665,25 +664,6 @@ def calc_hemi_seaice_extent(self, hemi): return vout.item() - -def ocean_floor(var): - """Not sure.. - - Parameters - ---------- - var : Xarray dataset - pot_temp variable - - Returns - ------- - vout : Xarray dataset - ocean floor temperature? - """ - lv = (~var.isnull()).sum(dim='st_ocean') - 1 - vout = var.take(lv, dim='st_ocean').squeeze() - return vout - - def maskSeaIce(var, sic): """Mask seaice. @@ -702,7 +682,6 @@ def maskSeaIce(var, sic): vout = var.where(sic != 0) return vout - def sithick(hi, aice): """Calculate seaice thickness. @@ -722,7 +701,6 @@ def sithick(hi, aice): vout = hi / aice return vout - def sisnconc(sisnthick): """Calculate seas ice? @@ -807,7 +785,7 @@ def calc_global_ave_ocean(var, rho_dzt, area_t): try: vnew = var.weighted(mass).mean(dim=('st_ocean', 'yt_ocean', 'xt_ocean'), skipna=True) - except: + except Exception as e: vnew = var.weighted(mass[:, 0, :, :]).mean(dim=('x', 'y'), skipna=True) return vnew @@ -1267,7 +1245,7 @@ def calc_global_ave_ocean(ctx, var, rho_dzt): mass = rho_dzt * area_t try: vnew=np.average(var,axis=(1,2,3),weights=mass) - except: + except Exception as e: vnew=np.average(var,axis=(1,2),weights=mass[:,0,:,:]) return vnew @@ -1437,7 +1415,7 @@ def calc_depositions(ctx, var, weight=None): (personal communication from M. Woodhouse) """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) varlist = [] for v in var: v0 = v.sel(model_theta_level_number=1).squeeze(dim='model_theta_level_number') diff --git a/src/mopper/cmip_utils.py b/src/mopper/cmip_utils.py index dd7674c..161e55f 100755 --- a/src/mopper/cmip_utils.py +++ b/src/mopper/cmip_utils.py @@ -25,7 +25,6 @@ import json import csv import ast -import copy import click from collections import OrderedDict @@ -39,7 +38,7 @@ def find_cmip_tables(dreq): with dreq.open(mode='r') as f: reader = csv.reader(f, delimiter='\t') for row in reader: - if not row[0] in tables: + if row[0] not in tables: if (row[0] != 'Notes') and (row[0] != 'MIP table') and (row[0] != '0'): tables.append(f"CMIP6_{row[0]}") f.close() @@ -137,9 +136,9 @@ def read_dreq_vars(ctx, table_id, activity_id): years = ast.literal_eval(row[31]) years = reallocate_years(years, ctx.obj['reference_date']) years = f'"{years}"' - except: + except Exception as e: years = 'all' - except: + except Exception as e: years = 'all' dreq_variables[cmorname] = years f.close() diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py index 5c1e04b..02132bb 100755 --- a/src/mopper/mop_setup.py +++ b/src/mopper/mop_setup.py @@ -24,7 +24,6 @@ import os import sys import shutil -import yaml import json import csv import click @@ -33,7 +32,9 @@ from json.decoder import JSONDecodeError from importlib.resources import files as import_files -from mopper.setup_utils import * +from mopper.setup_utils import (define_timeshot, adjust_nsteps, + find_custom_tables, write_var_map, write_table) +from mopper.cmip_utils import find_cmip_tables, read_dreq_vars from mopdb.utils import read_yaml @@ -99,10 +100,10 @@ def find_matches(table, var, realm, frequency, varlist): match['timeshot'] = timeshot match['table'] = table match['frequency'] = frequency - if match['realm'] == 'land': - realmdir = 'atmos' - else: - realmdir = match['realm'] + #if match['realm'] == 'land': + # realmdir = 'atmos' + #else: + # realmdir = match['realm'] in_fname = match['fpattern'].split() match['file_structure'] = '' for f in in_fname: @@ -253,8 +254,8 @@ def var_map(ctx, activity_id=None): else: sublist = ctx.obj['appdir'] / sublist # Custom mode vars - if ctx.obj['mode'].lower() == 'custom': - access_version = ctx.obj['access_version'] + #if ctx.obj['mode'].lower() == 'custom': + # access_version = ctx.obj['access_version'] if ctx.obj['force_dreq'] is True: if ctx.obj['dreq'] == 'default': ctx.obj['dreq'] = import_files('mopdata').joinpath( @@ -272,7 +273,7 @@ def var_map(ctx, activity_id=None): create_var_map(table, masters, selection=selection[table]) elif tables.lower() == 'all': mop_log.info(f"Experiment {ctx.obj['exp']}: processing all tables") - if ctx.obj['force_dreq'] == True: + if ctx.obj['force_dreq']: tables = find_cmip_tables(ctx.obj['dreq']) else: tables = find_custom_tables() diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index 6ed8b60..315afec 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -22,19 +22,17 @@ # last updated 15/05/2024 import numpy as np -import glob import re -import os,sys +import os import stat import yaml import xarray as xr import cmor -import calendar import click import logging import cftime -import itertools import copy +import json from functools import partial from pathlib import Path @@ -71,7 +69,7 @@ def config_log(debug, path, stream_level=logging.WARNING): logname = f"{path}/mopper_log.txt" flog = logging.FileHandler(logname) try: - os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO); + os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) except OSError: pass flog.setLevel(level) @@ -93,7 +91,7 @@ def config_varlog(debug, logname, pid): logger.setLevel(level) flog = logging.FileHandler(logname) try: - os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO); + os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO) except OSError: pass flog.setLevel(level) @@ -147,7 +145,7 @@ def get_files(ctx): inrange_files.append( check_in_range(paths, time_dim) ) else: inrange_files.append( check_timestamp(paths) ) - except: + except Exception as e: for i,paths in enumerate(all_files): inrange_files.append( check_in_range(paths, time_dim) ) @@ -205,7 +203,7 @@ def check_vars_in_file(ctx, invars, fname): """Check that all variables needed for calculation are in file else return extra filenames """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) ds = xr.open_dataset(fname, decode_times=False) tofind = [v for v in invars if v not in ds.variables] found = [v for v in invars if v not in tofind] @@ -457,7 +455,6 @@ def pseudo_axis(ctx, axis): cmor_name = 'vegtype' return cmor_name, p_vals, p_len - #PP this should eventually just be generated directly by defining the dimension using the same terms # in calculation for meridional overturning @click.pass_context @@ -480,11 +477,13 @@ def create_axis(ctx, axis, table): var_log.info(f"setup of {axis.name} axis complete") return axis_id - -def hybrid_axis(lev, z_ax_id, z_ids): +@click.pass_context +def hybrid_axis(ctx, lev, z_ax_id, z_ids): """Setting up additional hybrid axis information + PP this needs fixing can't possible work now without b_vals, b_bnds?? + lev is cmor_zName? """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) hybrid_dict = {'hybrid_height': 'b', 'hybrid_height_half': 'b_half'} orog_vals = getOrog() @@ -503,31 +502,26 @@ def hybrid_axis(lev, z_ax_id, z_ids): zfactor_values=orog_vals) return zfactor_b_id, zfactor_orog_id - @click.pass_context def ij_axis(ctx, ax, ax_name, table): """ """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) cmor.set_table(table) ax_id = cmor.axis(table_entry=ax_name, units='1', coord_vals=ax.values) return ax_id - @click.pass_context def ll_axis(ctx, ax, ax_name, ds, table, bounds_list): """ """ var_log = logging.getLogger(ctx.obj['var_log']) - var_log.debug(f"in ll_axis") + var_log.debug("in ll_axis") cmor.set_table(table) cmor_aName = get_cmorname(ax_name, ax) - try: - ax_units = ax.units - except: - ax_units = 'degrees' + ax_units = ax.attrs.get('units', 'degrees') a_bnds = None var_log.debug(f"got cmor name: {cmor_aName}") if cmor_aName in bounds_list: @@ -606,10 +600,10 @@ def get_axis_dim(ctx, var): 'lat_ax': None, 'lon_ax': None, 'j_ax': None, 'i_ax': None, 'p_ax': None, 'e_ax': None} for dim in var.dims: - try: + if dim in var.coords: axis = var[dim] var_log.debug(f"axis found: {axis}") - except: + else: var_log.warning(f"No coordinate variable associated with the dimension {dim}") axis = None # need to file to give a value then??? @@ -694,7 +688,7 @@ def bnds_change(ctx, axis): """Returns True if calculation/resample changes bnds of specified dimension. """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) dim = axis.name calculation = ctx.obj['calculation'] changed_bnds = False @@ -707,7 +701,6 @@ def bnds_change(ctx, axis): changed_bnds = True return changed_bnds - @click.pass_context def get_bounds(ctx, ds, axis, cmor_name, ax_val=None): """Returns bounds for input dimension, if bounds are not available @@ -800,10 +793,10 @@ def get_bounds_values(ctx, ds, bname): calc = False var_log = logging.getLogger(ctx.obj['var_log']) var_log.debug(f"Getting bounds values for {bname}") + ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"] if bname in ds.variables: bnds_val = ds[bname].values elif ancil_file != "": - ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"] fname = f"{ctx.obj['ancils_path']}/{ancil_file}" ancil = xr.open_dataset(fname) if bname in ancil.variables: @@ -902,7 +895,7 @@ def extract_var(ctx, input_ds, tdim, in_missing): if array.dtype.kind == 'i': try: in_missing = int(in_missing) - except: + except Eception as e: in_missing = int(-999) else: array = array.fillna(in_missing) @@ -925,7 +918,7 @@ def define_attrs(ctx): listed in notes file, this is indicated by precending any function in file with a ~. For other fields it checks equality. """ - var_log = logging.getLogger(ctx.obj['var_log']) + #var_log = logging.getLogger(ctx.obj['var_log']) attrs = ctx.obj['attrs'] notes = attrs.get('notes', '') # open file containing notes diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 4b08c5a..2c155a3 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -25,17 +25,22 @@ import click import logging -import sqlite3 import concurrent.futures -import os,sys +import os +import subprocess +import sys import warnings import yaml import cmor -import numpy as np -import xarray as xr - -from mopper.mop_utils import * -from mopper.mop_setup import * +import cftime + +from mopper.mop_utils import (config_log, config_varlog, get_files, + load_data, get_cmorname, pseudo_axis, create_axis, hybrid_axis, + ij_axis, ll_axis, define_grid, get_coords, get_axis_dim, + require_bounds, get_bounds, get_attrs, extract_var, define_attrs) +from mopper.mop_setup import setup_env, var_map, manage_env +from mopper.setup_utils import create_exp_json, edit_json_cv, write_config, + populate_db, count_rows, sum_file_sizes, filelist_sql, write_job from mopdb.mopdb_utils import db_connect, create_table, query warnings.simplefilter(action='ignore', category=FutureWarning) @@ -359,7 +364,7 @@ def mop_process(ctx): # Set up additional hybrid coordinate information if (axes['z_ax'] is not None and cmor_zName in ['hybrid_height', 'hybrid_height_half']): - zfactor_b_id, zfactor_orog_id = hybrid_axis(lev_name, z_ax_id, z_ids) + zfactor_b_id, zfactor_orog_id = hybrid_axis(cmor_zName, z_ax_id, z_ids) # Freeing up memory del dsin @@ -382,11 +387,11 @@ def mop_process(ctx): mop_log.error(f"Unable to define the CMOR variable {ctx.obj['filename']}") var_log.error(f"Unable to define the CMOR variable {e}") return 2 - var_log.info('Writing...') + var_log.info("Writing...") var_log.info(f"Variable shape is {ovar.shape}") status = None # Write timesteps separately if variable potentially exceeding memory - if float(ctx.obj['file_size']) > 4000.0 and time_dim != None: + if float(ctx.obj['file_size']) > 4000.0 and time_dim is not None: for i in range(ovar.shape[0]): data = ovar.isel({time_dim: i}).values status = cmor.write(variable_id, data, ntimes_passed=1) @@ -395,10 +400,10 @@ def mop_process(ctx): status = cmor.write(variable_id, ovar.values) if status != 0: mop_log.error(f"Unable to write the CMOR variable: {ctx.obj['filename']}\n") - var_log.error(f"Unable to write the CMOR variable to file\n" + var_log.error("Unable to write the CMOR variable to file\n" + f"See cmor log, status: {status}") return 2 - var_log.info(f"Finished writing") + var_log.info("Finished writing") # Close the CMOR file. path = cmor.close(variable_id, file_name=True) @@ -508,14 +513,12 @@ def process_row(ctx, row): 'json_file_path', 'reference_date', 'version', 'rowid'] for i,val in enumerate(header): record[val] = row[i] - table = record['table'].split('_')[1] # call logging - trange = record['filename'].replace('.nc.','').split("_")[-1] varlog_file = (f"{ctx.obj['var_logs']}/{record['variable_id']}" + f"_{record['table']}_{record['tstart']}.txt") var_log = config_varlog(ctx.obj['debug'], varlog_file, pid) ctx.obj['var_log'] = var_log.name - var_log.info(f"Start processing") + var_log.info("Start processing") var_log.debug(f"Process id: {pid}") msg = process_file(record) var_log.handlers[0].close() diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index ff1e082..2bc293d 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -21,28 +21,18 @@ # # last updated 08/04/2024 -import os import sys -import shutil -import calendar -import yaml import json -import csv import sqlite3 -import subprocess -import ast import copy -import re import click import pathlib import logging -from collections import OrderedDict from datetime import datetime#, timedelta from dateutil.relativedelta import relativedelta -from json.decoder import JSONDecodeError -from mopdb.utils import query, write_yaml, read_yaml +from mopdb.utils import query, write_yaml from mopper.cmip_utils import fix_years @@ -129,12 +119,11 @@ def write_config(ctx, fname='exp_config.yaml'): @click.pass_context -def find_custom_tables(ctx): +def find_custom_tables(ctx, cmip=False): """Returns list of tables files in custom table path """ mop_log = logging.getLogger('mop_log') tables = [] - path = ctx.obj['tables_path'] table_files = ctx.obj['tables_path'].rglob("*_*.json") for f in table_files: f = str(f).replace(".json", "") @@ -411,10 +400,10 @@ def adjust_size(opts, insize): resample = opts['resample'] grid_size = insize if 'plevinterp' in calc: - try: + if "," in calc: plevnum = calc.split(',')[-1] - except: - raise('check plevinterp calculation definition plev probably missing') + else: + raise('check plevinterp calculation def plev probably missing') plevnum = float(plevnum.replace(')','')) grid_size = float(insize)/float(opts['levnum'])*plevnum return grid_size @@ -437,7 +426,7 @@ def compute_fsize(ctx, opts, grid_size, frequency): Returns ------- """ - mop_log = logging.getLogger('mop_log') + #mop_log = logging.getLogger('mop_log') # set small number for fx frequency so it always create only one file nstep_day = {'10min': 144, '30min': 48, '1hr': 24, '3hr': 8, '6hr': 4, 'day': 1, '10day': 0.1, 'mon': 1/30, @@ -557,10 +546,6 @@ def process_vars(ctx, maps, opts, cursor): Returns ------- """ - tstep_dict = {'10min': 'minutes=10', '30min': 'minutes=30', - '1hr': 'hours=1', '3hr': 'hours=3', '6hr': 'hours=6', - 'day': 'days=1', '10day': 'days=10', 'mon': 'months=1', - 'yr': 'years=1', 'dec': 'years=10'} unchanged = ['frequency', 'realm', 'table', 'calculation', 'resample', 'positive', 'timeshot'] for mp in maps: @@ -620,7 +605,6 @@ def define_files(ctx, cursor, opts, mp): finish = start + relativedelta(days=1) tstep_dict['fx'] = tstep_dict['day'] while (start < finish): - tstep = eval(f"relativedelta({tstep_dict[frq][0]})") half_tstep = eval(f"relativedelta({tstep_dict[frq][1]})") delta = eval(f"relativedelta({interval})") newtime = min(start+delta, finish) @@ -633,6 +617,7 @@ def define_files(ctx, cursor, opts, mp): opts['filepath'], opts['filename'] = build_filename(opts, start, newtime, half_tstep) rowid = add_row(opts, cursor, update) + mop_log.debug(f"Last added row id: {rowid}") start = newtime return From 59015650b833a09932360eb56d80c0cca35e3f5a Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 19:15:43 +1000 Subject: [PATCH 114/137] fixed typowq --- src/mopdb/mopdb.py | 3 +-- src/mopper/mopper.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index 561e728..f0ab45c 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -28,7 +28,7 @@ from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map, read_map_app4, create_table, write_cmor_table, update_db) -from mopdb.utils import (config_log, db_connect, query, delete_record) +from mopdb.utils import (db_connect, query, delete_record) from mopdb.mopdb_map import (write_varlist, write_map_template, write_catalogue, map_variables, load_vars, get_map_obj) @@ -109,7 +109,6 @@ def mopdb(ctx, debug): ctx.obj={} # set up a default value for flow if none selected for logging ctx.obj['debug'] = debug - #mopdb_log = config_log(debug, logname='mopdb_log') @mopdb.command(name='check') diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 2c155a3..211a000 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -39,8 +39,8 @@ ij_axis, ll_axis, define_grid, get_coords, get_axis_dim, require_bounds, get_bounds, get_attrs, extract_var, define_attrs) from mopper.mop_setup import setup_env, var_map, manage_env -from mopper.setup_utils import create_exp_json, edit_json_cv, write_config, - populate_db, count_rows, sum_file_sizes, filelist_sql, write_job +from mopper.setup_utils import (create_exp_json, edit_json_cv, write_config, + populate_db, count_rows, sum_file_sizes, filelist_sql, write_job) from mopdb.mopdb_utils import db_connect, create_table, query warnings.simplefilter(action='ignore', category=FutureWarning) From 89200cfc954e0716b290c646e50ad29fe5766ac7 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 19:17:47 +1000 Subject: [PATCH 115/137] fixed typo in test-conda workflow --- .github/workflows/mopper-test-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml index 1ef10f9..e664ad7 100644 --- a/.github/workflows/mopper-test-conda.yaml +++ b/.github/workflows/mopper-test-conda.yaml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11"i, "3.12"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4.1.7 with: From c14d05e9c2622aaef7e734c2c3f2a9eac33348f2 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 19:21:28 +1000 Subject: [PATCH 116/137] fixed wrong import --- src/mopdb/mopdb_map.py | 2 +- src/mopper/mopper.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index 135a960..28530c5 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -111,7 +111,7 @@ def get_file_frq(ds, fnext, int2frq): # so we open also next file but get only time axs if max_len == 1: if fnext is None: - mopdb_log.info(f"Only 1 file with 1 tstep cannot determine frequency") + mopdb_log.info("Only 1 file with 1 tstep cannot determine frequency") else: dsnext = xr.open_dataset(fnext, decode_times = False) time_axs2 = [d for d in dsnext.dims if 'time' in d] diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 211a000..533090f 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -39,9 +39,10 @@ ij_axis, ll_axis, define_grid, get_coords, get_axis_dim, require_bounds, get_bounds, get_attrs, extract_var, define_attrs) from mopper.mop_setup import setup_env, var_map, manage_env -from mopper.setup_utils import (create_exp_json, edit_json_cv, write_config, +from mopper.setup_utils import (create_exp_json, write_config, populate_db, count_rows, sum_file_sizes, filelist_sql, write_job) from mopdb.mopdb_utils import db_connect, create_table, query +from cmip_utils import edit_json_cv warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=UserWarning) From 0109567919bdac15fb4473362bfda486bfa27be4 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 19:27:24 +1000 Subject: [PATCH 117/137] fixed wrong import 2 --- src/mopper/mopper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 533090f..dd22deb 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -41,7 +41,7 @@ from mopper.mop_setup import setup_env, var_map, manage_env from mopper.setup_utils import (create_exp_json, write_config, populate_db, count_rows, sum_file_sizes, filelist_sql, write_job) -from mopdb.mopdb_utils import db_connect, create_table, query +from mopdb.utils import db_connect, create_table, query from cmip_utils import edit_json_cv warnings.simplefilter(action='ignore', category=FutureWarning) From 87df3ada5dab3b672030be517266e72e41e7258f Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Tue, 13 Aug 2024 19:30:17 +1000 Subject: [PATCH 118/137] fixed wrong import 3 --- src/mopper/mopper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index dd22deb..91bddca 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -42,7 +42,7 @@ from mopper.setup_utils import (create_exp_json, write_config, populate_db, count_rows, sum_file_sizes, filelist_sql, write_job) from mopdb.utils import db_connect, create_table, query -from cmip_utils import edit_json_cv +from mopper.cmip_utils import edit_json_cv warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=UserWarning) From f3b693ef0245d1be00a8f2e9778c3a7f592997fc Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 15 Aug 2024 13:34:58 +1000 Subject: [PATCH 119/137] updates and clean up of tests --- .github/workflows/mopper-test-calcs.yaml | 90 ++++++++++-------------- src/mopdb/mopdb.py | 22 +++--- src/mopdb/mopdb_map.py | 7 +- src/mopdb/mopdb_utils.py | 2 +- tests/conftest.py | 27 +++++-- tests/test_calculations.py | 2 - tests/test_mop_utils.py | 10 ++- tests/test_mopdb.py | 62 +++++++++------- tests/test_mopdb_map.py | 9 +-- tests/test_mopdb_utils.py | 10 +-- tests/testdata/varlist.csv | 6 ++ tests/testdata/varlist_ex.csv | 3 - 12 files changed, 127 insertions(+), 123 deletions(-) create mode 100644 tests/testdata/varlist.csv delete mode 100644 tests/testdata/varlist_ex.csv diff --git a/.github/workflows/mopper-test-calcs.yaml b/.github/workflows/mopper-test-calcs.yaml index ee830d6..0c34152 100644 --- a/.github/workflows/mopper-test-calcs.yaml +++ b/.github/workflows/mopper-test-calcs.yaml @@ -1,67 +1,49 @@ -name: mopper-branch-test +# this workflow can be used as a template for a worklfow +# that runs automatically only specific tests when pushing +# to a selected branch. +name: mopper-specific-tests -#on: [push] on: push: branches: - - pytests_sam + - class jobs: - build: - + build-linux: runs-on: ubuntu-latest + timeout-minutes: 60 strategy: max-parallel: 5 matrix: - python-version: ["3.10"] - + python-version: ["3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v4 - #--------------------------------------------------- - - name: Set up Python 3.10 - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - #--------------------------------------------------- - # Install Miniconda - - name: Install Miniconda - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - - # Create and activate conda environment - - name: Create and activate conda environment - run: | - conda env create --name test-env --file conda/environment.yaml - - # Install dependencies from conda - - name: Install dependencies - run: conda env update --name test-env --file conda/environment.yaml - #--------------------------------------------------- - #- name: Lint with flake8 - # run: | - # conda install flake8 - # stop the build if there are Python syntax errors or undefined names - # flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - # - name: Install package - # run: | - # conda build conda/meta.yaml - #--------------------------------------------------- - - name: Test with pytest - run: | - conda install -n test-env pytest coverage codecov - conda run -n test-env pytest -q tests/test_calculations.py - # conda run -n test-env coverage run --source src -m pytest - #--------------------------------------------------- - #- name: Upload to codecov - # if: steps.build.outcome == 'success' - # run: | - # curl -Os https://uploader.codecov.io/latest/linux/codecov - # chmod +x codecov - # ./codecov - #--------------------------------------------------- + - uses: actions/checkout@v4.1.7 + - name: Set up Python 3.10/3.11 + uses: actions/setup-python@v5.1.1 + with: + python-version: ${{ matrix.python-version }} + - name: Install Miniconda + uses: conda-incubator/setup-miniconda@v3.0.4 + with: + activate-environment: testenv + environment-file: conda/testenv.yaml + python-version: ${{ matrix.python-version }} + channels: conda-forge + - name: Lint with ruff + shell: bash -el {0} + run: | + ruff check --output-format=github . + continue-on-error: true + + # making sure we are testing installed package + - name: Install package + shell: bash -el {0} + run: | + conda activate testenv + pip install -e . + - name: Test with pytest + shell: bash -el {0} + run: | + conda run python -m pytest -q tests/test_calculations.py diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index f0ab45c..d9c267c 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -28,7 +28,7 @@ from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map, read_map_app4, create_table, write_cmor_table, update_db) -from mopdb.utils import (db_connect, query, delete_record) +from mopdb.utils import (config_log, db_connect, query, delete_record) from mopdb.mopdb_map import (write_varlist, write_map_template, write_catalogue, map_variables, load_vars, get_map_obj) @@ -62,7 +62,8 @@ def db_args(f): """Define database click options """ constraints = [ - click.option('--fname', '-f', type=str, required=True, + click.option('--fname', '-f', type=click.Path(exists=True), + required=True, help='Input file: used to update db table (mapping/cmor)'), click.option('--dbname', type=str, required=False, default='default', help='Database relative path by default is package access.db'), @@ -78,8 +79,8 @@ def map_args(f): commands """ constraints = [ - click.option('--fpath', '-f', type=str, required=True, - callback=require_date, + click.option('--fpath', '-f', type=click.Path(exists=True), + required=True, callback=require_date, help=("""Model output files path. For 'template' command can also be file generated by varlist step""")), click.option('--match', '-m', type=str, required=False, @@ -109,6 +110,7 @@ def mopdb(ctx, debug): ctx.obj={} # set up a default value for flow if none selected for logging ctx.obj['debug'] = debug + mopdb_log = config_log(debug, logname='mopdb_log') @mopdb.command(name='check') @@ -340,7 +342,9 @@ def map_template(ctx, fpath, match, dbname, version, alias): # work out if fpath is varlist or path to output fpath = Path(fpath) if fpath.is_file(): + mopdb_log.debug(f"{fpath} is file") map_file, vobjs, fobjs = load_vars(fpath) + mopdb_log.debug(f"loaded data from file") fname = fpath.name mopdb_log.debug(f"Imported {len(vobjs)} objects from file {fpath}") mopdb_log.debug(f"Is mapping file? {map_file}") @@ -358,7 +362,8 @@ def map_template(ctx, fpath, match, dbname, version, alias): @mopdb.command(name='intake') @map_args -@click.option('--filelist','-fl', type=str, required=False, default=None, +@click.option('--filelist','-fl', type=click.Path(exists=True), + required=False, default=None, help='Map or varlist csv file relative path') @click.pass_context def write_intake(ctx, fpath, match, filelist, dbname, version, alias): @@ -474,9 +479,6 @@ def update_map(ctx, dbname, fname, alias): return None - return None - - @mopdb.command(name='varlist') @map_args @click.pass_context @@ -514,8 +516,8 @@ def model_vars(ctx, fpath, match, dbname, version, alias): @mopdb.command(name='del') -@click.option('--dbname', type=str, required=True, - help='Database relative path') +@click.option('--dbname', type=click.Path(exists=True), + required=True, help='Database relative path') @click.option('--table', '-t', type=str, required=True, help='DB table to remove records from') @click.option('--pair', '-p', type=(str, str), required=True, diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index 28530c5..4fb78df 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -464,7 +464,8 @@ def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'): var = check_realm_units(conn, var) dlist.append( var.__dict__ ) for dvar in sorted(dlist, key=itemgetter(sortby)): - dvar.pop('match') + if 'match' in dvar.keys(): + dvar.pop('match') fwriter.writerow(dvar) return @@ -598,7 +599,7 @@ def add_mapvars(vobjs, lines, path_list, alias): def load_vars(fname, indir=None): """Returns Variable and FPattern objs from varlist or map file. """ - #mopdb_log = logging.getLogger('mopdb_log') + mopdb_log = logging.getLogger('mopdb_log') vobjs = [] fobjs = {} if indir is not None: @@ -607,7 +608,7 @@ def load_vars(fname, indir=None): with open(fname, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=';') rows = list(reader) - #check_varlist(rows, fname) + #mopdb_log.debug(f"{rows}") # set fobjs patterns = list(set(x['fpattern'] for x in rows)) for pat in patterns: diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index 6de2ddf..c1f436d 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -395,7 +395,7 @@ def check_varlist(rows, fname): elif (not any( x in row['frequency'] for x in frq_list) or row['realm'] not in realm_list): mopdb_log.error(f""" Check frequency and realm in {fname}. - Some values might be invalid and need fixing""") + Some values might be invalid and need fixing""") sys.exit() return diff --git a/tests/conftest.py b/tests/conftest.py index 6123524..e430ed8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,10 +18,9 @@ import pytest import os import sqlite3 -import xarray as xr -import numpy as np -import pandas as pd -import datetime +#import xarray as xr +#import numpy as np +#import pandas as pd import logging import csv import pyfakefs @@ -40,6 +39,7 @@ # consecutive files with a 1-time step time axis dsonestep = os.path.join(TESTS_DATA, "onetstep.nc") dsonestep2 = os.path.join(TESTS_DATA, "onetstep_next.nc") +# varlist, map file examples @pytest.fixture def fake_fs(fs): # pylint:disable=invalid-name @@ -48,6 +48,11 @@ def fake_fs(fs): # pylint:disable=invalid-name """ yield fs +@pytest.fixture +def vlistcsv(): + vlistcsv = os.path.join(TESTS_DATA, "varlist.csv") + return vlistcsv + # setting up fixtures for databases:a ccess.db and mopper.db @pytest.fixture def session(): @@ -98,7 +103,7 @@ def test_check_timestamp(caplog): @pytest.fixture def varlist_rows(): - # read list of vars from iexample file + # read list of vars from example file with open('testdata/varlist_ex.csv', 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=';') rows = list(reader) @@ -150,3 +155,15 @@ def varobjs(mapvar_obj): mvobj.name = 'hfls' vobjs.append(mvobj) return vobjs + + +@pytest.fixture +def output_file(tmp_path): + # create your file manually here using the tmp_path fixture + # or just import a static pre-built mock file + # something like : + target_output = os.path.join(tmp_path,'mydoc.csv') + with open(target_output, 'w+'): + pass + # write stuff here + return target_output diff --git a/tests/test_calculations.py b/tests/test_calculations.py index 692211f..eedf5c0 100644 --- a/tests/test_calculations.py +++ b/tests/test_calculations.py @@ -15,7 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import xarray.testing as xrtest import numpy.testing as nptest import xarray as xr import numpy as np @@ -61,7 +60,6 @@ def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100): def test_calc_topsoil(): mrsol = create_var(2, 3, ntime=4, sdepth=True) - #print(mrsol) expected = mrsol.isel(depth=0) + mrsol.isel(depth=1)/3.0 out = calc_topsoil(mrsol) xrtest.assert_allclose(out, expected, rtol=1e-05) diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py index b47e158..15fcd9d 100644 --- a/tests/test_mop_utils.py +++ b/tests/test_mop_utils.py @@ -15,15 +15,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest +#import pytest +import click +import xarray as xr import numpy as np import pandas as pd -from mopper.mop_utils import * +from mopper.mop_utils import (check_timestamp, get_cmorname,) -#try: -# import unittest.mock as mock -#except ImportError: -# import mock ctx = click.Context(click.Command('cmd'), obj={'sel_start': '198302170600', 'sel_end': '198302181300', diff --git a/tests/test_mopdb.py b/tests/test_mopdb.py index e570fdb..b6c1e50 100644 --- a/tests/test_mopdb.py +++ b/tests/test_mopdb.py @@ -17,13 +17,25 @@ import pytest import os -import sqlite3 -from mopdb.mopdb import * +import logging +#from mopdb.mopdb import * from click.testing import CliRunner +from conftest import vlistcsv +from pytest import CaptureFixture -@pytest.mark.parametrize('subcommand', ['varlist', 'template', 'check', 'cmor', 'table', 'map']) -def test_mopdb(command, subcommand, runner): - ctx = click.Context(click.Command('cmd'), obj={'prop': 'A Context'}) + +@pytest.fixture(scope='module') +def runner(): + return CliRunner() + +def test_command(runner): + result = runner.invoke(mopdb, ['--help']) + assert result.exit_code == 0 + +@pytest.mark.parametrize('subcommand', ['varlist', 'template', + 'intake', 'check', 'cmor', 'table', 'map', 'del']) +def test_subcmd(subcommand, runner): + ctx = click.Context(click.Command('mopdb'), obj={'prop': 'A Context'}) with ctx: result = runner.invoke(mopdb, ['--help']) assert result.exit_code == 0 @@ -31,29 +43,27 @@ def test_mopdb(command, subcommand, runner): assert result.exit_code == 0 @pytest.mark.usefixtures("setup_access_db") # 1 -def test_template(session): - - runner = CliRunner() - - with runner.isolated_filesystem(): - with open('varlist.txt', 'w') as f: - f.write('name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;dtype;size;nsteps;file_name;long_name;standard_name') - f.write('fld_s03i236;tas;K;time lat lon,mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;2081;cm000a.pm;TEMPERATURE AT 1.5M;air_temperature') - f.write('fld_s03i237;huss;1;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;2081;cm000a.pm;SPECIFIC HUMIDITY AT 1.5M;specific_humidity') - f.write('fld_s05i205;prrc;kg m-2 s-1;time_0 lat lon;3hr;atmos;area: time: mean;CMIP6_E3hr;float32;110592;578880;cm000a.p8;CONVECTIVE RAINFALL RATE KG/M2/S;convective_rainfall_flux') - f.write('fld_s03i236;tas;K;time lat lon;day;atmos;area: time: mean;CMIP6_day;float32;110592;74772;cm000a.pd;TEMPERATURE AT 1.5M;air_temperature') - - result = runner.invoke(mopdb, ['template', '-f varlist.txt', '-vCM2']) - #assert result.exit_code == 0 - assert 'Opened database ' in result.output - #assert 'Definable cmip var' in result.output -#Pass temp_dir to control where the temporary directory is created. The directory will not be removed by Click in this case. This is useful to integrate with a framework like Pytest that manages temporary files. +def test_template(session, runner, tmp_path, caplog, + capsys: CaptureFixture): -#def test_keep_dir(tmp_path): -# runner = CliRunner() + caplog.set_level(logging.DEBUG, logger='mopdb_log') + with capsys.disabled() as disabled: + with runner.isolated_filesystem(temp_dir=tmp_path) as td: + os.mkdir("myfiles") + with open('myfiles/varlist.csv', 'w') as f: + f.write('name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;dtype;size;nsteps;fpattern;long_name;standard_name') + f.write('fld_s03i236;tas;K;time lat lon,mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;2081;cm000a.pm;TEMPERATURE AT 1.5M;air_temperature') + f.write('fld_s03i237;huss;1;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;2081;cm000a.pm;SPECIFIC HUMIDITY AT 1.5M;specific_humidity') + f.write('fld_s05i205;prrc;kg m-2 s-1;time_0 lat lon;3hr;atmos;area: time: mean;CMIP6_E3hr;float32;110592;578880;cm000a.p8;CONVECTIVE RAINFALL RATE KG/M2/S;convective_rainfall_flux') + f.write('fld_s03i236;tas;K;time lat lon;day;atmos;area: time: mean;CMIP6_day;float32;110592;74772;cm000a.pd;TEMPERATURE AT 1.5M;air_temperature') -# with runner.isolated_filesystem(temp_dir=tmp_path) as td: -# ... + args = ['--debug', 'template', '-f', 'myfiles/varlist.csv', '-v', 'CM2'] + result = runner.invoke(mopdb, args) + #assert result.exit_code == 0 + assert 'Opened database ' in caplog.messages[0] + assert 'myfiles/varlist.csv is file' in caplog.messages + #assert caplog.messages[-1] == 'Finished writing variables to mapping template' + #assert 'Definable cmip var' in result.output #def test_with_context(): # ctx = click.Context(click.Command('cmd'), obj={'prop': 'A Context'}) diff --git a/tests/test_mopdb_map.py b/tests/test_mopdb_map.py index 8b7a5e9..3372db6 100644 --- a/tests/test_mopdb_map.py +++ b/tests/test_mopdb_map.py @@ -17,13 +17,10 @@ import pytest import os -import sqlite3 -import click import logging -import itertools -from mopdb.mopdb_map import * -from mopdb.mopdb_class import MapVariable, Variable, FPattern -from conftest import * +from mopdb.mopdb_map import (add_var, get_file_frq) +#from mopdb.mopdb_class import MapVariable, Variable, FPattern +#from conftest import * TESTS_HOME = os.path.abspath(os.path.dirname(__file__)) diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py index 0f872a9..b5409b3 100644 --- a/tests/test_mopdb_utils.py +++ b/tests/test_mopdb_utils.py @@ -15,16 +15,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest -import os -import sqlite3 -import click +#import pytest import logging import itertools -from mopdb.mopdb_utils import * -from mopdb.mopdb_class import MapVariable, Variable, FPattern +from mopdb.mopdb_utils import (get_date_pattern, ) +#from mopdb.mopdb_class import MapVariable, Variable, FPattern -#from click.testing import CliRunner diff --git a/tests/testdata/varlist.csv b/tests/testdata/varlist.csv new file mode 100644 index 0000000..f04cc58 --- /dev/null +++ b/tests/testdata/varlist.csv @@ -0,0 +1,6 @@ +name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;fpattern;long_name;standard_name +fld_s00i004;theta;K;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CM2_mon;float32;9400320;12;cw323a.pm;THETA AFTER TIMESTEP;air_potential_temperature +fld_s00i010;hus;1;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CMIP6_CFmon;float32;9400320;12;cw323a.pm;SPECIFIC HUMIDITY AFTER TIMESTEP;specific_humidity +fld_s00i024;ts;K;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE TEMPERATURE AFTER TIMESTEP;surface_temperature +fld_s00i031;siconca;%;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction +fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX W/M2;surface_upward_latent_heat_flu diff --git a/tests/testdata/varlist_ex.csv b/tests/testdata/varlist_ex.csv deleted file mode 100644 index 780142d..0000000 --- a/tests/testdata/varlist_ex.csv +++ /dev/null @@ -1,3 +0,0 @@ -name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;fpattern;long_name;standard_name -fld_s03i236;tas;degC;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature -fld_s00i031;siconca;%;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX W/M2;surface_upward_latent_heat_flu From a1921865661dcd3d74950bf31d52387df318a62e Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Thu, 15 Aug 2024 14:47:55 +1000 Subject: [PATCH 120/137] fixed import in tests --- src/mopdb/mopdb.py | 5 ++--- src/mopdb/mopdb_map.py | 4 ++-- src/mopper/calculations.py | 11 +++++++---- src/mopper/mop_utils.py | 2 +- src/mopper/mopper.py | 3 +-- src/mopper/setup_utils.py | 2 +- tests/conftest.py | 16 +++++++++++----- tests/test_calculations.py | 16 ++++++++-------- tests/test_mop_utils.py | 1 + tests/test_mopdb.py | 5 +++-- tests/test_mopdb_map.py | 1 + 11 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index d9c267c..d458811 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -110,7 +110,7 @@ def mopdb(ctx, debug): ctx.obj={} # set up a default value for flow if none selected for logging ctx.obj['debug'] = debug - mopdb_log = config_log(debug, logname='mopdb_log') + config_log(debug, logname='mopdb_log') @mopdb.command(name='check') @@ -344,10 +344,9 @@ def map_template(ctx, fpath, match, dbname, version, alias): if fpath.is_file(): mopdb_log.debug(f"{fpath} is file") map_file, vobjs, fobjs = load_vars(fpath) - mopdb_log.debug(f"loaded data from file") fname = fpath.name mopdb_log.debug(f"Imported {len(vobjs)} objects from file {fpath}") - mopdb_log.debug(f"Is mapping file? {map_file}") + mopdb_log.debug(f"File is mapping: {map_file}") else: mopdb_log.debug(f"Calling write_varlist() from template: {fpath}") fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index 4fb78df..885e3de 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -99,7 +99,7 @@ def get_file_frq(ds, fnext, int2frq): frq = {} # retrieve all time axes time_axs = [d for d in ds.dims if 'time' in d] - time_axs_len = set(len(ds[d]) for d in time_axs) + #time_axs_len = set(len(ds[d]) for d in time_axs) time_axs.sort(key=lambda x: len(ds[x]), reverse=True) mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}") if len(time_axs) > 0: @@ -608,7 +608,7 @@ def load_vars(fname, indir=None): with open(fname, 'r') as csvfile: reader = csv.DictReader(csvfile, delimiter=';') rows = list(reader) - #mopdb_log.debug(f"{rows}") + mopdb_log.debug(f"Loaded file with {len(rows)} rows") # set fobjs patterns = list(set(x['fpattern'] for x in rows)) for pat in patterns: diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index ab8462c..568285c 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -917,7 +917,7 @@ def K_degC(ctx, var): def tos_3hr(var, landfrac): - """notes + """not sure this is needed?? Parameters ---------- @@ -928,7 +928,7 @@ def tos_3hr(var, landfrac): vout : Xarray dataset """ - v = K_degC(var) + var = K_degC(var) vout = xr.zeros_like(var) t = len(var.time) @@ -1132,7 +1132,8 @@ def average_tile(var, tilefrac=None, lfrac=1, landfrac=None, lev=None): return vout -def calc_topsoil(soilvar): +@click.pass_context +def calc_topsoil(ctx, soilvar): """Returns the variable over the first 10cm of soil. Parameters @@ -1147,9 +1148,11 @@ def calc_topsoil(soilvar): topsoil : Xarray DataArray Variable defined on top 10cm of soil """ + var_log = logging.getLogger(ctx.obj['var_log']) depth = soilvar.depth # find index of bottom depth level including the first 10cm of soil - maxlev = depth.where(depth >= 0.1).argmin().values + maxlev = np.nanargmin(depth.where(depth >= 0.1).values) + var_log.debug(f"Max level of soil used is {maxlev}") # calculate the fraction of maxlev which falls in first 10cm fraction = (0.1 - depth[maxlev -1])/(depth[maxlev] - depth[maxlev-1]) topsoil = soilvar.isel(depth=slice(0,maxlev)).sum(dim='depth') diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index 315afec..12ef93b 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -895,7 +895,7 @@ def extract_var(ctx, input_ds, tdim, in_missing): if array.dtype.kind == 'i': try: in_missing = int(in_missing) - except Eception as e: + except Exception as e: in_missing = int(-999) else: array = array.fillna(in_missing) diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py index 91bddca..a8f57d5 100644 --- a/src/mopper/mopper.py +++ b/src/mopper/mopper.py @@ -226,7 +226,6 @@ def mop_process(ctx): mop_log = logging.getLogger('mop_log') var_log = logging.getLogger(ctx.obj['var_log']) - default_cal = "gregorian" logname = f"{ctx.obj['variable_id']}_{ctx.obj['table']}_{ctx.obj['tstart']}" # Setup CMOR @@ -478,7 +477,7 @@ def process_file(ctx, row): #Check if output file matches what we expect var_log.info(f"Output file: {ret}") if ret == expected_file: - var_log.info(f"Expected and cmor file paths match") + var_log.info("Expected and cmor file paths match") msg = f"Successfully processed variable: {var_msg}\n" status = "processed" else : diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py index 2bc293d..05dfaca 100755 --- a/src/mopper/setup_utils.py +++ b/src/mopper/setup_utils.py @@ -397,7 +397,7 @@ def adjust_size(opts, insize): # volume,any vertical sum # resample will affect frequency but that should be already taken into account in mapping calc = opts['calculation'] - resample = opts['resample'] + #resample = opts['resample'] grid_size = insize if 'plevinterp' in calc: if "," in calc: diff --git a/tests/conftest.py b/tests/conftest.py index e430ed8..3770a94 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,9 +18,7 @@ import pytest import os import sqlite3 -#import xarray as xr -#import numpy as np -#import pandas as pd +import click import logging import csv import pyfakefs @@ -48,6 +46,13 @@ def fake_fs(fs): # pylint:disable=invalid-name """ yield fs +@pytest.fixture +def ctx(): + ctx = click.Context(click.Command('cmd'), + obj={'sel_start': '198302170600', 'sel_end': '198302181300', + 'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'}) + return ctx + @pytest.fixture def vlistcsv(): vlistcsv = os.path.join(TESTS_DATA, "varlist.csv") @@ -91,8 +96,8 @@ def setup_access_db(session): @pytest.fixture def setup_mopper_db(session): - filelist_sql = mapping_sql() - session.execute(filelist_sql) + flist_sql = filelist_sql() + session.execute(flist_sql) session.execute('''INSERT INTO filelist VALUES ("/testdata/atmos/umnsa_spec_*.nc", "/testdata/mjo-elnino/v1-0/A10min/", "tas_AUS2200_mjo-elnino_subhrPt_20160101001000-20160102000000.nc", "fld_s03i236", "tas", "AUS2200_A10min", "subhrPt", "atmos", "point", "20160101T0005", "20160102T0000", "201601010000", "201601012355", "unprocessed", "3027.83203125", "mjo-elnino", "K", "AUS2200", "AUS2200", "/testdata/mjo-elnino/mjo-elnino.json", "1970-01-01", "v1-0")''') session.connection.commit() @@ -121,6 +126,7 @@ def add_var_out(): vlist = [{'cmor_var': '', 'input_vars': '', 'calculation': '', 'units': '' ,'realm': '', 'positive': '', 'version': '', 'cmor_table': ''} ] + return vlist @pytest.fixture def map_rows(): diff --git a/tests/test_calculations.py b/tests/test_calculations.py index eedf5c0..8c70d28 100644 --- a/tests/test_calculations.py +++ b/tests/test_calculations.py @@ -17,14 +17,13 @@ import numpy.testing as nptest import xarray as xr +import xarray.testing as xrtest import numpy as np import pandas as pd import logging -from mopper.calculations import * +from mopper.calculations import (overturn_stream, calc_topsoil,) +from conftest import ctx -ctx = click.Context(click.Command('cmd'), - obj={'sel_start': '198302170600', 'sel_end': '198302181300', - 'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'}) def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100): @@ -58,14 +57,15 @@ def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100): return da -def test_calc_topsoil(): +def test_calc_topsoil(caplog, ctx): + caplog.set_level(logging.DEBUG, logger='varlog_1') mrsol = create_var(2, 3, ntime=4, sdepth=True) expected = mrsol.isel(depth=0) + mrsol.isel(depth=1)/3.0 - out = calc_topsoil(mrsol) + with ctx: + out = calc_topsoil(mrsol) xrtest.assert_allclose(out, expected, rtol=1e-05) -def test_overturn_stream(caplog): - global ctx +def test_overturn_stream(caplog, ctx): caplog.set_level(logging.DEBUG, logger='varlog_1') # set up input dims = ['time', 'depth', 'lat', 'lon'] diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py index 15fcd9d..8cf28a2 100644 --- a/tests/test_mop_utils.py +++ b/tests/test_mop_utils.py @@ -20,6 +20,7 @@ import xarray as xr import numpy as np import pandas as pd +import logging from mopper.mop_utils import (check_timestamp, get_cmorname,) diff --git a/tests/test_mopdb.py b/tests/test_mopdb.py index b6c1e50..b377077 100644 --- a/tests/test_mopdb.py +++ b/tests/test_mopdb.py @@ -18,9 +18,10 @@ import pytest import os import logging -#from mopdb.mopdb import * +import click +from mopdb.mopdb import mopdb from click.testing import CliRunner -from conftest import vlistcsv +#from conftest import vlistcsv from pytest import CaptureFixture diff --git a/tests/test_mopdb_map.py b/tests/test_mopdb_map.py index 3372db6..9b66447 100644 --- a/tests/test_mopdb_map.py +++ b/tests/test_mopdb_map.py @@ -18,6 +18,7 @@ import pytest import os import logging +import xarray as xr from mopdb.mopdb_map import (add_var, get_file_frq) #from mopdb.mopdb_class import MapVariable, Variable, FPattern #from conftest import * From 1737476edd5ffc7d24bc94a18c168e736599739f Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 16 Aug 2024 13:33:08 +1000 Subject: [PATCH 121/137] moving shell commnad in conda action --- .github/workflows/mopper-test-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml index e664ad7..6330e6c 100644 --- a/.github/workflows/mopper-test-conda.yaml +++ b/.github/workflows/mopper-test-conda.yaml @@ -30,8 +30,8 @@ jobs: environment-file: conda/environment.yaml # Path to the build conda environment show-channel-urls: true # - name: Build but do not upload the conda packages - uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0 shell: bash -el {0} + uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0 with: meta_yaml_dir: conda python-version: ${{ matrix.python-version }} # Values previously defined in `matrix` From f7e7cea7195c85024fe8752e40fb6910f25ed86a Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 16 Aug 2024 13:52:47 +1000 Subject: [PATCH 122/137] removing shell command in conda action --- .github/workflows/mopper-test-conda.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml index 6330e6c..102b423 100644 --- a/.github/workflows/mopper-test-conda.yaml +++ b/.github/workflows/mopper-test-conda.yaml @@ -30,9 +30,9 @@ jobs: environment-file: conda/environment.yaml # Path to the build conda environment show-channel-urls: true # - name: Build but do not upload the conda packages - shell: bash -el {0} uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0 with: + #shell: bash -el {0} meta_yaml_dir: conda python-version: ${{ matrix.python-version }} # Values previously defined in `matrix` platform_linux-64: true From ae510e2d5e7e0eadd67a5ade63632b2a050959da Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 16 Aug 2024 14:05:34 +1000 Subject: [PATCH 123/137] trying alternative way of getting version for conda action --- .github/workflows/mopper-test-conda.yaml | 3 ++- conda/meta.yaml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml index 102b423..76fb53f 100644 --- a/.github/workflows/mopper-test-conda.yaml +++ b/.github/workflows/mopper-test-conda.yaml @@ -22,6 +22,8 @@ jobs: - uses: actions/checkout@v4.1.7 with: fetch-depth: 0 + - name: Set env + run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV - name: Conda environment creation and activation uses: conda-incubator/setup-miniconda@v3.0.4 with: @@ -32,7 +34,6 @@ jobs: - name: Build but do not upload the conda packages uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0 with: - #shell: bash -el {0} meta_yaml_dir: conda python-version: ${{ matrix.python-version }} # Values previously defined in `matrix` platform_linux-64: true diff --git a/conda/meta.yaml b/conda/meta.yaml index c5fb0cf..8cda7f3 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,6 +1,6 @@ package: name: mopper - version: "{{ environ['GIT_DESCRIBE_TAG'] }}" + version: "{{ env.RELEASE_VERSION }}" #source: # path: ./ From a5f24ee28b318c4b3457de2e8f5dbe7d3c88fd10 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 16 Aug 2024 15:45:56 +1000 Subject: [PATCH 124/137] hopefully fixed meta.yaml --- conda/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 8cda7f3..9ed9b45 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,6 +1,6 @@ package: name: mopper - version: "{{ env.RELEASE_VERSION }}" + version: "{{ environ.get('GIT_DESCRIBE_TAG', '0.9') }}" #source: # path: ./ From b50d34b4b37ec741b9a6263bcedadc86a44c49fe Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 16 Aug 2024 15:51:49 +1000 Subject: [PATCH 125/137] fixed source in meta.yaml --- conda/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 9ed9b45..88fdd13 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -7,10 +7,10 @@ package: source: #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz - #git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git + git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git#prerelease #git_rev: "{{ version }}" #git_depth: 1 # (Defaults to -1/not shallow) - path: ../src + #path: ../ build: number: 1 From 8c0b4ea185c20a426fc8b58f9461be24404d98f5 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 16 Aug 2024 16:01:13 +1000 Subject: [PATCH 126/137] fixed source in meta.yaml 2 --- conda/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 88fdd13..aadef38 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -7,9 +7,9 @@ package: source: #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz - git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git#prerelease + git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git@prerelease #git_rev: "{{ version }}" - #git_depth: 1 # (Defaults to -1/not shallow) + git_depth: 1 # (Defaults to -1/not shallow) #path: ../ build: From 492961d01d163acb5f76f0e74a0a95f33fd5441b Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 16 Aug 2024 16:09:19 +1000 Subject: [PATCH 127/137] fixed source in meta.yaml 3 --- conda/meta.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index aadef38..57da1b0 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -7,7 +7,8 @@ package: source: #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz - git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git@prerelease + git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git + git_tag: prerelease #git_rev: "{{ version }}" git_depth: 1 # (Defaults to -1/not shallow) #path: ../ From 11e1c09dfa8de790df7d2a2e7044d65468261e39 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 16 Aug 2024 16:28:03 +1000 Subject: [PATCH 128/137] fixing git_depth --- conda/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 57da1b0..a8b2f5a 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -10,7 +10,7 @@ source: git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git git_tag: prerelease #git_rev: "{{ version }}" - git_depth: 1 # (Defaults to -1/not shallow) + #git_depth: 1 # (Defaults to -1/not shallow) #path: ../ build: From 3cef82897c6adf5092d09acfa804398adc7d4eb3 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Wed, 21 Aug 2024 12:39:34 +1000 Subject: [PATCH 129/137] fixed bug when detecting variables from different realms at once --- src/mopdb/mopdb.py | 2 +- src/mopdb/mopdb_map.py | 2 +- src/mopdb/mopdb_utils.py | 2 +- src/mopper/calculations.py | 4 +++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py index d458811..7e50bed 100644 --- a/src/mopdb/mopdb.py +++ b/src/mopdb/mopdb.py @@ -403,7 +403,7 @@ def write_intake(ctx, fpath, match, filelist, dbname, version, alias): if fpath.is_file(): mopdb_log.error(f""" {fpath} should be absolute or relative path to model output. - To pass a varlist or map file use --filelist/-f""") + To pass a varlist or map file use --filelist/-fl""") elif filelist is None: mopdb_log.debug(f"Calling write_varlist() from intake: {fpath}") fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias) diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py index 885e3de..cd26112 100644 --- a/src/mopdb/mopdb_map.py +++ b/src/mopdb/mopdb_map.py @@ -162,7 +162,7 @@ def write_varlist(conn, indir, match, version, alias): if fpattern in patterns: continue patterns.append(fpattern) - fobj = FPattern(fpattern, Path(indir)) + fobj = FPattern(fpattern, fpath.parent) #pattern_list = list_files(indir, f"{fpattern}*") nfiles = len(fobj.files) mopdb_log.debug(f"File pattern, number of files: {fpattern}, {nfiles}") diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py index c1f436d..b3f8bf4 100644 --- a/src/mopdb/mopdb_utils.py +++ b/src/mopdb/mopdb_utils.py @@ -364,7 +364,7 @@ def check_realm_units(conn, var): def get_realm(version, ds): '''Try to retrieve realm if using path failed''' - + realm = 'NArealm' mopdb_log = logging.getLogger('mopdb_log') if version == 'AUS2200': realm = 'atmos' diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py index 568285c..ba68dc4 100644 --- a/src/mopper/calculations.py +++ b/src/mopper/calculations.py @@ -813,9 +813,11 @@ def get_plev(ctx, levnum): @click.pass_context def plevinterp(ctx, var, pmod, levnum): """Interpolating var from model levels to pressure levels - + _extended_summary_ + Based on function from Dale Roberts (currently ANU) + Parameters ---------- var : Xarray DataArray From a376cc230ce1f06a5aa7f1e20736f54cf89c5160 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 23 Aug 2024 08:38:27 +1000 Subject: [PATCH 130/137] minor adjustment to docs, conf files and code after tests --- ACDD_conf.yaml | 2 +- CMIP6_conf.yaml | 4 +++- docs/mopdb_command.rst | 12 ++++++++++-- src/mopdata/update_db.py.txt | 9 ++++++++- src/mopper/mop_utils.py | 11 ++++++++--- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/ACDD_conf.yaml b/ACDD_conf.yaml index f507aeb..a86f373 100755 --- a/ACDD_conf.yaml +++ b/ACDD_conf.yaml @@ -174,4 +174,4 @@ attrs: parent: !!bool false # CMOR will add a tracking_id if you want to define a prefix add here tracking_id_prefix: - comment: "post-processed using ACCESS-MOPPeR v0.6.0 https://doi.org/10.5281/zenodo.10346216" + comment: "post-processed using ACCESS-MOPPeR v1.0.0 https://doi.org/10.5281/zenodo.10346216" diff --git a/CMIP6_conf.yaml b/CMIP6_conf.yaml index 9ae85aa..c421df0 100755 --- a/CMIP6_conf.yaml +++ b/CMIP6_conf.yaml @@ -91,6 +91,8 @@ cmor: # walltime in "hh:mm:ss" walltime: '8:00:00' mode: cmip6 + # conda_env: /bin/activate + conda_env: default # Global attributes: these will be added to each files comment unwanted ones # the labels CMIP6/ACDD indicates which ones are necessary to comply with respective standards @@ -161,4 +163,4 @@ attrs: #CMOR will add a tracking_id if you want to define a prefix add here tracking_id_prefix: Conventions: "CF-1.7 CMIP-6.2" - comment: "post-processed using ACCESS-MOPPeR v0.6.0 https://doi.org/10.5281/zenodo.10346216" + comment: "post-processed using ACCESS-MOPPeR v1.0.0 https://doi.org/10.5281/zenodo.10346216" diff --git a/docs/mopdb_command.rst b/docs/mopdb_command.rst index 5c24196..f60d958 100644 --- a/docs/mopdb_command.rst +++ b/docs/mopdb_command.rst @@ -57,16 +57,19 @@ A user that wants to create a mapping table for another AUS2200 simulation can u Create a mapping file --------------------- -.. code-block:: This can be done by providing the model output path and a pattern to match or directly a varlist file From output path: +.. code-block:: + mopdb template -f -m -v From varlist file: +.. code-block:: + mopdb template -f -v This will create a map_.csv file using, if available, information from the mapping table. @@ -110,20 +113,25 @@ The other groups of records require checking, as either the version or the frequ Create an intake catalogue -------------------------- -.. code-block:: This represents an extra step on top of the mapping, so it can be start directly from an existing mapping or from scratch by providing the model ouptut path and a match. From output path: +.. code-block:: + mopdb intake -f -m -v { -a } From varlist file: +.. code-block:: + mopdb intake -f -fl -v { -a } From mapping file: +.. code-block:: + mopdb intake -f -fl -v { -a } NB the model output path is still needed even when passing an existing mapping or variable list. diff --git a/src/mopdata/update_db.py.txt b/src/mopdata/update_db.py.txt index ddcd6a5..c1cb17d 100644 --- a/src/mopdata/update_db.py.txt +++ b/src/mopdata/update_db.py.txt @@ -37,7 +37,8 @@ def update_map(conn, varid, ctable): """Read mappings for variable from map file and update them in filelist """ - keys = ['frequency','realm','timeshot','calculation', 'positive', 'resample'] + keys = ['frequency','realm','timeshot','calculation', + 'positive', 'resample'] keys2 = {'vin': 'input_vars', 'in_units': 'units'} fname = f"maps/{ctable}.json" with open(fname, 'r') as f: @@ -48,6 +49,12 @@ def update_map(conn, varid, ctable): args = {k: row[k] for k in keys} for k,v in keys2.items(): args[k] = row[v] + if 'datadir' in row.keys(): + paths = row['file_structure'].split() + infile = '' + for x in paths: + infile += f"{row['datadir']}/{x} " + args['infile'] = infile cur = conn.cursor() sql = f"UPDATE filelist SET" for k,v in args.items(): diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py index 12ef93b..3577e25 100755 --- a/src/mopper/mop_utils.py +++ b/src/mopper/mop_utils.py @@ -564,9 +564,13 @@ def get_coords(ctx, ovar, coords): var_log = logging.getLogger(ctx.obj['var_log']) # open ancil grid file to read vertices #PP be careful this is currently hardcoded which is not ok! - ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"] + ancil_dir = ctx.obj.get('ancils_path', '') + ancil_file = ancil_dir + "/" + ctx.obj.get(f"grid_{ctx.obj['realm']}", '') + if ancil_file == '' or not Path(ancil_file).exists(): + var_log.error(f"Ancil file {ancil_file} not set or inexistent") + sys.exit() var_log.debug(f"getting lat/lon and bnds from ancil file: {ancil_file}") - ds = xr.open_dataset(f"{ctx.obj['ancils_path']}/{ancil_file}") + ds = xr.open_dataset(ancil_file) var_log.debug(f"ancil ds: {ds}") # read lat/lon and vertices mapping cfile = import_files('mopdata').joinpath('latlon_vertices.yaml') @@ -793,8 +797,9 @@ def get_bounds_values(ctx, ds, bname): calc = False var_log = logging.getLogger(ctx.obj['var_log']) var_log.debug(f"Getting bounds values for {bname}") - ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"] + ancil_file = ctx.obj.get(f"grid_{ctx.obj['realm']}", '') if bname in ds.variables: + var_log.debug(f"Bounds for {bname} in file") bnds_val = ds[bname].values elif ancil_file != "": fname = f"{ctx.obj['ancils_path']}/{ancil_file}" From 0a4465f050968201df2965dc678ecf4849322a14 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 23 Aug 2024 09:08:36 +1000 Subject: [PATCH 131/137] moving to python -m pytest to solve conda test issue --- conda/run_test.sh | 2 +- conda/run_test_coverage.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conda/run_test.sh b/conda/run_test.sh index 21da92a..6b7b31c 100644 --- a/conda/run_test.sh +++ b/conda/run_test.sh @@ -1,2 +1,2 @@ #!/bin/bash -py.test +python -m pytest diff --git a/conda/run_test_coverage.sh b/conda/run_test_coverage.sh index 4da4a69..476fc38 100644 --- a/conda/run_test_coverage.sh +++ b/conda/run_test_coverage.sh @@ -1,5 +1,5 @@ #!/bin/bash pip install coverage pytest-cov -py.test --cov=mopper --cov-report xml:/tmp/artefacts/tests/pytest/coverage.xml --junit-xml /tmp/artefacts/tests/pytest/results.xml -py.test --cov=mopdb --cov-report xml:/tmp/artefacts/tests/pytest/coverage.xml --junit-xml /tmp/artefacts/tests/pytest/results.xml +python -m pytest --cov=mopper --cov-report xml:/tmp/artefacts/tests/pytest/coverage.xml --junit-xml /tmp/artefacts/tests/pytest/results.xml +python -m pytest --cov=mopdb --cov-report xml:/tmp/artefacts/tests/pytest/coverage.xml --junit-xml /tmp/artefacts/tests/pytest/results.xml From c8d6d744e7b1d370dd4fdca5a85790377a773849 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 23 Aug 2024 09:19:40 +1000 Subject: [PATCH 132/137] adding echo to run_test to see if it gets called --- conda/run_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/run_test.sh b/conda/run_test.sh index 6b7b31c..1c3c29b 100644 --- a/conda/run_test.sh +++ b/conda/run_test.sh @@ -1,2 +1,3 @@ #!/bin/bash +echo 'calling run_test' python -m pytest From 6ca07ed20a29d6cd7e55b826976e1f5f5df4d445 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 23 Aug 2024 09:37:25 +1000 Subject: [PATCH 133/137] trying to fix conda tests --- conda/meta.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index a8b2f5a..9af645d 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -37,12 +37,17 @@ requirements: - python-dateutil test: + imports: + - mopdb + - mopper source_files: - - tests/testdata/* + - tests/*.py - tests/testdata requires: - pytest - pyfakefs + - coverage + - codecov about: home: https://github.com/ACCESS-Hive/ACCESS-MOPPeR From d448bcd03cec84fed61ee3141ce8175c9d7998a3 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 23 Aug 2024 09:43:48 +1000 Subject: [PATCH 134/137] trying to fix conda tests 2 --- conda/meta.yaml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 9af645d..bf5e2cd 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -37,13 +37,20 @@ requirements: - python-dateutil test: - imports: - - mopdb - - mopper + #imports: + # - mopdb + # - mopper source_files: - - tests/*.py + - tests - tests/testdata requires: + - cmor + - xarray + - numpy + - dask + - pyyaml + - cftime + - python-dateutil - pytest - pyfakefs - coverage From 1da6fee86e259acfdd7563f331ad002eb42ccbd0 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 23 Aug 2024 09:57:37 +1000 Subject: [PATCH 135/137] fixing conda action --- .github/workflows/mopper-test-conda.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml index 76fb53f..6ab53d2 100644 --- a/.github/workflows/mopper-test-conda.yaml +++ b/.github/workflows/mopper-test-conda.yaml @@ -36,9 +36,6 @@ jobs: with: meta_yaml_dir: conda python-version: ${{ matrix.python-version }} # Values previously defined in `matrix` - platform_linux-64: true - platform_osx-64: false - platform_win-64: false user: coecms label: auto upload: false From ceb0710701e419c98871b08880bf170bee8c2e04 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 23 Aug 2024 12:03:48 +1000 Subject: [PATCH 136/137] last fixes before releasing 1.0.0 --- .github/workflows/mopper-conda-release.yaml | 36 +++++++++++++++++++++ .github/workflows/mopper-pytest.yaml | 12 +++---- README.md | 7 ++-- conda/meta.yaml | 8 ++--- src/mopdata/cmor_tables/CM2_3hr.json | 5 +-- 5 files changed, 51 insertions(+), 17 deletions(-) create mode 100644 .github/workflows/mopper-conda-release.yaml diff --git a/.github/workflows/mopper-conda-release.yaml b/.github/workflows/mopper-conda-release.yaml new file mode 100644 index 0000000..7e070d2 --- /dev/null +++ b/.github/workflows/mopper-conda-release.yaml @@ -0,0 +1,36 @@ +name: Build of mopper conda package for new release + +# Controls when the action will run. +on: + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + conda_deployment_with_new_tag: + name: Test conda deployment of package with Python ${{ matrix.python-version }} + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + steps: + - uses: actions/checkout@v4.1.7 + with: + fetch-depth: 0 + - name: Set env + run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + - name: Conda environment creation and activation + uses: conda-incubator/setup-miniconda@v3.0.4 + with: + python-version: ${{ matrix.python-version }} + activate-environment: mopper_env + environment-file: conda/environment.yaml # Path to the build conda environment + show-channel-urls: true # + - name: Build but do not upload the conda packages + uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0 + with: + meta_yaml_dir: conda + python-version: ${{ matrix.python-version }} # Values previously defined in `matrix` + user: coecms + label: auto + upload: true + token: ${{ secrets.ANACONDA_TOKEN }} # Replace with the right name of your secret diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml index 51d846c..44e2b5e 100644 --- a/.github/workflows/mopper-pytest.yaml +++ b/.github/workflows/mopper-pytest.yaml @@ -4,12 +4,12 @@ name: mopper-all-tests on: push: branches: - #- prerelease - - class - #pull_request: - # branches: - # - main - # - prerelease + - prerelease + - main + pull_request: + branches: + - main + - prerelease jobs: diff --git a/README.md b/README.md index 159a3c5..12b0d94 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # [ACCESS Model Output Post-Processor (MOPPeR)](https://access-mopper.readthedocs.io/en/latest) [![Read the docs](https://readthedocs.org/projects/access-mopper/badge/?version=latest)](https://access-mopper.readthedocs.io/en/latest/) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10976467.svg)](https://doi.org/10.5281/zenodo.10976467) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12747219.svg)](https://doi.org/10.5281/zenodo.12747219) This code is derived from the [APP4](https://doi.org/10.5281/zenodo.7703469), initially created by Peter Uhe for CMIP5, and further developed for CMIP6-era by Chloe Mackallah from CSIRO, O&A Aspendale. @@ -15,12 +15,13 @@ Designed for use on ACCESS model output that has been archived using the [ACCESS Although we retained a differentiation between `custom` and `cmip` mode the main workflow is the same and `mode` is now only another field in the main configuration file. +See [MOPPeR ReadtheDocs](https://access-mopper.readthedocs.io/en/stable/) for the full documentation. ### Install You can install the latest version of `mopper` directly from conda (accessnri channel):: - conda install -c accessnri mopper + conda install -c coecms mopper If you want to install an unstable version or a different branch: @@ -35,6 +36,6 @@ If you want to install an unstable version or a different branch: MOPPeR is pre-installed into a Conda environment at NCI. Load it with:: module use /g/data3/hh5/public/modules - module load conda/analysis3-unstable + module load conda/analysis3 NB. You need to be a member of the hh5 project to load the modules. diff --git a/conda/meta.yaml b/conda/meta.yaml index bf5e2cd..479c1f7 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -1,6 +1,6 @@ package: name: mopper - version: "{{ environ.get('GIT_DESCRIBE_TAG', '0.9') }}" + version: "{{ environ.get('GIT_DESCRIBE_TAG', '1.0') }}" #source: # path: ./ @@ -8,9 +8,9 @@ package: source: #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git - git_tag: prerelease - #git_rev: "{{ version }}" - #git_depth: 1 # (Defaults to -1/not shallow) + #git_tag: prerelease + git_rev: "{{ version }}" + git_depth: 1 # (Defaults to -1/not shallow) #path: ../ build: diff --git a/src/mopdata/cmor_tables/CM2_3hr.json b/src/mopdata/cmor_tables/CM2_3hr.json index dd13bab..34584c2 100644 --- a/src/mopdata/cmor_tables/CM2_3hr.json +++ b/src/mopdata/cmor_tables/CM2_3hr.json @@ -104,10 +104,7 @@ "valid_max": "", "ok_min_mean_abs": "", "ok_max_mean_abs": "" - }, - - - + } } } From d22f93310c974d8f212eb7b344df36dd72b48277 Mon Sep 17 00:00:00 2001 From: Paola Petrelli Date: Fri, 23 Aug 2024 12:16:01 +1000 Subject: [PATCH 137/137] attempt to fix version from git --- conda/meta.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conda/meta.yaml b/conda/meta.yaml index 479c1f7..fff9987 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -10,8 +10,7 @@ source: git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git #git_tag: prerelease git_rev: "{{ version }}" - git_depth: 1 # (Defaults to -1/not shallow) - #path: ../ + #git_depth: 1 # (Defaults to -1/not shallow) build: number: 1