From bb306ed1caf788c5ade40c2efc9383eae325138b Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 2 Jul 2024 12:47:28 +1000
Subject: [PATCH 001/137] added ci github action #146, updated tests,
 restructured build-umfrq #148

---
 .github/workflows/mopper-conda.yaml | 54 +++++++++++++++++++
 src/mopdb/mopdb_utils.py            | 80 +++++++++++++++--------------
 tests/conftest.py                   | 29 +++++++++++
 tests/test_mop_utils.py             | 26 +++++-----
 tests/test_mopdb.py                 | 20 ++++----
 tests/test_mopdb_utils.py           | 10 ++++
 6 files changed, 158 insertions(+), 61 deletions(-)
 create mode 100644 .github/workflows/mopper-conda.yaml

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
new file mode 100644
index 0000000..98ea1a7
--- /dev/null
+++ b/.github/workflows/mopper-conda.yaml
@@ -0,0 +1,54 @@
+name: xmhw-conda-install-test
+
+#on: [push]
+on: 
+  push:
+    branches:
+      - main
+      - newrelease
+  pull_request:
+    branches:
+      - main
+
+
+jobs:
+  build-linux:
+    runs-on: ubuntu-latest
+    strategy:
+      max-parallel: 5
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.10
+    - name: Add conda to system path
+      run: |
+        # $CONDA is an environment variable pointing to the root of the miniconda directory
+        echo $CONDA/bin >> $GITHUB_PATH
+    - name: Install dependencies
+      run: |
+        conda env update --file conda/environment.yml --name base 
+    - name: Lint with flake8
+      run: |
+        conda install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        #    - name: Install package 
+      #      run: |
+              #        conda run python setup.py install
+    - name: Test with pytest
+      run: |
+        conda install pytest coverage codecov
+        conda run python -m pytest
+        conda run coverage run --source src -m py.test
+    - name: Upload to codecov 
+      if: steps.build.outcome == 'success'
+      run: |
+        curl -Os https://uploader.codecov.io/latest/linux/codecov
+        chmod +x codecov
+        ./codecov
+
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 7d4ab62..85eb922 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -426,32 +426,32 @@ def list_files(indir, match, db_log):
 
 
 def build_umfrq(time_axs, ds, db_log):
-    """
+    """Return a dictionary with frequency for each time axis.
+
+    Frequency is inferred by comparing interval between two consecutive
+    timesteps with expected interval at a given frequency.
+    Order time_axis so ones with only one step are last, so we can use 
+    file frequency (interval_file) inferred from other time axes.
     """
     umfrq = {}
-    #PPfirst_step = {}
     int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0,
                'day': 1.0, '6hr': 0.25, '3hr': 0.125,
                '1hr': 0.041667, '10min': 0.006944}
-    for t in time_axs:
-        #PPfirst_step[t] = ds[t][0].values
+    time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
+    db_log.debug(f"in build_umfrq, time_axs: {time_axs}")
+    for t in time_axs: 
+        db_log.debug(f"len of time axis {t}: {len(ds[t])}")
         if len(ds[t]) > 1:
             interval = (ds[t][1]-ds[t][0]).values
             interval_file = (ds[t][-1] -ds[t][0]).values
-            for k,v in int2frq.items():
-                if math.isclose(interval, v, rel_tol=0.05):
-                    umfrq[t] = k
-                    break
         else:
-            umfrq[t] = 'file'
-    # use other time_axis info to work out frq of time axis with 1 step
-    db_log.debug(f"umfrq in function {umfrq}")
-    for t,frq in umfrq.items():
-        if frq == 'file':
-           for k,v in int2frq.items():
-               if math.isclose(interval_file, v, rel_tol=0.05):
-                   umfrq[t] = k
-                   break
+            interval = interval_file
+        db_log.debug(f"interval 2 timesteps for {t}: {interval}")
+        db_log.debug(f"interval entire file {t}: {interval_file}")
+        for k,v in int2frq.items():
+            if math.isclose(interval, v, rel_tol=0.05):
+                umfrq[t] = k
+                break
     return umfrq
 
 
@@ -461,24 +461,23 @@ def get_frequency(realm, fname, ds, db_log):
     returns dictionary with frequency: variable list
     """
     umfrq = {} 
-    frequency = 'NA'
+    frequency = 'NAfrq'
     if realm == 'atmos':
         fbits = fname.split("_")
         frequency = fbits[-1].replace(".nc", "")
-        if frequency == 'dai':
-            frequency = 'day'
-        elif frequency == '3h':
-            frequency = '3hr'
-        elif frequency == '6h':
-            frequency = '6hr'
+        fix_frq = {'dai': 'day', '3h': '3hr', '6h': '6hr'}
+        if frequency in fix_frq.keys():
+            frequency = fix_frq[frequency]
         else:
             frequency = frequency.replace('hPt', 'hrPt')
+        # retrieve all time axes and check their frequency
         time_axs = [d for d in ds.dims if 'time' in d]
         time_axs_len = set(len(ds[d]) for d in time_axs)
         if len(time_axs_len) == 1:
             umfrq = {}
         else:
             umfrq = build_umfrq(time_axs, ds, db_log)
+        db_log.debug(f"umfrq: {umfrq}")
     elif realm == 'ocean':
         # if I found scalar or monthly in any of fbits 
         if any(x in fname for x in ['scalar', 'month']):
@@ -544,24 +543,10 @@ def write_varlist(conn, indir, startdate, version, db_log):
                           "vtype", "size", "nsteps", "filename", "long_name",
                           "standard_name"])
         # get attributes for the file variables
-        try:
-            if version == 'AUS2200':
-                realm = '/atmos/'
-            else:
-                realm = [x for x in ['/atmos/', '/ocean/', '/ice/'] if x in str(fpath)][0]
-        except:
-            realm = [x for x in ['/atm/', '/ocn/', '/ice/'] if x in str(fpath)][0]
-        realm = realm[1:-1]
-        if realm == 'atm':
-            realm = 'atmos'
-        elif realm == 'ocn':
-            realm = 'ocean'
-        db_log.debug(realm)
+        realm = get_realm(fpath, version, db_log)
         ds = xr.open_dataset(fpath, decode_times=False)
         coords = [c for c in ds.coords] + ['latitude_longitude']
         frequency, umfrq = get_frequency(realm, fpath.name, ds, db_log)
-        db_log.debug(f"Frequency: {frequency}")
-        db_log.debug(f"umfrq: {umfrq}")
         multiple_frq = False
         if umfrq != {}:
             multiple_frq = True
@@ -961,3 +946,20 @@ def check_realm_units(conn, var, db_log):
             db_log.warning(f"Variable {vname} not found in cmor table")
     return var 
        
+
+    def get_realm(fpath, version, db_log):
+        '''Return realm for variable in files or NArealm'''
+        if version == 'AUS2200':
+            realm = 'atmos'
+        else:
+            realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] 
+                     if x in fpath.parts][0]
+        if realm == 'atm':
+            realm = 'atmos'
+        elif realm == 'ocn':
+            realm = 'ocean'
+        elif realm is None:
+            realm = 'NArealm'
+            db_log.info(f"Couldn't detect realm from path, setting to NArealm")
+        db_log.debug(f"Realm is {realm}")
+    return realm
diff --git a/tests/conftest.py b/tests/conftest.py
index 2f7fcbf..7f544ac 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -22,6 +22,7 @@
 import numpy as np
 import pandas as pd
 import datetime
+import logging
 from mopdb.mopdb_utils import mapping_sql, cmorvar_sql
 from mopper.setup_utils import filelist_sql
 
@@ -29,7 +30,17 @@
 TESTS_HOME = os.path.abspath(os.path.dirname(__file__))
 TESTS_DATA = os.path.join(TESTS_HOME, "testdata")
 
+# setting up loggers for both mopdb and mop
+@pytest.fixture
+def moplog():
+    return logging.getLogger('mop_log')
+
+
+@pytest.fixture
+def mopdblog():
+    return logging.getLogger('mopdb_log')
 
+# setting up fixtures for databases:a ccess.db and mopper.db
 @pytest.fixture
 def session(): 
     connection = sqlite3.connect(':memory:')
@@ -64,6 +75,10 @@ def setup_mopper_db(session):
     session.connection.commit()
 
 
+def test_check_timestamp(caplog):
+    global ctx, logger
+    caplog.set_level(logging.DEBUG, logger='mop_log')
+
 @pytest.fixture
 def varlist_rows():
     lines = ["fld_s03i236;tas;K;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature",
@@ -71,3 +86,17 @@ def varlist_rows():
     "fld_s03i236;tas;;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature"]
     rows = [l.split(";") for l in lines]
     return rows
+
+@pytest.fixture
+def um_multi_time():
+    '''Return a um stule file with multiple time axes'''
+    time1 = pd.date_range("2001-01-01", periods=1)
+    time2 = pd.date_range("2001-01-01", periods=24, freq='h')
+    time3 = pd.date_range("2001-01-01", periods=48, freq='30min')
+    var1 = xr.DataArray(name='var1', data=[1],
+         dims=["time"], coords={"time": time1})
+    var2 = xr.DataArray(name='var2', data=np.arange(24),
+         dims=["time_0"], coords={"time_0": time2})
+    var3 = xr.DataArray(name='var3', data=np.arange(48), dims=["time_1"],
+         coords={"time_1": time3})
+    return xr.merge([var1, var2, var3])
diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py
index d006ca1..f177f21 100644
--- a/tests/test_mop_utils.py
+++ b/tests/test_mop_utils.py
@@ -19,6 +19,7 @@
 import numpy as np
 import pandas as pd
 from mopper.mop_utils import *
+from conftest import moplog
 
 #try:
 #    import unittest.mock as mock
@@ -28,14 +29,14 @@
 ctx = click.Context(click.Command('cmd'),
     obj={'sel_start': '198302170600', 'sel_end': '198302181300',
          'realm': 'atmos', 'frequency': '1hr'})
-logger = logging.getLogger('mop_log')
+#logger = logging.getLogger('mop_log')
 
-def test_check_timestamp(caplog):
-    global ctx, logger
-    caplog.set_level(logging.DEBUG, logger='mop_log')
+def test_check_timestamp(caplog, ctx):
+    moplog.set_level(logging.DEBUG)#, logger='mop_log')
     # test atmos files
     files = [f'obj_198302{d}T{str(h).zfill(2)}01_1hr.nc' for d in ['17','18','19']
              for h in range(24)] 
+    print(files)
     inrange = files[6:37]
     with ctx:
             out1 = check_timestamp(files, logger)
@@ -47,7 +48,7 @@ def test_check_timestamp(caplog):
             out2 = check_timestamp(files, logger)
     assert out2 == inrange
     # test ocn files
-    ctx.obj['frequency'] = 'mon'
+    ctx.obj['frequency'] = 'day'
     ctx.obj['realm'] = 'ocean'
     files = [f'ocn_daily.nc-198302{str(d).zfill(2)}' for d in range(1,29)] 
     inrange = files[16:18]
@@ -56,10 +57,9 @@ def test_check_timestamp(caplog):
     assert out3 == inrange
 
 
-def test_get_cmorname(caplog):
-    global ctx, logger
-    caplog.set_level(logging.DEBUG, logger='mop_log')
-    # axiis_name t
+def test_get_cmorname(caplog, ctx):
+    caplog.set_level(logging.DEBUG)#, logger='mop_log')
+    # axis_name t
     ctx.obj['calculation'] = "plevinterp(var[0], var[1], 24)"
     ctx.obj['variable_id'] = "ta24"
     ctx.obj['timeshot'] = 'mean'
@@ -71,10 +71,10 @@ def test_get_cmorname(caplog):
     foo = xr.DataArray(data, coords=[levs, tdata, lats, lons],
           dims=["lev", "t", "lat", "lon"])
     with ctx:
-        tname = get_cmorname('t', foo.t, logger, z_len=None)
-        iname = get_cmorname('i_index', foo.lon, logger, z_len=None)
-        jname = get_cmorname('j_index', foo.lat, logger, z_len=None)
-        zname = get_cmorname('z', foo.lev, logger, z_len=3)
+        tname = get_cmorname('t', foo.t, caplog, z_len=None)
+        iname = get_cmorname('lon', foo.lon, caplog, z_len=None)
+        jname = get_cmorname('lat', foo.lat, caplog, z_len=None)
+        zname = get_cmorname('z', foo.lev, caplog, z_len=3)
     assert tname == 'time'
     assert iname == 'longitude'
     assert jname == 'latitude'
diff --git a/tests/test_mopdb.py b/tests/test_mopdb.py
index 37f4232..0eddc58 100644
--- a/tests/test_mopdb.py
+++ b/tests/test_mopdb.py
@@ -22,11 +22,13 @@
 from click.testing import CliRunner
 
 @pytest.mark.parametrize('subcommand', ['varlist', 'template', 'check', 'cmor', 'table', 'map'])
-def test_cmip(command, runner):
-    result = runner.invoke(mopdb, ['--help'])
-    assert result.exit_code == 0
-    result = runner.invoke(mopdb, [subcommand, '--help'])
-    assert result.exit_code == 0
+def test_mopdb(command, subcommand, runner):
+    ctx = click.Context(click.Command('cmd'), obj={'prop': 'A Context'})
+    with ctx:
+        result = runner.invoke(mopdb, ['--help'])
+        assert result.exit_code == 0
+        result = runner.invoke(mopdb, [subcommand, '--help'])
+        assert result.exit_code == 0
 
 @pytest.mark.usefixtures("setup_db") # 1
 def test_template(session):
@@ -53,7 +55,7 @@ def test_template(session):
 #    with runner.isolated_filesystem(temp_dir=tmp_path) as td:
 #        ...
 
-def test_with_context():
-    ctx = click.Context(click.Command('cmd'), obj={'prop': 'A Context'})
-    with ctx:
-        process_cmd()
+#def test_with_context():
+#    ctx = click.Context(click.Command('cmd'), obj={'prop': 'A Context'})
+#    with ctx:
+#        mopdb()
diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py
index 48aa87b..103f75e 100644
--- a/tests/test_mopdb_utils.py
+++ b/tests/test_mopdb_utils.py
@@ -21,6 +21,7 @@
 import click
 import logging
 from mopdb.mopdb_utils import *
+from conftest import um_multi_time
 
 #from click.testing import CliRunner
 
@@ -43,3 +44,12 @@ def test_add_var(varlist_rows, idx, db_log):
     match = ("tas", "", "K")
     vlist = add_var(vlist, varlist_rows[idx], match, db_log)
     assert vlist == vlistout
+
+
+def test_build_umfrq(um_multi_time, caplog):
+    caplog.set_level(logging.DEBUG)
+    time_axs = [d for d in um_multi_time.dims if 'time' in d]
+    print(time_axs)
+    umfrq = {'time': 'day', 'time_0': '1hr', 'time_1': '30min'}
+    assert umfrq == build_umfrq(time_axs, um_multi_time, caplog)
+    

From 300927e1881099283b7cf9f09371093aba1d14ce Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 3 Jul 2024 17:41:22 +1000
Subject: [PATCH 002/137] partial work on tests, issues #147 and #146

---
 ACDD_conf.yaml             |  13 +-
 CMIP6_conf.yaml            |   2 +
 src/mopdb/mopdb.py         |  95 +++++++------
 src/mopdb/mopdb_utils.py   | 275 ++++++++++++++++++++-----------------
 src/mopper/calculations.py |  14 +-
 src/mopper/mop_setup.py    |  23 ++--
 src/mopper/mop_utils.py    | 101 +++++++++-----
 src/mopper/mopper.py       |  84 +++++------
 src/mopper/setup_utils.py  |  29 ++--
 tests/conftest.py          |  17 ++-
 tests/test_mopdb_utils.py  |  27 ++--
 11 files changed, 369 insertions(+), 311 deletions(-)

diff --git a/ACDD_conf.yaml b/ACDD_conf.yaml
index d75312a..f507aeb 100755
--- a/ACDD_conf.yaml
+++ b/ACDD_conf.yaml
@@ -83,16 +83,25 @@ cmor:
     grids: "ACDD_grids.json"
   # Additional NCI information:
     # NCI project to charge compute; $PROJECT = your default project
-    # NCI queue to use; hugemem is recommended
     project: v45 
     # additional NCI projects to be included in the storage flags
     addprojs: []
-    # queue and memory (GB) per CPU (depends on queue) 
+    # queue and memory (GB) per CPU (depends on queue),
+    # hugemem is reccomended for high reoslution data and/or derived variables 
+    # hugemem requires a minimum of 6 cpus this is handled by the code
     queue: hugemem
     mem_per_cpu: 32 
     # walltime in "hh:mm:ss"
     walltime: '8:00:00'
     mode: custom
+    # conda_env to use by default hh5 analysis3-unstable
+    # as this has the code and all dependecies installed
+    # you can override that by supplying the env to pass to "source"
+    # Ex 
+    # conda_env: <custom-env-path>/bin/activate
+    # or you can set "test: true" and modify mopper_job.sh manually
+    conda_env: default
+    
 #
 # Global attributes: these will be added to each files comment unwanted ones
 # Using ACDD CV vocab to check validity of global attributes
diff --git a/CMIP6_conf.yaml b/CMIP6_conf.yaml
index fd5f14b..9ae85aa 100755
--- a/CMIP6_conf.yaml
+++ b/CMIP6_conf.yaml
@@ -84,6 +84,8 @@ cmor:
     # additional NCI projects to be included in the storage flags
     addprojs: []
     # queue and memory (GB) per CPU (depends on queue) 
+    # hugemem is reccomended for high reoslution data and/or derived variables
+    # hugemem requires a minimum of 6 cpus this is handled by the code
     queue: hugemem
     mem_per_cpu: 30
     # walltime in "hh:mm:ss"
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index fbc5df8..892b4cb 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -70,7 +70,7 @@ def mopdb(ctx, debug):
     ctx.obj={}
     # set up a default value for flow if none selected for logging
     ctx.obj['debug'] = debug
-    ctx.obj['log'] = config_log(debug)
+    mopdb_log = config_log(debug)
 
 
 @mopdb.command(name='check')
@@ -89,11 +89,11 @@ def check_cmor(ctx, dbname):
     dbname : str
         Database relative path (default is data/access.db)
     """
-    db_log = ctx.obj['log']
+    mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
     if dbname == 'default':
         dbname = import_files('data').joinpath('access.db')
-    conn = db_connect(dbname, db_log)
+    conn = db_connect(dbname)
     # get list of variables already in db
     sql = 'SELECT name, out_name FROM cmorvar'
     results = query(conn, sql, first=False)
@@ -108,9 +108,9 @@ def check_cmor(ctx, dbname):
     results = query(conn, sql, first=False)
     map_vars = [x[0] for x in results]
     missing = set(map_vars) - set(cmor_vars)
-    db_log.info("Variables not yet defined in cmorvar table:")
+    mopdb_log.info("Variables not yet defined in cmorvar table:")
     for v in missing:
-        db_log.info(f"{v}")
+        mopdb_log.info(f"{v}")
     conn.close()
     return
 
@@ -140,11 +140,11 @@ def cmor_table(ctx, dbname, fname, alias, label):
     label : str
         Label indicating preferred cmor variable definitions 
     """
-    db_log = ctx.obj['log']
+    mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
     if dbname == 'default':
         dbname = import_files('data').joinpath('access.db')
-    conn = db_connect(dbname, db_log)
+    conn = db_connect(dbname)
     # get list of variables already in db
     sql = "SELECT out_name, frequency, modeling_realm FROM cmorvar"
     results = query(conn, sql, first=False)
@@ -152,7 +152,7 @@ def cmor_table(ctx, dbname, fname, alias, label):
     # this sometime differs from name used in tables tohat can distinguish different dims/freq
     cmor_vars = set(x[0] for x in results)
     # read variable list from map_ file
-    vlist = read_map(fname, alias, db_log)
+    vlist = read_map(fname, alias)
     # extract cmor_var,units,dimensions,frequency,realm,cell_methods
     var_list = []
     for v in vlist[1:]:
@@ -160,7 +160,7 @@ def cmor_table(ctx, dbname, fname, alias, label):
         # This was adding variables to the table just if they didn't exists in other tables
         if v[0][:4] != 'fld_':
             if v[0] not in cmor_vars:
-                db_log.warning(f"Variable {v[0]} not defined in cmorvar table")
+                mopdb_log.warning(f"Variable {v[0]} not defined in cmorvar table")
             else:
                 
                 sql = f"SELECT * FROM cmorvar WHERE out_name='{v[0]}'"
@@ -178,13 +178,13 @@ def cmor_table(ctx, dbname, fname, alias, label):
                 definition[2] = v[6]
                 # if units are different print warning!
                 if v[3] != record[4]:
-                    db_log.warning(f"Variable {v[0]} units orig/table are different: {v[3]}/{record[4]}")
+                    mopdb_log.warning(f"Variable {v[0]} units orig/table are different: {v[3]}/{record[4]}")
                 if v[7] != '' and v[7] != record[5]:
-                    db_log.warning(f"Variable {v[0]} cell_methods orig/table are different: {v[7]}/{record[5]}")
+                    mopdb_log.warning(f"Variable {v[0]} cell_methods orig/table are different: {v[7]}/{record[5]}")
                 if len(v[4].split()) != len(record[9].split()):
-                    db_log.warning(f"Variable {v[0]} number of dims orig/table are different: {v[4]}/{record[9]}")
+                    mopdb_log.warning(f"Variable {v[0]} number of dims orig/table are different: {v[4]}/{record[9]}")
                 var_list.append(definition)
-    write_cmor_table(var_list, alias, db_log)
+    write_cmor_table(var_list, alias, mopdb_log)
     conn.close()
     return
 
@@ -213,26 +213,26 @@ def update_cmor(ctx, dbname, fname, alias):
     -------
     """
 
-    db_log = ctx.obj['log']
+    mopdb_log = logging.getLogger('mopdb_log')
     if alias is None:
         alias = fname.split("/")[-1]
         alias = alias.replace('.json', '')
-    db_log.info(f"Adding {alias} to variable name to track origin")
+    mopdb_log.info(f"Adding {alias} to variable name to track origin")
     # connect to db, this will create one if not existing
     dbcentral = import_files('data').joinpath('access.db')
     if dbname in [dbcentral, 'default']:
-        db_log.error("The package database cannot be updated")
+        mopdb_log.error("The package database cannot be updated")
         sys.exit()
-    conn = db_connect(dbname, db_log)
+    conn = db_connect(dbname)
     # create table if not existing
     table_sql = cmorvar_sql()
-    create_table(conn, table_sql, db_log)
+    create_table(conn, table_sql)
     # get list of variables already in db in debug mode
     if ctx.obj['debug']:
         sql = 'SELECT name FROM cmorvar'
         results = query(conn, sql, first=False)
         existing_vars = [x[0] for x in results]
-        db_log.debug(f"Variables already in db: {existing_vars}")
+        mopdb_log.debug(f"Variables already in db: {existing_vars}")
 
     # read list of vars from file
     with open(fname, 'r') as fj:
@@ -247,14 +247,14 @@ def update_cmor(ctx, dbname, fname, alias):
         if 'flag_values' not in row.keys():
             values = values[:-2] + ['',''] + values[-2:]
         vars_list.append(tuple([name] + values))
-    db_log.debug(f"Variables list: {vars_list}")
+    mopdb_log.debug(f"Variables list: {vars_list}")
     # check that all tuples have len == 19
     for r in vars_list:
         if len(r) != 19:
-            db_log.error(r)
+            mopdb_log.error(r)
             sys.exit()
     # insert new vars and update existing ones
-    update_db(conn, 'cmorvar', vars_list, db_log)
+    update_db(conn, 'cmorvar', vars_list)
 
     return
 
@@ -287,13 +287,13 @@ def map_template(ctx, dbname, fname, alias, version):
     Returns
     -------
     """
-    db_log = ctx.obj['log']
+    mopdb_log = logging.getLogger('mopdb_log')
     if alias is None:
         alias = fname.split(".")[0]
     # connect to db, check first if db exists or exit 
     if dbname == 'default':
         dbname = import_files('data').joinpath('access.db')
-    conn = db_connect(dbname, db_log)
+    conn = db_connect(dbname)
     # read list of vars from file
     with open(fname, 'r') as csvfile:
         reader = csv.DictReader(csvfile, delimiter=';')
@@ -301,24 +301,23 @@ def map_template(ctx, dbname, fname, alias, version):
     # return lists of fully/partially matching variables and stash_vars 
     # these are input_vars for calculation defined in already in mapping db
     full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, 
-        rows, version, db_log)
+        rows, version)
 
     # remove duplicates from partially matched variables 
-    no_ver = remove_duplicate(no_ver, db_log)
-    no_frq = remove_duplicate(no_frq, db_log, strict=False)
-    no_match = remove_duplicate(no_match, db_log, strict=False)
+    no_ver = remove_duplicate(no_ver)
+    no_frq = remove_duplicate(no_frq, strict=False)
+    no_match = remove_duplicate(no_match, strict=False)
 
     # check if more derived variables can be added based on all
     # input_vars being available
     pot_full, pot_part, pot_varnames = potential_vars(conn, rows,
-        stash_vars, version, db_log)
+        stash_vars, version)
     # potential vars have always duplicates: 1 for each input_var
-    pot_full = remove_duplicate(pot_full, db_log, strict=False)
-    pot_part = remove_duplicate(pot_part, db_log, extra=pot_full,
-        strict=False)
-    db_log.info(f"Derived variables: {pot_varnames}")
+    pot_full = remove_duplicate(pot_full, strict=False)
+    pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False)
+    mopdb_log.info(f"Derived variables: {pot_varnames}")
     write_map_template(conn, full, no_ver, no_frq, stdn, 
-        no_match, pot_full, pot_part, alias, db_log)
+        no_match, pot_full, pot_part, alias)
     conn.close()
 
     return
@@ -347,29 +346,29 @@ def update_map(ctx, dbname, fname, alias):
     Returns
     -------
     """
-    db_log = ctx.obj['log']
+    mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
     dbcentral = import_files('data').joinpath('access.db')
     if dbname in [dbcentral, 'default']:
-        db_log.error("The package database cannot be updated")
+        mopdb_log.error("The package database cannot be updated")
         sys.exit()
-    conn = db_connect(dbname, db_log)
+    conn = db_connect(dbname)
     # create table if not existing
     table_sql = mapping_sql()
-    create_table(conn, table_sql, db_log)
+    create_table(conn, table_sql)
     # get list of variables already in db in debug mode
     if ctx.obj['debug']:
         sql = 'SELECT cmor_var FROM mapping'
         results = query(conn, sql, first=False)
         existing_vars = [x[0] for x in results]
-        db_log.debug(f"Variables already in db: {existing_vars}")
+        mopdb_log.debug(f"Variables already in db: {existing_vars}")
     # read list of vars from file
     if alias == 'app4':
         var_list = read_map_app4(fname)
     else:
-        var_list = read_map(fname, alias, db_log)
+        var_list = read_map(fname, alias)
     # update mapping table
-    update_db(conn, 'mapping', var_list, db_log)
+    update_db(conn, 'mapping', var_list)
     return
 
 
@@ -405,12 +404,12 @@ def model_vars(ctx, indir, startdate, dbname, version):
     Returns
     -------
     """
-    db_log = ctx.obj['log']
+    mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
     if dbname == 'default':
         dbname = import_files('data').joinpath('access.db')
-    conn = db_connect(dbname, db_log)
-    write_varlist(conn, indir, startdate, version, db_log)
+    conn = db_connect(dbname)
+    write_varlist(conn, indir, startdate, version)
     conn.close()
     return
 
@@ -442,19 +441,19 @@ def remove_record(ctx, dbname, table, pair):
     Returns
     -------
     """
-    db_log = ctx.obj['log']
+    mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
     dbcentral = import_files('data').joinpath('access.db')
     if dbname == dbcentral:
-        db_log.error("The package database cannot be updated")
+        mopdb_log.error("The package database cannot be updated")
         sys.exit()
-    conn = db_connect(dbname, db_log)
+    conn = db_connect(dbname)
     # set which columns to show based on table
     if table == 'cmorvar':
         col = "name"
     elif table == 'mapping':
         col = "cmor_var,frequency,realm,cmor_table" 
     # select, confirm, delete record/s 
-    delete_record(conn, table, col, pair, db_log)
+    delete_record(conn, table, col, pair)
     return
     
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 85eb922..d4de94f 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -28,6 +28,7 @@
 import json
 import stat
 import xarray as xr
+import numpy as np
 import math
 from datetime import datetime, date
 from collections import Counter
@@ -38,7 +39,7 @@
 def config_log(debug):
     """Configures log file"""
     # start a logger
-    logger = logging.getLogger('db_log')
+    logger = logging.getLogger('mopdb_log')
     # set a formatter to manage the output format of our handler
     formatter = logging.Formatter('%(asctime)s; %(message)s',"%Y-%m-%d %H:%M:%S")
     # set the level for the logger, has to be logging.LEVEL not a string
@@ -72,11 +73,12 @@ def config_log(debug):
     return logger
 
 
-def db_connect(db, db_log):
+def db_connect(db):
     """Connects to ACCESS mapping sqlite database"""
+    mopdb_log = logging.getLogger('mopdb_log')
     conn = sqlite3.connect(db, timeout=10, isolation_level=None)
     if conn.total_changes == 0:
-        db_log.info(f"Opened database {db} successfully")
+        mopdb_log.info(f"Opened database {db} successfully")
     return conn 
 
 
@@ -175,7 +177,7 @@ def cmor_update_sql():
     return sql
 
 
-def create_table(conn, sql, db_log):
+def create_table(conn, sql):
     """Creates table if database is empty
 
     Parameters
@@ -183,17 +185,17 @@ def create_table(conn, sql, db_log):
     conn : connection object
     sql : str
         SQL style string defining table to create
-    db_log: logger obj
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     try:
         c = conn.cursor()
         c.execute(sql)
     except Exception as e:
-        db_log.error(e)
+        mopdb_log.error(e)
     return
 
 
-def update_db(conn, table, rows_list, db_log):
+def update_db(conn, table, rows_list):
     """Adds to table new variables definitions
 
     Parameters
@@ -203,25 +205,25 @@ def update_db(conn, table, rows_list, db_log):
         Name of database table to use
     rows_list : list
         List of str represneting rows to add to table
-    db_log: logger obj
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     # insert into db
     if table == 'cmorvar':
         sql = cmor_update_sql()
     elif table == 'mapping':
         sql = map_update_sql()
     else:
-        db_log.error("Provide an insert sql statement for table: {table}")
+        mopdb_log.error("Provide an insert sql statement for table: {table}")
     if len(rows_list) > 0:
-        db_log.info('Updating db ...')
+        mopdb_log.info('Updating db ...')
         with conn:
             c = conn.cursor()
-            db_log.debug(sql)
+            mopdb_log.debug(sql)
             c.executemany(sql, rows_list)
             nmodified = c.rowcount
-            db_log.info(f"Rows modified: {nmodified}")
+            mopdb_log.info(f"Rows modified: {nmodified}")
     conn.close()
-    db_log.info('--- Done ---')
+    mopdb_log.info('--- Done ---')
     return
 
 
@@ -246,6 +248,7 @@ def query(conn, sql, tup=(), first=True):
     result : tuple/list(tuple)
         tuple or a list of, representing row/s returned by query 
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     with conn:
         c = conn.cursor()
         c.execute(sql, tup)
@@ -260,16 +263,18 @@ def query(conn, sql, tup=(), first=True):
 def get_columns(conn, table):
     """Gets list of columns form db table
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     sql = f'PRAGMA table_info({table});'
     table_data = query(conn, sql, first=False)
     columns = [x[1] for x in table_data]
     return columns
 
 
-def get_cmorname(conn, varname, version, frequency, db_log):
+def get_cmorname(conn, varname, version, frequency):
     """Queries mapping table for cmip name given variable name as output
        by the model
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping
         WHERE input_vars='{varname}' and (calculation=''
         or calculation IS NULL)""" 
@@ -283,7 +288,7 @@ def get_cmorname(conn, varname, version, frequency, db_log):
         cmor_var = names[0]
         cmor_table = tables[0]
     elif len(names) > 1:
-        db_log.debug(f"Found more than 1 definition for {varname}:\n" +
+        mopdb_log.debug(f"Found more than 1 definition for {varname}:\n" +
                        f"{results}")
         match_found = False
         for r in results:
@@ -306,7 +311,7 @@ def get_cmorname(conn, varname, version, frequency, db_log):
         if not match_found:
             cmor_var = names[0]
             cmor_table = tables[0]
-            db_log.info(f"Found more than 1 definition for {varname}:\n"+
+            mopdb_log.info(f"Found more than 1 definition for {varname}:\n"+
                         f"{results}\n Using {cmor_var} from {cmor_table}")
     return cmor_var, cmor_table
 
@@ -335,23 +340,24 @@ def cmor_table_header(name, realm, frequency):
     return header
 
 
-def write_cmor_table(var_list, name, db_log):
+def write_cmor_table(var_list, name):
     """
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     realms = [v[2] for v in var_list]
     setr = set(realms)
     if len(setr) > 1:
         realm = Counter(realms).most_common(1)[0][0]
-        db_log.info(f"More than one realms found for variables: {setr}")
-        db_log.info(f"Using: {realm}")
+        mopdb_log.info(f"More than one realms found for variables: {setr}")
+        mopdb_log.info(f"Using: {realm}")
     else:
         realm = realms[0]
     freqs = [v[1] for v in var_list]
     setf = set(freqs)
     if len(setf) > 1:
         frequency = Counter(freqs).most_common(1)[0][0]
-        db_log.info(f"More than one freqs found for variables: {setf}")
-        db_log.info(f"Using: {frequency}")
+        mopdb_log.info(f"More than one freqs found for variables: {setf}")
+        mopdb_log.info(f"Using: {frequency}")
     else:
         frequency = freqs[0]
     header = cmor_table_header(name, realm, frequency)
@@ -373,7 +379,7 @@ def write_cmor_table(var_list, name, db_log):
     return
 
 
-def delete_record(conn, table, col, pairs, db_log):
+def delete_record(conn, table, col, pairs):
     """Deletes record from table based on pairs of column and
     value passed for selection
 
@@ -387,45 +393,45 @@ def delete_record(conn, table, col, pairs, db_log):
         name of column to return with query
     pairs : list[tuple(str, str)]
         pairs of columns, values to select record/s
-    db_log: logger obj
-        logger connection
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     # Set up query
     sqlwhere = f"FROM {table} WHERE "
     for c,v in pairs:
         sqlwhere += f"{c}='{v}' AND "
     sql = f"SELECT {col} " + sqlwhere[:-4]
-    db_log.debug(f"Delete query: {sql}")
+    mopdb_log.debug(f"Delete query: {sql}")
     xl = query(conn, sql, first=False)
     # Delete from db
     if xl is not None:
-        db_log.info(f"Found {len(xl)} records")
+        mopdb_log.info(f"Found {len(xl)} records")
         for x in xl:
-            db_log.info(f"{x}")
+            mopdb_log.info(f"{x}")
         confirm = input('Confirm deletion from database: Y/N   ')
         if confirm == 'Y':
-            db_log.info('Updating db ...')
+            mopdb_log.info('Updating db ...')
             with conn:
                 c = conn.cursor()
                 sql = "DELETE " + sqlwhere[:-4]
-                db_log.debug(f"Delete sql: {sql}")
+                mopdb_log.debug(f"Delete sql: {sql}")
                 c.execute(sql)
                 c.execute('select total_changes()')
-                db_log.info(f"Rows modified: {c.fetchall()[0][0]}")
+                mopdb_log.info(f"Rows modified: {c.fetchall()[0][0]}")
     else:
-        db_log.info("The query did not return any records")
+        mopdb_log.info("The query did not return any records")
     conn.close()
     return
 
 
-def list_files(indir, match, db_log):
+def list_files(indir, match):
     """Returns list of files matching input directory and match"""
+    mopdb_log = logging.getLogger('mopdb_log')
     files = [x for x in Path(indir).rglob(f"{match}") if x.is_file()]
-    db_log.debug(f"{indir}/**/*{match}*")
+    mopdb_log.debug(f"{indir}/**/*{match}*")
     return files
 
 
-def build_umfrq(time_axs, ds, db_log):
+def build_umfrq(time_axs, ds):
     """Return a dictionary with frequency for each time axis.
 
     Frequency is inferred by comparing interval between two consecutive
@@ -433,21 +439,23 @@ def build_umfrq(time_axs, ds, db_log):
     Order time_axis so ones with only one step are last, so we can use 
     file frequency (interval_file) inferred from other time axes.
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     umfrq = {}
     int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0,
                'day': 1.0, '6hr': 0.25, '3hr': 0.125,
-               '1hr': 0.041667, '10min': 0.006944}
+               '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944}
     time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
-    db_log.debug(f"in build_umfrq, time_axs: {time_axs}")
+    mopdb_log.debug(f"in build_umfrq, time_axs: {time_axs}")
     for t in time_axs: 
-        db_log.debug(f"len of time axis {t}: {len(ds[t])}")
+        mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}")
         if len(ds[t]) > 1:
-            interval = (ds[t][1]-ds[t][0]).values
-            interval_file = (ds[t][-1] -ds[t][0]).values
+            interval = (ds[t][1]-ds[t][0]).values / np.timedelta64(1, 'D')
+#astype('timedelta64[m]') / 1440.0
+            interval_file = (ds[t][-1] -ds[t][0]).values / np.timedelta64(1, 'D')
         else:
             interval = interval_file
-        db_log.debug(f"interval 2 timesteps for {t}: {interval}")
-        db_log.debug(f"interval entire file {t}: {interval_file}")
+        mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}")
+        mopdb_log.debug(f"interval entire file {t}: {interval_file}")
         for k,v in int2frq.items():
             if math.isclose(interval, v, rel_tol=0.05):
                 umfrq[t] = k
@@ -455,11 +463,12 @@ def build_umfrq(time_axs, ds, db_log):
     return umfrq
 
 
-def get_frequency(realm, fname, ds, db_log):
+def get_frequency(realm, fname, ds):
     """Return frequency based on realm and filename
     For UM files checks if more than one time axis is present and if so
     returns dictionary with frequency: variable list
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     umfrq = {} 
     frequency = 'NAfrq'
     if realm == 'atmos':
@@ -476,8 +485,8 @@ def get_frequency(realm, fname, ds, db_log):
         if len(time_axs_len) == 1:
             umfrq = {}
         else:
-            umfrq = build_umfrq(time_axs, ds, db_log)
-        db_log.debug(f"umfrq: {umfrq}")
+            umfrq = build_umfrq(time_axs, ds)
+        mopdb_log.debug(f"umfrq: {umfrq}")
     elif realm == 'ocean':
         # if I found scalar or monthly in any of fbits 
         if any(x in fname for x in ['scalar', 'month']):
@@ -489,7 +498,7 @@ def get_frequency(realm, fname, ds, db_log):
             frequency = 'mon'
         elif '_d.' in fname:
             frequency = 'day'
-    db_log.debug(f"Frequency: {frequency}")
+    mopdb_log.debug(f"Frequency: {frequency}")
     return frequency, umfrq
 
 
@@ -499,6 +508,7 @@ def get_cell_methods(attrs, dims):
        `time: point`
        If `area` not specified is added at start of string as `area: `
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     frqmod = ''
     val = attrs.get('cell_methods', "") 
     if 'area' not in val: 
@@ -513,29 +523,28 @@ def get_cell_methods(attrs, dims):
     return val, frqmod
 
 
-def write_varlist(conn, indir, startdate, version, db_log):
+def write_varlist(conn, indir, startdate, version):
     """Based on model output files create a variable list and save it
        to a csv file. Main attributes needed to map output are provided
        for each variable
     """
-    #PP temporarily remove .nc as ocean files sometimes have pattern.nc-datestamp
-    #sdate = f"*{startdate}*.nc"
+    mopdb_log = logging.getLogger('mopdb_log')
     sdate = f"*{startdate}*"
-    files = list_files(indir, sdate, db_log)
-    db_log.debug(f"Found files: {files}")
+    files = list_files(indir, sdate)
+    mopdb_log.debug(f"Found files: {files}")
     patterns = []
     for fpath in files:
         # get filename pattern until date match
-        db_log.debug(f"Filename: {fpath.name}")
+        mopdb_log.debug(f"Filename: {fpath.name}")
         fpattern = fpath.name.split(startdate)[0]
         # adding this in case we have a mix of yyyy/yyyymn date stamps 
         # as then a user would have to pass yyyy only and would get 12 files for some of the patterns
         if fpattern in patterns:
             continue
         patterns.append(fpattern)
-        pattern_list = list_files(indir, f"{fpattern}*", db_log)
+        pattern_list = list_files(indir, f"{fpattern}*")
         nfiles = len(pattern_list) 
-        db_log.debug(f"File pattern: {fpattern}")
+        mopdb_log.debug(f"File pattern: {fpattern}")
         fcsv = open(f"{fpattern}.csv", 'w')
         fwriter = csv.writer(fcsv, delimiter=';')
         fwriter.writerow(["name", "cmor_var", "units", "dimensions",
@@ -543,18 +552,18 @@ def write_varlist(conn, indir, startdate, version, db_log):
                           "vtype", "size", "nsteps", "filename", "long_name",
                           "standard_name"])
         # get attributes for the file variables
-        realm = get_realm(fpath, version, db_log)
+        realm = get_realm(fpath, version)
         ds = xr.open_dataset(fpath, decode_times=False)
         coords = [c for c in ds.coords] + ['latitude_longitude']
-        frequency, umfrq = get_frequency(realm, fpath.name, ds, db_log)
+        frequency, umfrq = get_frequency(realm, fpath.name, ds)
         multiple_frq = False
         if umfrq != {}:
             multiple_frq = True
-        db_log.debug(f"Multiple frq: {multiple_frq}")
+        mopdb_log.debug(f"Multiple frq: {multiple_frq}")
         for vname in ds.variables:
             if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']):
                 v = ds[vname]
-                db_log.debug(f"Variable: {v.name}")
+                mopdb_log.debug(f"Variable: {v.name}")
                 # get size in bytes of grid for 1 timestep and number of timesteps
                 vsize = v[0].nbytes
                 nsteps = nfiles * v.shape[0]
@@ -564,14 +573,14 @@ def write_varlist(conn, indir, startdate, version, db_log):
                         frequency = umfrq[v.dims[0]]
                     else:
                         frequency = 'NA'
-                        db_log.info(f"Could not detect frequency for variable: {v}")
+                        mopdb_log.info(f"Could not detect frequency for variable: {v}")
                 attrs = v.attrs
                 cell_methods, frqmod = get_cell_methods(attrs, v.dims)
                 varfrq = frequency + frqmod
-                db_log.debug(f"Frequency x var: {varfrq}")
+                mopdb_log.debug(f"Frequency x var: {varfrq}")
                 # try to retrieve cmip name
                 cmor_var, cmor_table = get_cmorname(conn, vname,
-                    version, varfrq, db_log)
+                    version, varfrq)
                 line = [v.name, cmor_var, attrs.get('units', ""),
                         " ".join(v.dims), varfrq, realm, 
                         cell_methods, cmor_table, v.dtype, vsize,
@@ -579,12 +588,13 @@ def write_varlist(conn, indir, startdate, version, db_log):
                         attrs.get('standard_name', "")]
                 fwriter.writerow(line)
         fcsv.close()
-        db_log.info(f"Variable list for {fpattern} successfully written")
+        mopdb_log.info(f"Variable list for {fpattern} successfully written")
     return
 
 
 def read_map_app4(fname):
     """Reads APP4 style mapping """
+    mopdb_log = logging.getLogger('mopdb_log')
     # old order
     #cmor_var,definable,input_vars,calculation,units,axes_mod,positive,ACCESS_ver[CM2/ESM/both],realm,notes
     var_list = []
@@ -607,7 +617,7 @@ def read_map_app4(fname):
     return var_list
 
 
-def read_map(fname, alias, db_log):
+def read_map(fname, alias):
     """Reads complete mapping csv file and extract info necessary to create new records
        for the mapping table in access.db
     Fields from file:
@@ -619,6 +629,7 @@ def read_map(fname, alias, db_log):
     realm, cell_methods, positive, model, notes, origin 
     NB model and version are often the same but version should eventually be defined in a CV
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     var_list = []
     with open(fname, 'r') as csvfile:
         reader = csv.reader(csvfile, delimiter=';')
@@ -627,8 +638,8 @@ def read_map(fname, alias, db_log):
             if row[0][0] == "#":
                 continue
             else:
-                db_log.debug(f"In read_map: {row[0]}")
-                db_log.debug(f"In read_map row length: {len(row)}")
+                mopdb_log.debug(f"In read_map: {row[0]}")
+                mopdb_log.debug(f"In read_map row length: {len(row)}")
                 if row[16] != '':
                     notes = row[16]
                 else:
@@ -639,30 +650,31 @@ def read_map(fname, alias, db_log):
     return var_list
 
 
-def match_stdname(conn, row, stdn, db_log):
+def match_stdname(conn, row, stdn):
     """Returns an updated stdn list if finds one or more variables
     in cmorvar table that match the standard name passed as input.
     It also return a False/True found_match boolean.
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     found_match = False
     sql = f"""SELECT name FROM cmorvar where 
         standard_name='{row['standard_name']}'"""
     results = query(conn, sql, first=False)
     matches = [x[0] for x in results]
     if len(matches) > 0:
-        stdn = add_var(stdn, row, tuple([matches]+['']*7), db_log,
-            stdnm=True)
+        stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True)
         found_match = True
 
     return stdn, found_match
 
 
-def match_var(row, version, mode, conn, records, db_log):
+def match_var(row, version, mode, conn, records):
     """Returns match for variable if found after looping
        variables already mapped in database
     Parameters
 
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     found_match = False
     # build sql query based on mode
     sql_base = f"""SELECT cmor_var,input_vars,calculation,frequency,
@@ -678,17 +690,17 @@ def match_var(row, version, mode, conn, records, db_log):
         sql = sql_base + sql_frq
     # execute query and process results
     result = query(conn, sql, first=False)
-    db_log.debug(f"match_var: {result}, sql: {sql[110:]}") 
+    mopdb_log.debug(f"match_var: {result}, sql: {sql[110:]}") 
     if result is not None and result != []:
         for x in result:
-            db_log.debug(f"match: {x}")
-            records = add_var(records, row, x, db_log)
+            mopdb_log.debug(f"match: {x}")
+            records = add_var(records, row, x)
         found_match = True
 
     return records, found_match
 
 
-def parse_vars(conn, rows, version, db_log):
+def parse_vars(conn, rows, version):
     """Returns records of variables to include in template mapping file,
     a list of all stash variables + frequency available in model output
     and a list of variables already defined in db
@@ -700,13 +712,13 @@ def parse_vars(conn, rows, version, db_log):
          list of variables to match
     version : str
         model version to use to match variables
-    db_log: logger obj
 
     Returns
     -------
     stash_vars : list
         varname-frequency for each listed variable, varname is from model output
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     full = []
     no_ver = []
     no_frq = []
@@ -719,29 +731,28 @@ def parse_vars(conn, rows, version, db_log):
         if row['name'][0] == "#" or row['name'] == 'name':
             continue
         else:
-            full, found = match_var(row, version, 'full', conn, full, db_log)
+            full, found = match_var(row, version, 'full', conn, full)
         # if no match, ignore model version first and then frequency 
-        db_log.debug(f"found perfect match: {found}")
+        mopdb_log.debug(f"found perfect match: {found}")
         if not found:
-            no_ver, found = match_var(row, version, 'no_ver', conn, no_ver, db_log)
-            db_log.debug(f"found no ver match: {found}")
+            no_ver, found = match_var(row, version, 'no_ver', conn, no_ver)
+            mopdb_log.debug(f"found no ver match: {found}")
         if not found:
-            no_frq, found = match_var(row, version, 'no_frq', conn, no_frq, db_log)
-            db_log.debug(f"found no frq match: {found}")
+            no_frq, found = match_var(row, version, 'no_frq', conn, no_frq)
+            mopdb_log.debug(f"found no frq match: {found}")
         # make a last attempt to match using standard_name
         if not found:
             if row['standard_name'] != '':
-                stdn, found = match_stdname(conn, row, stdn, db_log)
-            db_log.debug(f"found stdnm match: {found}")
+                stdn, found = match_stdname(conn, row, stdn)
+            mopdb_log.debug(f"found stdnm match: {found}")
         if not found:
-            no_match = add_var(no_match, row, tuple([row['name']]+['']*8),
-                db_log)
+            no_match = add_var(no_match, row, tuple([row['name']]+['']*8)) 
         stash_vars.append(f"{row['name']}-{row['frequency']}")
 
     return full, no_ver, no_frq, stdn, no_match, stash_vars 
 
 
-def add_var(vlist, row, match, db_log, stdnm=False):
+def add_var(vlist, row, match, stdnm=False):
     """Add information from match to variable list and re-order
     fields so they correspond to final mapping output.
 
@@ -750,9 +761,10 @@ def add_var(vlist, row, match, db_log, stdnm=False):
         match values (cmor_var,input_vars,calculation,frequency,
         realm,model(version),cmor_table,positive,units)
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     # assign cmor_var from match and swap place with input_vars
-    db_log.debug(f"Assign cmor_var: {match}")
-    db_log.debug(f"initial row: {row}")
+    mopdb_log.debug(f"Assign cmor_var: {match}")
+    mopdb_log.debug(f"initial row: {row}")
     var = row.copy() 
     var['cmor_var'] = match[0]
     var['input_vars'] = match[1]
@@ -781,7 +793,7 @@ def add_var(vlist, row, match, db_log, stdnm=False):
     return vlist
 
 
-def remove_duplicate(vlist, db_log, extra=[], strict=True):
+def remove_duplicate(vlist, extra=[], strict=True):
     """Returns list without duplicate variable definitions.
 
     Define unique definition for variable as tuple (cmor_var, input_vars,
@@ -790,25 +802,26 @@ def remove_duplicate(vlist, db_log, extra=[], strict=True):
     If extra is defined if a variable exists in this additional set
     it is a duplicate
     """
-    db_log.debug(f'in duplicate, vlist {vlist}')
+    mopdb_log = logging.getLogger('mopdb_log')
+    mopdb_log.debug(f'in duplicate, vlist {vlist}')
     vid_list = []
     keys = ['cmor_var', 'input_vars', 'calculation']
     if strict is True:
         keys += ['frequency', 'realm']
     if extra:
         vid_list = [tuple(x[k] for k in keys) for x in extra] 
-    db_log.debug(f"vid_list: {vid_list}")
+    mopdb_log.debug(f"vid_list: {vid_list}")
     final = []
     for v in vlist:
         vid = tuple(v[k] for k in keys)
-        db_log.debug(f"var and vid: {v['cmor_var']}, {vid}")
+        mopdb_log.debug(f"var and vid: {v['cmor_var']}, {vid}")
         if vid not in vid_list:
             final.append(v)
         vid_list.append(vid)
     return final
 
 
-def potential_vars(conn, rows, stash_vars, version, db_log):
+def potential_vars(conn, rows, stash_vars, version):
     """Returns list of variables that can be potentially derived from
     model output.
 
@@ -825,11 +838,11 @@ def potential_vars(conn, rows, stash_vars, version, db_log):
         varname-frequency for each listed variable, varname is from model output
     version : str
         model version to use to match variables
-    db_log: logger obj
 
     Returns
     -------
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     pot_full = [] 
     pot_part = []
     pot_varnames = set()
@@ -838,24 +851,24 @@ def potential_vars(conn, rows, stash_vars, version, db_log):
             realm,model,cmor_table,positive,units FROM mapping 
             WHERE input_vars like '%{row['name']}%'"""
         results = query(conn, sql, first=False)
-        db_log.debug(f"In potential: var {row['name']}, db results {results}")
+        mopdb_log.debug(f"In potential: var {row['name']}, db results {results}")
         for r in results:
             allinput = r[1].split(" ")
-            db_log.debug(f"{len(allinput)> 1}")
-            db_log.debug(all(f"{x}-{row['frequency']}" in stash_vars for x in allinput))
+            mopdb_log.debug(f"{len(allinput)> 1}")
+            mopdb_log.debug(all(f"{x}-{row['frequency']}" in stash_vars for x in allinput))
             if len(allinput) > 1 and all(f"{x}-{row['frequency']}" in stash_vars for x in allinput):
                 # if both version and frequency of applied mapping match
                 # consider this a full matching potential var 
                 if r[5] == version and r[3] == row['frequency']:
-                   pot_full = add_var(pot_full, row, r, db_log)
+                   pot_full = add_var(pot_full, row, r)
                 else:
-                    pot_part = add_var(pot_part, row, r, db_log)
+                    pot_part = add_var(pot_part, row, r)
                 pot_varnames.add(r[0])
     return pot_full, pot_part, pot_varnames
 
 
 def write_map_template(conn, full, no_ver, no_frq, stdn,
-                       no_match, pot_full, pot_part, alias, db_log):
+                       no_match, pot_full, pot_part, alias):
     """Write mapping csv file template based on list of variables to define 
 
     Input varlist file order:
@@ -870,6 +883,7 @@ def write_map_template(conn, full, no_ver, no_frq, stdn,
     cell_methods, positive, cmor_table, version, vtype, size, nsteps, filename,
     long_name, standard_name
     """ 
+    mopdb_log = logging.getLogger('mopdb_log')
     keys = ['cmor_var', 'input_vars', 'calculation', 'units',
             'dimensions', 'frequency', 'realm', 'cell_methods',
             'positive', 'cmor_table', 'version', 'vtype', 'size',
@@ -877,34 +891,35 @@ def write_map_template(conn, full, no_ver, no_frq, stdn,
 
     with open(f"map_{alias}.csv", 'w') as fcsv:
         fwriter = csv.DictWriter(fcsv, keys, delimiter=';')
-        write_vars(full, fwriter, keys, db_log, conn=conn)
+        write_vars(full, fwriter, keys, conn=conn)
         div = ("# Derived variables with matching version and " +
             "frequency: Use with caution!")
-        write_vars(pot_full, fwriter, div, db_log, conn=conn)
+        write_vars(pot_full, fwriter, div, conn=conn)
             #pot=True, conn=conn, sortby=0)
         div = ("# Variables definitions coming from different " +
             "version")
-        write_vars(no_ver, fwriter, div, db_log, conn=conn)
+        write_vars(no_ver, fwriter, div, conn=conn)
         div = ("# Variables with different frequency: Use with"
             + " caution!")
-        write_vars(no_ver, fwriter, div, db_log, conn=conn)
+        write_vars(no_ver, fwriter, div, conn=conn)
         div = ("# Variables matched using standard_name: Use " +
             "with caution!")
-        write_vars(stdn, fwriter, div, db_log, sortby='input_vars')
+        write_vars(stdn, fwriter, div, sortby='input_vars')
         div = "# Derived variables: Use with caution!"
-        write_vars(pot_part, fwriter, div, db_log, conn=conn)
+        write_vars(pot_part, fwriter, div, conn=conn)
             #pot=True, conn=conn, sortby=0)
         div = "# Variables without mapping"
-        write_vars(no_match, fwriter, div, db_log)
-        db_log.debug("Finished writing variables to mapping template")
+        write_vars(no_match, fwriter, div)
+        mopdb_log.debug("Finished writing variables to mapping template")
         fcsv.close()
 
         return
 
 
-def write_vars(vlist, fwriter, div, db_log, conn=None, sortby='cmor_var'):
+def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'):
     """
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     if len(vlist) > 0:
         if type(div) is str:
             divrow = {x:'' for x in vlist[0].keys()}
@@ -914,52 +929,54 @@ def write_vars(vlist, fwriter, div, db_log, conn=None, sortby='cmor_var'):
         fwriter.writerow(divrow)
         for var in sorted(vlist, key=itemgetter(sortby)):
             if conn:
-                var = check_realm_units(conn, var, db_log)
+                var = check_realm_units(conn, var)
             fwriter.writerow(var)
     return
 
 
-def check_realm_units(conn, var, db_log):
+def check_realm_units(conn, var):
     """Checks that realm and units are consistent with values in 
     cmor table.
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     vname = f"{var['cmor_var']}-{var['cmor_table']}"
     if var['cmor_table'] is None or var['cmor_table'] == "":
-        db_log.warning(f"Variable: {vname} has no associated cmor_table")
+        mopdb_log.warning(f"Variable: {vname} has no associated cmor_table")
     else:
     # retrieve modeling_realm, units from db cmor table
         sql = f"""SELECT modeling_realm, units FROM cmorvar
             WHERE name='{vname}' """ 
         result = query(conn, sql)
-        db_log.debug(f"In check_realm_units: {vname}, {result}")
+        mopdb_log.debug(f"In check_realm_units: {vname}, {result}")
         if result is not None:
             dbrealm = result[0] 
             dbunits = result[1] 
             # dbrealm could have two realms
             if var['realm'] not in [dbrealm] + dbrealm.split():
-                db_log.info(f"Changing {vname} realm from {var['realm']} to {dbrealm}")
+                mopdb_log.info(f"Changing {vname} realm from {var['realm']} to {dbrealm}")
                 var['realm'] = dbrealm
             if var['units'] != dbunits :
-                db_log.info(f"Changing {vname} units from {var['units']} to {dbunits}")
+                mopdb_log.info(f"Changing {vname} units from {var['units']} to {dbunits}")
                 var['units'] = dbunits
         else:
-            db_log.warning(f"Variable {vname} not found in cmor table")
+            mopdb_log.warning(f"Variable {vname} not found in cmor table")
     return var 
        
 
-    def get_realm(fpath, version, db_log):
-        '''Return realm for variable in files or NArealm'''
-        if version == 'AUS2200':
-            realm = 'atmos'
-        else:
-            realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] 
-                     if x in fpath.parts][0]
-        if realm == 'atm':
-            realm = 'atmos'
-        elif realm == 'ocn':
-            realm = 'ocean'
-        elif realm is None:
-            realm = 'NArealm'
-            db_log.info(f"Couldn't detect realm from path, setting to NArealm")
-        db_log.debug(f"Realm is {realm}")
+def get_realm(fpath, version):
+    '''Return realm for variable in files or NArealm'''
+    mopdb_log = logging.getLogger('mopdb_log')
+    if version == 'AUS2200':
+        realm = 'atmos'
+    else:
+        realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] 
+                 if x in fpath.parts][0]
+    if realm == 'atm':
+        realm = 'atmos'
+    elif realm == 'ocn':
+        realm = 'ocean'
+    elif realm is None:
+        realm = 'NArealm'
+        mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm")
+    mopdb_log.debug(f"Realm is {realm}")
     return realm
diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py
index b8af723..1adf216 100644
--- a/src/mopper/calculations.py
+++ b/src/mopper/calculations.py
@@ -37,6 +37,7 @@
 import json 
 import numpy as np
 import dask
+import logging
 
 from importlib_resources import files as import_files
 from mopper.setup_utils import read_yaml
@@ -852,9 +853,10 @@ def plevinterp(ctx, var, pmod, levnum):
     interp : Xarray DataArray
         The variable interpolated on pressure levels
     """
+
+    var_log = logging.getLogger(ctx.obj['var_log'])
     # avoid dask warning
     dask.config.set(**{'array.slicing.split_large_chunks': True})
-    var_log = ctx.obj['var_log']
     plev = get_plev(levnum)
     lev = var.dims[1]
     # if pmod is pressure on rho_level_0 and variable is on rho_level
@@ -928,7 +930,7 @@ def K_degC(ctx, var):
     vout : Xarray DataArray 
         temperature array in degrees Celsius
     """    
-    var_log = ctx.obj['var_log']
+    var_log = logging.getLogger(ctx.obj['var_log'])
     if 'K' in var.units:
         var_log.info("temp in K, converting to degC")
         vout = var - 273.15
@@ -1199,7 +1201,7 @@ def level_to_height(ctx, var, levs=None):
     vout : Xarray DataArray
         Same variable defined on model levels height
     """    
-    var_log = ctx.obj['var_log']
+    var_log = logging.getLogger(ctx.obj['var_log'])
     if levs is not None and type(levs) not in [tuple, list]:
          var_log.error(f"level_to_height function: levs {levs} should be a tuple or list")  
     zdim = var.dims[1]
@@ -1293,7 +1295,7 @@ def calc_overt(ctx, varlist, sv=False):
     overt: DataArray
         overturning mass streamfunction (time, basin, depth, gridlat) variable 
     """
-    var_log = ctx.obj['var_log']
+    var_log = logging.getLogger(ctx.obj['var_log'])
     var1 = varlist[0]
     vlat, vlon = var1.dims[2:]
     mask = get_basin_mask(vlat, vlon)
@@ -1381,7 +1383,7 @@ def overturn_stream(ctx, varlist, sv=False):
     stream: DataArray 
         The ocean overturning mass streamfunction in kg s-1
     """
-    var_log = ctx.obj['var_log']
+    var_log = logging.getLogger(ctx.obj['var_log'])
     londim = varlist[0].dims[3]
     depdim = varlist[0].dims[1]
     var_log.debug(f"Streamfunct lon, dep dims: {londim}, {depdim}")
@@ -1434,7 +1436,7 @@ def calc_depositions(ctx, var, weight=None):
     (personal communication from M. Woodhouse)
     """
 
-    var_log = ctx.obj['var_log']
+    var_log = logging.getLogger(ctx.obj['var_log'])
     varlist = []
     for v in var:
         v0 = v.sel(model_theta_level_number=1).squeeze(dim='model_theta_level_number')
diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py
index a639075..7040270 100755
--- a/src/mopper/mop_setup.py
+++ b/src/mopper/mop_setup.py
@@ -28,6 +28,7 @@
 import json
 import csv
 import click
+import logging
 from pathlib import Path
 from json.decoder import JSONDecodeError
 from importlib.resources import files as import_files
@@ -35,7 +36,7 @@
 from mopper.setup_utils import *
 
 
-def find_matches(table, var, realm, frequency, varlist, mop_log):
+def find_matches(table, var, realm, frequency, varlist):
     """Finds variable matching constraints given by table and config
     settings and returns a dictionary with the variable specifications. 
 
@@ -59,14 +60,13 @@ def find_matches(table, var, realm, frequency, varlist, mop_log):
     varlist : list
         List of variables, each represented by a dictionary with mappings
         used to find a match to "var" passed 
-    mop_log : logging object 
-        Log
     Returns
     -------
     match : dict
         Dictionary containing matched variable specifications
         or None if not matches
     """
+    mop_log = logging.getLogger('mop_log')
     near_matches = []
     found = False
     match = None
@@ -83,7 +83,7 @@ def find_matches(table, var, realm, frequency, varlist, mop_log):
               and v['realm'] in realm.split()):
             near_matches.append(v)
     if found is False and frequency != 'fx':
-        v = find_nearest(near_matches, frequency, mop_log)
+        v = find_nearest(near_matches, frequency)
         if v is not None:
             match = v
             found = True
@@ -110,7 +110,7 @@ def find_matches(table, var, realm, frequency, varlist, mop_log):
     return match
 
 
-def find_nearest(varlist, frequency, mop_log):
+def find_nearest(varlist, frequency):
     """If variable is present in file at different frequencies,
     finds the one with higher frequency nearest to desired frequency.
     Adds frequency to variable resample field.
@@ -124,8 +124,6 @@ def find_nearest(varlist, frequency, mop_log):
         frequency
     frequency : str
         Variable frequency to match
-    mop_log : logging object 
-        Log
 
     Returns
     -------
@@ -133,6 +131,7 @@ def find_nearest(varlist, frequency, mop_log):
         Dictionary containing matched variable specifications
         or None if not matches
     """
+    mop_log = logging.getLogger('mop_log')
     var = None
     found = False
     freq = frequency
@@ -178,7 +177,7 @@ def setup_env(ctx):
         attributes for experiment
 
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     cdict = ctx.obj
     cdict['appdir'] = Path(cdict['appdir'])
     appdir = cdict['appdir']
@@ -231,7 +230,7 @@ def setup_env(ctx):
 def var_map(ctx, activity_id=None):
     """
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     tables = ctx.obj.get('tables', 'all')
     subset = ctx.obj.get('var_subset', False)
     sublist = ctx.obj.get('var_subset_list', None)
@@ -289,7 +288,7 @@ def create_var_map(ctx, table, mappings, activity_id=None,
     Returns
     -------
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     matches = []
     fpath = ctx.obj['tables_path'] / f"{table}.json"
     if not fpath.exists():
@@ -325,7 +324,7 @@ def create_var_map(ctx, table, mappings, activity_id=None,
             years = dreq_years[var]
         if 'subhr' in frq:
             frq =  ctx.obj['subhr'] + frq.split('subhr')[1]
-        match = find_matches(table, var, realm, frq, mappings, mop_log)
+        match = find_matches(table, var, realm, frq, mappings)
         if match is not None:
             match['years'] = years
             matches.append(match)
@@ -367,7 +366,7 @@ def archive_workdir(ctx):
 def manage_env(ctx):
     """Prepare output directories and removes pre-existing ones
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     # check if output path already exists
     outpath = ctx.obj['outpath']
     if outpath.exists() and ctx.obj['update'] is False:
diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index 0f0fd42..52f01bf 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -81,9 +81,9 @@ def config_log(debug, path, stream_level=logging.WARNING):
     return logger
 
 
-def config_varlog(debug, logname):
+def config_varlog(debug, logname, pid):
     """Configure varlog file: use this for specific var information"""
-    logger = logging.getLogger('var_log')
+    logger = logging.getLogger(f'{pid}_log')
     formatter = logging.Formatter('%(asctime)s; %(message)s',"%Y-%m-%d %H:%M:%S")
     if debug is True:
         level = logging.DEBUG
@@ -99,6 +99,8 @@ def config_varlog(debug, logname):
     flog.setLevel(level)
     flog.setFormatter(formatter)
     logger.addHandler(flog)
+    # Stop propagation
+    logger.propagate = False
     return logger
 
 
@@ -118,9 +120,8 @@ def _preselect(ds, varlist):
     return ds[varsel]
 
 
-
 @click.pass_context
-def get_files(ctx, var_log):
+def get_files(ctx):
     """Returns all files in time range
     First identifies all files with pattern/s defined for invars
     Then retrieve time dimension and if multiple time axis are present
@@ -129,40 +130,41 @@ def get_files(ctx, var_log):
     last timestep from each file
     """
     # Returns file list for each input var and list of vars for each file pattern
-    all_files, path_vars = find_all_files(var_log)
+    var_log = logging.getLogger(ctx.obj['var_log'])
+    all_files, path_vars = find_all_files()
 
     # PP FUNCTION END return all_files, extra_files
     var_log.debug(f"access files from: {os.path.basename(all_files[0][0])}" +
                  f"to {os.path.basename(all_files[0][-1])}")
     ds = xr.open_dataset(all_files[0][0], decode_times=False)
-    time_dim, units, multiple_times = get_time_dim(ds, var_log)
+    time_dim, units, multiple_times = get_time_dim(ds)
     del ds
     try:
         inrange_files = []
         for i,paths in enumerate(all_files):
             if multiple_times is True:
-                inrange_files.append( check_in_range(paths, time_dim, var_log) )
+                inrange_files.append( check_in_range(paths, time_dim) )
             else:
-                inrange_files.append( check_timestamp(paths, var_log) )
+                inrange_files.append( check_timestamp(paths) )
     except:
         for i,paths in enumerate(all_files):
-            inrange_files.append( check_in_range(paths, time_dim, var_log) )
+            inrange_files.append( check_in_range(paths, time_dim) )
 
     for i,paths in enumerate(inrange_files):
         if paths == []:
-            mop_log.error(f"No data in requested time range for: {ctx.obj['filename']}")
             var_log.error(f"No data in requested time range for: {ctx.obj['filename']}")
     return inrange_files, path_vars, time_dim, units
 
 
 @click.pass_context
-def find_all_files(ctx, var_log):
+def find_all_files(ctx):
     """Find all the ACCESS file names which match the pattern/s associated with invars.
     Sort the filenames, assuming that the sorted filenames will
     be in chronological order because there is usually some sort of date
     and/or time information in the filename.
     Check that all variables needed are in file, otherwise add extra file pattern
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     var_log.debug(f"Input file structure: {ctx.obj['infile']}")
     patterns = ctx.obj['infile'].split()
     var_log.debug(f"Input file patterns: {patterns}")
@@ -186,7 +188,7 @@ def find_all_files(ctx, var_log):
     while len(missing) > 0 and i < len(patterns):
         path_vars[i] = []
         f = files[i][0]
-        missing, found = check_vars_in_file(missing, f, var_log)
+        missing, found = check_vars_in_file(missing, f)
         if len(found) > 0:
             for v in found:
                 path_vars[i].append(v)
@@ -198,10 +200,11 @@ def find_all_files(ctx, var_log):
 
 
 @click.pass_context
-def check_vars_in_file(ctx, invars, fname, var_log):
+def check_vars_in_file(ctx, invars, fname):
     """Check that all variables needed for calculation are in file
     else return extra filenames
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     ds = xr.open_dataset(fname, decode_times=False)
     tofind = [v for v in invars if v not in ds.variables]
     found = [v for v in invars if v not in tofind]
@@ -209,10 +212,11 @@ def check_vars_in_file(ctx, invars, fname, var_log):
 
 
 @click.pass_context
-def get_time_dim(ctx, ds, var_log):
+def get_time_dim(ctx, ds):
     """Find time info: time axis, reference time and set tstart and tend
        also return mutlitple_times True if more than one time axis
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     time_dim = None
     multiple_times = False
     varname = [ctx.obj['vin'][0]]
@@ -236,11 +240,12 @@ def get_time_dim(ctx, ds, var_log):
 
 
 @click.pass_context
-def check_timestamp(ctx, all_files, var_log):
+def check_timestamp(ctx, all_files):
     """This function tries to guess the time coverage of a file based on its timestamp
        and return the files in range. At the moment it does a lot of checks based on the realm and real examples
        eventually it would make sense to make sure all files generated are consistent in naming
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     inrange_files = []
     realm = ctx.obj['realm']
     var_log.info("checking files timestamp ...")
@@ -305,11 +310,12 @@ def check_timestamp(ctx, all_files, var_log):
 
  
 @click.pass_context
-def check_in_range(ctx, all_files, tdim, var_log):
+def check_in_range(ctx, all_files, tdim):
     """Return a list of files in time range
        Open each file and check based on time axis
        Use this function only if check_timestamp fails
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     inrange_files = []
     var_log.info("loading files...")
     var_log.debug(f"time dimension: {tdim}")
@@ -337,7 +343,7 @@ def check_in_range(ctx, all_files, tdim, var_log):
 
 
 @click.pass_context
-def load_data(ctx, inrange_files, path_vars, time_dim, var_log):
+def load_data(ctx, inrange_files, path_vars, time_dim):
     """Returns a dictionary of input var: xarray dataset
     """
     # preprocessing to select only variables we need to avoid
@@ -345,6 +351,7 @@ def load_data(ctx, inrange_files, path_vars, time_dim, var_log):
     # temporarily opening file without decoding times, fixing
     # faulty time bounds units and decoding times
     # this is to prevent issues with ocean files
+    var_log = logging.getLogger(ctx.obj['var_log'])
     input_ds = {}
     for i, paths in enumerate(inrange_files):
         preselect = partial(_preselect, varlist=path_vars[i])
@@ -361,9 +368,10 @@ def load_data(ctx, inrange_files, path_vars, time_dim, var_log):
  
 
 @click.pass_context
-def get_cmorname(ctx, axis_name, axis, var_log, z_len=None):
+def get_cmorname(ctx, axis_name, axis, z_len=None):
     """Get time cmor name based on timeshot option
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     var_log.debug(f'axis_name, axis.name: {axis_name}, {axis.name}')
     ctx.obj['axes_modifier'] = []
     if axis_name == 't':
@@ -418,10 +426,11 @@ def get_cmorname(ctx, axis_name, axis, var_log, z_len=None):
 #PP this should eventually just be generated directly by defining the dimension using the same terms 
 # in related calculation 
 @click.pass_context
-def pseudo_axis(axis, var_log):
+def pseudo_axis(ctx, axis):
     """coordinates with axis_identifier other than X,Y,Z,T
     PP not sure if axis can be used to remove axes_mod
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     cmor_name = None
     p_vals = None
     p_len = None
@@ -450,9 +459,11 @@ def pseudo_axis(axis, var_log):
 
 #PP this should eventually just be generated directly by defining the dimension using the same terms 
 # in calculation for meridional overturning
-def create_axis(axis, table, var_log):
+@click.pass_context
+def create_axis(ctx, axis, table):
     """
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     # maybe we can just create these axis as they're meant in calculations 
     var_log.info(f"creating {axis.name} axis...")
     #func_dict = {'oline': getTransportLines(),
@@ -469,9 +480,10 @@ def create_axis(axis, table, var_log):
     return axis_id
 
 
-def hybrid_axis(lev, z_ax_id, z_ids, var_log):
+def hybrid_axis(lev, z_ax_id, z_ids):
     """Setting up additional hybrid axis information
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     hybrid_dict = {'hybrid_height': 'b',
                    'hybrid_height_half': 'b_half'}
     orog_vals = getOrog()
@@ -492,9 +504,10 @@ def hybrid_axis(lev, z_ax_id, z_ids, var_log):
 
 
 @click.pass_context
-def ij_axis(ctx, ax, ax_name, table, var_log):
+def ij_axis(ctx, ax, ax_name, table):
     """
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     cmor.set_table(table)
     ax_id = cmor.axis(table_entry=ax_name,
         units='1',
@@ -503,12 +516,13 @@ def ij_axis(ctx, ax, ax_name, table, var_log):
 
 
 @click.pass_context
-def ll_axis(ctx, ax, ax_name, ds, table, bounds_list, var_log):
+def ll_axis(ctx, ax, ax_name, ds, table, bounds_list):
     """
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     var_log.debug(f"n ll_axis")
     cmor.set_table(table)
-    cmor_aName = get_cmorname(ax_name, ax, var_log)
+    cmor_aName = get_cmorname(ax_name, ax)
     try:
         ax_units = ax.units
     except:
@@ -516,7 +530,7 @@ def ll_axis(ctx, ax, ax_name, ds, table, bounds_list, var_log):
     a_bnds = None
     var_log.debug(f"got cmor name: {cmor_aName}")
     if cmor_aName in bounds_list:
-        a_bnds = get_bounds(ds, ax, cmor_aName, var_log)
+        a_bnds = get_bounds(ds, ax, cmor_aName)
         a_vals = ax.values
         var_log.debug(f"a_bnds: {a_bnds.shape}")
         var_log.debug(f"a_vals: {a_vals.shape}")
@@ -533,10 +547,10 @@ def ll_axis(ctx, ax, ax_name, ds, table, bounds_list, var_log):
     return ax_id
 
 @click.pass_context
-def define_grid(ctx, j_id, i_id, lat, lat_bnds, lon, lon_bnds,
-                var_log):
+def define_grid(ctx, j_id, i_id, lat, lat_bnds, lon, lon_bnds):
     """If we are on a non-cartesian grid, Define the spatial grid
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     grid_id=None
     var_log.info("setting up grid")
     #Set grid id and append to axis and z ids
@@ -550,9 +564,10 @@ def define_grid(ctx, j_id, i_id, lat, lat_bnds, lon, lon_bnds,
 
 
 @click.pass_context
-def get_coords(ctx, ovar, coords, var_log):
+def get_coords(ctx, ovar, coords):
     """Get lat/lon and their boundaries from ancil file
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     # open ancil grid file to read vertices
     #PP be careful this is currently hardcoded which is not ok!
     ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"]
@@ -583,9 +598,10 @@ def get_coords(ctx, ovar, coords, var_log):
 
 
 @click.pass_context
-def get_axis_dim(ctx, var, var_log):
+def get_axis_dim(ctx, var):
     """
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     axes = {'t_ax': None, 'z_ax': None, 'glat_ax': None,
             'lat_ax': None, 'lon_ax': None, 'j_ax': None,
             'i_ax': None, 'p_ax': None, 'e_ax': None}
@@ -631,8 +647,10 @@ def get_axis_dim(ctx, var, var_log):
     return axes
 
 
-def check_time_bnds(bnds, frequency, var_log):
+@click.pass_context
+def check_time_bnds(ictx, bnds, frequency):
     """Checks if dimension boundaries from file are wrong"""
+    var_log = logging.getLogger(ctx.obj['var_log'])
     var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}")
     diff = bnds[:,1] - bnds[:,0]
     #approx_int = [np.timedelta64(x, 'D').astype(float) for x in diff]
@@ -650,10 +668,11 @@ def check_time_bnds(bnds, frequency, var_log):
 
 
 @click.pass_context
-def require_bounds(ctx, var_log):
+def require_bounds(ctx):
     """Returns list of coordinates that require bounds.
     Reads the requirement directly from .._coordinate.json file
     """
+    var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}")
     fpath = f"{ctx.obj['tpath']}/{ctx.obj['_AXIS_ENTRY_FILE']}"
     with open(fpath, 'r') as jfile:
         data = json.load(jfile)
@@ -665,10 +684,11 @@ def require_bounds(ctx, var_log):
 
 
 @click.pass_context
-def bnds_change(ctx, axis, var_log):
+def bnds_change(ctx, axis):
     """Returns True if calculation/resample changes bnds of specified
        dimension.
     """
+    var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}")
     dim = axis.name
     calculation = ctx.obj['calculation']
     changed_bnds = False
@@ -683,16 +703,17 @@ def bnds_change(ctx, axis, var_log):
 
 
 @click.pass_context
-def get_bounds(ctx, ds, axis, cmor_name, var_log, ax_val=None):
+def get_bounds(ctx, ds, axis, cmor_name, ax_val=None):
     """Returns bounds for input dimension, if bounds are not available
        uses edges or tries to calculate them.
        If variable goes through calculation potentially bounds are different from
        input file and forces re-calculating them
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     var_log.debug(f'in getting bounds: {axis}')
     dim = axis.name
     var_log.info(f"Getting bounds for axis: {dim}")
-    changed_bnds = bnds_change(axis, var_log) 
+    changed_bnds = bnds_change(axis) 
     var_log.debug(f"Bounds has changed: {changed_bnds}")
     #The default bounds assume that the grid cells are centred on
     #each grid point specified by the coordinate variable.
@@ -716,7 +737,7 @@ def get_bounds(ctx, ds, axis, cmor_name, var_log, ax_val=None):
             dim_bnds_val = cftime.date2num(dim_bnds_val,
                 units=ctx.obj['reference_date'],
                 calendar=ctx.obj['attrs']['calendar'])
-        inrange = check_time_bnds(dim_bnds_val, frq, var_log)
+        inrange = check_time_bnds(dim_bnds_val, frq)
         if not inrange:
             calc = True
             var_log.info(f"Inherited bounds for {dim} are incorrect")
@@ -735,7 +756,7 @@ def get_bounds(ctx, ds, axis, cmor_name, var_log, ax_val=None):
             var_log.warning(f"dodgy bounds for dimension: {dim}")
             var_log.error(f"error: {e}")
         if 'time' in cmor_name:
-            inrange = check_time_bnds(dim_bnds_val, frq, var_log)
+            inrange = check_time_bnds(dim_bnds_val, frq)
             if inrange is False:
                 var_log.error(f"Boundaries for {cmor_name} are "
                     + "wrong even after calculation")
@@ -766,9 +787,10 @@ def get_bounds(ctx, ds, axis, cmor_name, var_log, ax_val=None):
 
 
 @click.pass_context
-def get_attrs(ctx, infiles, var1, var_log):
+def get_attrs(ctx, infiles, var1):
     """
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     # open only first file so we can access encoding
     ds = xr.open_dataset(infiles[0][0])
     var_attrs = ds[var1].attrs 
@@ -803,7 +825,7 @@ def get_attrs(ctx, infiles, var1, var_log):
 
 
 @click.pass_context
-def extract_var(ctx, input_ds, tdim, in_missing, mop_log, var_log):
+def extract_var(ctx, input_ds, tdim, in_missing):
     """
     This function pulls the required variables from the Xarray dataset.
     If a calculation isn't needed then it just returns the variables to be saved.
@@ -814,6 +836,8 @@ def extract_var(ctx, input_ds, tdim, in_missing, mop_log, var_log):
     input_ds - dict
        dictionary of input datasets for each variable
     """
+    mop_log = logging.getLogger('mop_log')
+    var_log = logging.getLogger(ctx.obj['var_log'])
     failed = False
     # Save the variables
     if ctx.obj['calculation'] == '':
@@ -873,6 +897,7 @@ def define_attrs(ctx):
     listed in notes file, this is indicated by precending any function
     in file with a ~. For other fields it checks equality.
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     attrs = ctx.obj['attrs']
     notes = attrs.get('notes', '')
     # open file containing notes
diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 554c7c7..5418309 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -80,11 +80,10 @@ def mop(ctx, cfile, debug):
     ctx.obj['attrs'] = cfg['attrs']
     # set up main mop log
     if ctx.invoked_subcommand == 'setup':
-        ctx.obj['log'] = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO)
+        mop_log = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO)
     else:
-        ctx.obj['log'] = config_log(debug, ctx.obj['appdir'])
+        mop_log = config_log(debug, ctx.obj['appdir'])
     ctx.obj['debug'] = debug
-    mop_log = ctx.obj['log']
     mop_log.info(f"Simulation to process: {ctx.obj['exp']}")
 
 
@@ -95,9 +94,9 @@ def mop_run(ctx):
 
     Use the configuration yaml file created in setup step as input.
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     # Open database and retrieve list of files to create
-    conn = db_connect(ctx.obj['database'], mop_log)
+    conn = db_connect(ctx.obj['database'])
     c = conn.cursor()
     sql = f"""select *,ROWID  from filelist where
         status=='unprocessed' and exp_id=='{ctx.obj['exp']}'"""
@@ -133,7 +132,7 @@ def mop_setup(ctx, update):
     * finalises configuration and save in new yaml file
     * writes job executable file and submits (optional) to queue
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     # then add setup_env to config
     mop_log.info("Setting environment and creating working directory")
     ctx.obj['update'] = update
@@ -152,11 +151,11 @@ def mop_setup(ctx, update):
     # setup database table
     database = ctx.obj['database']
     mop_log.info(f"creating & using database: {database}")
-    conn = db_connect(database, mop_log)
+    conn = db_connect(database)
     table_sql = filelist_sql()
-    create_table(conn, table_sql, mop_log)
+    create_table(conn, table_sql)
     populate_db(conn)
-    nrows = count_rows(conn, ctx.obj['exp'], mop_log)
+    nrows = count_rows(conn, ctx.obj['exp'])
     tot_size = sum_file_sizes(conn)
     mop_log.info(f"Estimated total files size before compression is: {tot_size} GB")
     #write app_job.sh
@@ -177,14 +176,16 @@ def mop_setup(ctx, update):
 
 
 @click.pass_context
-def mop_process(ctx, mop_log, var_log):
+def mop_process(ctx):
     """Main processing workflow
 
     Sets up CMOR dataset, tables and axis. Extracts and/or calculates variable and 
     write to file using CMOR.
     Returns path of created file if successful or error code if not.
     """
-
+ 
+    mop_log = logging.getLogger('mop_log')
+    var_log = logging.getLogger(ctx.obj['var_log'])
     default_cal = "gregorian"
     logname = f"{ctx.obj['variable_id']}_{ctx.obj['table']}_{ctx.obj['tstart']}"
     
@@ -210,15 +211,15 @@ def mop_process(ctx, mop_log, var_log):
 
     # Select files to use and associate a path to each input variable
     #P I might not need this!
-    inrange_files, path_vars, time_dim, t_units = get_files(var_log)
+    inrange_files, path_vars, time_dim, t_units = get_files()
 
     # Open input datasets based on input files, return dict= {var: ds}
-    dsin = load_data(inrange_files, path_vars, time_dim, var_log)
+    dsin = load_data(inrange_files, path_vars, time_dim)
 
     #Get the units and other attrs of first variable.
     var1 = ctx.obj['vin'][0]
     in_units, in_missing, positive, coords = get_attrs(inrange_files,
-        var1, var_log) 
+        var1) 
     var_log.debug(f"var just after reading {dsin[var1][var1]}")
 
     # Extract variable and calculation:
@@ -226,7 +227,7 @@ def mop_process(ctx, mop_log, var_log):
     var_log.info(f"calculation: {ctx.obj['calculation']}")
     var_log.info(f"resample: {ctx.obj['resample']}")
     try:
-        ovar, failed = extract_var(dsin, time_dim, in_missing, mop_log, var_log)
+        ovar, failed = extract_var(dsin, time_dim, in_missing)
         var_log.info("Calculation completed.")
     except Exception as e:
         mop_log.error(f"E: Unable to retrieve/calculate var for {ctx.obj['filename']}")
@@ -239,16 +240,16 @@ def mop_process(ctx, mop_log, var_log):
     # Define axis and variable for CMOR
     var_log.info("Defining axes...")
     # get list of coordinates that require bounds
-    bounds_list = require_bounds(var_log)
+    bounds_list = require_bounds()
     # get axis of each dimension
-    axes = get_axis_dim(ovar, var_log)
+    axes = get_axis_dim(ovar)
     var_log.debug(f"detected axes: {axes}")
     cmor.set_table(tables[1])
     axis_ids = []
     z_ids = []
     setgrid = False
     if axes['t_ax'] is not None:
-        cmor_tName = get_cmorname('t', axes['t_ax'], var_log)
+        cmor_tName = get_cmorname('t', axes['t_ax'])
         ctx.obj['reference_date'] = f"days since {ctx.obj['reference_date']}"
         var_log.debug(f"{ctx.obj['reference_date']}")
         t_ax_val = cftime.date2num(axes['t_ax'], units=ctx.obj['reference_date'],
@@ -257,7 +258,7 @@ def mop_process(ctx, mop_log, var_log):
         t_bounds = None
         if cmor_tName in bounds_list:
             t_bounds = get_bounds(dsin[var1], axes['t_ax'], cmor_tName,
-                var_log, ax_val=t_ax_val)
+                ax_val=t_ax_val)
         t_ax_id = cmor.axis(table_entry=cmor_tName,
             units=ctx.obj['reference_date'],
             length=len(t_ax_val),
@@ -266,14 +267,14 @@ def mop_process(ctx, mop_log, var_log):
             interval=None)
         axis_ids.append(t_ax_id)
     if axes['e_ax'] is not None:
-        e_ax_id = create_axis(axes['e_ax'], tables[1], var_log)
+        e_ax_id = create_axis(axes['e_ax'], tables[1])
         axis_ids.append(e_ax_id)
     if axes['z_ax'] is not None:
         zlen = len(axes['z_ax'])
-        cmor_zName = get_cmorname('z', axes['z_ax'], var_log, z_len=zlen)
+        cmor_zName = get_cmorname('z', axes['z_ax'], z_len=zlen)
         z_bounds = None
         if cmor_zName in bounds_list:
-            z_bounds = get_bounds(dsin[var1], axes['z_ax'], cmor_zName, var_log)
+            z_bounds = get_bounds(dsin[var1], axes['z_ax'], cmor_zName)
         z_ax_id = cmor.axis(table_entry=cmor_zName,
             units=axes['z_ax'].units,
             length=zlen,
@@ -284,31 +285,30 @@ def mop_process(ctx, mop_log, var_log):
     # if both i, j are defined setgrid if only one treat as lat/lon
     if axes['i_ax'] is not None and axes['j_ax'] is not None:
         setgrid = True
-        j_id = ij_axis(axes['j_ax'], 'j_index', tables[0], var_log)
-        i_id = ij_axis(axes['i_ax'], 'i_index', tables[0], var_log)
+        j_id = ij_axis(axes['j_ax'], 'j_index', tables[0])
+        i_id = ij_axis(axes['i_ax'], 'i_index', tables[0])
     elif axes['j_ax'] is not None:
         axes['lat_ax'] = axes['j_ax']
     elif axes['i_ax'] is not None:
         axes['lon_ax'] = axes['i_ax']
     # Define the spatial grid if non-cartesian grid
     if setgrid:
-        lat, lat_bnds, lon, lon_bnds = get_coords(ovar, coords, var_log)
-        grid_id = define_grid(j_id, i_id, lat, lat_bnds, lon,
-                              lon_bnds, var_log)
+        lat, lat_bnds, lon, lon_bnds = get_coords(ovar, coords)
+        grid_id = define_grid(j_id, i_id, lat, lat_bnds, lon, lon_bnds)
     else:
         if axes['glat_ax'] is not None:
-            lat_id = ll_axis(axes['glat_ax'], 'glat', dsin[var1], tables[1],
-                bounds_list, var_log)
+            lat_id = ll_axis(axes['glat_ax'], 'glat', dsin[var1],
+                             tables[1], bounds_list)
             axis_ids.append(lat_id)
             #z_ids.append(lat_id)
         elif axes['lat_ax'] is not None:
             lat_id = ll_axis(axes['lat_ax'], 'lat', dsin[var1], tables[1],
-                bounds_list, var_log)
+                bounds_list)
             axis_ids.append(lat_id)
             z_ids.append(lat_id)
         if axes['lon_ax'] is not None:
             lon_id = ll_axis(axes['lon_ax'], 'lon', dsin[var1], tables[1],
-                bounds_list, var_log)
+                bounds_list)
             axis_ids.append(lon_id)
             z_ids.append(lon_id)
     if axes['p_ax'] is not None:
@@ -324,7 +324,7 @@ def mop_process(ctx, mop_log, var_log):
     # Set up additional hybrid coordinate information
     if (axes['z_ax'] is not None and cmor_zName in 
         ['hybrid_height', 'hybrid_height_half']):
-        zfactor_b_id, zfactor_orog_id = hybrid_axis(lev_name, z_ax_id, z_ids, var_log)
+        zfactor_b_id, zfactor_orog_id = hybrid_axis(lev_name, z_ax_id, z_ids)
 
     # Freeing up memory 
     del dsin
@@ -371,7 +371,7 @@ def mop_process(ctx, mop_log, var_log):
 
 
 @click.pass_context
-def process_file(ctx, row, var_log):
+def process_file(ctx, row):
     """Processes file from database if status is unprocessed.
     If override is true, re-writes existing files. Called by process_row() and
     calls mop_process() to extract and write variable.
@@ -382,15 +382,14 @@ def process_file(ctx, row, var_log):
         Click context object
     row : dict
         row from filelist db table describing one output file
-    var_log : logging handler 
-        Logging file handler specific to the file to process
     Returns
     -------
     out : tuple
         Output status message and code and db rowid for processed file
     """
 
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
+    var_log = logging.getLogger(ctx.obj['var_log'])
     row['vin'] = row['vin'].split()
     # Check that calculation is defined if more than one variable is passed as input
     if len(row['vin']) > 1 and row['calculation'] == '':
@@ -411,7 +410,7 @@ def process_file(ctx, row, var_log):
     var_msg = f"{row['table']},{row['variable_id']},{row['tstart']},{row['tend']}"
     if ctx.obj['override'] or not os.path.exists(expected_file):
         try:
-            ret = mop_process(mop_log, var_log)
+            ret = mop_process()
         except Exception as e: #something has gone wrong in the processing
             ret = -1
             mop_log.error(e)
@@ -464,6 +463,7 @@ def process_row(ctx, row):
     Sets up variable log file, prepares dictionary with file details
     and calls process_file
     """
+    pid = os.getpid()
     record = {}
     header = ['infile', 'filepath', 'filename', 'vin', 'variable_id',
               'table', 'frequency', 'realm', 'timeshot', 'tstart',
@@ -478,11 +478,11 @@ def process_row(ctx, row):
     trange = record['filename'].replace('.nc.','').split("_")[-1]
     varlog_file = (f"{ctx.obj['var_logs']}/{record['variable_id']}"
                  + f"_{record['table']}_{record['tstart']}.txt")
-    var_log = config_varlog(ctx.obj['debug'], varlog_file) 
-    ctx.obj['var_log'] = var_log 
+    var_log = config_varlog(ctx.obj['debug'], varlog_file, pid) 
+    ctx.obj['var_log'] = var_log.name 
     var_log.info(f"Start processing")
-    var_log.debug(f"Process id: {os.getpid()}")
-    msg = process_file(record, var_log)
+    var_log.debug(f"Process id: {pid}")
+    msg = process_file(record)
     var_log.handlers[0].close()
     var_log.removeHandler(var_log.handlers[0])
     return msg
@@ -500,7 +500,7 @@ def pool_handler(ctx, rows, ncpus):
         list of process_row() outputs returned by futures, these are 
         tuples with status message and code, and rowid
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     executor = concurrent.futures.ProcessPoolExecutor(max_workers=ncpus)
     futures = []
     for row in rows:
diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index 7981e0d..68c60dd 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -35,6 +35,7 @@
 import re
 import click
 import pathlib
+import logging
 
 from collections import OrderedDict
 from datetime import datetime#, timedelta
@@ -111,19 +112,20 @@ def read_yaml(fname):
     return data
 
 
-def write_yaml(data, fname, logger):
+def write_yaml(data, fname, log_name='__name__'):
     """Write data to a yaml file
 
     Parameters
     ----------
     data : dict
-        The file content as a dictioinary 
+        The file content as a dictionary 
     fname : str
         Yaml filename 
 
     Returns
     -------
     """
+    logger = logging.getLogger(log_name)
     try:
         with open(fname, 'w') as f:
             yaml.dump(data, f)
@@ -153,8 +155,7 @@ def write_config(ctx, fname='exp_config.yaml'):
         else:
             config['cmor'][k] = v 
     config['attrs'] = config['cmor'].pop('attrs')
-    mop_log = config['cmor'].pop('log')
-    write_yaml(config, fname, mop_log)
+    write_yaml(config, fname, 'mop_log')
     return
 
 
@@ -162,7 +163,7 @@ def write_config(ctx, fname='exp_config.yaml'):
 def find_custom_tables(ctx):
     """Returns list of tables files in custom table path
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     tables = []
     path = ctx.obj['tables_path']
     tables = ctx.obj['tables_path'].rglob("*_*.json")
@@ -237,7 +238,7 @@ def filelist_sql():
 def write_job(ctx, nrows):
     """
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     # define storage flag
     flag = "storage=gdata/hh5" 
     projects = ctx.obj['addprojs'] + [ctx.obj['project']]
@@ -282,7 +283,7 @@ def create_exp_json(ctx, json_cv):
     fname : str
         Name of created experiment json file
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     fname = ctx.obj['outpath'] / f"{ctx.obj['exp']}.json"
     attrs = ctx.obj['attrs']
     with json_cv.open(mode='r') as f:
@@ -353,7 +354,7 @@ def populate_db(ctx, conn):
     conn : obj 
         DB connection object
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     cursor = conn.cursor()
     # process experiment information
     opts = {}
@@ -388,7 +389,7 @@ def populate_db(ctx, conn):
     return
 
 
-def add_row(values, cursor, update, mop_log):
+def add_row(values, cursor, update):
     """Add a row to the filelist database table
        one row specifies the information to produce one output cmip5 file
 
@@ -404,6 +405,7 @@ def add_row(values, cursor, update, mop_log):
     Returns
     -------
     """
+    mop_log = logging.getLogger('mop_log')
     sql = '''insert into filelist
         (infile, filepath, filename, vin, variable_id, ctable,
         frequency, realm, timeshot, tstart, tend, sel_start, sel_end,
@@ -466,7 +468,7 @@ def compute_fsize(ctx, opts, grid_size, frequency):
     Returns
     -------
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     # set small number for fx frequency so it always create only one file
     nstep_day = {'10min': 144, '30min': 48, '1hr': 24, '3hr': 8, 
                  '6hr': 4, 'day': 1, '10day': 0.1, 'mon': 1/30, 
@@ -617,7 +619,7 @@ def define_files(ctx, cursor, opts, mp):
     time interval for each file. This last is determined by maximum file size.
     These and other files details are saved in filelist db table.
     """
-    mop_log = ctx.obj['log']
+    mop_log = logging.getLogger('mop_log')
     update = ctx.obj['update']
     exp_start = opts['exp_start']
     exp_end = opts['exp_end']
@@ -662,14 +664,15 @@ def define_files(ctx, cursor, opts, mp):
         opts['sel_end'] = (newtime - half_tstep).strftime('%4Y%m%d%H%M')
         opts['filepath'], opts['filename'] = build_filename(opts,
             start, newtime, half_tstep)
-        rowid = add_row(opts, cursor, update, mop_log)
+        rowid = add_row(opts, cursor, update)
         start = newtime
     return
 
 
-def count_rows(conn, exp, mop_log):
+def count_rows(conn, exp):
     """Returns number of files to process
     """
+    mop_log = logging.getLogger('mop_log')
     sql = f"select * from filelist where status=='unprocessed' and exp_id=='{exp}'"
     rows = query(conn, sql, first=False)
     mop_log.info(f"Number of rows in filelist: {len(rows)}")
diff --git a/tests/conftest.py b/tests/conftest.py
index 7f544ac..9f2f190 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -82,11 +82,24 @@ def test_check_timestamp(caplog):
 @pytest.fixture
 def varlist_rows():
     lines = ["fld_s03i236;tas;K;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature",
-    "fld_s03i236;;K;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature",
-    "fld_s03i236;tas;;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature"]
+    "fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;AUS2200_A1hr;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction",
+"fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX        W/M2;surface_upward_latent_heat_flu"]
     rows = [l.split(";") for l in lines]
     return rows
 
+@pytest.fixture
+def add_var_out():
+    vlist = [{'cmor_var': '', 'input_vars': '', 'calculation': '', 'units': ''
+              ,'realm': '', 'positive': '', 'version': '', 'cmor_table': ''}
+            ]
+
+@pytest.fixture
+def map_rows():
+    maps = [["fld_s03i236","tas","K","time_0 lat lon","1hr","atmos",
+        "area: time: mean","","AUS2200_A1hr","float32","22048000","96",
+        "umnsa_slv_","TEMPERATURE AT 1.5M","air_temperature"]]
+    return maps
+
 @pytest.fixture
 def um_multi_time():
     '''Return a um stule file with multiple time axes'''
diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py
index 103f75e..9737c52 100644
--- a/tests/test_mopdb_utils.py
+++ b/tests/test_mopdb_utils.py
@@ -25,31 +25,20 @@
 
 #from click.testing import CliRunner
 
-@pytest.fixture
-def db_log():
-    return config_log(False)
-
-
-@pytest.fixture
-def db_log_debug():
-    return config_log(True)
-
 
 @pytest.mark.parametrize('idx', [0,1,2])
-def test_add_var(varlist_rows, idx, db_log):
+def test_add_var(varlist_rows, idx, caplog):
+    caplog.set_level(logging.DEBUG, logger='mopdb_log')
     vlist = []
-    vlistout = [["fld_s03i236","tas","K","time_0 lat lon","1hr","atmos",
-        "area: time: mean","","AUS2200_A1hr","float32","22048000","96",
-        "umnsa_slv_","TEMPERATURE AT 1.5M","air_temperature"]]
-    match = ("tas", "", "K")
-    vlist = add_var(vlist, varlist_rows[idx], match, db_log)
-    assert vlist == vlistout
+    match = [("tas", "", "K"),  ("siconca", "", ""), ("hfls", "", "")]
+    vlist = add_var(vlist, varlist_rows[idx], match[idx])
+    assert vlist[idx]['cmor_var'] == match[idx][0] 
 
 
 def test_build_umfrq(um_multi_time, caplog):
-    caplog.set_level(logging.DEBUG)
+    caplog.set_level(logging.DEBUG, logger='mopdb_log')
     time_axs = [d for d in um_multi_time.dims if 'time' in d]
-    print(time_axs)
     umfrq = {'time': 'day', 'time_0': '1hr', 'time_1': '30min'}
-    assert umfrq == build_umfrq(time_axs, um_multi_time, caplog)
+    out = build_umfrq(time_axs, um_multi_time)
+    assert umfrq == out
     

From 5d0ab2407d8ed7b8a362acb4b3a1974b7556e91b Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 4 Jul 2024 09:52:24 +1000
Subject: [PATCH 003/137] fixed #147

---
 src/mopper/mop_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index 52f01bf..cdb78db 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -648,7 +648,7 @@ def get_axis_dim(ctx, var):
 
 
 @click.pass_context
-def check_time_bnds(ictx, bnds, frequency):
+def check_time_bnds(ctx, bnds, frequency):
     """Checks if dimension boundaries from file are wrong"""
     var_log = logging.getLogger(ctx.obj['var_log'])
     var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}")
@@ -672,7 +672,7 @@ def require_bounds(ctx):
     """Returns list of coordinates that require bounds.
     Reads the requirement directly from .._coordinate.json file
     """
-    var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}")
+    var_log = logging.getLogger(ctx.obj['var_log'])
     fpath = f"{ctx.obj['tpath']}/{ctx.obj['_AXIS_ENTRY_FILE']}"
     with open(fpath, 'r') as jfile:
         data = json.load(jfile)
@@ -688,7 +688,7 @@ def bnds_change(ctx, axis):
     """Returns True if calculation/resample changes bnds of specified
        dimension.
     """
-    var_log.debug(f"Time bnds 1,0: {bnds[:,1], bnds[:,0]}")
+    var_log = logging.getLogger(ctx.obj['var_log'])
     dim = axis.name
     calculation = ctx.obj['calculation']
     changed_bnds = False

From 9904d0f1f62e4288fdcd1420552e210a9b303b37 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 4 Jul 2024 18:20:38 +1000
Subject: [PATCH 004/137] now varlist and template are 1 step see #150

---
 src/mopdb/mopdb.py       | 125 ++++++++++++++++++++++++++-------------
 src/mopdb/mopdb_utils.py |  54 +++++++++++++----
 2 files changed, 129 insertions(+), 50 deletions(-)

diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 892b4cb..b0935ff 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -43,18 +43,47 @@ def mopdb_catch():
         sys.exit(1)
 
 
+def require_date(ctx, param, value):
+    """Changes startdate option in template command from optional to
+    required if fpath is a directory.
+    """
+    if Path(value).is_dir():
+        ctx.command.params[1].required = True
+    return value
+
+
 def db_args(f):
-    """Define database click arguments
+    """Define database click options
     """
     constraints = [
         click.option('--fname', '-f', type=str, required=True,
-            help='Input file: used to update db table (mapping/cmor),' +
-                 'or to pass output model variables (list)'),
+            help='Input file: used to update db table (mapping/cmor)'),
         click.option('--dbname', type=str, required=False, default='default',
             help='Database relative path by default is package access.db'),
-        click.option('--alias', '-a', type=str, required=False, default=None,
-            help='Table alias to use when updating cmor var table or creating map template with list' +
-                 ' to keep track of variable definition origin. If none passed uses input filename')]
+        click.option('--alias', '-a', type=str, required=False, default='',
+            help='Table alias to track definitions origin in cmorvar table.')]
+    for c in reversed(constraints):
+        f = c(f)
+    return f
+
+
+def map_args(f):
+    """Define mapping click options for varlist and template commands"""
+    constraints = [
+        click.option('--fpath', '-f', type=str, required=True,
+            callback=require_date,
+            help='Model output directory or varlist for the same'),
+        click.option('--startdate', '-d', type=str, required=False,
+            help='Start date of model run as YYYYMMDD'),
+        click.option('--version', '-v', required=True,
+            type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']),
+            show_default=True,
+            help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2'),
+        click.option('--dbname', type=str, required=False, default='default',
+            help='Database relative path by default is package access.db'),
+        click.option('--alias', '-a', type=str, required=False, default='',
+            help='''Alias to use to keep track of variable definition origin.
+                 If none passed uses input filename''')]
     for c in reversed(constraints):
         f = c(f)
     return f
@@ -118,8 +147,10 @@ def check_cmor(ctx, dbname):
 @mopdb.command(name='table')
 @db_args
 @click.option('--label', '-l', required=False, default='CMIP6',
-    type=click.Choice(['CMIP6', 'AUS2200', 'CM2']), show_default=True,
-    help='Label indicating origin of CMOR variable definitions. Currently only CMIP6, AUS2200 and CM2')
+    type=click.Choice(['CMIP6', 'AUS2200', 'CM2', 'OM2']),
+    show_default=True,
+    help='''Label indicating origin of CMOR variable definitions. 
+    Currently only CMIP6, AUS2200, CM2 and OM2''')
 @click.pass_context
 def cmor_table(ctx, dbname, fname, alias, label):
     """Create CMIP style table containing new variable definitions
@@ -136,7 +167,7 @@ def cmor_table(ctx, dbname, fname, alias, label):
     fname : str
         Mapping file??? 
     alias : str
-           not used here
+           ??? it is used so what's ahppenw hen not passed?
     label : str
         Label indicating preferred cmor variable definitions 
     """
@@ -184,7 +215,7 @@ def cmor_table(ctx, dbname, fname, alias, label):
                 if len(v[4].split()) != len(record[9].split()):
                     mopdb_log.warning(f"Variable {v[0]} number of dims orig/table are different: {v[4]}/{record[9]}")
                 var_list.append(definition)
-    write_cmor_table(var_list, alias, mopdb_log)
+    write_cmor_table(var_list, alias)
     conn.close()
     return
 
@@ -206,15 +237,15 @@ def update_cmor(ctx, dbname, fname, alias):
     fname : str
         Name of json input file with records to add
     alias : str
-        Indicates origin of records to add, if None json filename
-        base is used instead
+        Indicates origin of records to add, if '' (default) json
+        filename base is used instead
 
     Returns
     -------
     """
 
     mopdb_log = logging.getLogger('mopdb_log')
-    if alias is None:
+    if alias == '':
         alias = fname.split("/")[-1]
         alias = alias.replace('.json', '')
     mopdb_log.info(f"Adding {alias} to variable name to track origin")
@@ -260,35 +291,46 @@ def update_cmor(ctx, dbname, fname, alias):
 
 
 @mopdb.command(name='template')
-@db_args
-@click.option('--version', '-v', required=True,
-    type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True,
-    help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2')
+@map_args
 @click.pass_context
-def map_template(ctx, dbname, fname, alias, version):
+def map_template(ctx, fpath, startdate, dbname, version, alias):
     """Writes a template of mapping file needed to run setup.
        First opens database and check if variables match any in
        mapping table. If not tries to partially match them.
 
+    It can get as input the directory containing the output in
+    which case it will first call model_vars() (varlist command)
+    or the file output of the same if already available.
+
     Parameters
     ----------
     ctx : obj
         Click context object
+    fpath : str
+        Path of csv input file with output variables to map or
+        of directory containing output files to scan
+    startdate : str
+        Date or other string to match to individuate one file per type
     dbname : str
         Database relative path (default is data/access.db)
-    fname : str
-        Name of csv input file with output variables to map
-    alias : str
-        Indicates origin of records to add, if None csv filename
-        base is used instead
     version : str
         Version of ACCESS model used to generate variables
+    alias : str
+        Indicates origin of records to add, if '' csv filename
+        base is used instead
 
     Returns
     -------
     """
     mopdb_log = logging.getLogger('mopdb_log')
-    if alias is None:
+    # work out if fpath is varlist or path to output
+    fpath = Path(fpath)
+    if fpath.is_file():
+        fname = fpath.name
+    else:
+        mopdb_log.debug(f"Calling model_vars() from template: {fpath}")
+        fname = model_vars(fpath, startdate, dbname, version, alias) 
+    if alias == '':
         alias = fname.split(".")[0]
     # connect to db, check first if db exists or exit 
     if dbname == 'default':
@@ -298,6 +340,7 @@ def map_template(ctx, dbname, fname, alias, version):
     with open(fname, 'r') as csvfile:
         reader = csv.DictReader(csvfile, delimiter=';')
         rows = list(reader)
+    check_varlist(rows, fname)
     # return lists of fully/partially matching variables and stash_vars 
     # these are input_vars for calculation defined in already in mapping db
     full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, 
@@ -340,7 +383,7 @@ def update_map(ctx, dbname, fname, alias):
     fname : str
         Name of csv input file with mapping records
     alias : str
-        Indicates origin of records to add, if None csv filename
+        Indicates origin of records to add, if '' csv filename
         base is used instead
 
     Returns
@@ -373,26 +416,23 @@ def update_map(ctx, dbname, fname, alias):
 
 
 @mopdb.command(name='varlist')
-@click.option('--indir', '-i', type=str, required=True,
-    help='Converted model output directory')
-@click.option('--startdate', '-d', type=str, required=True,
-    help='Start date of model run as YYYYMMDD')
-@click.option('--dbname', type=str, required=False, default='default',
-    help='Database relative path by default is package access.db')
-@click.option('--version', '-v', required=False, default='CM2',
-    type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']), show_default=True,
-    help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2')
+@map_args
 @click.pass_context
-def model_vars(ctx, indir, startdate, dbname, version):
+def list_vars(ctx, fpath, startdate, dbname, version, alias):
+    """Calls model_vars to generate list of variables""" 
+    fname = model_vars(fpath, startdate, dbname, version, alias)
+
+
+@click.pass_context
+def model_vars(ctx, fpath, startdate, dbname, version, alias):
     """Read variables from model output
        opens one file for each kind, save variable list as csv file
-       alias is not used so far
 
     Parameters
     ----------
     ctx : obj
         Click context object
-    indir : str
+    fpath : str
         Path for model output files
     startdate : str
         Date or other string to match to individuate one file per type
@@ -400,18 +440,24 @@ def model_vars(ctx, indir, startdate, dbname, version):
         Database relative path (default is data/access.db)
     version : str
         Version of ACCESS model to use as preferred mapping
+    alias : str
+        Used for output filename: 'varlist_<alias>'. If '', 
+        'varlist_mopdb' is used instead
 
     Returns
     -------
+    fname : str
+        Name of output varlist file
     """
+
     mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
     if dbname == 'default':
         dbname = import_files('data').joinpath('access.db')
     conn = db_connect(dbname)
-    write_varlist(conn, indir, startdate, version)
+    fname = write_varlist(conn, fpath, startdate, version, alias)
     conn.close()
-    return
+    return fname
 
 
 @mopdb.command(name='del')
@@ -456,4 +502,3 @@ def remove_record(ctx, dbname, table, pair):
     # select, confirm, delete record/s 
     delete_record(conn, table, col, pair)
     return
-    
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index d4de94f..b9e0c4d 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -523,7 +523,7 @@ def get_cell_methods(attrs, dims):
     return val, frqmod
 
 
-def write_varlist(conn, indir, startdate, version):
+def write_varlist(conn, indir, startdate, version, alias):
     """Based on model output files create a variable list and save it
        to a csv file. Main attributes needed to map output are provided
        for each variable
@@ -533,6 +533,14 @@ def write_varlist(conn, indir, startdate, version):
     files = list_files(indir, sdate)
     mopdb_log.debug(f"Found files: {files}")
     patterns = []
+    if alias == '':
+        alias = 'mopdb'
+    fname = f"varlist_{alias}.csv"
+    fcsv = open(fname, 'w')
+    fwriter = csv.writer(fcsv, delimiter=';')
+    fwriter.writerow(["name", "cmor_var", "units", "dimensions",
+        "frequency", "realm", "cell_methods", "cmor_table", "vtype",
+        "size", "nsteps", "filename", "long_name", "standard_name"])
     for fpath in files:
         # get filename pattern until date match
         mopdb_log.debug(f"Filename: {fpath.name}")
@@ -545,12 +553,7 @@ def write_varlist(conn, indir, startdate, version):
         pattern_list = list_files(indir, f"{fpattern}*")
         nfiles = len(pattern_list) 
         mopdb_log.debug(f"File pattern: {fpattern}")
-        fcsv = open(f"{fpattern}.csv", 'w')
-        fwriter = csv.writer(fcsv, delimiter=';')
-        fwriter.writerow(["name", "cmor_var", "units", "dimensions",
-                          "frequency", "realm", "cell_methods", "cmor_table",
-                          "vtype", "size", "nsteps", "filename", "long_name",
-                          "standard_name"])
+        fwriter.writerow([f"#{fpattern}"])
         # get attributes for the file variables
         realm = get_realm(fpath, version)
         ds = xr.open_dataset(fpath, decode_times=False)
@@ -587,9 +590,9 @@ def write_varlist(conn, indir, startdate, version):
                         nsteps, fpattern, attrs.get('long_name', ""), 
                         attrs.get('standard_name', "")]
                 fwriter.writerow(line)
-        fcsv.close()
         mopdb_log.info(f"Variable list for {fpattern} successfully written")
-    return
+    fcsv.close()
+    return  fname
 
 
 def read_map_app4(fname):
@@ -644,7 +647,7 @@ def read_map(fname, alias):
                     notes = row[16]
                 else:
                     notes = row[15]
-                if alias is None:
+                if alias is '':
                     alias = fname.replace(".csv","")
                 var_list.append(row[:11] + [notes, alias])
     return var_list
@@ -883,6 +886,7 @@ def write_map_template(conn, full, no_ver, no_frq, stdn,
     cell_methods, positive, cmor_table, version, vtype, size, nsteps, filename,
     long_name, standard_name
     """ 
+
     mopdb_log = logging.getLogger('mopdb_log')
     keys = ['cmor_var', 'input_vars', 'calculation', 'units',
             'dimensions', 'frequency', 'realm', 'cell_methods',
@@ -919,6 +923,7 @@ def write_map_template(conn, full, no_ver, no_frq, stdn,
 def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'):
     """
     """
+
     mopdb_log = logging.getLogger('mopdb_log')
     if len(vlist) > 0:
         if type(div) is str:
@@ -938,6 +943,7 @@ def check_realm_units(conn, var):
     """Checks that realm and units are consistent with values in 
     cmor table.
     """
+
     mopdb_log = logging.getLogger('mopdb_log')
     vname = f"{var['cmor_var']}-{var['cmor_table']}"
     if var['cmor_table'] is None or var['cmor_table'] == "":
@@ -965,6 +971,7 @@ def check_realm_units(conn, var):
 
 def get_realm(fpath, version):
     '''Return realm for variable in files or NArealm'''
+
     mopdb_log = logging.getLogger('mopdb_log')
     if version == 'AUS2200':
         realm = 'atmos'
@@ -980,3 +987,30 @@ def get_realm(fpath, version):
         mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm")
     mopdb_log.debug(f"Realm is {realm}")
     return realm
+
+
+def check_varlist(rows, fname):
+    """Checks that varlist written to file has sensible information for frequency and realm
+    to avoid incorrect mapping to be produced.
+
+    At the moment we're checking only frequency and realm as they can be missed or wrong
+    depending on the file structure.
+
+    Parameters
+    ----------
+    rows : list(dict)
+         list of variables to match
+    """
+
+    mopdb_log = logging.getLogger('mopdb_log')
+    frq_list = ['min', 'hr', 'day', 'mon', 'yr'] 
+    realm_list = ['ice', 'ocean', 'atmos', 'land']
+    for row in rows:
+        if row['name'][0] == "#" or row['name'] == 'name':
+            continue
+        elif (not any( x in row['frequency'] for x in frq_list) 
+            or row['realm'] not in realm_list):
+                mopdb_log.error(f"""  Check frequency and realm in {fname}.
+  Some values might be invalid and need fixing""")
+                sys.exit()
+    return

From 29567a35e821a59f9a7a861f65e2ea9198600129 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 5 Jul 2024 15:08:39 +1000
Subject: [PATCH 005/137] moved options #151, #113

---
 conda/meta.yaml           |  2 +-
 docs/gettingstarted.rst   | 62 ++++++++++++++----------------
 docs/mopdb_command.rst    | 49 ++++++++++++++----------
 src/mopdb/mopdb.py        |  8 ++--
 src/mopper/mop_setup.py   |  8 ++++
 src/mopper/mopper.py      | 80 ++++++++++++++++++++++++++++-----------
 src/mopper/setup_utils.py |  7 ++--
 7 files changed, 132 insertions(+), 84 deletions(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index b0cb321..f20a79a 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,4 +1,4 @@
-{% set version = "0.6.1" %}
+{% set version = "1.0.0" %}
 package:
     name: mopper 
     version: {{ version }}
diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst
index 397652e..666e89a 100644
--- a/docs/gettingstarted.rst
+++ b/docs/gettingstarted.rst
@@ -3,46 +3,38 @@ Starting with MOPPeR
 
 A typical workflow to post-process an ACCESS or UM model output requires three steps.
 
-Step1: get a list of variables from the raw output
---------------------------------------------------
-
-     *mopdb varlist -i <path-to-raw-output> -d <date-pattern>*
-
-`mopdb varlist` will output one or more `csv` files with a detailed list of variables, one list for each pattern of output files.
-
-.. code-block:: console
-
-   $ mopdb varlist -i /scratch/../exp -d 20120101
-   Opened database ~/.local/lib/python3.10/site-packages/data/access.db successfully
-   Variable list for ocean_scalar.nc- successfully written
-   Variable list for ocean_month.nc- successfully written
-   Variable list for ocean_daily.nc- successfully written
-
-.. csv-table:: Example of varlist output 
-   :file: varlist_example.csv
-   :delim: ;
-
-The <date-pattern> argument is used to reduce the number of files to check. The tool will recognise anyway a repeated pattern and only add a list of variable for the same pattern once.
-
  
-Step2: create a template for a mapping file
+Step1: create a template for a mapping file
 -------------------------------------------
 
-   *mopdb template -i <varlist.csv> -v <access-version> -a <alias>*
+   *mopdb template -f <path-to-model-output> -v <access-version> -a <alias>*
 
 .. code-block:: console 
 
-   $ mopdb template -f ocean.csv -v OM2 -a ocnmon
-   Opened database ~/.local/lib/python3.10/site-packages/data/access.db successfully
-   Derived variables: {'msftyrho', 'msftmrho', 'hfds', 'msftmz', 'msftyz'}
-   Changing advectsweby-CM2_mon units from Watts/m^2 to W m-2
-   Changing areacello-CMIP6_Ofx units from m^2 to m2
-   Variable difvho-CM2_Omon not found in cmor table
+   $ mopdb template -f /scratch/.../exp1/atmos -m 095101 -v CM2 -a exp1
+   Opened database /home/581/pxp581/.local/lib/python3.10/site-packages/data/access.db successfully
+   Found more than 1 definition for fld_s16i222:
+   [('psl', 'AUS2200', 'AUS2200_A10min', '10minPt'), ('psl', 'AUS2200', 'AUS2200_A1hr', '1hr')]
+   Using psl from AUS2200_A10min
+   Variable list for cw323a.pm successfully written
+   Opened database /home/581/pxp581/.local/lib/python3.10/site-packages/data/access.db successfully
+   Derived variables: {'treeFracBdlEvg', 'grassFracC4', 'shrubFrac', 'prc', 'mrsfl', 'landCoverFrac', 'mmrbc', 'mmrso4', 'theta24', 'sftgif', 'treeFracNdlEvg', 'snw', 'rtmt', 'nwdFracLut', 'sifllatstop', 'prw', 'mrfso', 'rlus', 'mrsll', 'baresoilFrac', 'c4PftFrac', 'wetlandFrac', 'mrro', 'c3PftFrac', 'treeFracBdlDcd', 'od550lt1aer', 'treeFracNdlDcd', 'residualFrac', 'wetss', 'sbl', 'vegFrac', 'rsus', 'cropFrac', 'mmrdust', 'grassFrac', 'mmrss', 'od550aer', 'hus24', 'dryss', 'fracLut', 'mrlso', 'mc', 'od440aer', 'grassFracC3', 'nep', 'mmroa', 'cropFracC3', 'snm', 'agesno'}
+   Changing cl-CMIP6_Amon units from 1 to %
+   Changing cli-CMIP6_Amon units from 1 to kg kg-1
+   Changing clt-CMIP6_Amon units from 1 to %
+   Changing clw-CMIP6_Amon units from 1 to kg kg-1
+   Variable husuvgrid-CM2_mon not found in cmor table
+   ...
 
 `mopdb template` takes as input:
- * the output/s of `varlist` - To get one template for the all variable concatenate the output on `varlist` into one file first.
- * the access version to use as preferred
- * an optional alias, if omitted the varlist filename will be used. Based on the example: `map_ocnmon.csv` or `map_ocean.csv` if omitted.
+ * -f/--fpath : the path to the model output
+ * -m/--match : used to identify files' patterns. The tool will only add a list of variables for the same pattern once.
+ * -v/--version : the access version to use as preferred mapping. ESM1.5, CM2, OM2 and AUS2200 are currently available.
+ * -a/--alias : an optional alias, if omitted default names will be used for the output files. 
+
+Alternatively a list of variables can be created separately using the *varlist* command and this can be passed directly to template using the *fpath* option.
+
+   *mopdb template -f <varlist.csv> -v <access-version> -a <alias>*
 
 It produces a csv file with a list of all the variables from raw output mapped to cmip style variables. These mappings also take into account the frequency and include variables that can be potentially calculated with the listed fields. The console output lists these, as shown above.
  
@@ -51,18 +43,20 @@ The mappings can be different between different version and/or configurations of
 
 Starting with version 0.6 the list includes matches based on the standard_name, as these rows often list more than one option per field, it's important to either edit or remove these rows before using the mapping file. 
 The :doc:`Customing section <customising>` covers what to do for an experiment using a new configuration which is substantially different from the ones which are available.
+It also provides an intermediate varlist_<alias>.csv file that shows the information derived directly from the files. This can be useful to debug in case of issues with the mapping. This file is checked before the mapping step to make sure the tool has detected sensible frequency and realm, if the check fails the mapping won't proceed but the varlist file can be edited appropriately.
 
 .. warning:: 
    Always check that the resulting template is mapping the variables correctly. This is particularly true for derived variables. Comment lines are inserted to give some information on what assumptions were done for each group of mappings.
+   The se
 
 
-Step3: Set up the working environment 
+Step2: Set up the working environment 
 -------------------------------------
 
    *mop -c <conf_exp.yaml> setup*
 
 .. code-block:: console 
-
+https://climate-cms.org/posts/2023-05-31-vscode-are.html
    $ mop -c exp_conf.yaml setup
    Simulation to process: cy286
    Setting environment and creating working directory
diff --git a/docs/mopdb_command.rst b/docs/mopdb_command.rst
index 32d712c..421f39c 100644
--- a/docs/mopdb_command.rst
+++ b/docs/mopdb_command.rst
@@ -54,29 +54,17 @@ e.g. use aus2200 for mappings related to the AUS2200 configuration:
 
 A user that wants to create a mapping table for another AUS2200 simulation can use this value to select appropriate mappings (see how to do that below).
 
-Get a list of variables from the model output
----------------------------------------------
+Create a mapping file
+---------------------
 .. code-block::
 
-    mopdb varlist -i <output-path> -d <start-date>
-
-this will create for each output file a list of variables with useful attributes
-These can be concatenated into one or used to create separate mappings.
-
-.. _varlist example:
-.. dropdown:: Example output of varlist
+This can be done by providing the model output path and a pattern to match or directly a varlist file
 
-   name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name
-   fld_s00i004;theta;K;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CM2_mon;float32;9400320;12;cw323a.pm;THETA AFTER TIMESTEP;air_potential_temperature
-   fld_s00i010;hus;1;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;9400320;12;cw323a.pm;SPECIFIC HUMIDITY AFTER TIMESTEP;specific_humidity
-   fld_s00i024;ts;K;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE TEMPERATURE AFTER TIMESTEP;surface_temperature
-   fld_s00i030;;1;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;LAND MASK (No halo) (LAND=TRUE);land_binary_mask
-   fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;CMIP6_SImon;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction
-   ...
+From output path:
+  
+    mopdb template  -f <output-path> -m <string-to-match> -v <access-version>
 
-Create a mapping file starting from variable list
--------------------------------------------------
-.. code-block::
+From varlist file:
 
     mopdb template  -f <varlist-out> -v <access-version>
 
@@ -119,6 +107,29 @@ The other groups of records require checking, as either the version or the frequ
    ...
 
 
+Get a list of variables from the model output
+---------------------------------------------
+.. code-block::
+
+    mopdb varlist -f <output-path> -m <string-to-match>
+
+this will create a list of variables with useful attributes
+
+.. _varlist example:
+.. dropdown:: Example output of varlist
+
+   name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name
+   #cw323a.pm
+   fld_s00i004;theta;K;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CM2_mon;float32;9400320;12;cw323a.pm;THETA AFTER TIMESTEP;air_potential_temperature
+   fld_s00i010;hus;1;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;9400320;12;cw323a.pm;SPECIFIC HUMIDITY AFTER TIMESTEP;specific_humidity
+   fld_s00i024;ts;K;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE TEMPERATURE AFTER TIMESTEP;surface_temperature
+   fld_s00i030;;1;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;LAND MASK (No halo) (LAND=TRUE);land_binary_mask
+   fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;CMIP6_SImon;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction
+   ...
+
+Doing this step separately can be useful if the model output is using a random directory structure, as it's more likely in such a case that important attributes like frequency and realm which are used for the mapping might be incorrect or missing. In such a case it might be more efficient processing different kind of files separately first, making sure frequency and realm are correct and then combining them into one file to pass to template.
+The template command will stop execution if detects potentially wrong values for these fields and save 
+
 Check which variables aren't yet defined
 ----------------------------------------
 .. code-block:: console
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index b0935ff..7a2e744 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -72,9 +72,11 @@ def map_args(f):
     constraints = [
         click.option('--fpath', '-f', type=str, required=True,
             callback=require_date,
-            help='Model output directory or varlist for the same'),
-        click.option('--startdate', '-d', type=str, required=False,
-            help='Start date of model run as YYYYMMDD'),
+            help=(''''Path for model output files. For "template"
+              command can also be file generated by varlist step''')),
+        click.option('--match', '-m', type=str, required=False,
+            help=('''String to match output files. Most often
+                the timestamp from one of the output files''')),
         click.option('--version', '-v', required=True,
             type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']),
             show_default=True,
diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py
index 7040270..90ba47e 100755
--- a/src/mopper/mop_setup.py
+++ b/src/mopper/mop_setup.py
@@ -196,6 +196,14 @@ def setup_env(ctx):
     else:
         cdict['tables_path'] = appdir / cdict['tables_path']
     cdict['ancils_path'] = appdir / cdict['ancils_path']
+    # conda env to run job
+    if cdict['conda_env'] == 'default':
+        cdict['conda_env'] = ''
+    else: 
+        path =  Path(cdict['conda_env'])
+        if not path.is_absolute():
+            path = appdir / path
+        cdict['conda_env'] = f"source {str(path)}"
     # Output subdirectories
     outpath = cdict['outpath']
     cdict['maps'] = outpath / "maps"
diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 5418309..6313edd 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -53,13 +53,22 @@ def mop_catch():
         sys.exit(1)
 
 
+def mop_args(f):
+    """Define common click options
+    """
+    constraints = [
+        click.option('--debug', is_flag=True, default=False,
+            help="Show debug info"),
+        click.option('--cfile', '-c', type=str, required=True, 
+            help='Experiment configuration as yaml file')]
+    for c in reversed(constraints):
+        f = c(f)
+    return f
+
+
 @click.group(context_settings=dict(help_option_names=['-h', '--help']))
-@click.option('--cfile', '-c', type=str, required=True, 
-                help='Experiment configuration as yaml file')
-@click.option('--debug', is_flag=True, default=False,
-               help="Show debug info")
 @click.pass_context
-def mop(ctx, cfile, debug):
+def mop(ctx):
     """Main command with 2 sub-commands:
     - setup to setup the job to run
     - run to execute the post-processing
@@ -68,33 +77,39 @@ def mop(ctx, cfile, debug):
     ----------
     ctx : obj
         Click context object
+    """
+    #ctx.obj = {} 
+    pass
+
+
+@mop.command(name='run')
+@mop_args
+#@click.option('--cfile', '-c', type=str, required=True, 
+#                help='Experiment configuration as yaml file')
+@click.pass_context
+def mop_run(ctx, cfile, debug):
+    """Subcommand that executes the processing.
+
+    Use the configuration yaml file created in setup step as input.
+
+    Parameters
+    ----------
     cfile : str
         Name of yaml configuration file, run sub-command uses the 
         configuration created by setup
     debug : bool
         If true set logging level to debug
     """
+
+    # load config file
     with open(cfile, 'r') as yfile:
         cfg = yaml.safe_load(yfile)
     ctx.obj = cfg['cmor']
     ctx.obj['attrs'] = cfg['attrs']
-    # set up main mop log
-    if ctx.invoked_subcommand == 'setup':
-        mop_log = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO)
-    else:
-        mop_log = config_log(debug, ctx.obj['appdir'])
+    # set up logger
+    mop_log = config_log(debug, ctx.obj['appdir'])
     ctx.obj['debug'] = debug
     mop_log.info(f"Simulation to process: {ctx.obj['exp']}")
-
-
-@mop.command(name='run')
-@click.pass_context
-def mop_run(ctx):
-    """Subcommand that executes the processing.
-
-    Use the configuration yaml file created in setup step as input.
-    """
-    mop_log = logging.getLogger('mop_log')
     # Open database and retrieve list of files to create
     conn = db_connect(ctx.obj['database'])
     c = conn.cursor()
@@ -117,11 +132,12 @@ def mop_run(ctx):
     return
 
 
+@mop.command(name='setup')
+@mop_args
 @click.option('--update', is_flag=True, default=False,
                help="Update current settings, keeping db and logs")
-@mop.command(name='setup')
 @click.pass_context
-def mop_setup(ctx, update):
+def mop_setup(ctx, cfile, debug, update):
     """Setup of mopper processing job and working environment.
 
     * Defines and creates paths
@@ -131,8 +147,26 @@ def mop_setup(ctx, update):
     * creates/updates database filelist table to list files to create
     * finalises configuration and save in new yaml file
     * writes job executable file and submits (optional) to queue
+
+    Parameters
+    ----------
+    cfile : str
+        Name of yaml configuration file, run sub-command uses the 
+        configuration created by setup
+    debug : bool
+        If True set logging level to debug
+    update : bool
+        If True update current workding directory (default is False)
     """
-    mop_log = logging.getLogger('mop_log')
+
+    # load config file
+    with open(cfile, 'r') as yfile:
+        cfg = yaml.safe_load(yfile)
+    ctx.obj = cfg['cmor']
+    ctx.obj['attrs'] = cfg['attrs']
+    ctx.obj['debug'] = debug
+    # set up logger
+    mop_log = config_log(debug, ctx.obj['appdir'], stream_level=logging.INFO)
     # then add setup_env to config
     mop_log.info("Setting environment and creating working directory")
     ctx.obj['update'] = update
diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index 68c60dd..e0341fa 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -700,8 +700,6 @@ def define_template(ctx, flag, nrows):
     cdict : dict
         Dictionary with cmor settings for experiment
     """
-    # temporarily removing this as it only works for conda envs
-    #{os.path.dirname(sys.executable)}/mop  -c {ctx.obj['exp']}_config.yaml run
     template = f"""#!/bin/bash
 #PBS -P {ctx.obj['project']}
 #PBS -q {ctx.obj['queue']}
@@ -717,9 +715,10 @@ def define_template(ctx, flag, nrows):
 # for a list of packages
 
 module use /g/data/hh5/public/modules
-module load conda/analysis3
+module load conda/analysis3-unstable
+{ctx.obj['conda_env']}
 
 cd {ctx.obj['appdir']}
-mop  -c {ctx.obj['exp']}_config.yaml run
+mop  run -c {ctx.obj['exp']}_config.yaml
 echo 'APP completed for exp {ctx.obj['exp']}.'"""
     return template

From f8b1a24a4b96d781f4fd5c33bf1a4a3d5d8e76ac Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 5 Jul 2024 16:40:28 +1000
Subject: [PATCH 006/137] minor fix to actions and solved #150

---
 .github/workflows/mopper-conda.yaml |  8 ++++----
 src/mopdb/mopdb.py                  | 18 +++++++++---------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 98ea1a7..b4ecaa5 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -1,11 +1,11 @@
-name: xmhw-conda-install-test
+name: mopper-conda-install-test
 
 #on: [push]
 on: 
   push:
     branches:
       - main
-      - newrelease
+      - prerelease
   pull_request:
     branches:
       - main
@@ -38,8 +38,8 @@ jobs:
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
         #    - name: Install package 
-      #      run: |
-              #        conda run python setup.py install
+            run: |
+                      conda build conda/meta.yaml
     - name: Test with pytest
       run: |
         conda install pytest coverage codecov
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 7a2e744..4a63bba 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -44,7 +44,7 @@ def mopdb_catch():
 
 
 def require_date(ctx, param, value):
-    """Changes startdate option in template command from optional to
+    """Changes match option in template command from optional to
     required if fpath is a directory.
     """
     if Path(value).is_dir():
@@ -295,7 +295,7 @@ def update_cmor(ctx, dbname, fname, alias):
 @mopdb.command(name='template')
 @map_args
 @click.pass_context
-def map_template(ctx, fpath, startdate, dbname, version, alias):
+def map_template(ctx, fpath, match, dbname, version, alias):
     """Writes a template of mapping file needed to run setup.
        First opens database and check if variables match any in
        mapping table. If not tries to partially match them.
@@ -311,7 +311,7 @@ def map_template(ctx, fpath, startdate, dbname, version, alias):
     fpath : str
         Path of csv input file with output variables to map or
         of directory containing output files to scan
-    startdate : str
+    match : str
         Date or other string to match to individuate one file per type
     dbname : str
         Database relative path (default is data/access.db)
@@ -331,7 +331,7 @@ def map_template(ctx, fpath, startdate, dbname, version, alias):
         fname = fpath.name
     else:
         mopdb_log.debug(f"Calling model_vars() from template: {fpath}")
-        fname = model_vars(fpath, startdate, dbname, version, alias) 
+        fname = model_vars(fpath, match, dbname, version, alias) 
     if alias == '':
         alias = fname.split(".")[0]
     # connect to db, check first if db exists or exit 
@@ -420,13 +420,13 @@ def update_map(ctx, dbname, fname, alias):
 @mopdb.command(name='varlist')
 @map_args
 @click.pass_context
-def list_vars(ctx, fpath, startdate, dbname, version, alias):
+def list_vars(ctx, fpath, match, dbname, version, alias):
     """Calls model_vars to generate list of variables""" 
-    fname = model_vars(fpath, startdate, dbname, version, alias)
+    fname = model_vars(fpath, match, dbname, version, alias)
 
 
 @click.pass_context
-def model_vars(ctx, fpath, startdate, dbname, version, alias):
+def model_vars(ctx, fpath, match, dbname, version, alias):
     """Read variables from model output
        opens one file for each kind, save variable list as csv file
 
@@ -436,7 +436,7 @@ def model_vars(ctx, fpath, startdate, dbname, version, alias):
         Click context object
     fpath : str
         Path for model output files
-    startdate : str
+    match : str
         Date or other string to match to individuate one file per type
     dbname : str
         Database relative path (default is data/access.db)
@@ -457,7 +457,7 @@ def model_vars(ctx, fpath, startdate, dbname, version, alias):
     if dbname == 'default':
         dbname = import_files('data').joinpath('access.db')
     conn = db_connect(dbname)
-    fname = write_varlist(conn, fpath, startdate, version, alias)
+    fname = write_varlist(conn, fpath, match, version, alias)
     conn.close()
     return fname
 

From ab3cd299d3604d3d3fe559905363cfe4f254cd52 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Sat, 6 Jul 2024 18:31:26 +1000
Subject: [PATCH 007/137] progress in detecting relam and frequency

---
 src/mopdb/mopdb_utils.py | 84 ++++++++++++++++++++++++----------------
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index b9e0c4d..f723025 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -426,50 +426,67 @@ def delete_record(conn, table, col, pairs):
 def list_files(indir, match):
     """Returns list of files matching input directory and match"""
     mopdb_log = logging.getLogger('mopdb_log')
-    files = [x for x in Path(indir).rglob(f"{match}") if x.is_file()]
-    mopdb_log.debug(f"{indir}/**/*{match}*")
+    mopdb_log.debug(f"Pattern to list files: {indir}/**/*{match}*")
+    files = [x for x in Path(indir).rglob(f"{match}") if x.is_file()
+        and  '.nc' in str(x)]
+    files.sort(key=lambda x:x.name)
+    mopdb_log.debug(f"Files after sorting: {files}")
     return files
 
 
-def build_umfrq(time_axs, ds):
+def get_file_frq(ds, fnext):
     """Return a dictionary with frequency for each time axis.
 
     Frequency is inferred by comparing interval between two consecutive
     timesteps with expected interval at a given frequency.
     Order time_axis so ones with only one step are last, so we can use 
     file frequency (interval_file) inferred from other time axes.
+    This is called if there are more than one time axis in file 
+    (usually only UM) or if frequency can be guessed from filename.
     """
     mopdb_log = logging.getLogger('mopdb_log')
-    umfrq = {}
+    frq = {}
     int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0,
                'day': 1.0, '6hr': 0.25, '3hr': 0.125,
                '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944}
+    # retrieve all time axes
+    time_axs = [d for d in ds.dims if 'time' in d]
+    time_axs_len = set(len(ds[d]) for d in time_axs)
     time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
-    mopdb_log.debug(f"in build_umfrq, time_axs: {time_axs}")
+    mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}")
+    max_len = len(ds[time_axs[0]]) 
+    # if all time axes have only 1 timestep we cannot infer frequency
+    # so we open also next file but get only time axs
+    if max_len == 1:
+        dsnext = xr.open_dataset(fnext, decode_times = False)
+        time_axs2 = [d for d in dsnext.dims if 'time' in d]
+        ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time')
+        time_axs = [d for d in ds.dims if 'time' in d]
+        time_axs_len = set(len(ds[d]) for d in time_axs)
+        time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
     for t in time_axs: 
         mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}")
         if len(ds[t]) > 1:
-            interval = (ds[t][1]-ds[t][0]).values / np.timedelta64(1, 'D')
-#astype('timedelta64[m]') / 1440.0
-            interval_file = (ds[t][-1] -ds[t][0]).values / np.timedelta64(1, 'D')
+            interval = (ds[t][1]-ds[t][0]).values #/ np.timedelta64(1, 'D')
+            interval_file = (ds[t][-1] -ds[t][0]).values #/ np.timedelta64(1, 'D')
         else:
             interval = interval_file
         mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}")
-        mopdb_log.debug(f"interval entire file {t}: {interval_file}")
+        #mopdb_log.debug(f"interval entire file {t}: {interval_file}")
         for k,v in int2frq.items():
             if math.isclose(interval, v, rel_tol=0.05):
-                umfrq[t] = k
+                frq[t] = k
                 break
-    return umfrq
+    return frq
 
 
-def get_frequency(realm, fname, ds):
+def get_frequency(realm, fname, ds, fnext):
     """Return frequency based on realm and filename
     For UM files checks if more than one time axis is present and if so
     returns dictionary with frequency: variable list
     """
     mopdb_log = logging.getLogger('mopdb_log')
-    umfrq = {} 
+    frq_dict = {} 
     frequency = 'NAfrq'
     if realm == 'atmos':
         fbits = fname.split("_")
@@ -479,14 +496,8 @@ def get_frequency(realm, fname, ds):
             frequency = fix_frq[frequency]
         else:
             frequency = frequency.replace('hPt', 'hrPt')
-        # retrieve all time axes and check their frequency
-        time_axs = [d for d in ds.dims if 'time' in d]
-        time_axs_len = set(len(ds[d]) for d in time_axs)
-        if len(time_axs_len) == 1:
-            umfrq = {}
-        else:
-            umfrq = build_umfrq(time_axs, ds)
-        mopdb_log.debug(f"umfrq: {umfrq}")
+        frq_dict = get_file_frq(ds, fnext)
+        mopdb_log.debug(f"frq_dict: {frq_dict}")
     elif realm == 'ocean':
         # if I found scalar or monthly in any of fbits 
         if any(x in fname for x in ['scalar', 'month']):
@@ -498,8 +509,13 @@ def get_frequency(realm, fname, ds):
             frequency = 'mon'
         elif '_d.' in fname:
             frequency = 'day'
+    if frequency == 'NAfrq':
+        frq_dict = get_file_frq(ds, fnext)
+        # if only one frequency detected empty dict
+        if len(frq_dict) == 1:
+            frequency = frq_dict.popitem()[1]
     mopdb_log.debug(f"Frequency: {frequency}")
-    return frequency, umfrq
+    return frequency, frq_dict
 
 
 def get_cell_methods(attrs, dims):
@@ -523,15 +539,13 @@ def get_cell_methods(attrs, dims):
     return val, frqmod
 
 
-def write_varlist(conn, indir, startdate, version, alias):
+def write_varlist(conn, indir, match, version, alias):
     """Based on model output files create a variable list and save it
        to a csv file. Main attributes needed to map output are provided
        for each variable
     """
     mopdb_log = logging.getLogger('mopdb_log')
-    sdate = f"*{startdate}*"
-    files = list_files(indir, sdate)
-    mopdb_log.debug(f"Found files: {files}")
+    files = list_files(indir, f"*{match}*")
     patterns = []
     if alias == '':
         alias = 'mopdb'
@@ -541,10 +555,10 @@ def write_varlist(conn, indir, startdate, version, alias):
     fwriter.writerow(["name", "cmor_var", "units", "dimensions",
         "frequency", "realm", "cell_methods", "cmor_table", "vtype",
         "size", "nsteps", "filename", "long_name", "standard_name"])
-    for fpath in files:
+    for i, fpath in enumerate(files):
         # get filename pattern until date match
         mopdb_log.debug(f"Filename: {fpath.name}")
-        fpattern = fpath.name.split(startdate)[0]
+        fpattern = fpath.name.split(match)[0]
         # adding this in case we have a mix of yyyy/yyyymn date stamps 
         # as then a user would have to pass yyyy only and would get 12 files for some of the patterns
         if fpattern in patterns:
@@ -555,10 +569,12 @@ def write_varlist(conn, indir, startdate, version, alias):
         mopdb_log.debug(f"File pattern: {fpattern}")
         fwriter.writerow([f"#{fpattern}"])
         # get attributes for the file variables
-        realm = get_realm(fpath, version)
-        ds = xr.open_dataset(fpath, decode_times=False)
+        ds = xr.open_dataset(str(pattern_list[0]), decode_times=False)
+        realm = get_realm(fpath, version, ds)
         coords = [c for c in ds.coords] + ['latitude_longitude']
-        frequency, umfrq = get_frequency(realm, fpath.name, ds)
+        #pass next file in case of 1 timestep per file and no frq in name
+        fnext = str(pattern_list[1])
+        frequency, umfrq = get_frequency(realm, fpath.name, ds, fnext)
         multiple_frq = False
         if umfrq != {}:
             multiple_frq = True
@@ -569,7 +585,7 @@ def write_varlist(conn, indir, startdate, version, alias):
                 mopdb_log.debug(f"Variable: {v.name}")
                 # get size in bytes of grid for 1 timestep and number of timesteps
                 vsize = v[0].nbytes
-                nsteps = nfiles * v.shape[0]
+                nsteps = nfiles * v.shape[0]/2
                 # assign specific frequency if more than one is available
                 if multiple_frq:
                     if 'time' in v.dims[0]:
@@ -969,7 +985,7 @@ def check_realm_units(conn, var):
     return var 
        
 
-def get_realm(fpath, version):
+def get_realm(fpath, version, ds):
     '''Return realm for variable in files or NArealm'''
 
     mopdb_log = logging.getLogger('mopdb_log')
@@ -978,7 +994,7 @@ def get_realm(fpath, version):
     else:
         realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] 
                  if x in fpath.parts][0]
-    if realm == 'atm':
+    if realm == 'atm' or 'um_version' in ds.attrs.keys():
         realm = 'atmos'
     elif realm == 'ocn':
         realm = 'ocean'

From d24bea219d88ca30cb3b0f2be9675154215b9be6 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Mon, 8 Jul 2024 12:36:52 +1000
Subject: [PATCH 008/137] removed unneccessary adjustment to variable size from
 mopdb_utils.py

---
 src/mopdb/mopdb_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index f723025..18bc48f 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -585,7 +585,7 @@ def write_varlist(conn, indir, match, version, alias):
                 mopdb_log.debug(f"Variable: {v.name}")
                 # get size in bytes of grid for 1 timestep and number of timesteps
                 vsize = v[0].nbytes
-                nsteps = nfiles * v.shape[0]/2
+                nsteps = nfiles * v.shape[0]
                 # assign specific frequency if more than one is available
                 if multiple_frq:
                     if 'time' in v.dims[0]:

From 704d607e104abd0e6a2f630107e8e9dff515b4e6 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 15:12:23 +1000
Subject: [PATCH 009/137] minor adjustment to conftest and action

---
 .github/workflows/mopper-conda.yaml |  2 +-
 src/mopdb/mopdb_utils.py            | 10 ++++++++++
 tests/conftest.py                   |  9 ---------
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index b4ecaa5..c232518 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -4,11 +4,11 @@ name: mopper-conda-install-test
 on: 
   push:
     branches:
-      - main
       - prerelease
   pull_request:
     branches:
       - main
+      - prerelease
 
 
 jobs:
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 18bc48f..a14ca5e 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -539,6 +539,15 @@ def get_cell_methods(attrs, dims):
     return val, frqmod
 
 
+def identify_patterns(files):
+    """Return patterns of files
+    """
+    i = 0
+    while present is True:
+
+
+    return patterns
+
 def write_varlist(conn, indir, match, version, alias):
     """Based on model output files create a variable list and save it
        to a csv file. Main attributes needed to map output are provided
@@ -546,6 +555,7 @@ def write_varlist(conn, indir, match, version, alias):
     """
     mopdb_log = logging.getLogger('mopdb_log')
     files = list_files(indir, f"*{match}*")
+    patterns = identify_patterns(files)
     patterns = []
     if alias == '':
         alias = 'mopdb'
diff --git a/tests/conftest.py b/tests/conftest.py
index 9f2f190..9a60849 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -30,15 +30,6 @@
 TESTS_HOME = os.path.abspath(os.path.dirname(__file__))
 TESTS_DATA = os.path.join(TESTS_HOME, "testdata")
 
-# setting up loggers for both mopdb and mop
-@pytest.fixture
-def moplog():
-    return logging.getLogger('mop_log')
-
-
-@pytest.fixture
-def mopdblog():
-    return logging.getLogger('mopdb_log')
 
 # setting up fixtures for databases:a ccess.db and mopper.db
 @pytest.fixture

From 7c45897cf8403591e1449289ae522a967a938c61 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 16:17:25 +1000
Subject: [PATCH 010/137] some imporvements to tests

---
 tests/conftest.py             | 16 ++++++++++++----
 tests/test_calculations.py    |  8 ++++----
 tests/test_mop_utils.py       | 22 +++++++++++-----------
 tests/test_mopdb.py           |  6 +++---
 tests/test_mopdb_utils.py     |  7 +++----
 tests/testdata/varlist_ex.csv |  3 +++
 6 files changed, 36 insertions(+), 26 deletions(-)
 create mode 100644 tests/testdata/varlist_ex.csv

diff --git a/tests/conftest.py b/tests/conftest.py
index 9a60849..0dd6c56 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -23,6 +23,7 @@
 import pandas as pd
 import datetime
 import logging
+import csv
 from mopdb.mopdb_utils import mapping_sql, cmorvar_sql
 from mopper.setup_utils import filelist_sql
 
@@ -72,12 +73,19 @@ def test_check_timestamp(caplog):
 
 @pytest.fixture
 def varlist_rows():
-    lines = ["fld_s03i236;tas;K;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature",
-    "fld_s00i031;siconca;1;time lat lon;mon;atmos;area: time: mean;AUS2200_A1hr;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction",
-"fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX        W/M2;surface_upward_latent_heat_flu"]
-    rows = [l.split(";") for l in lines]
+    # read list of vars from iexample file
+    with open('testdata/varlist_ex.csv', 'r') as csvfile:
+        reader = csv.DictReader(csvfile, delimiter=';')
+        rows = list(reader)
     return rows
 
+@pytest.fixture
+def matches():
+    matches = [("tas", "fld_s03i236", "", "1hr", "atmos", "AUS2200", "AUS2200_A1hr", "", "K"),
+        ("siconca", "fld_s00i031", "", "mon", "ocean", "CM2", "CMIP6_OImon", "", "1"), 
+        ("hfls", "fld_s03i234", "", "mon", "atmos", "CM2", "CMIP6_Amon", "up", "W/m2")]
+    return matches
+
 @pytest.fixture
 def add_var_out():
     vlist = [{'cmor_var': '', 'input_vars': '', 'calculation': '', 'units': ''
diff --git a/tests/test_calculations.py b/tests/test_calculations.py
index dcd6398..9037b53 100644
--- a/tests/test_calculations.py
+++ b/tests/test_calculations.py
@@ -23,10 +23,9 @@
 import logging
 from mopper.calculations import *
 
-logger = logging.getLogger('var_log')
 ctx = click.Context(click.Command('cmd'),
     obj={'sel_start': '198302170600', 'sel_end': '198302181300',
-         'realm': 'atmos', 'frequency': '1hr', 'var_log': logger})
+         'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'})
 
 def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100):
 
@@ -68,8 +67,9 @@ def test_calc_topsoil():
     xrtest.assert_allclose(out, expected, rtol=1e-05) 
 
 
-def test_overturn_stream():
-    global ctx, logger
+def test_overturn_stream(caplog):
+    global ctx
+    caplog.set_level(logging.DEBUG, logger='varlog_1')
     # set up input
     dims = ['time', 'depth', 'lat', 'lon']
     time = pd.date_range("2014-09-06", periods=1)
diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py
index f177f21..4889274 100644
--- a/tests/test_mop_utils.py
+++ b/tests/test_mop_utils.py
@@ -19,7 +19,6 @@
 import numpy as np
 import pandas as pd
 from mopper.mop_utils import *
-from conftest import moplog
 
 #try:
 #    import unittest.mock as mock
@@ -28,24 +27,24 @@
 
 ctx = click.Context(click.Command('cmd'),
     obj={'sel_start': '198302170600', 'sel_end': '198302181300',
-         'realm': 'atmos', 'frequency': '1hr'})
-#logger = logging.getLogger('mop_log')
+         'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'})
 
-def test_check_timestamp(caplog, ctx):
-    moplog.set_level(logging.DEBUG)#, logger='mop_log')
+def test_check_timestamp(caplog):
+    global ctx
+    caplog.set_level(logging.DEBUG, logger='mop_log')
+    caplog.set_level(logging.DEBUG, logger='varlog_1')
     # test atmos files
     files = [f'obj_198302{d}T{str(h).zfill(2)}01_1hr.nc' for d in ['17','18','19']
              for h in range(24)] 
-    print(files)
     inrange = files[6:37]
     with ctx:
-            out1 = check_timestamp(files, logger)
+            out1 = check_timestamp(files)
     assert out1 == inrange
     # get only first file is frequency is fx
     ctx.obj['frequency'] = 'fx'
     inrange = [files[0]]
     with ctx:
-            out2 = check_timestamp(files, logger)
+            out2 = check_timestamp(files)
     assert out2 == inrange
     # test ocn files
     ctx.obj['frequency'] = 'day'
@@ -53,12 +52,13 @@ def test_check_timestamp(caplog, ctx):
     files = [f'ocn_daily.nc-198302{str(d).zfill(2)}' for d in range(1,29)] 
     inrange = files[16:18]
     with ctx:
-            out3 = check_timestamp(files, logger)
+            out3 = check_timestamp(files)
     assert out3 == inrange
 
 
-def test_get_cmorname(caplog, ctx):
-    caplog.set_level(logging.DEBUG)#, logger='mop_log')
+def test_get_cmorname(caplog):
+    global ctx
+    caplog.set_level(logging.DEBUG, logger='mop_log')
     # axis_name t
     ctx.obj['calculation'] = "plevinterp(var[0], var[1], 24)"
     ctx.obj['variable_id'] = "ta24"
diff --git a/tests/test_mopdb.py b/tests/test_mopdb.py
index 0eddc58..e570fdb 100644
--- a/tests/test_mopdb.py
+++ b/tests/test_mopdb.py
@@ -30,7 +30,7 @@ def test_mopdb(command, subcommand, runner):
         result = runner.invoke(mopdb, [subcommand, '--help'])
         assert result.exit_code == 0
 
-@pytest.mark.usefixtures("setup_db") # 1
+@pytest.mark.usefixtures("setup_access_db") # 1
 def test_template(session):
 
     runner = CliRunner()
@@ -45,8 +45,8 @@ def test_template(session):
 
         result = runner.invoke(mopdb, ['template', '-f varlist.txt', '-vCM2'])
         #assert result.exit_code == 0
-        assert 'Opened database successfully' in result.output
-        assert 'Definable cmip var' in result.output 
+        assert 'Opened database ' in result.output
+        #assert 'Definable cmip var' in result.output 
 #Pass temp_dir to control where the temporary directory is created. The directory will not be removed by Click in this case. This is useful to integrate with a framework like Pytest that manages temporary files.
 
 #def test_keep_dir(tmp_path):
diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py
index 9737c52..ebc8be0 100644
--- a/tests/test_mopdb_utils.py
+++ b/tests/test_mopdb_utils.py
@@ -27,12 +27,11 @@
 
 
 @pytest.mark.parametrize('idx', [0,1,2])
-def test_add_var(varlist_rows, idx, caplog):
+def test_add_var(varlist_rows, matches, idx, caplog):
     caplog.set_level(logging.DEBUG, logger='mopdb_log')
     vlist = []
-    match = [("tas", "", "K"),  ("siconca", "", ""), ("hfls", "", "")]
-    vlist = add_var(vlist, varlist_rows[idx], match[idx])
-    assert vlist[idx]['cmor_var'] == match[idx][0] 
+    vlist = add_var(vlist, varlist_rows[idx], matches[idx])
+    assert vlist[0]['cmor_var'] == matches[idx][0] 
 
 
 def test_build_umfrq(um_multi_time, caplog):
diff --git a/tests/testdata/varlist_ex.csv b/tests/testdata/varlist_ex.csv
new file mode 100644
index 0000000..154729f
--- /dev/null
+++ b/tests/testdata/varlist_ex.csv
@@ -0,0 +1,3 @@
+name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name
+fld_s03i236;tas;degC;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature
+fld_s00i031;siconca;%;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction                                                                                                     fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX        W/M2;surface_upward_latent_heat_flu 

From 63f3b380ede007c88b08b646d4f0a9eae73a6122 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 16:30:57 +1000
Subject: [PATCH 011/137] updated install instruction to current situation in
 docs

---
 docs/overview.rst | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/docs/overview.rst b/docs/overview.rst
index 908db06..f074224 100644
--- a/docs/overview.rst
+++ b/docs/overview.rst
@@ -1,25 +1,16 @@
 Install
 =======
 
-You can install the latest version of `mopper` directly from conda (accessnri channel)::
+We are planning to release ACCESS-MOPPeR in conda soon and then it will be available at NCI on our conda environments.
+In the meantime, you can icreate a custom conda environment and install mopper following these steps:
 
-   conda install -c accessnri mopper 
+1. module load conda/analysis3
+2. python -m venv mopper_env --system-site-packages
+3. source  <path-to-mopper-env>/mopper_env/bin/activate
+4. pip install git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@main
+ 
+The source command will activate the conda env you just created.
+Any time you want to use the tool in a new session repeat the first and third steps.
 
-If you want to install an unstable version or a different branch:
+The `pip` command above will install from the main branch, you can also indicate a different branch.
 
-    * git clone 
-    * git checkout <branch-name>   (if installing a a different branch from master)
-    * cd mopper 
-    * pip install ./ 
-      use --user flag if you want to install it in ~/.local
-
-Working on the NCI server
--------------------------
-
-MOPPeR is pre-installed into a Conda environment at NCI. Load it with::
-
-    module use /g/data3/hh5/public/modules
-    module load conda/analysis3-unstable
-
-.. note::
-   You need to be a member of the hh5 project to load the modules.

From fc8bb02d7688e7b0671981bba099a73a0b81e320 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 16:43:04 +1000
Subject: [PATCH 012/137] removed partial pattern function

---
 src/mopdb/mopdb_utils.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index a14ca5e..80565b4 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -261,7 +261,7 @@ def query(conn, sql, tup=(), first=True):
 
 
 def get_columns(conn, table):
-    """Gets list of columns form db table
+    """Gets list of columns from db table
     """
     mopdb_log = logging.getLogger('mopdb_log')
     sql = f'PRAGMA table_info({table});'
@@ -539,15 +539,6 @@ def get_cell_methods(attrs, dims):
     return val, frqmod
 
 
-def identify_patterns(files):
-    """Return patterns of files
-    """
-    i = 0
-    while present is True:
-
-
-    return patterns
-
 def write_varlist(conn, indir, match, version, alias):
     """Based on model output files create a variable list and save it
        to a csv file. Main attributes needed to map output are provided

From cacdd9570acb0526012880c1c7348ab89665381d Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 16:46:03 +1000
Subject: [PATCH 013/137] removed partial pattern function 2

---
 src/mopdb/mopdb_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 80565b4..b9875c7 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -546,7 +546,6 @@ def write_varlist(conn, indir, match, version, alias):
     """
     mopdb_log = logging.getLogger('mopdb_log')
     files = list_files(indir, f"*{match}*")
-    patterns = identify_patterns(files)
     patterns = []
     if alias == '':
         alias = 'mopdb'

From 1a3b63ceea2b325c28623fe4f092d351ff630ac0 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 9 Jul 2024 19:10:25 +1000
Subject: [PATCH 014/137] introduced intake sub-command, and class Variable for
 mopdb varlist

---
 src/mopdb/mopdb.py       | 57 ++++++++++++++++++++++++---
 src/mopdb/mopdb_class.py | 84 ++++++++++++++++++++++++++++++++++++++++
 src/mopdb/mopdb_utils.py | 74 +++++++++++++++++++----------------
 3 files changed, 176 insertions(+), 39 deletions(-)
 create mode 100644 src/mopdb/mopdb_class.py

diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 4a63bba..335a367 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -29,7 +29,6 @@
 
 from mopdb.mopdb_utils import *
 
-
 def mopdb_catch():
     """
     """
@@ -44,7 +43,7 @@ def mopdb_catch():
 
 
 def require_date(ctx, param, value):
-    """Changes match option in template command from optional to
+    """Changes match option in template/intake commands from optional to
     required if fpath is a directory.
     """
     if Path(value).is_dir():
@@ -331,7 +330,7 @@ def map_template(ctx, fpath, match, dbname, version, alias):
         fname = fpath.name
     else:
         mopdb_log.debug(f"Calling model_vars() from template: {fpath}")
-        fname = model_vars(fpath, match, dbname, version, alias) 
+        fname, vobjs = model_vars(fpath, match, dbname, version, alias) 
     if alias == '':
         alias = fname.split(".")[0]
     # connect to db, check first if db exists or exit 
@@ -368,6 +367,52 @@ def map_template(ctx, fpath, match, dbname, version, alias):
     return
 
 
+@mopdb.command(name='intake')
+@map_args
+@click.pass_context
+def write_catalogue(ctx, fpath, match, dbname, version, alias):
+    """Writes an intake-esm catalogue.
+
+    It can get as input the directory containing the output in
+    which case it will first call model_vars() (varlist command)
+    or the file output of the same if already available.
+
+    Parameters
+    ----------
+    ctx : obj
+        Click context object
+    fpath : str
+        Path of csv input file with output variables to map or
+        of directory containing output files to scan
+    match : str
+        Date or other string to match to individuate one file per type
+    dbname : str
+        Database relative path (default is data/access.db)
+    version : str
+        Version of ACCESS model used to generate variables
+    alias : str
+        Indicates origin of records to add, if '' csv filename
+        base is used instead
+
+    Returns
+    -------
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    # work out if fpath is varlist or path to output
+    fpath = Path(fpath)
+    if fpath.is_file():
+        fname = fpath.name
+    else:
+        mopdb_log.debug(f"Calling model_vars() from intake: {fpath}")
+        fname, vobjs = model_vars(fpath, match, dbname, version, alias) 
+    if alias == '':
+        alias = fname.split(".")[0]
+    # connect to db, check first if db exists or exit 
+    if dbname == 'default':
+        dbname = import_files('data').joinpath('access.db')
+    conn = db_connect(dbname)
+
+
 @mopdb.command(name='map')
 @db_args
 @click.pass_context
@@ -422,7 +467,7 @@ def update_map(ctx, dbname, fname, alias):
 @click.pass_context
 def list_vars(ctx, fpath, match, dbname, version, alias):
     """Calls model_vars to generate list of variables""" 
-    fname = model_vars(fpath, match, dbname, version, alias)
+    fname, vobjs = model_vars(fpath, match, dbname, version, alias)
 
 
 @click.pass_context
@@ -457,9 +502,9 @@ def model_vars(ctx, fpath, match, dbname, version, alias):
     if dbname == 'default':
         dbname = import_files('data').joinpath('access.db')
     conn = db_connect(dbname)
-    fname = write_varlist(conn, fpath, match, version, alias)
+    fname, vobjs = write_varlist(conn, fpath, match, version, alias)
     conn.close()
-    return fname
+    return fname, vobjs
 
 
 @mopdb.command(name='del')
diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
new file mode 100644
index 0000000..a554ee0
--- /dev/null
+++ b/src/mopdb/mopdb_class.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+# Copyright 2024 ARC Centre of Excellence for Climate Extremes (CLEX)
+# Author: Paola Petrelli <paola.petrelli@utas.edu.au> for CLEX
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# contact: paola.petrelli@utas.edu.au
+#
+# last updated 06/07/2024
+
+class Variable():
+  
+  #  __slots__ = ('name', 'pattern', 'files', 'frequency', 'realm',
+  #      'cmor_var', 'cmor_table', 'version', 'units', 'dimensions',
+  #      'cell_methods', 'positive', 'long_name', 'standard_name',
+  #      'vtype', 'size', 'nsteps')
+
+    def __init__(self, varname, fpattern):
+        self.name = varname
+        # path attributes
+        self.pattern = fpattern
+        self.files = [] 
+        # mapping attributes
+        self.frequency = 'NAfrq' 
+        self.realm = 'NArealm' 
+        self.cmor_var = '' 
+        self.cmor_table = '' 
+        self.version = ''
+        # descriptive attributes
+        self.units = '' 
+        self.dimensions = '' 
+        self.cell_methods = ''
+        self.positive = ''
+        self.long_name = '' 
+        self.standard_name = '' 
+        # type and size attributes
+        self.vtype = ''
+        self.size = 0
+        self.nsteps = 0
+
+
+    @property
+    def frequency(self):
+        return self._frequency
+
+    @frequency.setter
+    def frequency(self, value):
+        fix_frq = {'dCai': 'day', '3h': '3hr', '6h': '6hr'}
+        if value in fix_frq.keys():
+            self._frequency = fix_frq[value]
+        value = value.replace('hPt', 'hrPt')
+        if not any(x in value for x in 
+            ['min', 'hr', 'day', 'mon', 'yr']):
+            self._frequency = 'NAfrq' 
+        self._frequency = value
+
+
+    @property
+    def realm(self):
+        return self._realm
+
+    @realm.setter
+    def realm(self, value):
+        fix_realm = {'atm': 'atmos', 'ice': 'seaIce', 'ocn': 'ocean'}
+        if value in fix_realm.keys():
+            self._realm = fix_realm[value]
+        if not any(x in value for x in 
+            ['atmos', 'seaIce', 'ocean', 'land']):
+            self._realm = 'NArealm' 
+
+    def list_files(self):
+        """Returns list of files matching input directory and match"""
+        self.files = [x for x in Path(self.indir).rglob(f"{self.match}") if x.is_file()]
+        return files.sort(key=lambda x:x.name)
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index b9875c7..295e1ab 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -35,6 +35,7 @@
 from operator import itemgetter
 from pathlib import Path
 
+from mopdb.mopdb_class import Variable
 
 def config_log(debug):
     """Configures log file"""
@@ -270,50 +271,50 @@ def get_columns(conn, table):
     return columns
 
 
-def get_cmorname(conn, varname, version, frequency):
+def get_cmorname(conn, vobj, version):
     """Queries mapping table for cmip name given variable name as output
        by the model
     """
     mopdb_log = logging.getLogger('mopdb_log')
     sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping
-        WHERE input_vars='{varname}' and (calculation=''
+        WHERE input_vars='{vobj.vname}' and (calculation=''
         or calculation IS NULL)""" 
     results = query(conn, sql, first=False)
     names = list(x[0] for x in results) 
     tables = list(x[2] for x in results) 
     if len(names) == 0:
-        cmor_var = ''
-        cmor_table = ''
+        vobj.cmor_var = ''
+        vobj.cmor_table = ''
     elif len(names) == 1:
-        cmor_var = names[0]
-        cmor_table = tables[0]
+        vobj.cmor_var = names[0]
+        vobj.cmor_table = tables[0]
     elif len(names) > 1:
-        mopdb_log.debug(f"Found more than 1 definition for {varname}:\n" +
+        mopdb_log.debug(f"Found more than 1 definition for {vobj.name}:\n" +
                        f"{results}")
         match_found = False
         for r in results:
-            if r[1] == version and r[3] == frequency:
-                cmor_var, cmor_table = r[0], r[2]
+            if r[1] == version and r[3] == vobj.frequency:
+                vobj.cmor_var, vobj.cmor_table = r[0], r[2]
                 match_found = True
                 break
         if not match_found:
             for r in results:
-                if r[3] == frequency:
-                    cmor_var, cmor_table = r[0], r[2]
+                if r[3] == vobj.frequency:
+                    vobj.cmor_var, vobj.cmor_table = r[0], r[2]
                     match_found = True
                     break
         if not match_found:
             for r in results:
                 if r[1] == version:
-                    cmor_var, cmor_table = r[0], r[2]
+                    vobj.cmor_var, vobj.cmor_table = r[0], r[2]
                     match_found = True
                     break
         if not match_found:
-            cmor_var = names[0]
-            cmor_table = tables[0]
-            mopdb_log.info(f"Found more than 1 definition for {varname}:\n"+
-                        f"{results}\n Using {cmor_var} from {cmor_table}")
-    return cmor_var, cmor_table
+            vobj.cmor_var = names[0]
+            vobj.cmor_table = tables[0]
+            mopdb_log.info(f"Found more than 1 definition for {vobj.name}:\n"+
+                        f"{results}\n Using {vobj.cmor_var} from {vobj.cmor_table}")
+    return vobj
 
 
 def cmor_table_header(name, realm, frequency):
@@ -545,6 +546,10 @@ def write_varlist(conn, indir, match, version, alias):
        for each variable
     """
     mopdb_log = logging.getLogger('mopdb_log')
+    line_cols = ['name', 'cmor_var', 'units', 'dimensions', 
+        'frequency', 'realm', 'cell_methods', 'cmor_table', 'vtype',
+        'size', 'nsteps', 'filename', 'long_name', 'standard_name']
+    vobj_list = []
     files = list_files(indir, f"*{match}*")
     patterns = []
     if alias == '':
@@ -580,35 +585,38 @@ def write_varlist(conn, indir, match, version, alias):
             multiple_frq = True
         mopdb_log.debug(f"Multiple frq: {multiple_frq}")
         for vname in ds.variables:
+            vobj = Variable(vname, fpattern) 
+            vobj.realm = realm
             if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']):
                 v = ds[vname]
-                mopdb_log.debug(f"Variable: {v.name}")
+                mopdb_log.debug(f"Variable: {vobj.name}")
                 # get size in bytes of grid for 1 timestep and number of timesteps
-                vsize = v[0].nbytes
-                nsteps = nfiles * v.shape[0]
-                # assign specific frequency if more than one is available
+                vobj.size = v[0].nbytes
+                vobj.nsteps = nfiles * v.shape[0]
+                # assign time axis frequency if more than one is available
                 if multiple_frq:
                     if 'time' in v.dims[0]:
                         frequency = umfrq[v.dims[0]]
                     else:
-                        frequency = 'NA'
                         mopdb_log.info(f"Could not detect frequency for variable: {v}")
                 attrs = v.attrs
-                cell_methods, frqmod = get_cell_methods(attrs, v.dims)
-                varfrq = frequency + frqmod
-                mopdb_log.debug(f"Frequency x var: {varfrq}")
+                vobj.cell_methods, frqmod = get_cell_methods(attrs, v.dims)
+                vobj.frequency = frequency + frqmod
+                mopdb_log.debug(f"Frequency x var: {vobj.frequency}")
                 # try to retrieve cmip name
-                cmor_var, cmor_table = get_cmorname(conn, vname,
-                    version, varfrq)
-                line = [v.name, cmor_var, attrs.get('units', ""),
-                        " ".join(v.dims), varfrq, realm, 
-                        cell_methods, cmor_table, v.dtype, vsize,
-                        nsteps, fpattern, attrs.get('long_name', ""), 
-                        attrs.get('standard_name', "")]
+                cmor_var, cmor_table = get_cmorname(conn, vobj,
+                    version)
+                vobj.units = attrs.get('units', "")
+                vobj.long_name = attrs.get('long_name', "")
+                vobj.standard_name = attrs.get('standard_name', "")
+                vobj.dimensions = " ".join(v.dims)
+                vobj.type = v.dtype
+                line = [vobj[k] for k in line_cols]
                 fwriter.writerow(line)
+                vobj_list.append(vobj)
         mopdb_log.info(f"Variable list for {fpattern} successfully written")
     fcsv.close()
-    return  fname
+    return  fname, vobj_list
 
 
 def read_map_app4(fname):

From d81cdbd1ec04de7d6fac74f2533050f9a16f6ac2 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 09:59:13 +1000
Subject: [PATCH 015/137] Update mopper-conda.yaml to fix python version

---
 .github/workflows/mopper-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index c232518..cbd7984 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -22,7 +22,7 @@ jobs:
     - name: Set up Python 3.10
       uses: actions/setup-python@v2
       with:
-        python-version: 3.10
+        python-version: '3.10'
     - name: Add conda to system path
       run: |
         # $CONDA is an environment variable pointing to the root of the miniconda directory

From d47081806ed3de27533365688eaf2ffb5ae0cba7 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 10:02:32 +1000
Subject: [PATCH 016/137] Update mopper-conda.yaml to fix conda file

---
 .github/workflows/mopper-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index cbd7984..d5868d9 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -29,7 +29,7 @@ jobs:
         echo $CONDA/bin >> $GITHUB_PATH
     - name: Install dependencies
       run: |
-        conda env update --file conda/environment.yml --name base 
+        conda env update --file conda/meta.yml --name base 
     - name: Lint with flake8
       run: |
         conda install flake8

From a16bfee3a20a5269216632fd48cb33a532903802 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 10:03:41 +1000
Subject: [PATCH 017/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index d5868d9..a8d26a6 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -29,7 +29,7 @@ jobs:
         echo $CONDA/bin >> $GITHUB_PATH
     - name: Install dependencies
       run: |
-        conda env update --file conda/meta.yml --name base 
+        conda env update --file conda/meta.yaml --name base 
     - name: Lint with flake8
       run: |
         conda install flake8

From 4c344cc1c711e91ab94098d867555026c9d76828 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 10 Jul 2024 10:12:26 +1000
Subject: [PATCH 018/137] adjustments to class

---
 src/mopdb/mopdb_utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 295e1ab..3dfdc31 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -277,11 +277,12 @@ def get_cmorname(conn, vobj, version):
     """
     mopdb_log = logging.getLogger('mopdb_log')
     sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping
-        WHERE input_vars='{vobj.vname}' and (calculation=''
+        WHERE input_vars='{vobj.name}' and (calculation=''
         or calculation IS NULL)""" 
     results = query(conn, sql, first=False)
     names = list(x[0] for x in results) 
     tables = list(x[2] for x in results) 
+    mopdb_log.debug(f"In get_cmorname query results: {results}")
     if len(names) == 0:
         vobj.cmor_var = ''
         vobj.cmor_table = ''
@@ -602,7 +603,7 @@ def write_varlist(conn, indir, match, version, alias):
                 attrs = v.attrs
                 vobj.cell_methods, frqmod = get_cell_methods(attrs, v.dims)
                 vobj.frequency = frequency + frqmod
-                mopdb_log.debug(f"Frequency x var: {vobj.frequency}")
+                mopdb_log.debug(f"Frequency var: {vobj.frequency}")
                 # try to retrieve cmip name
                 cmor_var, cmor_table = get_cmorname(conn, vobj,
                     version)

From 6583c895ca2db0251913330204350628d0a0674d Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 10 Jul 2024 10:14:43 +1000
Subject: [PATCH 019/137] removed extra line from docs

---
 docs/gettingstarted.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst
index 666e89a..2e7181e 100644
--- a/docs/gettingstarted.rst
+++ b/docs/gettingstarted.rst
@@ -56,7 +56,7 @@ Step2: Set up the working environment
    *mop -c <conf_exp.yaml> setup*
 
 .. code-block:: console 
-https://climate-cms.org/posts/2023-05-31-vscode-are.html
+
    $ mop -c exp_conf.yaml setup
    Simulation to process: cy286
    Setting environment and creating working directory

From b0d04f1785dfc6f93fe0eb29e48ba078255f0244 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 10:51:33 +1000
Subject: [PATCH 020/137] Update meta.yaml

---
 conda/meta.yaml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index f20a79a..b4f0932 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,7 +1,6 @@
-{% set version = "1.0.0" %}
 package:
     name: mopper 
-    version: {{ version }}
+    version: {{ "1.0.0" }}
 
 #source:
 #    path: ./
@@ -9,7 +8,7 @@ package:
 source:
   #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz
   git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
-  git_rev: {{ version }} 
+  git_rev: {{ "1.0.0" }} 
   git_depth: 1 # (Defaults to -1/not shallow)
 
 build:

From b8feb63b893215b30fa14890aa11b5ee1a8ad7f9 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 10:52:00 +1000
Subject: [PATCH 021/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 86 +++++++++++++++--------------
 1 file changed, 46 insertions(+), 40 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index c232518..0bb68f7 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -4,51 +4,57 @@ name: mopper-conda-install-test
 on: 
   push:
     branches:
-      - prerelease
-  pull_request:
-    branches:
-      - main
-      - prerelease
+      - pytests_sam
 
 
 jobs:
-  build-linux:
+  build:
+  
     runs-on: ubuntu-latest
     strategy:
       max-parallel: 5
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+        
 
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.10
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.10
-    - name: Add conda to system path
-      run: |
-        # $CONDA is an environment variable pointing to the root of the miniconda directory
-        echo $CONDA/bin >> $GITHUB_PATH
-    - name: Install dependencies
-      run: |
-        conda env update --file conda/environment.yml --name base 
-    - name: Lint with flake8
-      run: |
-        conda install flake8
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-        #    - name: Install package 
-            run: |
-                      conda build conda/meta.yaml
-    - name: Test with pytest
-      run: |
-        conda install pytest coverage codecov
-        conda run python -m pytest
-        conda run coverage run --source src -m py.test
-    - name: Upload to codecov 
-      if: steps.build.outcome == 'success'
-      run: |
-        curl -Os https://uploader.codecov.io/latest/linux/codecov
-        chmod +x codecov
-        ./codecov
-
+      - uses: actions/checkout@v4
+      #---------------------------------------------------
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      #---------------------------------------------------
+      - name: Add conda to system path
+        run: |
+          # $CONDA is an environment variable pointing to the root of the miniconda directory
+          echo $CONDA/bin >> $GITHUB_PATH
+      #---------------------------------------------------
+      - name: Install dependencies
+        run: |
+          conda env update --file conda/meta.yaml --name base
+      #---------------------------------------------------
+      - name: Lint with flake8
+        run: |
+          conda install flake8
+          # stop the build if there are Python syntax errors or undefined names
+          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+          #    - name: Install package 
+              run: |
+                        conda build conda/meta.yaml
+      #---------------------------------------------------
+      - name: Test with pytest
+        run: |
+          conda install pytest coverage codecov
+          conda run python -m pytest
+          conda run coverage run --source src -m py.test
+      #---------------------------------------------------
+      - name: Upload to codecov 
+        if: steps.build.outcome == 'success'
+        run: |
+          curl -Os https://uploader.codecov.io/latest/linux/codecov
+          chmod +x codecov
+          ./codecov
+      #---------------------------------------------------

From 84731f4954a91a74df8519e2feb0001af41c9ad9 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 10:58:29 +1000
Subject: [PATCH 022/137] Update meta.yaml

---
 conda/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index b4f0932..146929a 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,6 +1,6 @@
 package:
     name: mopper 
-    version: {{ "1.0.0" }}
+    version: "1.0.0"
 
 #source:
 #    path: ./
@@ -8,7 +8,7 @@ package:
 source:
   #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz
   git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
-  git_rev: {{ "1.0.0" }} 
+  git_rev: "1.0.0"
   git_depth: 1 # (Defaults to -1/not shallow)
 
 build:

From 15fbfa686eb0c1e6a7d9ca1d29b80f04b07a4daa Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 12:30:42 +1000
Subject: [PATCH 023/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 0bb68f7..d699f16 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -34,16 +34,16 @@ jobs:
         run: |
           conda env update --file conda/meta.yaml --name base
       #---------------------------------------------------
-      - name: Lint with flake8
-        run: |
-          conda install flake8
+      #- name: Lint with flake8
+      #  run: |
+      #    conda install flake8
           # stop the build if there are Python syntax errors or undefined names
-          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+      #    flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+      #    flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
           #    - name: Install package 
-              run: |
-                        conda build conda/meta.yaml
+      #        run: |
+      #                 conda build conda/meta.yaml
       #---------------------------------------------------
       - name: Test with pytest
         run: |

From 70f94252675f4cbad631dafe011edbc76b13f918 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 12:35:43 +1000
Subject: [PATCH 024/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index d699f16..0d8eade 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -14,7 +14,7 @@ jobs:
     strategy:
       max-parallel: 5
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.10"]
         
 
     steps:

From 45c5cab09b65618a89f0c83a8fa6e7a2bd5dc5f6 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 12:38:20 +1000
Subject: [PATCH 025/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 0d8eade..90f46d2 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -47,6 +47,7 @@ jobs:
       #---------------------------------------------------
       - name: Test with pytest
         run: |
+          conda activate base
           conda install pytest coverage codecov
           conda run python -m pytest
           conda run coverage run --source src -m py.test

From 85f96a7406ded480c72cd4437496c54326728caf Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 12:39:18 +1000
Subject: [PATCH 026/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 90f46d2..74eac53 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -47,6 +47,7 @@ jobs:
       #---------------------------------------------------
       - name: Test with pytest
         run: |
+          conda init
           conda activate base
           conda install pytest coverage codecov
           conda run python -m pytest

From 905b898d617500ec160dc1067b498f15b026342e Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 12:46:33 +1000
Subject: [PATCH 027/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 74eac53..f89c896 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -25,10 +25,12 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       #---------------------------------------------------
-      - name: Add conda to system path
-        run: |
-          # $CONDA is an environment variable pointing to the root of the miniconda directory
-          echo $CONDA/bin >> $GITHUB_PATH
+      - name: Install Miniconda
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          auto-update-conda: true
+          python-version: ${{ matrix.python-version }}
+          activate-environment: base
       #---------------------------------------------------
       - name: Install dependencies
         run: |
@@ -47,11 +49,9 @@ jobs:
       #---------------------------------------------------
       - name: Test with pytest
         run: |
-          conda init
-          conda activate base
           conda install pytest coverage codecov
-          conda run python -m pytest
-          conda run coverage run --source src -m py.test
+          conda run -n base python -m pytest
+          conda run -n base coverage run --source src -m pytest
       #---------------------------------------------------
       - name: Upload to codecov 
         if: steps.build.outcome == 'success'

From 062ec2873a6f8a8a841787b9d050f6c7e7ef3786 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 12:50:21 +1000
Subject: [PATCH 028/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index f89c896..e776402 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -30,11 +30,12 @@ jobs:
         with:
           auto-update-conda: true
           python-version: ${{ matrix.python-version }}
-          activate-environment: base
+          activate-environment: test-env
+          environment-file: conda/meta.yaml
       #---------------------------------------------------
       - name: Install dependencies
         run: |
-          conda env update --file conda/meta.yaml --name base
+          conda env update --file conda/meta.yaml --name test-env
       #---------------------------------------------------
       #- name: Lint with flake8
       #  run: |
@@ -50,8 +51,8 @@ jobs:
       - name: Test with pytest
         run: |
           conda install pytest coverage codecov
-          conda run -n base python -m pytest
-          conda run -n base coverage run --source src -m pytest
+          conda run -n test-env python -m pytest
+          conda run -n test-env coverage run --source src -m pytest
       #---------------------------------------------------
       - name: Upload to codecov 
         if: steps.build.outcome == 'success'

From 44ddf3639e36f3613ede373e82900230df8f2f46 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 12:53:54 +1000
Subject: [PATCH 029/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index e776402..0b9cb76 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -25,17 +25,22 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       #---------------------------------------------------
+      # Install Miniconda
       - name: Install Miniconda
         uses: conda-incubator/setup-miniconda@v2
         with:
           auto-update-conda: true
           python-version: ${{ matrix.python-version }}
-          activate-environment: test-env
-          environment-file: conda/meta.yaml
-      #---------------------------------------------------
-      - name: Install dependencies
+
+      # Create and activate conda environment
+      - name: Create and activate conda environment
         run: |
-          conda env update --file conda/meta.yaml --name test-env
+          conda env create --name test-env --file conda/meta.yaml
+          conda activate test-env
+
+      # Install dependencies from conda
+      - name: Install dependencies
+        run: conda env update --name test-env --file conda/meta.yaml
       #---------------------------------------------------
       #- name: Lint with flake8
       #  run: |

From 0391714d8c4d119d5fd6b86f2885502e455666d2 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 12:55:35 +1000
Subject: [PATCH 030/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 0b9cb76..27c56fc 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -36,7 +36,6 @@ jobs:
       - name: Create and activate conda environment
         run: |
           conda env create --name test-env --file conda/meta.yaml
-          conda activate test-env
 
       # Install dependencies from conda
       - name: Install dependencies

From c373636921d7d20e468afc9918249ba11db08e12 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 12:58:09 +1000
Subject: [PATCH 031/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 27c56fc..0c6fe68 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -54,7 +54,7 @@ jobs:
       #---------------------------------------------------
       - name: Test with pytest
         run: |
-          conda install pytest coverage codecov
+          conda install -n test-env pytest coverage codecov
           conda run -n test-env python -m pytest
           conda run -n test-env coverage run --source src -m pytest
       #---------------------------------------------------

From 5b578065e7afddcb406521e5dcc6632bf39f558b Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 10 Jul 2024 13:19:16 +1000
Subject: [PATCH 032/137] more validation for realm added to class

---
 src/mopdb/mopdb_class.py |  8 +++++---
 src/mopdb/mopdb_utils.py | 27 +++++++++++++--------------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index a554ee0..5cf3b6b 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -28,14 +28,16 @@ class Variable():
     def __init__(self, varname, fpattern):
         self.name = varname
         # path attributes
-        self.pattern = fpattern
+        self.fpattern = fpattern
         self.files = [] 
         # mapping attributes
-        self.frequency = 'NAfrq' 
-        self.realm = 'NArealm' 
+        self._frequency = '' 
+        self._realm =  [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm']
+                 if x in self.fpattern.parts][0] 
         self.cmor_var = '' 
         self.cmor_table = '' 
         self.version = ''
+        self.match = False
         # descriptive attributes
         self.units = '' 
         self.dimensions = '' 
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 3dfdc31..dfca89c 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -548,8 +548,8 @@ def write_varlist(conn, indir, match, version, alias):
     """
     mopdb_log = logging.getLogger('mopdb_log')
     line_cols = ['name', 'cmor_var', 'units', 'dimensions', 
-        'frequency', 'realm', 'cell_methods', 'cmor_table', 'vtype',
-        'size', 'nsteps', 'filename', 'long_name', 'standard_name']
+        '_frequency', '_realm', 'cell_methods', 'cmor_table', 'vtype',
+        'size', 'nsteps', 'fpattern', 'long_name', 'standard_name']
     vobj_list = []
     files = list_files(indir, f"*{match}*")
     patterns = []
@@ -573,7 +573,7 @@ def write_varlist(conn, indir, match, version, alias):
         pattern_list = list_files(indir, f"{fpattern}*")
         nfiles = len(pattern_list) 
         mopdb_log.debug(f"File pattern: {fpattern}")
-        fwriter.writerow([f"#{fpattern}"])
+        #fwriter.writerow([f"#{fpattern}"])
         # get attributes for the file variables
         ds = xr.open_dataset(str(pattern_list[0]), decode_times=False)
         realm = get_realm(fpath, version, ds)
@@ -587,7 +587,6 @@ def write_varlist(conn, indir, match, version, alias):
         mopdb_log.debug(f"Multiple frq: {multiple_frq}")
         for vname in ds.variables:
             vobj = Variable(vname, fpattern) 
-            vobj.realm = realm
             if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']):
                 v = ds[vname]
                 mopdb_log.debug(f"Variable: {vobj.name}")
@@ -605,14 +604,13 @@ def write_varlist(conn, indir, match, version, alias):
                 vobj.frequency = frequency + frqmod
                 mopdb_log.debug(f"Frequency var: {vobj.frequency}")
                 # try to retrieve cmip name
-                cmor_var, cmor_table = get_cmorname(conn, vobj,
-                    version)
+                vobj = get_cmorname(conn, vobj, version)
                 vobj.units = attrs.get('units', "")
                 vobj.long_name = attrs.get('long_name', "")
                 vobj.standard_name = attrs.get('standard_name', "")
                 vobj.dimensions = " ".join(v.dims)
-                vobj.type = v.dtype
-                line = [vobj[k] for k in line_cols]
+                vobj.vtype = v.dtype
+                line = [vobj.__dict__[k] for k in line_cols]
                 fwriter.writerow(line)
                 vobj_list.append(vobj)
         mopdb_log.info(f"Variable list for {fpattern} successfully written")
@@ -998,18 +996,19 @@ def get_realm(fpath, version, ds):
     '''Return realm for variable in files or NArealm'''
 
     mopdb_log = logging.getLogger('mopdb_log')
+    #realm = None
     if version == 'AUS2200':
         realm = 'atmos'
     else:
         realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] 
                  if x in fpath.parts][0]
-    if realm == 'atm' or 'um_version' in ds.attrs.keys():
+    if realm is None and 'um_version' in ds.attrs.keys():
         realm = 'atmos'
-    elif realm == 'ocn':
-        realm = 'ocean'
-    elif realm is None:
-        realm = 'NArealm'
-        mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm")
+    #elif realm == 'ocn':
+    #    realm = 'ocean'
+    #elif realm is None:
+    #    realm = 'NArealm'
+    #    mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm")
     mopdb_log.debug(f"Realm is {realm}")
     return realm
 

From b6a424100fdf1d87f3e15cccc07defe22dd9b4ab Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 10 Jul 2024 13:28:59 +1000
Subject: [PATCH 033/137] attempt to fix meta.yaml

---
 conda/meta.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index f20a79a..6a68a4f 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,7 +1,6 @@
-{% set version = "1.0.0" %}
 package:
     name: mopper 
-    version: {{ version }}
+    version: 1.0.0
 
 #source:
 #    path: ./

From be08654f5a1dda4fdb78b7cc232f23670140eed6 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 10 Jul 2024 13:39:27 +1000
Subject: [PATCH 034/137] more validation for realm added to class 2

---
 src/mopdb/mopdb_class.py | 5 +++--
 src/mopdb/mopdb_utils.py | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index 5cf3b6b..53c4e4c 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -25,11 +25,12 @@ class Variable():
   #      'cell_methods', 'positive', 'long_name', 'standard_name',
   #      'vtype', 'size', 'nsteps')
 
-    def __init__(self, varname, fpattern):
+    def __init__(self, varname, fpattern, fpath, files):
         self.name = varname
         # path attributes
         self.fpattern = fpattern
-        self.files = [] 
+        self.fpath = fpath
+        self.files = files 
         # mapping attributes
         self._frequency = '' 
         self._realm =  [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm']
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index dfca89c..06391c8 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -565,6 +565,7 @@ def write_varlist(conn, indir, match, version, alias):
         # get filename pattern until date match
         mopdb_log.debug(f"Filename: {fpath.name}")
         fpattern = fpath.name.split(match)[0]
+        print(fpattern)
         # adding this in case we have a mix of yyyy/yyyymn date stamps 
         # as then a user would have to pass yyyy only and would get 12 files for some of the patterns
         if fpattern in patterns:
@@ -586,7 +587,7 @@ def write_varlist(conn, indir, match, version, alias):
             multiple_frq = True
         mopdb_log.debug(f"Multiple frq: {multiple_frq}")
         for vname in ds.variables:
-            vobj = Variable(vname, fpattern) 
+            vobj = Variable(vname, fpattern, fpath, pattern_list) 
             if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']):
                 v = ds[vname]
                 mopdb_log.debug(f"Variable: {vobj.name}")

From 21d5ecfde8dbd68c55eb4bf7ba315af0dd3f5998 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 10 Jul 2024 13:40:31 +1000
Subject: [PATCH 035/137] attempt to fix meta.yaml 2

---
 conda/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index 6a68a4f..05d58fc 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -8,7 +8,7 @@ package:
 source:
   #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz
   git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
-  git_rev: {{ version }} 
+  git_rev: "{{ version }}" 
   git_depth: 1 # (Defaults to -1/not shallow)
 
 build:

From 56ec024bb2ebc13961496022ab6f758be14fe443 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 14:17:45 +1000
Subject: [PATCH 036/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 0c6fe68..3741c47 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -55,7 +55,7 @@ jobs:
       - name: Test with pytest
         run: |
           conda install -n test-env pytest coverage codecov
-          conda run -n test-env python -m pytest
+          conda run -n test-env pytest
           conda run -n test-env coverage run --source src -m pytest
       #---------------------------------------------------
       - name: Upload to codecov 

From 2237ea2ce0bee3483321a9936bf277170009b478 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 14:25:53 +1000
Subject: [PATCH 037/137] Create environment.yaml

---
 conda/environment.yaml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 conda/environment.yaml

diff --git a/conda/environment.yaml b/conda/environment.yaml
new file mode 100644
index 0000000..4cde7a9
--- /dev/null
+++ b/conda/environment.yaml
@@ -0,0 +1,21 @@
+name: test-env
+channels:
+  - defaults
+  - conda-forge
+dependencies:
+  - python=3.10
+  - pip
+  - pbr
+  - click
+  - cmor
+  - xarray
+  - numpy
+  - dask
+  - pyyaml
+  - cftime
+  - python-dateutil
+  - pytest
+  - coverage
+  - codecov
+  - pip:
+      - mop==1.0.0

From 1c2f27f79ede2be0faae9d06cb7581e7ad8a68ff Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 14:26:33 +1000
Subject: [PATCH 038/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 3741c47..b2a6d22 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -35,11 +35,11 @@ jobs:
       # Create and activate conda environment
       - name: Create and activate conda environment
         run: |
-          conda env create --name test-env --file conda/meta.yaml
+          conda env create --name test-env --file conda/environment.yaml
 
       # Install dependencies from conda
       - name: Install dependencies
-        run: conda env update --name test-env --file conda/meta.yaml
+        run: conda env update --name test-env --file conda/environment.yaml
       #---------------------------------------------------
       #- name: Lint with flake8
       #  run: |

From 8a1ea8d99d6277f9d67b9188c68c2adfd9501262 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 14:30:57 +1000
Subject: [PATCH 039/137] Update environment.yaml

---
 conda/environment.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/environment.yaml b/conda/environment.yaml
index 4cde7a9..40390e1 100644
--- a/conda/environment.yaml
+++ b/conda/environment.yaml
@@ -18,4 +18,4 @@ dependencies:
   - coverage
   - codecov
   - pip:
-      - mop==1.0.0
+      - git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@pytests_sam

From 05a6d09342ee5411e817f3f1eda0157ea2c80393 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 14:37:32 +1000
Subject: [PATCH 040/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index b2a6d22..c635674 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -55,13 +55,13 @@ jobs:
       - name: Test with pytest
         run: |
           conda install -n test-env pytest coverage codecov
-          conda run -n test-env pytest
-          conda run -n test-env coverage run --source src -m pytest
+          conda run -n test-env pytest -q test_calculations.py
+      #    conda run -n test-env coverage run --source src -m pytest
       #---------------------------------------------------
-      - name: Upload to codecov 
-        if: steps.build.outcome == 'success'
-        run: |
-          curl -Os https://uploader.codecov.io/latest/linux/codecov
-          chmod +x codecov
-          ./codecov
+      #- name: Upload to codecov 
+      #  if: steps.build.outcome == 'success'
+      #  run: |
+      #    curl -Os https://uploader.codecov.io/latest/linux/codecov
+      #    chmod +x codecov
+      #    ./codecov
       #---------------------------------------------------

From 8ef5df0c4fc0bc50fa5d077ee8c7b575768e0ceb Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 14:43:22 +1000
Subject: [PATCH 041/137] Update conftest.py

---
 tests/conftest.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 0dd6c56..f26c225 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -25,7 +25,7 @@
 import logging
 import csv
 from mopdb.mopdb_utils import mapping_sql, cmorvar_sql
-from mopper.setup_utils import filelist_sql
+#from mopper.setup_utils import filelist_sql
 
 
 TESTS_HOME = os.path.abspath(os.path.dirname(__file__))
@@ -59,12 +59,12 @@ def setup_access_db(session):
     session.connection.commit()
 
 
-@pytest.fixture
-def setup_mopper_db(session):
-    filelist_sql = mapping_sql()
-    session.execute(filelist_sql)
-    session.execute('''INSERT INTO filelist VALUES ("/testdata/atmos/umnsa_spec_*.nc", 	"/testdata/mjo-elnino/v1-0/A10min/", "tas_AUS2200_mjo-elnino_subhrPt_20160101001000-20160102000000.nc", "fld_s03i236", "tas", "AUS2200_A10min", "subhrPt", "atmos", "point", "20160101T0005", "20160102T0000", "201601010000", "201601012355", "unprocessed", "3027.83203125", "mjo-elnino", "K", "AUS2200", "AUS2200", "/testdata/mjo-elnino/mjo-elnino.json",	"1970-01-01", "v1-0")''')
-    session.connection.commit()
+#@pytest.fixture
+#def setup_mopper_db(session):
+#    filelist_sql = mapping_sql()
+#    session.execute(filelist_sql)
+#    session.execute('''INSERT INTO filelist VALUES ("/testdata/atmos/umnsa_spec_*.nc", 	"/testdata/mjo-elnino/v1-0/A10min/", "tas_AUS2200_mjo-elnino_subhrPt_20160101001000-20160102000000.nc", "fld_s03i236", "tas", "AUS2200_A10min", "subhrPt", "atmos", "point", "20160101T0005", "20160102T0000", "201601010000", "201601012355", "unprocessed", "3027.83203125", "mjo-elnino", "K", "AUS2200", "AUS2200", "/testdata/mjo-elnino/mjo-elnino.json",	"1970-01-01", "v1-0")''')
+#    session.connection.commit()
 
 
 def test_check_timestamp(caplog):

From 05d618b44d2cb7ec37fabb5a631fa08576ad6d0d Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 14:47:44 +1000
Subject: [PATCH 042/137] Update mopper-conda.yaml

---
 .github/workflows/mopper-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index c635674..50d2512 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -55,7 +55,7 @@ jobs:
       - name: Test with pytest
         run: |
           conda install -n test-env pytest coverage codecov
-          conda run -n test-env pytest -q test_calculations.py
+          conda run -n test-env pytest -q tests/test_calculations.py
       #    conda run -n test-env coverage run --source src -m pytest
       #---------------------------------------------------
       #- name: Upload to codecov 

From 53067bf658a198d4d4d9718e228388e7b30aed33 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 16:14:28 +1000
Subject: [PATCH 043/137] Update environment.yaml

---
 conda/environment.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conda/environment.yaml b/conda/environment.yaml
index 40390e1..2f0d566 100644
--- a/conda/environment.yaml
+++ b/conda/environment.yaml
@@ -17,5 +17,6 @@ dependencies:
   - pytest
   - coverage
   - codecov
+  - importlib_resources
   - pip:
       - git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@pytests_sam

From 0572339cfb807496548cf715a1aa9fc33fee23e9 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 10 Jul 2024 16:18:33 +1000
Subject: [PATCH 044/137] moved frequency logic to class

---
 src/mopdb/mopdb_class.py | 28 +++++++++++++++++++++++----
 src/mopdb/mopdb_utils.py | 41 ++++++++++++++++++++--------------------
 2 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index 53c4e4c..f4b3e0b 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -18,6 +18,8 @@
 #
 # last updated 06/07/2024
 
+from pathlib import Path
+
 class Variable():
   
   #  __slots__ = ('name', 'pattern', 'files', 'frequency', 'realm',
@@ -25,16 +27,16 @@ class Variable():
   #      'cell_methods', 'positive', 'long_name', 'standard_name',
   #      'vtype', 'size', 'nsteps')
 
-    def __init__(self, varname, fpattern, fpath, files):
+    def __init__(self, varname: str, fpattern: str, fpath: Path, files: list):
         self.name = varname
         # path attributes
         self.fpattern = fpattern
         self.fpath = fpath
         self.files = files 
         # mapping attributes
-        self._frequency = '' 
+        self._frequency = None 
         self._realm =  [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm']
-                 if x in self.fpattern.parts][0] 
+                 if x in self.fpath.parts][0] 
         self.cmor_var = '' 
         self.cmor_table = '' 
         self.version = ''
@@ -54,11 +56,29 @@ def __init__(self, varname, fpattern, fpath, files):
 
     @property
     def frequency(self):
+        if self._frequency is None:
+            fname = self.files[0]
+            if self._realm == 'atmos':
+                fbits = fname.split("_")
+                self._frequency = fbits[-1].replace(".nc", "")
+            elif self._realm == 'ocean':
+                if any(x in fname for x in ['scalar', 'month']):
+                    self._frequency = 'mon'
+                elif 'daily' in fname:
+                    self._frequency = 'day'
+            elif self._realm == 'seaIce':
+                if '_m.' in fname:
+                    self._frequency = 'mon'
+                elif '_d.' in fname:
+                   self._frequency = 'day'
+            else:
+                self._frequency = 'NAfrq'
         return self._frequency
 
+
     @frequency.setter
     def frequency(self, value):
-        fix_frq = {'dCai': 'day', '3h': '3hr', '6h': '6hr'}
+        fix_frq = {'dai': 'day', '3h': '3hr', '6h': '6hr'}
         if value in fix_frq.keys():
             self._frequency = fix_frq[value]
         value = value.replace('hPt', 'hrPt')
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 06391c8..17d1cba 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -577,17 +577,23 @@ def write_varlist(conn, indir, match, version, alias):
         #fwriter.writerow([f"#{fpattern}"])
         # get attributes for the file variables
         ds = xr.open_dataset(str(pattern_list[0]), decode_times=False)
-        realm = get_realm(fpath, version, ds)
         coords = [c for c in ds.coords] + ['latitude_longitude']
         #pass next file in case of 1 timestep per file and no frq in name
         fnext = str(pattern_list[1])
-        frequency, umfrq = get_frequency(realm, fpath.name, ds, fnext)
+        #frequency, umfrq = get_frequency(realm, fpath.name, ds, fnext)
         multiple_frq = False
-        if umfrq != {}:
-            multiple_frq = True
-        mopdb_log.debug(f"Multiple frq: {multiple_frq}")
-        for vname in ds.variables:
+        for idx, vname in enumerate(ds.variables):
             vobj = Variable(vname, fpattern, fpath, pattern_list) 
+            if vobj.frequency == 'NAfrq' or vobj.realm == 'atmos':
+                # if this is the first variable get frq from time axes 
+                if idx == 0:
+                    frq_dict = get_file_frq(ds, fnext)
+                # if only one frequency detected empty dict
+                    if len(frq_dict) == 1:
+                        vobj._frequency = frq_dict.popitem()[1]
+                    else:
+                        multiple_frq = True
+                    mopdb_log.debug(f"Multiple frq: {multiple_frq}")
             if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']):
                 v = ds[vname]
                 mopdb_log.debug(f"Variable: {vobj.name}")
@@ -597,12 +603,12 @@ def write_varlist(conn, indir, match, version, alias):
                 # assign time axis frequency if more than one is available
                 if multiple_frq:
                     if 'time' in v.dims[0]:
-                        frequency = umfrq[v.dims[0]]
+                        vobj._frequency = frq_dict[v.dims[0]]
                     else:
                         mopdb_log.info(f"Could not detect frequency for variable: {v}")
                 attrs = v.attrs
                 vobj.cell_methods, frqmod = get_cell_methods(attrs, v.dims)
-                vobj.frequency = frequency + frqmod
+                vobj.frequency = vobj.frequency + frqmod
                 mopdb_log.debug(f"Frequency var: {vobj.frequency}")
                 # try to retrieve cmip name
                 vobj = get_cmorname(conn, vobj, version)
@@ -611,6 +617,8 @@ def write_varlist(conn, indir, match, version, alias):
                 vobj.standard_name = attrs.get('standard_name', "")
                 vobj.dimensions = " ".join(v.dims)
                 vobj.vtype = v.dtype
+                if vobj.realm == "NArealm":
+                    vobj.realm = get_realm(version, ds)
                 line = [vobj.__dict__[k] for k in line_cols]
                 fwriter.writerow(line)
                 vobj_list.append(vobj)
@@ -671,7 +679,7 @@ def read_map(fname, alias):
                     notes = row[16]
                 else:
                     notes = row[15]
-                if alias is '':
+                if alias == '':
                     alias = fname.replace(".csv","")
                 var_list.append(row[:11] + [notes, alias])
     return var_list
@@ -993,23 +1001,14 @@ def check_realm_units(conn, var):
     return var 
        
 
-def get_realm(fpath, version, ds):
-    '''Return realm for variable in files or NArealm'''
+def get_realm(version, ds):
+    '''Try to retrieve realm if using path failed'''
 
     mopdb_log = logging.getLogger('mopdb_log')
-    #realm = None
     if version == 'AUS2200':
         realm = 'atmos'
-    else:
-        realm = [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm'] 
-                 if x in fpath.parts][0]
-    if realm is None and 'um_version' in ds.attrs.keys():
+    elif 'um_version' in ds.attrs.keys():
         realm = 'atmos'
-    #elif realm == 'ocn':
-    #    realm = 'ocean'
-    #elif realm is None:
-    #    realm = 'NArealm'
-    #    mopdb_log.info(f"Couldn't detect realm from path, setting to NArealm")
     mopdb_log.debug(f"Realm is {realm}")
     return realm
 

From 2e66b798232d6e8c19e818520076392955a332a3 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Wed, 10 Jul 2024 16:23:51 +1000
Subject: [PATCH 045/137] Update test_calculations.py

---
 tests/test_calculations.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_calculations.py b/tests/test_calculations.py
index 9037b53..dc8507b 100644
--- a/tests/test_calculations.py
+++ b/tests/test_calculations.py
@@ -41,7 +41,7 @@ def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100):
         dims.insert(0, 'lev')
         coords['lev'] = lev
         shape.insert(0, nlev)
-    elif sdepth is True:
+    if sdepth is True:
         depth = np.array([0.05, 0.2, 0.5, 1])
         dims.insert(0, 'depth')
         coords['depth'] = depth
@@ -66,7 +66,7 @@ def test_calc_topsoil():
     out = calc_topsoil(mrsol)
     xrtest.assert_allclose(out, expected, rtol=1e-05) 
 
-
+'''
 def test_overturn_stream(caplog):
     global ctx
     caplog.set_level(logging.DEBUG, logger='varlog_1')
@@ -118,3 +118,4 @@ def test_overturn_stream(caplog):
     with ctx:
         out4 = overturn_stream(varlist)
     nptest.assert_array_equal(res4, out4)
+'''

From 79983531d461e83889b38a7e3bf2eec0d1f1eaac Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 08:54:25 +1000
Subject: [PATCH 046/137] fixed issues hihglighted by flake

---
 .github/workflows/mopper-conda.yaml | 1 +
 src/mopdb/mopdb_utils.py            | 2 +-
 src/mopper/calculations.py          | 5 +++--
 src/mopper/setup_utils.py           | 8 ++++----
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index a8d26a6..f776b8f 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -14,6 +14,7 @@ on:
 jobs:
   build-linux:
     runs-on: ubuntu-latest
+    timeout-minutes: 60
     strategy:
       max-parallel: 5
 
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index b9875c7..32ceb81 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -663,7 +663,7 @@ def read_map(fname, alias):
                     notes = row[16]
                 else:
                     notes = row[15]
-                if alias is '':
+                if alias == '':
                     alias = fname.replace(".csv","")
                 var_list.append(row[:11] + [notes, alias])
     return var_list
diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py
index 1adf216..c87213a 100644
--- a/src/mopper/calculations.py
+++ b/src/mopper/calculations.py
@@ -950,7 +950,7 @@ def tos_3hr(var, landfrac):
     vout : Xarray dataset
     """    
 
-    v = tos_degC(var)
+    v = K_degC(var)
 
     vout = xr.zeros_like(var)
     t = len(var.time)
@@ -1346,6 +1346,7 @@ def get_basin_mask(ctx, lat, lon):
     basin_mask: DataArray
         basin_mask(lat,lon)
     """
+    var_log = logging.getLogger(ctx.obj['var_log'])
     coords = ['t', 't']
     if 'xu' in lon:
         coords[0] = 'u'
@@ -1443,6 +1444,6 @@ def calc_depositions(ctx, var, weight=None):
         varlist.append(v0)
     if weight is None:
         weight = 0.05844
-    deps = sum_vars(varlist) * mole_weight
+    deps = sum_vars(varlist) * weight
     return deps
     
diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index e0341fa..192f642 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -44,6 +44,7 @@
 from json.decoder import JSONDecodeError
 
 from mopdb.mopdb_utils import query
+from mopdb.cmip_utils import fix_years
 
 
 def write_var_map(outpath, table, matches):
@@ -166,7 +167,7 @@ def find_custom_tables(ctx):
     mop_log = logging.getLogger('mop_log')
     tables = []
     path = ctx.obj['tables_path']
-    tables = ctx.obj['tables_path'].rglob("*_*.json")
+    table_files = ctx.obj['tables_path'].rglob("*_*.json")
     for f in table_files:
         f = str(f).replace(".json", "")
         tables.append(f)
@@ -626,9 +627,8 @@ def define_files(ctx, cursor, opts, mp):
     if mp['years'] != 'all' and ctx.obj['dreq_years']:
         exp_start, exp_end = fix_years(mp['years'], exp_start[:4], exp_end[:4]) 
         if exp_start is None:
-            mop_log.info("Years requested for variable are outside specified") 
-            mop_log.info((f"period: {table_id}, {var},",  
-                   f"{match['tstart']}, {match['tend']}"))
+            mop_log.info(f"""Years requested for variable are outside
+                specified period: {mp['years']}""")
             return
     tstep_dict = {'10min': ['minutes=10', 'minutes=5'],
               '30min': ['minutes=30', 'minutes=15'],

From 57894bd6e9c31e2ffde041350b766182a1424ecb Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 09:13:05 +1000
Subject: [PATCH 047/137] fixing conda package install in actions

---
 .github/workflows/mopper-conda.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index f776b8f..64c4906 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -38,9 +38,6 @@ jobs:
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-        #    - name: Install package 
-            run: |
-                      conda build conda/meta.yaml
     - name: Test with pytest
       run: |
         conda install pytest coverage codecov

From f044f04810c48c505b0626773fe93aa99dc83c51 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 09:30:03 +1000
Subject: [PATCH 048/137] fixing conda package install in actions 2

---
 .github/workflows/mopper-conda.yaml | 2 +-
 env.yaml                            | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 env.yaml

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 64c4906..bed7062 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -30,7 +30,7 @@ jobs:
         echo $CONDA/bin >> $GITHUB_PATH
     - name: Install dependencies
       run: |
-        conda env update --file conda/meta.yaml --name base 
+        conda env update --file env.yaml --name base 
     - name: Lint with flake8
       run: |
         conda install flake8
diff --git a/env.yaml b/env.yaml
new file mode 100644
index 0000000..64e2483
--- /dev/null
+++ b/env.yaml
@@ -0,0 +1,7 @@
+name: mopenv
+dependencies:
+  - click
+  - cmor
+  - xarray
+  - numpy
+  - pyyaml

From 7b3460e85bf3739af5c6e7d3f2fd055ba2732e3d Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 09:34:10 +1000
Subject: [PATCH 049/137] fixing conda package install in actions 3

---
 env.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/env.yaml b/env.yaml
index 64e2483..10266f8 100644
--- a/env.yaml
+++ b/env.yaml
@@ -1,4 +1,6 @@
 name: mopenv
+channels:
+  - conda-forge
 dependencies:
   - click
   - cmor

From e6b37e50096bbb3da99e6770a9133b4c1511eace Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 09:43:21 +1000
Subject: [PATCH 050/137] fixing conda package install in actions 4

---
 .github/workflows/mopper-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index bed7062..f32f3a4 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -33,7 +33,7 @@ jobs:
         conda env update --file env.yaml --name base 
     - name: Lint with flake8
       run: |
-        conda install flake8
+        conda install flake8 --solver classic
         # stop the build if there are Python syntax errors or undefined names
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide

From 3c02c32fdd0b9c8bed8b2ccda34f1e1aeefedf49 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 09:46:23 +1000
Subject: [PATCH 051/137] fixing conda package install in actions 5

---
 .github/workflows/mopper-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index f32f3a4..5ff395e 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -40,7 +40,7 @@ jobs:
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     - name: Test with pytest
       run: |
-        conda install pytest coverage codecov
+        conda install pytest coverage codecov --solver classic
         conda run python -m pytest
         conda run coverage run --source src -m py.test
     - name: Upload to codecov 

From a36cd1395f20a899afdc42dfcfc0691030a1235c Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 10:07:14 +1000
Subject: [PATCH 052/137] fixing conda package install in actions 6

---
 .github/workflows/mopper-conda.yaml | 3 +++
 env.yaml                            | 1 +
 2 files changed, 4 insertions(+)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 5ff395e..1b005b5 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -38,6 +38,9 @@ jobs:
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: install package
+        source activate base
+        pip install ./
     - name: Test with pytest
       run: |
         conda install pytest coverage codecov --solver classic
diff --git a/env.yaml b/env.yaml
index 10266f8..6e46467 100644
--- a/env.yaml
+++ b/env.yaml
@@ -7,3 +7,4 @@ dependencies:
   - xarray
   - numpy
   - pyyaml
+  - dask

From a1f393e7e63dd55dda84d3cb41fcfc7057ded9e9 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 10:11:17 +1000
Subject: [PATCH 053/137] fixing conda package install in actions 7

---
 .github/workflows/mopper-conda.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index 1b005b5..d08666f 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -39,6 +39,7 @@ jobs:
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     - name: install package
+      run: |
         source activate base
         pip install ./
     - name: Test with pytest

From 9a746c2af438c566f886fdc2db91a99eae1cfa23 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 10:20:06 +1000
Subject: [PATCH 054/137] issue #155

---
 src/mopper/calculations.py | 2 +-
 src/mopper/mop_utils.py    | 2 +-
 src/mopper/setup_utils.py  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py
index c87213a..d217aef 100644
--- a/src/mopper/calculations.py
+++ b/src/mopper/calculations.py
@@ -39,7 +39,7 @@
 import dask
 import logging
 
-from importlib_resources import files as import_files
+from importlib.resources import files as import_files
 from mopper.setup_utils import read_yaml
 
 # Global Variables
diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index cdb78db..6017b68 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -40,7 +40,7 @@
 
 from mopper.calculations import *
 from mopper.setup_utils import read_yaml
-from importlib_resources import files as import_files
+from importlib.resources import files as import_files
 
 
 def config_log(debug, path, stream_level=logging.WARNING):
diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index 192f642..be5cae0 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -40,7 +40,7 @@
 from collections import OrderedDict
 from datetime import datetime#, timedelta
 from dateutil.relativedelta import relativedelta
-from importlib_resources import files as import_files
+from importlib.resources import files as import_files
 from json.decoder import JSONDecodeError
 
 from mopdb.mopdb_utils import query

From 357e4e9df24767541a5b85eb373b9c7ab9eb5488 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 10:23:55 +1000
Subject: [PATCH 055/137] fixed import

---
 src/mopper/setup_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index be5cae0..ef2c1ec 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -44,7 +44,7 @@
 from json.decoder import JSONDecodeError
 
 from mopdb.mopdb_utils import query
-from mopdb.cmip_utils import fix_years
+from mopper.cmip_utils import fix_years
 
 
 def write_var_map(outpath, table, matches):

From 835a1c550069db7a5bcbdaf256525054bb99e7c6 Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Thu, 11 Jul 2024 16:39:36 +1000
Subject: [PATCH 056/137] Typo in topsoil calc

---
 src/mopper/calculations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py
index 1adf216..8ce6783 100644
--- a/src/mopper/calculations.py
+++ b/src/mopper/calculations.py
@@ -1175,7 +1175,7 @@ def calc_topsoil(soilvar):
     # calculate the fraction of maxlev which falls in first 10cm
     fraction = (0.1 - depth[maxlev -1])/(depth[maxlev] - depth[maxlev-1])
     topsoil = soilvar.isel(depth=slice(0,maxlev)).sum(dim='depth')
-    topsoil = topsoil + fraction * topsoil.isel(depth=maxlev)
+    topsoil = topsoil + fraction * soilvar.isel(depth=maxlev)
 
     return topsoil
 #----------------------------------------------------------------------

From b0e738e2de6f801fa92193960b34fadd5254d119 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 11 Jul 2024 17:52:21 +1000
Subject: [PATCH 057/137] moved to two classes for FPattern and Variable

---
 .github/workflows/mopper-conda.yaml |   2 +-
 env.yaml => conda/enviroment.yaml   |   0
 src/mopdb/mopdb_class.py            | 109 +++++++++++++++----------
 src/mopdb/mopdb_utils.py            | 118 ++++++++--------------------
 4 files changed, 101 insertions(+), 128 deletions(-)
 rename env.yaml => conda/enviroment.yaml (100%)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-conda.yaml
index d08666f..4642d83 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-conda.yaml
@@ -30,7 +30,7 @@ jobs:
         echo $CONDA/bin >> $GITHUB_PATH
     - name: Install dependencies
       run: |
-        conda env update --file env.yaml --name base 
+        conda env update --file conda/environment.yaml --name base 
     - name: Lint with flake8
       run: |
         conda install flake8 --solver classic
diff --git a/env.yaml b/conda/enviroment.yaml
similarity index 100%
rename from env.yaml
rename to conda/enviroment.yaml
diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index f4b3e0b..41381ef 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -20,26 +20,82 @@
 
 from pathlib import Path
 
+class FPattern():
+    """This class represent a file pattern with a set list of variables
+       its attributes represents features of the variables which are shared.
+    """ 
+
+    def __init__(self, fpattern: str, fpath: Path):
+        self.fpattern = fpattern
+        self.fpath = fpath
+        self.files = self.get_files() 
+        self.realm =  self.get_realm()
+        self.frequency = self.get_frequency() 
+        self.version = ''
+        self.multiple_frq = False
+
+    def get_frequency(self):
+        frequency = 'NAfrq'
+        fname = str(self.files[0])
+        if self.realm == 'atmos':
+            fbits = fname.split("_")
+            frequency = fbits[-1].replace(".nc", "")
+        elif self.realm == 'ocean':
+            if any(x in fname for x in ['scalar', 'month']):
+                frequency = 'mon'
+            elif 'daily' in fname:
+                frequency = 'day'
+        elif self.realm == 'seaIce':
+            if '_m.' in fname:
+                frequency = 'mon'
+            elif '_d.' in fname:
+                frequency = 'day'
+        return frequency
+
+
+    def get_realm(self):
+        realm = 'NArealm'
+        realm = next((x for x in ['atmos', 'ocean', 'ice', 'ocn','atm']
+            if x in self.fpath.parts), 'NArealm')
+        fix_realm = {'atm': 'atmos', 'ice': 'seaIce', 'ocn': 'ocean'}
+        if realm in fix_realm.keys():
+            realm = fix_realm[realm]
+        return realm
+
+    def get_files(self):
+        return self.list_files(self.fpath, self.fpattern)
+
+    @staticmethod
+    def list_files(indir, match):
+        """Returns list of files matching input directory and match"""
+        files = [x for x in Path(indir).rglob(f"*{match}*")
+            if x.is_file() and  '.nc' in str(x)]
+        files.sort(key=lambda x:x.name)
+        return files
+
+
 class Variable():
-  
+    """This class represent a single variable with attributes derived from file
+       and the one added by mapping.
+    """ 
+
   #  __slots__ = ('name', 'pattern', 'files', 'frequency', 'realm',
   #      'cmor_var', 'cmor_table', 'version', 'units', 'dimensions',
   #      'cell_methods', 'positive', 'long_name', 'standard_name',
   #      'vtype', 'size', 'nsteps')
 
-    def __init__(self, varname: str, fpattern: str, fpath: Path, files: list):
+    def __init__(self, varname: str, fobj: FPattern):
         self.name = varname
-        # path attributes
-        self.fpattern = fpattern
-        self.fpath = fpath
-        self.files = files 
+        # path object
+        self.fobj = fobj
+        #self.fpath = fobj.fpath
+        #self.files = fobj.files
         # mapping attributes
-        self._frequency = None 
-        self._realm =  [x for x in ['atmos', 'ocean', 'ice', 'ocn','atm']
-                 if x in self.fpath.parts][0] 
+        self._frequency = fobj.frequency 
+        self._realm = fobj.realm
         self.cmor_var = '' 
         self.cmor_table = '' 
-        self.version = ''
+        #self.version = self.fpattern.version
         self.match = False
         # descriptive attributes
         self.units = '' 
@@ -56,31 +112,11 @@ def __init__(self, varname: str, fpattern: str, fpath: Path, files: list):
 
     @property
     def frequency(self):
-        if self._frequency is None:
-            fname = self.files[0]
-            if self._realm == 'atmos':
-                fbits = fname.split("_")
-                self._frequency = fbits[-1].replace(".nc", "")
-            elif self._realm == 'ocean':
-                if any(x in fname for x in ['scalar', 'month']):
-                    self._frequency = 'mon'
-                elif 'daily' in fname:
-                    self._frequency = 'day'
-            elif self._realm == 'seaIce':
-                if '_m.' in fname:
-                    self._frequency = 'mon'
-                elif '_d.' in fname:
-                   self._frequency = 'day'
-            else:
-                self._frequency = 'NAfrq'
         return self._frequency
 
 
     @frequency.setter
     def frequency(self, value):
-        fix_frq = {'dai': 'day', '3h': '3hr', '6h': '6hr'}
-        if value in fix_frq.keys():
-            self._frequency = fix_frq[value]
         value = value.replace('hPt', 'hrPt')
         if not any(x in value for x in 
             ['min', 'hr', 'day', 'mon', 'yr']):
@@ -94,14 +130,7 @@ def realm(self):
 
     @realm.setter
     def realm(self, value):
-        fix_realm = {'atm': 'atmos', 'ice': 'seaIce', 'ocn': 'ocean'}
-        if value in fix_realm.keys():
-            self._realm = fix_realm[value]
         if not any(x in value for x in 
-            ['atmos', 'seaIce', 'ocean', 'land']):
-            self._realm = 'NArealm' 
-
-    def list_files(self):
-        """Returns list of files matching input directory and match"""
-        self.files = [x for x in Path(self.indir).rglob(f"{self.match}") if x.is_file()]
-        return files.sort(key=lambda x:x.name)
+            ['atmos', 'seaIce', 'ocean', 'land', 'landIce']):
+            value = 'NArealm' 
+        self.realm = value
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 17d1cba..ca8560a 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -24,7 +24,6 @@
 import sys
 import os
 import csv
-import glob
 import json
 import stat
 import xarray as xr
@@ -32,10 +31,10 @@
 import math
 from datetime import datetime, date
 from collections import Counter
-from operator import itemgetter
+from operator import itemgetter, attrgetter
 from pathlib import Path
 
-from mopdb.mopdb_class import Variable
+from mopdb.mopdb_class import FPattern, Variable
 
 def config_log(debug):
     """Configures log file"""
@@ -425,17 +424,6 @@ def delete_record(conn, table, col, pairs):
     return
 
 
-def list_files(indir, match):
-    """Returns list of files matching input directory and match"""
-    mopdb_log = logging.getLogger('mopdb_log')
-    mopdb_log.debug(f"Pattern to list files: {indir}/**/*{match}*")
-    files = [x for x in Path(indir).rglob(f"{match}") if x.is_file()
-        and  '.nc' in str(x)]
-    files.sort(key=lambda x:x.name)
-    mopdb_log.debug(f"Files after sorting: {files}")
-    return files
-
-
 def get_file_frq(ds, fnext):
     """Return a dictionary with frequency for each time axis.
 
@@ -469,12 +457,11 @@ def get_file_frq(ds, fnext):
     for t in time_axs: 
         mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}")
         if len(ds[t]) > 1:
-            interval = (ds[t][1]-ds[t][0]).values #/ np.timedelta64(1, 'D')
-            interval_file = (ds[t][-1] -ds[t][0]).values #/ np.timedelta64(1, 'D')
+            interval = (ds[t][1]-ds[t][0]).values
+            interval_file = (ds[t][-1] -ds[t][0]).values 
         else:
             interval = interval_file
         mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}")
-        #mopdb_log.debug(f"interval entire file {t}: {interval_file}")
         for k,v in int2frq.items():
             if math.isclose(interval, v, rel_tol=0.05):
                 frq[t] = k
@@ -482,44 +469,6 @@ def get_file_frq(ds, fnext):
     return frq
 
 
-def get_frequency(realm, fname, ds, fnext):
-    """Return frequency based on realm and filename
-    For UM files checks if more than one time axis is present and if so
-    returns dictionary with frequency: variable list
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    frq_dict = {} 
-    frequency = 'NAfrq'
-    if realm == 'atmos':
-        fbits = fname.split("_")
-        frequency = fbits[-1].replace(".nc", "")
-        fix_frq = {'dai': 'day', '3h': '3hr', '6h': '6hr'}
-        if frequency in fix_frq.keys():
-            frequency = fix_frq[frequency]
-        else:
-            frequency = frequency.replace('hPt', 'hrPt')
-        frq_dict = get_file_frq(ds, fnext)
-        mopdb_log.debug(f"frq_dict: {frq_dict}")
-    elif realm == 'ocean':
-        # if I found scalar or monthly in any of fbits 
-        if any(x in fname for x in ['scalar', 'month']):
-            frequency = 'mon'
-        elif 'daily' in fname:
-            frequency = 'day'
-    elif realm == 'ice':
-        if '_m.' in fname:
-            frequency = 'mon'
-        elif '_d.' in fname:
-            frequency = 'day'
-    if frequency == 'NAfrq':
-        frq_dict = get_file_frq(ds, fnext)
-        # if only one frequency detected empty dict
-        if len(frq_dict) == 1:
-            frequency = frq_dict.popitem()[1]
-    mopdb_log.debug(f"Frequency: {frequency}")
-    return frequency, frq_dict
-
-
 def get_cell_methods(attrs, dims):
     """Get cell_methods from variable attributes.
        If cell_methods is not defined assumes values are instantaneous
@@ -547,11 +496,12 @@ def write_varlist(conn, indir, match, version, alias):
        for each variable
     """
     mopdb_log = logging.getLogger('mopdb_log')
-    line_cols = ['name', 'cmor_var', 'units', 'dimensions', 
-        '_frequency', '_realm', 'cell_methods', 'cmor_table', 'vtype',
-        'size', 'nsteps', 'fpattern', 'long_name', 'standard_name']
+    line_cols = ['name','cmor_var','units','dimensions','_frequency',
+        '_realm','cell_methods','cmor_table','vtype','size',
+        'nsteps','fobj.fpattern','long_name','standard_name']
     vobj_list = []
-    files = list_files(indir, f"*{match}*")
+    files = FPattern.list_files(indir, match)
+    mopdb_log.debug(f"Files after sorting: {files}")
     patterns = []
     if alias == '':
         alias = 'mopdb'
@@ -560,40 +510,36 @@ def write_varlist(conn, indir, match, version, alias):
     fwriter = csv.writer(fcsv, delimiter=';')
     fwriter.writerow(["name", "cmor_var", "units", "dimensions",
         "frequency", "realm", "cell_methods", "cmor_table", "vtype",
-        "size", "nsteps", "filename", "long_name", "standard_name"])
-    for i, fpath in enumerate(files):
+        "size", "nsteps", "fpattern", "long_name", "standard_name"])
+    for fpath in files:
         # get filename pattern until date match
         mopdb_log.debug(f"Filename: {fpath.name}")
         fpattern = fpath.name.split(match)[0]
-        print(fpattern)
-        # adding this in case we have a mix of yyyy/yyyymn date stamps 
-        # as then a user would have to pass yyyy only and would get 12 files for some of the patterns
         if fpattern in patterns:
             continue
         patterns.append(fpattern)
-        pattern_list = list_files(indir, f"{fpattern}*")
-        nfiles = len(pattern_list) 
-        mopdb_log.debug(f"File pattern: {fpattern}")
+        fobj = FPattern(fpattern, Path(indir))
+        #pattern_list = list_files(indir, f"{fpattern}*")
+        nfiles = len(fobj.files) 
+        mopdb_log.debug(f"File pattern, number of files: {fpattern}, {nfiles}")
         #fwriter.writerow([f"#{fpattern}"])
         # get attributes for the file variables
-        ds = xr.open_dataset(str(pattern_list[0]), decode_times=False)
+        ds = xr.open_dataset(str(fobj.files[0]), decode_times=False)
         coords = [c for c in ds.coords] + ['latitude_longitude']
         #pass next file in case of 1 timestep per file and no frq in name
-        fnext = str(pattern_list[1])
-        #frequency, umfrq = get_frequency(realm, fpath.name, ds, fnext)
-        multiple_frq = False
-        for idx, vname in enumerate(ds.variables):
-            vobj = Variable(vname, fpattern, fpath, pattern_list) 
-            if vobj.frequency == 'NAfrq' or vobj.realm == 'atmos':
-                # if this is the first variable get frq from time axes 
-                if idx == 0:
-                    frq_dict = get_file_frq(ds, fnext)
-                # if only one frequency detected empty dict
-                    if len(frq_dict) == 1:
-                        vobj._frequency = frq_dict.popitem()[1]
-                    else:
-                        multiple_frq = True
-                    mopdb_log.debug(f"Multiple frq: {multiple_frq}")
+        fnext = str(fobj.files[1])
+        if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos':
+            frq_dict = get_file_frq(ds, fnext)
+            # if only one frequency detected empty dict
+            if len(frq_dict) == 1:
+                fobj.frequency = frq_dict.popitem()[1]
+            else:
+                fobj.multiple_frq = True
+        mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}")
+        if fobj.realm == "NArealm":
+            fobj.realm = get_realm(version, ds)
+        for vname in ds.variables:
+            vobj = Variable(vname, fobj) 
             if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']):
                 v = ds[vname]
                 mopdb_log.debug(f"Variable: {vobj.name}")
@@ -601,7 +547,7 @@ def write_varlist(conn, indir, match, version, alias):
                 vobj.size = v[0].nbytes
                 vobj.nsteps = nfiles * v.shape[0]
                 # assign time axis frequency if more than one is available
-                if multiple_frq:
+                if fobj.multiple_frq:
                     if 'time' in v.dims[0]:
                         vobj._frequency = frq_dict[v.dims[0]]
                     else:
@@ -617,9 +563,7 @@ def write_varlist(conn, indir, match, version, alias):
                 vobj.standard_name = attrs.get('standard_name', "")
                 vobj.dimensions = " ".join(v.dims)
                 vobj.vtype = v.dtype
-                if vobj.realm == "NArealm":
-                    vobj.realm = get_realm(version, ds)
-                line = [vobj.__dict__[k] for k in line_cols]
+                line = [attrgetter(k)(vobj) for k in line_cols]
                 fwriter.writerow(line)
                 vobj_list.append(vobj)
         mopdb_log.info(f"Variable list for {fpattern} successfully written")

From 0195945b0e8ffb5591016b25b39706c338c04643 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 12 Jul 2024 15:14:24 +1000
Subject: [PATCH 058/137] progress on intake subcommand

---
 setup.cfg                                     |   2 +-
 src/{data => mopdata}/access.db               | Bin
 src/{data => mopdata}/access_dump.sql         |   0
 .../cmor_tables/ACDD_CV.json                  |   0
 .../cmor_tables/ACDD_coordinate.json          |   0
 .../cmor_tables/ACDD_formula_terms.json       |   0
 .../cmor_tables/ACDD_grids.json               |   0
 .../cmor_tables/AUS2200_A10min.json           |   0
 .../cmor_tables/AUS2200_A1hr.json             |   0
 .../cmor_tables/AUS2200_A1hrPlev.json         |   0
 .../cmor_tables/AUS2200_A3hr.json             |   0
 .../cmor_tables/AUS2200_A6hr.json             |   0
 .../cmor_tables/AUS2200_Aday.json             |   0
 .../cmor_tables/AUS2200_fx.json               |   0
 .../cmor_tables/CM2_3hr.json                  |   0
 .../cmor_tables/CM2_6hr.json                  |   0
 .../cmor_tables/CM2_day.json                  |   0
 .../cmor_tables/CM2_mon.json                  |   0
 .../cmor_tables/CMIP6_3hr.json                |   0
 .../cmor_tables/CMIP6_6hrLev.json             |   0
 .../cmor_tables/CMIP6_6hrPlev.json            |   0
 .../cmor_tables/CMIP6_6hrPlevPt.json          |   0
 .../cmor_tables/CMIP6_AERday.json             |   0
 .../cmor_tables/CMIP6_AERhr.json              |   0
 .../cmor_tables/CMIP6_AERmon.json             |   0
 .../cmor_tables/CMIP6_AERmonZ.json            |   0
 .../cmor_tables/CMIP6_Amon.json               |   0
 .../cmor_tables/CMIP6_CF3hr.json              |   0
 .../cmor_tables/CMIP6_CFday.json              |   0
 .../cmor_tables/CMIP6_CFmon.json              |   0
 .../cmor_tables/CMIP6_CFsubhr.json            |   0
 .../cmor_tables/CMIP6_CV.json                 |   0
 .../cmor_tables/CMIP6_E1hr.json               |   0
 .../cmor_tables/CMIP6_E1hrClimMon.json        |   0
 .../cmor_tables/CMIP6_E3hr.json               |   0
 .../cmor_tables/CMIP6_E3hrPt.json             |   0
 .../cmor_tables/CMIP6_E6hrZ.json              |   0
 .../cmor_tables/CMIP6_Eday.json               |   0
 .../cmor_tables/CMIP6_EdayZ.json              |   0
 .../cmor_tables/CMIP6_Efx.json                |   0
 .../cmor_tables/CMIP6_Emon.json               |   0
 .../cmor_tables/CMIP6_EmonZ.json              |   0
 .../cmor_tables/CMIP6_Esubhr.json             |   0
 .../cmor_tables/CMIP6_Eyr.json                |   0
 .../cmor_tables/CMIP6_IfxAnt.json             |   0
 .../cmor_tables/CMIP6_IfxGre.json             |   0
 .../cmor_tables/CMIP6_ImonAnt.json            |   0
 .../cmor_tables/CMIP6_ImonGre.json            |   0
 .../cmor_tables/CMIP6_IyrAnt.json             |   0
 .../cmor_tables/CMIP6_IyrGre.json             |   0
 .../cmor_tables/CMIP6_LImon.json              |   0
 .../cmor_tables/CMIP6_Lmon.json               |   0
 .../cmor_tables/CMIP6_Oclim.json              |   0
 .../cmor_tables/CMIP6_Oday.json               |   0
 .../cmor_tables/CMIP6_Odec.json               |   0
 .../cmor_tables/CMIP6_Ofx.json                |   0
 .../cmor_tables/CMIP6_Omon.json               |   0
 .../cmor_tables/CMIP6_Oyr.json                |   0
 .../cmor_tables/CMIP6_SIday.json              |   0
 .../cmor_tables/CMIP6_SImon.json              |   0
 .../cmor_tables/CMIP6_coordinate.json         |   0
 .../cmor_tables/CMIP6_day.json                |   0
 .../cmor_tables/CMIP6_formula_terms.json      |   0
 .../cmor_tables/CMIP6_fx.json                 |   0
 .../cmor_tables/CMIP6_grids.json              |   0
 .../dreq/cmvme_all_piControl_3_3.csv          |   0
 src/{data => mopdata}/land_tiles.yaml         |   0
 src/{data => mopdata}/landtype.yaml           |   0
 src/{data => mopdata}/latlon_vertices.yaml    |   0
 src/{data => mopdata}/model_levels.yaml       |   0
 src/{data => mopdata}/notes.yaml              |   0
 src/{data => mopdata}/transport_lines.yaml    |   0
 src/mopdb/mopdb.py                            |  90 +++++++++--------
 src/mopdb/mopdb_class.py                      |   1 +
 src/mopdb/mopdb_utils.py                      |  95 +++++++++++++++---
 src/mopper/calculations.py                    |   8 +-
 src/mopper/mop_setup.py                       |   6 +-
 src/mopper/mop_utils.py                       |   4 +-
 src/mopper/setup_utils.py                     |   1 -
 79 files changed, 140 insertions(+), 67 deletions(-)
 rename src/{data => mopdata}/access.db (100%)
 rename src/{data => mopdata}/access_dump.sql (100%)
 rename src/{data => mopdata}/cmor_tables/ACDD_CV.json (100%)
 rename src/{data => mopdata}/cmor_tables/ACDD_coordinate.json (100%)
 rename src/{data => mopdata}/cmor_tables/ACDD_formula_terms.json (100%)
 rename src/{data => mopdata}/cmor_tables/ACDD_grids.json (100%)
 rename src/{data => mopdata}/cmor_tables/AUS2200_A10min.json (100%)
 rename src/{data => mopdata}/cmor_tables/AUS2200_A1hr.json (100%)
 rename src/{data => mopdata}/cmor_tables/AUS2200_A1hrPlev.json (100%)
 rename src/{data => mopdata}/cmor_tables/AUS2200_A3hr.json (100%)
 rename src/{data => mopdata}/cmor_tables/AUS2200_A6hr.json (100%)
 rename src/{data => mopdata}/cmor_tables/AUS2200_Aday.json (100%)
 rename src/{data => mopdata}/cmor_tables/AUS2200_fx.json (100%)
 rename src/{data => mopdata}/cmor_tables/CM2_3hr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CM2_6hr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CM2_day.json (100%)
 rename src/{data => mopdata}/cmor_tables/CM2_mon.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_3hr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_6hrLev.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_6hrPlev.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_6hrPlevPt.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_AERday.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_AERhr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_AERmon.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_AERmonZ.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Amon.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_CF3hr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_CFday.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_CFmon.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_CFsubhr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_CV.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_E1hr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_E1hrClimMon.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_E3hr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_E3hrPt.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_E6hrZ.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Eday.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_EdayZ.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Efx.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Emon.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_EmonZ.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Esubhr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Eyr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_IfxAnt.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_IfxGre.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_ImonAnt.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_ImonGre.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_IyrAnt.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_IyrGre.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_LImon.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Lmon.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Oclim.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Oday.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Odec.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Ofx.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Omon.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_Oyr.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_SIday.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_SImon.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_coordinate.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_day.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_formula_terms.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_fx.json (100%)
 rename src/{data => mopdata}/cmor_tables/CMIP6_grids.json (100%)
 rename src/{data => mopdata}/dreq/cmvme_all_piControl_3_3.csv (100%)
 rename src/{data => mopdata}/land_tiles.yaml (100%)
 rename src/{data => mopdata}/landtype.yaml (100%)
 rename src/{data => mopdata}/latlon_vertices.yaml (100%)
 rename src/{data => mopdata}/model_levels.yaml (100%)
 rename src/{data => mopdata}/notes.yaml (100%)
 rename src/{data => mopdata}/transport_lines.yaml (100%)

diff --git a/setup.cfg b/setup.cfg
index e2d1814..677f9bc 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -26,7 +26,7 @@ include_package_data = True
 where = src
 
 [options.package_data]
-data = *.json, *.yaml, *.db, *.csv
+mopdata = *.json, *.yaml, *.db, *.csv
 mopper = update_db.py 
 
 [pbr]
diff --git a/src/data/access.db b/src/mopdata/access.db
similarity index 100%
rename from src/data/access.db
rename to src/mopdata/access.db
diff --git a/src/data/access_dump.sql b/src/mopdata/access_dump.sql
similarity index 100%
rename from src/data/access_dump.sql
rename to src/mopdata/access_dump.sql
diff --git a/src/data/cmor_tables/ACDD_CV.json b/src/mopdata/cmor_tables/ACDD_CV.json
similarity index 100%
rename from src/data/cmor_tables/ACDD_CV.json
rename to src/mopdata/cmor_tables/ACDD_CV.json
diff --git a/src/data/cmor_tables/ACDD_coordinate.json b/src/mopdata/cmor_tables/ACDD_coordinate.json
similarity index 100%
rename from src/data/cmor_tables/ACDD_coordinate.json
rename to src/mopdata/cmor_tables/ACDD_coordinate.json
diff --git a/src/data/cmor_tables/ACDD_formula_terms.json b/src/mopdata/cmor_tables/ACDD_formula_terms.json
similarity index 100%
rename from src/data/cmor_tables/ACDD_formula_terms.json
rename to src/mopdata/cmor_tables/ACDD_formula_terms.json
diff --git a/src/data/cmor_tables/ACDD_grids.json b/src/mopdata/cmor_tables/ACDD_grids.json
similarity index 100%
rename from src/data/cmor_tables/ACDD_grids.json
rename to src/mopdata/cmor_tables/ACDD_grids.json
diff --git a/src/data/cmor_tables/AUS2200_A10min.json b/src/mopdata/cmor_tables/AUS2200_A10min.json
similarity index 100%
rename from src/data/cmor_tables/AUS2200_A10min.json
rename to src/mopdata/cmor_tables/AUS2200_A10min.json
diff --git a/src/data/cmor_tables/AUS2200_A1hr.json b/src/mopdata/cmor_tables/AUS2200_A1hr.json
similarity index 100%
rename from src/data/cmor_tables/AUS2200_A1hr.json
rename to src/mopdata/cmor_tables/AUS2200_A1hr.json
diff --git a/src/data/cmor_tables/AUS2200_A1hrPlev.json b/src/mopdata/cmor_tables/AUS2200_A1hrPlev.json
similarity index 100%
rename from src/data/cmor_tables/AUS2200_A1hrPlev.json
rename to src/mopdata/cmor_tables/AUS2200_A1hrPlev.json
diff --git a/src/data/cmor_tables/AUS2200_A3hr.json b/src/mopdata/cmor_tables/AUS2200_A3hr.json
similarity index 100%
rename from src/data/cmor_tables/AUS2200_A3hr.json
rename to src/mopdata/cmor_tables/AUS2200_A3hr.json
diff --git a/src/data/cmor_tables/AUS2200_A6hr.json b/src/mopdata/cmor_tables/AUS2200_A6hr.json
similarity index 100%
rename from src/data/cmor_tables/AUS2200_A6hr.json
rename to src/mopdata/cmor_tables/AUS2200_A6hr.json
diff --git a/src/data/cmor_tables/AUS2200_Aday.json b/src/mopdata/cmor_tables/AUS2200_Aday.json
similarity index 100%
rename from src/data/cmor_tables/AUS2200_Aday.json
rename to src/mopdata/cmor_tables/AUS2200_Aday.json
diff --git a/src/data/cmor_tables/AUS2200_fx.json b/src/mopdata/cmor_tables/AUS2200_fx.json
similarity index 100%
rename from src/data/cmor_tables/AUS2200_fx.json
rename to src/mopdata/cmor_tables/AUS2200_fx.json
diff --git a/src/data/cmor_tables/CM2_3hr.json b/src/mopdata/cmor_tables/CM2_3hr.json
similarity index 100%
rename from src/data/cmor_tables/CM2_3hr.json
rename to src/mopdata/cmor_tables/CM2_3hr.json
diff --git a/src/data/cmor_tables/CM2_6hr.json b/src/mopdata/cmor_tables/CM2_6hr.json
similarity index 100%
rename from src/data/cmor_tables/CM2_6hr.json
rename to src/mopdata/cmor_tables/CM2_6hr.json
diff --git a/src/data/cmor_tables/CM2_day.json b/src/mopdata/cmor_tables/CM2_day.json
similarity index 100%
rename from src/data/cmor_tables/CM2_day.json
rename to src/mopdata/cmor_tables/CM2_day.json
diff --git a/src/data/cmor_tables/CM2_mon.json b/src/mopdata/cmor_tables/CM2_mon.json
similarity index 100%
rename from src/data/cmor_tables/CM2_mon.json
rename to src/mopdata/cmor_tables/CM2_mon.json
diff --git a/src/data/cmor_tables/CMIP6_3hr.json b/src/mopdata/cmor_tables/CMIP6_3hr.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_3hr.json
rename to src/mopdata/cmor_tables/CMIP6_3hr.json
diff --git a/src/data/cmor_tables/CMIP6_6hrLev.json b/src/mopdata/cmor_tables/CMIP6_6hrLev.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_6hrLev.json
rename to src/mopdata/cmor_tables/CMIP6_6hrLev.json
diff --git a/src/data/cmor_tables/CMIP6_6hrPlev.json b/src/mopdata/cmor_tables/CMIP6_6hrPlev.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_6hrPlev.json
rename to src/mopdata/cmor_tables/CMIP6_6hrPlev.json
diff --git a/src/data/cmor_tables/CMIP6_6hrPlevPt.json b/src/mopdata/cmor_tables/CMIP6_6hrPlevPt.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_6hrPlevPt.json
rename to src/mopdata/cmor_tables/CMIP6_6hrPlevPt.json
diff --git a/src/data/cmor_tables/CMIP6_AERday.json b/src/mopdata/cmor_tables/CMIP6_AERday.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_AERday.json
rename to src/mopdata/cmor_tables/CMIP6_AERday.json
diff --git a/src/data/cmor_tables/CMIP6_AERhr.json b/src/mopdata/cmor_tables/CMIP6_AERhr.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_AERhr.json
rename to src/mopdata/cmor_tables/CMIP6_AERhr.json
diff --git a/src/data/cmor_tables/CMIP6_AERmon.json b/src/mopdata/cmor_tables/CMIP6_AERmon.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_AERmon.json
rename to src/mopdata/cmor_tables/CMIP6_AERmon.json
diff --git a/src/data/cmor_tables/CMIP6_AERmonZ.json b/src/mopdata/cmor_tables/CMIP6_AERmonZ.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_AERmonZ.json
rename to src/mopdata/cmor_tables/CMIP6_AERmonZ.json
diff --git a/src/data/cmor_tables/CMIP6_Amon.json b/src/mopdata/cmor_tables/CMIP6_Amon.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Amon.json
rename to src/mopdata/cmor_tables/CMIP6_Amon.json
diff --git a/src/data/cmor_tables/CMIP6_CF3hr.json b/src/mopdata/cmor_tables/CMIP6_CF3hr.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_CF3hr.json
rename to src/mopdata/cmor_tables/CMIP6_CF3hr.json
diff --git a/src/data/cmor_tables/CMIP6_CFday.json b/src/mopdata/cmor_tables/CMIP6_CFday.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_CFday.json
rename to src/mopdata/cmor_tables/CMIP6_CFday.json
diff --git a/src/data/cmor_tables/CMIP6_CFmon.json b/src/mopdata/cmor_tables/CMIP6_CFmon.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_CFmon.json
rename to src/mopdata/cmor_tables/CMIP6_CFmon.json
diff --git a/src/data/cmor_tables/CMIP6_CFsubhr.json b/src/mopdata/cmor_tables/CMIP6_CFsubhr.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_CFsubhr.json
rename to src/mopdata/cmor_tables/CMIP6_CFsubhr.json
diff --git a/src/data/cmor_tables/CMIP6_CV.json b/src/mopdata/cmor_tables/CMIP6_CV.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_CV.json
rename to src/mopdata/cmor_tables/CMIP6_CV.json
diff --git a/src/data/cmor_tables/CMIP6_E1hr.json b/src/mopdata/cmor_tables/CMIP6_E1hr.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_E1hr.json
rename to src/mopdata/cmor_tables/CMIP6_E1hr.json
diff --git a/src/data/cmor_tables/CMIP6_E1hrClimMon.json b/src/mopdata/cmor_tables/CMIP6_E1hrClimMon.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_E1hrClimMon.json
rename to src/mopdata/cmor_tables/CMIP6_E1hrClimMon.json
diff --git a/src/data/cmor_tables/CMIP6_E3hr.json b/src/mopdata/cmor_tables/CMIP6_E3hr.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_E3hr.json
rename to src/mopdata/cmor_tables/CMIP6_E3hr.json
diff --git a/src/data/cmor_tables/CMIP6_E3hrPt.json b/src/mopdata/cmor_tables/CMIP6_E3hrPt.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_E3hrPt.json
rename to src/mopdata/cmor_tables/CMIP6_E3hrPt.json
diff --git a/src/data/cmor_tables/CMIP6_E6hrZ.json b/src/mopdata/cmor_tables/CMIP6_E6hrZ.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_E6hrZ.json
rename to src/mopdata/cmor_tables/CMIP6_E6hrZ.json
diff --git a/src/data/cmor_tables/CMIP6_Eday.json b/src/mopdata/cmor_tables/CMIP6_Eday.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Eday.json
rename to src/mopdata/cmor_tables/CMIP6_Eday.json
diff --git a/src/data/cmor_tables/CMIP6_EdayZ.json b/src/mopdata/cmor_tables/CMIP6_EdayZ.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_EdayZ.json
rename to src/mopdata/cmor_tables/CMIP6_EdayZ.json
diff --git a/src/data/cmor_tables/CMIP6_Efx.json b/src/mopdata/cmor_tables/CMIP6_Efx.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Efx.json
rename to src/mopdata/cmor_tables/CMIP6_Efx.json
diff --git a/src/data/cmor_tables/CMIP6_Emon.json b/src/mopdata/cmor_tables/CMIP6_Emon.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Emon.json
rename to src/mopdata/cmor_tables/CMIP6_Emon.json
diff --git a/src/data/cmor_tables/CMIP6_EmonZ.json b/src/mopdata/cmor_tables/CMIP6_EmonZ.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_EmonZ.json
rename to src/mopdata/cmor_tables/CMIP6_EmonZ.json
diff --git a/src/data/cmor_tables/CMIP6_Esubhr.json b/src/mopdata/cmor_tables/CMIP6_Esubhr.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Esubhr.json
rename to src/mopdata/cmor_tables/CMIP6_Esubhr.json
diff --git a/src/data/cmor_tables/CMIP6_Eyr.json b/src/mopdata/cmor_tables/CMIP6_Eyr.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Eyr.json
rename to src/mopdata/cmor_tables/CMIP6_Eyr.json
diff --git a/src/data/cmor_tables/CMIP6_IfxAnt.json b/src/mopdata/cmor_tables/CMIP6_IfxAnt.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_IfxAnt.json
rename to src/mopdata/cmor_tables/CMIP6_IfxAnt.json
diff --git a/src/data/cmor_tables/CMIP6_IfxGre.json b/src/mopdata/cmor_tables/CMIP6_IfxGre.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_IfxGre.json
rename to src/mopdata/cmor_tables/CMIP6_IfxGre.json
diff --git a/src/data/cmor_tables/CMIP6_ImonAnt.json b/src/mopdata/cmor_tables/CMIP6_ImonAnt.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_ImonAnt.json
rename to src/mopdata/cmor_tables/CMIP6_ImonAnt.json
diff --git a/src/data/cmor_tables/CMIP6_ImonGre.json b/src/mopdata/cmor_tables/CMIP6_ImonGre.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_ImonGre.json
rename to src/mopdata/cmor_tables/CMIP6_ImonGre.json
diff --git a/src/data/cmor_tables/CMIP6_IyrAnt.json b/src/mopdata/cmor_tables/CMIP6_IyrAnt.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_IyrAnt.json
rename to src/mopdata/cmor_tables/CMIP6_IyrAnt.json
diff --git a/src/data/cmor_tables/CMIP6_IyrGre.json b/src/mopdata/cmor_tables/CMIP6_IyrGre.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_IyrGre.json
rename to src/mopdata/cmor_tables/CMIP6_IyrGre.json
diff --git a/src/data/cmor_tables/CMIP6_LImon.json b/src/mopdata/cmor_tables/CMIP6_LImon.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_LImon.json
rename to src/mopdata/cmor_tables/CMIP6_LImon.json
diff --git a/src/data/cmor_tables/CMIP6_Lmon.json b/src/mopdata/cmor_tables/CMIP6_Lmon.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Lmon.json
rename to src/mopdata/cmor_tables/CMIP6_Lmon.json
diff --git a/src/data/cmor_tables/CMIP6_Oclim.json b/src/mopdata/cmor_tables/CMIP6_Oclim.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Oclim.json
rename to src/mopdata/cmor_tables/CMIP6_Oclim.json
diff --git a/src/data/cmor_tables/CMIP6_Oday.json b/src/mopdata/cmor_tables/CMIP6_Oday.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Oday.json
rename to src/mopdata/cmor_tables/CMIP6_Oday.json
diff --git a/src/data/cmor_tables/CMIP6_Odec.json b/src/mopdata/cmor_tables/CMIP6_Odec.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Odec.json
rename to src/mopdata/cmor_tables/CMIP6_Odec.json
diff --git a/src/data/cmor_tables/CMIP6_Ofx.json b/src/mopdata/cmor_tables/CMIP6_Ofx.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Ofx.json
rename to src/mopdata/cmor_tables/CMIP6_Ofx.json
diff --git a/src/data/cmor_tables/CMIP6_Omon.json b/src/mopdata/cmor_tables/CMIP6_Omon.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Omon.json
rename to src/mopdata/cmor_tables/CMIP6_Omon.json
diff --git a/src/data/cmor_tables/CMIP6_Oyr.json b/src/mopdata/cmor_tables/CMIP6_Oyr.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_Oyr.json
rename to src/mopdata/cmor_tables/CMIP6_Oyr.json
diff --git a/src/data/cmor_tables/CMIP6_SIday.json b/src/mopdata/cmor_tables/CMIP6_SIday.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_SIday.json
rename to src/mopdata/cmor_tables/CMIP6_SIday.json
diff --git a/src/data/cmor_tables/CMIP6_SImon.json b/src/mopdata/cmor_tables/CMIP6_SImon.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_SImon.json
rename to src/mopdata/cmor_tables/CMIP6_SImon.json
diff --git a/src/data/cmor_tables/CMIP6_coordinate.json b/src/mopdata/cmor_tables/CMIP6_coordinate.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_coordinate.json
rename to src/mopdata/cmor_tables/CMIP6_coordinate.json
diff --git a/src/data/cmor_tables/CMIP6_day.json b/src/mopdata/cmor_tables/CMIP6_day.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_day.json
rename to src/mopdata/cmor_tables/CMIP6_day.json
diff --git a/src/data/cmor_tables/CMIP6_formula_terms.json b/src/mopdata/cmor_tables/CMIP6_formula_terms.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_formula_terms.json
rename to src/mopdata/cmor_tables/CMIP6_formula_terms.json
diff --git a/src/data/cmor_tables/CMIP6_fx.json b/src/mopdata/cmor_tables/CMIP6_fx.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_fx.json
rename to src/mopdata/cmor_tables/CMIP6_fx.json
diff --git a/src/data/cmor_tables/CMIP6_grids.json b/src/mopdata/cmor_tables/CMIP6_grids.json
similarity index 100%
rename from src/data/cmor_tables/CMIP6_grids.json
rename to src/mopdata/cmor_tables/CMIP6_grids.json
diff --git a/src/data/dreq/cmvme_all_piControl_3_3.csv b/src/mopdata/dreq/cmvme_all_piControl_3_3.csv
similarity index 100%
rename from src/data/dreq/cmvme_all_piControl_3_3.csv
rename to src/mopdata/dreq/cmvme_all_piControl_3_3.csv
diff --git a/src/data/land_tiles.yaml b/src/mopdata/land_tiles.yaml
similarity index 100%
rename from src/data/land_tiles.yaml
rename to src/mopdata/land_tiles.yaml
diff --git a/src/data/landtype.yaml b/src/mopdata/landtype.yaml
similarity index 100%
rename from src/data/landtype.yaml
rename to src/mopdata/landtype.yaml
diff --git a/src/data/latlon_vertices.yaml b/src/mopdata/latlon_vertices.yaml
similarity index 100%
rename from src/data/latlon_vertices.yaml
rename to src/mopdata/latlon_vertices.yaml
diff --git a/src/data/model_levels.yaml b/src/mopdata/model_levels.yaml
similarity index 100%
rename from src/data/model_levels.yaml
rename to src/mopdata/model_levels.yaml
diff --git a/src/data/notes.yaml b/src/mopdata/notes.yaml
similarity index 100%
rename from src/data/notes.yaml
rename to src/mopdata/notes.yaml
diff --git a/src/data/transport_lines.yaml b/src/mopdata/transport_lines.yaml
similarity index 100%
rename from src/data/transport_lines.yaml
rename to src/mopdata/transport_lines.yaml
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 335a367..c4fdb38 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -122,7 +122,7 @@ def check_cmor(ctx, dbname):
     mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
     if dbname == 'default':
-        dbname = import_files('data').joinpath('access.db')
+        dbname = import_files('mopdata').joinpath('access.db')
     conn = db_connect(dbname)
     # get list of variables already in db
     sql = 'SELECT name, out_name FROM cmorvar'
@@ -175,7 +175,7 @@ def cmor_table(ctx, dbname, fname, alias, label):
     mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
     if dbname == 'default':
-        dbname = import_files('data').joinpath('access.db')
+        dbname = import_files('mopdata').joinpath('access.db')
     conn = db_connect(dbname)
     # get list of variables already in db
     sql = "SELECT out_name, frequency, modeling_realm FROM cmorvar"
@@ -251,7 +251,7 @@ def update_cmor(ctx, dbname, fname, alias):
         alias = alias.replace('.json', '')
     mopdb_log.info(f"Adding {alias} to variable name to track origin")
     # connect to db, this will create one if not existing
-    dbcentral = import_files('data').joinpath('access.db')
+    dbcentral = import_files('mopdata').joinpath('access.db')
     if dbname in [dbcentral, 'default']:
         mopdb_log.error("The package database cannot be updated")
         sys.exit()
@@ -287,6 +287,7 @@ def update_cmor(ctx, dbname, fname, alias):
             sys.exit()
     # insert new vars and update existing ones
     update_db(conn, 'cmorvar', vars_list)
+    conn.close()
 
     return
 
@@ -324,19 +325,19 @@ def map_template(ctx, fpath, match, dbname, version, alias):
     -------
     """
     mopdb_log = logging.getLogger('mopdb_log')
+    # connect to db, this will create one if not existing
+    if dbname == 'default':
+        dbname = import_files('mopdata').joinpath('access.db')
+    conn = db_connect(dbname)
     # work out if fpath is varlist or path to output
     fpath = Path(fpath)
     if fpath.is_file():
         fname = fpath.name
     else:
         mopdb_log.debug(f"Calling model_vars() from template: {fpath}")
-        fname, vobjs = model_vars(fpath, match, dbname, version, alias) 
+        fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) 
     if alias == '':
         alias = fname.split(".")[0]
-    # connect to db, check first if db exists or exit 
-    if dbname == 'default':
-        dbname = import_files('data').joinpath('access.db')
-    conn = db_connect(dbname)
     # read list of vars from file
     with open(fname, 'r') as csvfile:
         reader = csv.DictReader(csvfile, delimiter=';')
@@ -344,24 +345,9 @@ def map_template(ctx, fpath, match, dbname, version, alias):
     check_varlist(rows, fname)
     # return lists of fully/partially matching variables and stash_vars 
     # these are input_vars for calculation defined in already in mapping db
-    full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, 
-        rows, version)
-
-    # remove duplicates from partially matched variables 
-    no_ver = remove_duplicate(no_ver)
-    no_frq = remove_duplicate(no_frq, strict=False)
-    no_match = remove_duplicate(no_match, strict=False)
-
-    # check if more derived variables can be added based on all
-    # input_vars being available
-    pot_full, pot_part, pot_varnames = potential_vars(conn, rows,
-        stash_vars, version)
+    parsed = map_variables(conn, rows, version)
     # potential vars have always duplicates: 1 for each input_var
-    pot_full = remove_duplicate(pot_full, strict=False)
-    pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False)
-    mopdb_log.info(f"Derived variables: {pot_varnames}")
-    write_map_template(conn, full, no_ver, no_frq, stdn, 
-        no_match, pot_full, pot_part, alias)
+    write_map_template(conn, parsed, alias)
     conn.close()
 
     return
@@ -370,7 +356,7 @@ def map_template(ctx, fpath, match, dbname, version, alias):
 @mopdb.command(name='intake')
 @map_args
 @click.pass_context
-def write_catalogue(ctx, fpath, match, dbname, version, alias):
+def write_intake(ctx, fpath, match, dbname, version, alias):
     """Writes an intake-esm catalogue.
 
     It can get as input the directory containing the output in
@@ -398,19 +384,32 @@ def write_catalogue(ctx, fpath, match, dbname, version, alias):
     -------
     """
     mopdb_log = logging.getLogger('mopdb_log')
+    # connect to db, check first if db exists or exit 
+    if dbname == 'default':
+        dbname = import_files('mopdata').joinpath('access.db')
+    conn = db_connect(dbname)
     # work out if fpath is varlist or path to output
     fpath = Path(fpath)
     if fpath.is_file():
         fname = fpath.name
     else:
         mopdb_log.debug(f"Calling model_vars() from intake: {fpath}")
-        fname, vobjs = model_vars(fpath, match, dbname, version, alias) 
-    if alias == '':
+        fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) 
+    if alias == ''
         alias = fname.split(".")[0]
-    # connect to db, check first if db exists or exit 
-    if dbname == 'default':
-        dbname = import_files('data').joinpath('access.db')
-    conn = db_connect(dbname)
+    # read list of vars from file
+    with open(fname, 'r') as csvfile:
+        reader = csv.DictReader(csvfile, delimiter=';')
+        rows = list(reader)
+    check_varlist(rows, fname)
+    # return lists of fully/partially matching variables and stash_vars 
+    # these are input_vars for calculation defined in already in mapping db
+    parsed = map_variables(conn, rows, version)
+    # potential vars have always duplicates: 1 for each input_var
+    cat_name, fcsv = write_catalogue(conn, parsed, vobjs, fobjs, alias)
+    mopdb_log.info("Intake-esm catalogue written to {cat_name} and {fcsv}")
+    conn.close()
+    return None
 
 
 @mopdb.command(name='map')
@@ -438,7 +437,7 @@ def update_map(ctx, dbname, fname, alias):
     """
     mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
-    dbcentral = import_files('data').joinpath('access.db')
+    dbcentral = import_files('mopdata').joinpath('access.db')
     if dbname in [dbcentral, 'default']:
         mopdb_log.error("The package database cannot be updated")
         sys.exit()
@@ -459,7 +458,8 @@ def update_map(ctx, dbname, fname, alias):
         var_list = read_map(fname, alias)
     # update mapping table
     update_db(conn, 'mapping', var_list)
-    return
+    conn.close()
+    return None
 
 
 @mopdb.command(name='varlist')
@@ -467,11 +467,17 @@ def update_map(ctx, dbname, fname, alias):
 @click.pass_context
 def list_vars(ctx, fpath, match, dbname, version, alias):
     """Calls model_vars to generate list of variables""" 
-    fname, vobjs = model_vars(fpath, match, dbname, version, alias)
+    # connect to db, check first if db exists or exit 
+    if dbname == 'default':
+        dbname = import_files('mopdata').joinpath('access.db')
+    conn = db_connect(dbname)
+    fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias)
+    conn.close()
+    return None
 
 
 @click.pass_context
-def model_vars(ctx, fpath, match, dbname, version, alias):
+def model_vars(ctx, fpath, match, conn, version, alias):
     """Read variables from model output
        opens one file for each kind, save variable list as csv file
 
@@ -498,13 +504,8 @@ def model_vars(ctx, fpath, match, dbname, version, alias):
     """
 
     mopdb_log = logging.getLogger('mopdb_log')
-    # connect to db, this will create one if not existing
-    if dbname == 'default':
-        dbname = import_files('data').joinpath('access.db')
-    conn = db_connect(dbname)
-    fname, vobjs = write_varlist(conn, fpath, match, version, alias)
-    conn.close()
-    return fname, vobjs
+    fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias)
+    return fname, vobjs, fobjs
 
 
 @mopdb.command(name='del')
@@ -536,7 +537,7 @@ def remove_record(ctx, dbname, table, pair):
     """
     mopdb_log = logging.getLogger('mopdb_log')
     # connect to db, this will create one if not existing
-    dbcentral = import_files('data').joinpath('access.db')
+    dbcentral = import_files('mopdata').joinpath('access.db')
     if dbname == dbcentral:
         mopdb_log.error("The package database cannot be updated")
         sys.exit()
@@ -548,4 +549,5 @@ def remove_record(ctx, dbname, table, pair):
         col = "cmor_var,frequency,realm,cmor_table" 
     # select, confirm, delete record/s 
     delete_record(conn, table, col, pair)
+    conn.close()
     return
diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index 41381ef..a592465 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -33,6 +33,7 @@ def __init__(self, fpattern: str, fpath: Path):
         self.frequency = self.get_frequency() 
         self.version = ''
         self.multiple_frq = False
+        self.varlist = []
 
     def get_frequency(self):
         frequency = 'NAfrq'
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index ca8560a..79161b7 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -26,13 +26,16 @@
 import csv
 import json
 import stat
+import lzma
 import xarray as xr
 import numpy as np
 import math
+
 from datetime import datetime, date
 from collections import Counter
 from operator import itemgetter, attrgetter
 from pathlib import Path
+from importlib.resources import files as import_files
 
 from mopdb.mopdb_class import FPattern, Variable
 
@@ -222,7 +225,6 @@ def update_db(conn, table, rows_list):
             c.executemany(sql, rows_list)
             nmodified = c.rowcount
             mopdb_log.info(f"Rows modified: {nmodified}")
-    conn.close()
     mopdb_log.info('--- Done ---')
     return
 
@@ -420,7 +422,6 @@ def delete_record(conn, table, col, pairs):
                 mopdb_log.info(f"Rows modified: {c.fetchall()[0][0]}")
     else:
         mopdb_log.info("The query did not return any records")
-    conn.close()
     return
 
 
@@ -500,9 +501,10 @@ def write_varlist(conn, indir, match, version, alias):
         '_realm','cell_methods','cmor_table','vtype','size',
         'nsteps','fobj.fpattern','long_name','standard_name']
     vobj_list = []
+    fobj_list = []
+    patterns = []
     files = FPattern.list_files(indir, match)
     mopdb_log.debug(f"Files after sorting: {files}")
-    patterns = []
     if alias == '':
         alias = 'mopdb'
     fname = f"varlist_{alias}.csv"
@@ -538,6 +540,7 @@ def write_varlist(conn, indir, match, version, alias):
         mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}")
         if fobj.realm == "NArealm":
             fobj.realm = get_realm(version, ds)
+        pattern_var_list = []
         for vname in ds.variables:
             vobj = Variable(vname, fobj) 
             if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']):
@@ -566,9 +569,12 @@ def write_varlist(conn, indir, match, version, alias):
                 line = [attrgetter(k)(vobj) for k in line_cols]
                 fwriter.writerow(line)
                 vobj_list.append(vobj)
+                pattern_var_list.append(vobj.name)
+        fjob.varlist = pattern_var_list
+        fjob_list.append(fobj)
         mopdb_log.info(f"Variable list for {fpattern} successfully written")
     fcsv.close()
-    return  fname, vobj_list
+    return  fname, vobj_list, fobj_list
 
 
 def read_map_app4(fname):
@@ -602,7 +608,7 @@ def read_map(fname, alias):
     Fields from file:
     cmor_var, input_vars, calculation, units, dimensions, frequency,
     realm, cell_methods, positive, cmor_table, version, vtype, size, nsteps,
-    filename, long_name, standard_name
+    fpattern, long_name, standard_name
     Fields in table:
     cmor_var, input_vars, calculation, units, dimensions, frequency,
     realm, cell_methods, positive, model, notes, origin 
@@ -846,28 +852,28 @@ def potential_vars(conn, rows, stash_vars, version):
     return pot_full, pot_part, pot_varnames
 
 
-def write_map_template(conn, full, no_ver, no_frq, stdn,
-                       no_match, pot_full, pot_part, alias):
+def write_map_template(conn, parsed, alias):
     """Write mapping csv file template based on list of variables to define 
 
     Input varlist file order:
     name, cmor_var, units, dimensions, frequency, realm, cell_methods,
-    cmor_table, vtype, size, nsteps, filename, long_name, standard_name
+    cmor_table, vtype, size, nsteps, fpattern, long_name, standard_name
     Mapping db order:
     cmor_var, input_vars, calculation, units, dimensions, frequency, realm,
     cell_methods, positive, cmor_table, model, notes, origin 
-        for pot vars + vtype, size, nsteps, filename
+        for pot vars + vtype, size, nsteps, fpattern
     Final template order:
     cmor_var, input_vars, calculation, units, dimensions, frequency, realm,
-    cell_methods, positive, cmor_table, version, vtype, size, nsteps, filename,
+    cell_methods, positive, cmor_table, version, vtype, size, nsteps, fpattern,
     long_name, standard_name
     """ 
 
     mopdb_log = logging.getLogger('mopdb_log')
+    full, no_ver, no_frq, stdn, no_match, pot_full, pot_part = parsed
     keys = ['cmor_var', 'input_vars', 'calculation', 'units',
             'dimensions', 'frequency', 'realm', 'cell_methods',
             'positive', 'cmor_table', 'version', 'vtype', 'size',
-            'nsteps', 'filename', 'long_name', 'standard_name'] 
+            'nsteps', 'fpattern', 'long_name', 'standard_name'] 
 
     with open(f"map_{alias}.csv", 'w') as fcsv:
         fwriter = csv.DictWriter(fcsv, keys, delimiter=';')
@@ -875,7 +881,6 @@ def write_map_template(conn, full, no_ver, no_frq, stdn,
         div = ("# Derived variables with matching version and " +
             "frequency: Use with caution!")
         write_vars(pot_full, fwriter, div, conn=conn)
-            #pot=True, conn=conn, sortby=0)
         div = ("# Variables definitions coming from different " +
             "version")
         write_vars(no_ver, fwriter, div, conn=conn)
@@ -982,3 +987,69 @@ def check_varlist(rows, fname):
   Some values might be invalid and need fixing""")
                 sys.exit()
     return
+
+
+def map_variables(conn, rows, version):
+    """
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    # return lists of fully/partially matching variables and stash_vars 
+    # these are input_vars for calculation defined in already in mapping db
+    full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, 
+        rows, version)
+    # remove duplicates from partially matched variables 
+    no_ver = remove_duplicate(no_ver)
+    no_frq = remove_duplicate(no_frq, strict=False)
+    no_match = remove_duplicate(no_match, strict=False)
+    # check if more derived variables can be added based on all
+    # input_vars being available
+    pot_full, pot_part, pot_varnames = potential_vars(conn, rows,
+        stash_vars, version)
+    # potential vars have always duplicates: 1 for each input_var
+    pot_full = remove_duplicate(pot_full, strict=False)
+    pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False)
+    mopdb_log.info(f"Derived variables: {pot_varnames}")
+    return full, no_ver, no_frq, stdn, no_match, pot_full, pot_part 
+
+
+def write_catalogue(conn, parsed, vobjs, fobjs, alias):
+    """Write intake-esm catalogue and returns name
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    # read template json data 
+    jfile = import_files('mopdata').joinpath('intake_cat_template.json')
+    with open(jfile, 'r') as f:
+        template = json.load(f)
+    mopdb_log.debug("Opened intake template file")
+    # update json data with relevant information
+    # update title, description etc with experiment
+    for k,v in template.items():
+        if type(v) == str:
+            template[k] = v.replace('<experiment>', alias)
+    # write updated json to file
+    jfile = f"intake_{alias}.json"
+    with open(jfile, 'w') as f:
+        json.dump(template, f, indent=4)
+    # create a dictionary for each file to list
+    for pat_obj in fobjs:
+        var_list = get_pattern_vars.
+        base_dict = {'experiment': alias,
+                     'realm': = pat_obj.realm,
+                     'realm': = pat_obj.realm,
+    # write csv file
+    csvname = template['catalog_file']
+    with lzma.open(csvname, 'wt') as fcsv:
+        fwriter = csv.DictWriter(fcsv, keys, delimiter=',')
+        for f in files_dict:
+            fwriter.writerow(f)
+        fcsv.close()
+    return jfile, csvname
+
+"experiment"
+            "column_name": "realm"
+            "column_name": "frequency"
+             "variable"
+            "column_name": "map_var"
+            "column_name": "map_table"
+            "column_name": "standard_name"
+            "column_name": "date_range"
diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py
index d217aef..954d72a 100644
--- a/src/mopper/calculations.py
+++ b/src/mopper/calculations.py
@@ -153,7 +153,7 @@ class IceTransportCalculations():
 
     @click.pass_context
     def __init__(self, ctx):
-        fname = import_files('data').joinpath('transport_lines.yaml')
+        fname = import_files('mopdata').joinpath('transport_lines.yaml')
         self.yaml_data = read_yaml(fname)['lines']
 
         self.gridfile = xr.open_dataset(f"{ctx.obj['ancils_path']}/"+
@@ -568,7 +568,7 @@ class SeaIceCalculations():
 
     @click.pass_context
     def __init__(self, ctx):
-        fname = import_files('data').joinpath('transport_lines.yaml')
+        fname = import_files('mopdata').joinpath('transport_lines.yaml')
         self.yaml_data = read_yaml(fname)['lines']
 
         self.gridfile = xr.open_dataset(f"{ctx.obj['ancil_path']}/" +
@@ -1004,7 +1004,7 @@ def extract_tilefrac(ctx, tilefrac, tilenum, landfrac=None, lev=None):
     vout = vout * landfrac
 
     if lev:
-        fname = import_files('data').joinpath('landtype.yaml')
+        fname = import_files('mopdata').joinpath('landtype.yaml')
         data = read_yaml(fname)
         type_dict = data['mod_mapping']
         vout = vout.expand_dims(dim={lev: type_dict[lev]})
@@ -1147,7 +1147,7 @@ def average_tile(var, tilefrac=None, lfrac=1, landfrac=None, lev=None):
         vout = vout * landfrac
     
     if lev:
-        fname = import_files('data').joinpath('landtype.yaml')
+        fname = import_files('mopdata').joinpath('landtype.yaml')
         data = read_yaml(fname)
         type_dict = data['mod_mapping']
         vout = vout.expand_dims(dim={lev: type_dict[lev]})
diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py
index 90ba47e..5bc5634 100755
--- a/src/mopper/mop_setup.py
+++ b/src/mopper/mop_setup.py
@@ -256,7 +256,7 @@ def var_map(ctx, activity_id=None):
         access_version = ctx.obj['access_version']
     if ctx.obj['force_dreq'] is True:
         if ctx.obj['dreq'] == 'default':
-            ctx.obj['dreq'] = import_files('data').joinpath( 
+            ctx.obj['dreq'] = import_files('mopdata').joinpath( 
                 'data/dreq/cmvme_all_piControl_3_3.csv' )
     with ctx.obj['master_map'].open(mode='r') as f:
         reader = csv.DictReader(f, delimiter=';')
@@ -300,7 +300,7 @@ def create_var_map(ctx, table, mappings, activity_id=None,
     matches = []
     fpath = ctx.obj['tables_path'] / f"{table}.json"
     if not fpath.exists():
-         fpath = import_files('data').joinpath( 
+         fpath = import_files('mopdata').joinpath( 
              f"cmor_tables/{table}.json")
     table_id = table.split('_')[1]
     mop_log.debug(f"Mappings: {mappings}")
@@ -406,7 +406,7 @@ def manage_env(ctx):
          '_control_vocabulary_file']:
         fpath = ctx.obj['tables_path'] / ctx.obj[f]
         if not fpath.exists():
-             fpath = import_files('data').joinpath(
+             fpath = import_files('mopdata').joinpath(
                  f"cmor_tables/{ctx.obj[f]}")
         if f == '_control_vocabulary_file':
             fname = "CMIP6_CV.json"
diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index 6017b68..2eb9695 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -575,7 +575,7 @@ def get_coords(ctx, ovar, coords):
     ds = xr.open_dataset(f"{ctx.obj['ancils_path']}/{ancil_file}")
     var_log.debug(f"ancil ds: {ds}")
     # read lat/lon and vertices mapping
-    cfile = import_files('data').joinpath('latlon_vertices.yaml')
+    cfile = import_files('mopdata').joinpath('latlon_vertices.yaml')
     with open(cfile, 'r') as yfile:
         data = yaml.safe_load(yfile)
     ll_dict = data[ctx.obj['realm']]
@@ -901,7 +901,7 @@ def define_attrs(ctx):
     attrs = ctx.obj['attrs']
     notes = attrs.get('notes', '')
     # open file containing notes
-    fname = import_files('data').joinpath('notes.yaml')
+    fname = import_files('mopdata').joinpath('notes.yaml')
     data = read_yaml(fname)['notes']
     # check all fields and if any of their keys (e.g. a specific variable)
     # match the field value for the file being processed
diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index ef2c1ec..5b1b36c 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -40,7 +40,6 @@
 from collections import OrderedDict
 from datetime import datetime#, timedelta
 from dateutil.relativedelta import relativedelta
-from importlib.resources import files as import_files
 from json.decoder import JSONDecodeError
 
 from mopdb.mopdb_utils import query

From 8fa654cc26809672e338409170f5f8cb013bb06f Mon Sep 17 00:00:00 2001
From: Sam Green <greensh2018@gmail.com>
Date: Fri, 12 Jul 2024 17:34:14 +1000
Subject: [PATCH 059/137] Rename mopper-conda.yaml to mopper-test-calcs.yaml

---
 .github/workflows/{mopper-conda.yaml => mopper-test-calcs.yaml} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .github/workflows/{mopper-conda.yaml => mopper-test-calcs.yaml} (100%)

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-test-calcs.yaml
similarity index 100%
rename from .github/workflows/mopper-conda.yaml
rename to .github/workflows/mopper-test-calcs.yaml

From c14581c98a2c24ada023ef25e3e1d5077381531b Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 12 Jul 2024 20:28:17 +1000
Subject: [PATCH 060/137] started reorganising mopdb_utils.py and intake now
 working

---
 .../{mopper-conda.yaml => mopper-pytest.yaml} |   4 -
 conda/enviroment.yaml                         |   1 -
 src/mopdata/intake_cat_template.json          |  51 ++++
 src/mopdata/intake_cat_template.yaml          |  19 ++
 src/mopdb/mopdb.py                            |  37 +--
 src/mopdb/mopdb_class.py                      |   2 +-
 src/mopdb/mopdb_utils.py                      | 227 ++++++------------
 src/mopdb/utils.py                            | 210 ++++++++++++++++
 src/mopper/calculations.py                    |   2 +-
 src/mopper/mop_setup.py                       |   1 +
 src/mopper/mop_utils.py                       |   2 +-
 src/mopper/setup_utils.py                     |  33 +--
 tests/test_mopdb_utils.py                     |  19 ++
 13 files changed, 391 insertions(+), 217 deletions(-)
 rename .github/workflows/{mopper-conda.yaml => mopper-pytest.yaml} (94%)
 create mode 100644 src/mopdata/intake_cat_template.json
 create mode 100644 src/mopdata/intake_cat_template.yaml
 create mode 100644 src/mopdb/utils.py

diff --git a/.github/workflows/mopper-conda.yaml b/.github/workflows/mopper-pytest.yaml
similarity index 94%
rename from .github/workflows/mopper-conda.yaml
rename to .github/workflows/mopper-pytest.yaml
index 4642d83..430693f 100644
--- a/.github/workflows/mopper-conda.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -38,10 +38,6 @@ jobs:
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: install package
-      run: |
-        source activate base
-        pip install ./
     - name: Test with pytest
       run: |
         conda install pytest coverage codecov --solver classic
diff --git a/conda/enviroment.yaml b/conda/enviroment.yaml
index 6e46467..3856ac4 100644
--- a/conda/enviroment.yaml
+++ b/conda/enviroment.yaml
@@ -1,4 +1,3 @@
-name: mopenv
 channels:
   - conda-forge
 dependencies:
diff --git a/src/mopdata/intake_cat_template.json b/src/mopdata/intake_cat_template.json
new file mode 100644
index 0000000..b4549b4
--- /dev/null
+++ b/src/mopdata/intake_cat_template.json
@@ -0,0 +1,51 @@
+{
+  "id": "<experiment>",
+  "title": "<experiment> model output.",
+  "description": "<experiment> raw model output. \\nProject: \\nMaintained By: \\nContact: \\nDocumentation:\\nLicense: https://creativecommons.org/licenses/by/4.0/\\nCitation:\\nReferences:\\n",
+  "assets": {
+    "column_name": "path",
+    "format": "netcdf"
+  },
+  "aggregation_control": {
+    "variable_column_name": "variable",
+    "groupby_attrs": [
+      "realm",
+      "frequency",
+      "variable"
+    ],
+    "aggregations": [
+      {
+        "type": "join_existing",
+        "attribute_name": "date_range",
+        "options": {
+          "dim": "time"
+        }
+      }
+    ]
+  },
+  "esmcat_version": "0.1.0",
+  "catalog_file": "catalogue.csv.xz",
+  "attributes": [
+    {
+      "column_name": "experiment"
+    },
+    {
+      "column_name": "realm"
+    },
+    {
+      "column_name": "frequency"
+    },
+    {
+      "column_name": "variable"
+    },
+    {
+      "column_name": "map_var"
+    },
+    {
+      "column_name": "standard_name"
+    },
+    {
+      "column_name": "date"
+    }
+  ]
+}
diff --git a/src/mopdata/intake_cat_template.yaml b/src/mopdata/intake_cat_template.yaml
new file mode 100644
index 0000000..1a04ac4
--- /dev/null
+++ b/src/mopdata/intake_cat_template.yaml
@@ -0,0 +1,19 @@
+metadata:
+  version: 1
+sources:
+  experiment: 
+    description: "Intake catalogue to load ACCESS <experiment> model output"
+    Project: ""
+    Maintained By: ""
+    Contact: ""
+    Documentation: ""
+    License: "https://creativecommons.org/licenses/by/4.0/"
+    Citation: ""
+    References: ""
+    driver: intake_esm.esm_datastore
+    args:
+      obj: "{{CATALOG_DIR}}/catalogue.json"
+      columns_with_iterables:
+        - variable
+        - map_var
+        - standard_name
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index c4fdb38..d495d7b 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -28,6 +28,7 @@
 from importlib.resources import files as import_files
 
 from mopdb.mopdb_utils import *
+from mopdb.utils import *
 
 def mopdb_catch():
     """
@@ -100,7 +101,7 @@ def mopdb(ctx, debug):
     ctx.obj={}
     # set up a default value for flow if none selected for logging
     ctx.obj['debug'] = debug
-    mopdb_log = config_log(debug)
+    mopdb_log = config_log(debug, logname='mopdb_log')
 
 
 @mopdb.command(name='check')
@@ -123,10 +124,10 @@ def check_cmor(ctx, dbname):
     # connect to db, this will create one if not existing
     if dbname == 'default':
         dbname = import_files('mopdata').joinpath('access.db')
-    conn = db_connect(dbname)
+    conn = db_connect(dbname, logname='mopdb_log')
     # get list of variables already in db
     sql = 'SELECT name, out_name FROM cmorvar'
-    results = query(conn, sql, first=False)
+    results = query(conn, sql, first=False, logname='mopdb_log')
     # first set is the actual cmip variable name 
     # second set is the name used in tables to distinguish different dims/freq
     # original maps files use the second style
@@ -135,7 +136,7 @@ def check_cmor(ctx, dbname):
     cmor_vars.update(cmor_vars2)
 
     sql = 'SELECT cmor_var FROM mapping'
-    results = query(conn, sql, first=False)
+    results = query(conn, sql, first=False, logname='mopdb_log')
     map_vars = [x[0] for x in results]
     missing = set(map_vars) - set(cmor_vars)
     mopdb_log.info("Variables not yet defined in cmorvar table:")
@@ -176,10 +177,10 @@ def cmor_table(ctx, dbname, fname, alias, label):
     # connect to db, this will create one if not existing
     if dbname == 'default':
         dbname = import_files('mopdata').joinpath('access.db')
-    conn = db_connect(dbname)
+    conn = db_connect(dbname, logname='mopdb_log')
     # get list of variables already in db
     sql = "SELECT out_name, frequency, modeling_realm FROM cmorvar"
-    results = query(conn, sql, first=False)
+    results = query(conn, sql, first=False, logname='mopdb_log')
     # cmor_vars is the actual cmip variable name 
     # this sometime differs from name used in tables tohat can distinguish different dims/freq
     cmor_vars = set(x[0] for x in results)
@@ -196,7 +197,7 @@ def cmor_table(ctx, dbname, fname, alias, label):
             else:
                 
                 sql = f"SELECT * FROM cmorvar WHERE out_name='{v[0]}'"
-                records = query(conn, sql, first=False)
+                records = query(conn, sql, first=False, logname='mopdb_log')
                 record = records[0]
                 if len(records) > 1:
                     for r in records:
@@ -255,14 +256,14 @@ def update_cmor(ctx, dbname, fname, alias):
     if dbname in [dbcentral, 'default']:
         mopdb_log.error("The package database cannot be updated")
         sys.exit()
-    conn = db_connect(dbname)
+    conn = db_connect(dbname, logname='mopdb_log')
     # create table if not existing
     table_sql = cmorvar_sql()
-    create_table(conn, table_sql)
+    create_table(conn, table_sql, logname='mopdb_log')
     # get list of variables already in db in debug mode
     if ctx.obj['debug']:
         sql = 'SELECT name FROM cmorvar'
-        results = query(conn, sql, first=False)
+        results = query(conn, sql, first=False, logname='mopdb_log')
         existing_vars = [x[0] for x in results]
         mopdb_log.debug(f"Variables already in db: {existing_vars}")
 
@@ -328,7 +329,7 @@ def map_template(ctx, fpath, match, dbname, version, alias):
     # connect to db, this will create one if not existing
     if dbname == 'default':
         dbname = import_files('mopdata').joinpath('access.db')
-    conn = db_connect(dbname)
+    conn = db_connect(dbname, logname='mopdb_log')
     # work out if fpath is varlist or path to output
     fpath = Path(fpath)
     if fpath.is_file():
@@ -387,7 +388,7 @@ def write_intake(ctx, fpath, match, dbname, version, alias):
     # connect to db, check first if db exists or exit 
     if dbname == 'default':
         dbname = import_files('mopdata').joinpath('access.db')
-    conn = db_connect(dbname)
+    conn = db_connect(dbname, logname='mopdb_log')
     # work out if fpath is varlist or path to output
     fpath = Path(fpath)
     if fpath.is_file():
@@ -395,7 +396,7 @@ def write_intake(ctx, fpath, match, dbname, version, alias):
     else:
         mopdb_log.debug(f"Calling model_vars() from intake: {fpath}")
         fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) 
-    if alias == ''
+    if alias == '':
         alias = fname.split(".")[0]
     # read list of vars from file
     with open(fname, 'r') as csvfile:
@@ -441,14 +442,14 @@ def update_map(ctx, dbname, fname, alias):
     if dbname in [dbcentral, 'default']:
         mopdb_log.error("The package database cannot be updated")
         sys.exit()
-    conn = db_connect(dbname)
+    conn = db_connect(dbname, logname='mopdb_log')
     # create table if not existing
     table_sql = mapping_sql()
-    create_table(conn, table_sql)
+    create_table(conn, table_sql, logname='mopdb_log')
     # get list of variables already in db in debug mode
     if ctx.obj['debug']:
         sql = 'SELECT cmor_var FROM mapping'
-        results = query(conn, sql, first=False)
+        results = query(conn, sql, first=False, logname='mopdb_log')
         existing_vars = [x[0] for x in results]
         mopdb_log.debug(f"Variables already in db: {existing_vars}")
     # read list of vars from file
@@ -471,6 +472,7 @@ def list_vars(ctx, fpath, match, dbname, version, alias):
     if dbname == 'default':
         dbname = import_files('mopdata').joinpath('access.db')
     conn = db_connect(dbname)
+    conn = db_connect(dbname, logname='mopdb_log')
     fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias)
     conn.close()
     return None
@@ -542,12 +544,13 @@ def remove_record(ctx, dbname, table, pair):
         mopdb_log.error("The package database cannot be updated")
         sys.exit()
     conn = db_connect(dbname)
+    conn = db_connect(dbname, logname='mopdb_log')
     # set which columns to show based on table
     if table == 'cmorvar':
         col = "name"
     elif table == 'mapping':
         col = "cmor_var,frequency,realm,cmor_table" 
     # select, confirm, delete record/s 
-    delete_record(conn, table, col, pair)
+    delete_record(conn, table, col, pair, logname='mopdb_log')
     conn.close()
     return
diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index a592465..9a9aa0e 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -88,7 +88,7 @@ class Variable():
     def __init__(self, varname: str, fobj: FPattern):
         self.name = varname
         # path object
-        self.fobj = fobj
+        self.fpattern = fobj.fpattern
         #self.fpath = fobj.fpath
         #self.files = fobj.files
         # mapping attributes
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 79161b7..17f475e 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -35,54 +35,11 @@
 from collections import Counter
 from operator import itemgetter, attrgetter
 from pathlib import Path
+from itertools import compress
 from importlib.resources import files as import_files
 
 from mopdb.mopdb_class import FPattern, Variable
-
-def config_log(debug):
-    """Configures log file"""
-    # start a logger
-    logger = logging.getLogger('mopdb_log')
-    # set a formatter to manage the output format of our handler
-    formatter = logging.Formatter('%(asctime)s; %(message)s',"%Y-%m-%d %H:%M:%S")
-    # set the level for the logger, has to be logging.LEVEL not a string
-    level = logging.INFO
-    flevel = logging.WARNING
-    if debug:
-        level = logging.DEBUG
-        flevel = logging.DEBUG
-    logger.setLevel(level)
-
-    # add a handler to send WARNING level messages to console
-    # or DEBUG level if debug is on
-    clog = logging.StreamHandler()
-    clog.setLevel(level)
-    logger.addHandler(clog)
-
-    # add a handler to send INFO level messages to file
-    # the messagges will be appended to the same file
-    # create a new log file every month
-    day = date.today().strftime("%Y%m%d")
-    logname = 'mopdb_log_' + day + '.txt'
-    flog = logging.FileHandler(logname)
-    try:
-        os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO);
-    except OSError:
-        pass
-    flog.setLevel(flevel)
-    flog.setFormatter(formatter)
-    logger.addHandler(flog)
-    # return the logger object
-    return logger
-
-
-def db_connect(db):
-    """Connects to ACCESS mapping sqlite database"""
-    mopdb_log = logging.getLogger('mopdb_log')
-    conn = sqlite3.connect(db, timeout=10, isolation_level=None)
-    if conn.total_changes == 0:
-        mopdb_log.info(f"Opened database {db} successfully")
-    return conn 
+from mopdb.utils import *
 
 
 def mapping_sql():
@@ -228,50 +185,6 @@ def update_db(conn, table, rows_list):
     mopdb_log.info('--- Done ---')
     return
 
-
-def query(conn, sql, tup=(), first=True):
-    """Executes generic sql query and returns row/s
-
-    Parameters
-    ----------
-    conn : connection object
-        Connection to sqlite database
-    sql : str
-        sql string representing query
-    tup : tuple
-        By default empty, used to pass values when placeholder ? is used
-        in sql string
-    first : boolean
-        By default True will return only first record found, set to False
-        to return all matching records
-
-    Returns
-    -------
-    result : tuple/list(tuple)
-        tuple or a list of, representing row/s returned by query 
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    with conn:
-        c = conn.cursor()
-        c.execute(sql, tup)
-        if first:
-            result = c.fetchone()
-        else:
-            result = [ x for x in c.fetchall() ]
-        #columns = [description[0] for description in c.description]
-        return result
-
-
-def get_columns(conn, table):
-    """Gets list of columns from db table
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    sql = f'PRAGMA table_info({table});'
-    table_data = query(conn, sql, first=False)
-    columns = [x[1] for x in table_data]
-    return columns
-
-
 def get_cmorname(conn, vobj, version):
     """Queries mapping table for cmip name given variable name as output
        by the model
@@ -280,7 +193,7 @@ def get_cmorname(conn, vobj, version):
     sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping
         WHERE input_vars='{vobj.name}' and (calculation=''
         or calculation IS NULL)""" 
-    results = query(conn, sql, first=False)
+    results = query(conn, sql, first=False, logname='mopdb_log')
     names = list(x[0] for x in results) 
     tables = list(x[2] for x in results) 
     mopdb_log.debug(f"In get_cmorname query results: {results}")
@@ -381,50 +294,6 @@ def write_cmor_table(var_list, name):
         json.dump(out, f, indent=4)
     return
 
-
-def delete_record(conn, table, col, pairs):
-    """Deletes record from table based on pairs of column and
-    value passed for selection
-
-    Parameters
-    ----------
-    conn : connection object
-        connection to db
-    table: str
-        db table name
-    col: str
-        name of column to return with query
-    pairs : list[tuple(str, str)]
-        pairs of columns, values to select record/s
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    # Set up query
-    sqlwhere = f"FROM {table} WHERE "
-    for c,v in pairs:
-        sqlwhere += f"{c}='{v}' AND "
-    sql = f"SELECT {col} " + sqlwhere[:-4]
-    mopdb_log.debug(f"Delete query: {sql}")
-    xl = query(conn, sql, first=False)
-    # Delete from db
-    if xl is not None:
-        mopdb_log.info(f"Found {len(xl)} records")
-        for x in xl:
-            mopdb_log.info(f"{x}")
-        confirm = input('Confirm deletion from database: Y/N   ')
-        if confirm == 'Y':
-            mopdb_log.info('Updating db ...')
-            with conn:
-                c = conn.cursor()
-                sql = "DELETE " + sqlwhere[:-4]
-                mopdb_log.debug(f"Delete sql: {sql}")
-                c.execute(sql)
-                c.execute('select total_changes()')
-                mopdb_log.info(f"Rows modified: {c.fetchall()[0][0]}")
-    else:
-        mopdb_log.info("The query did not return any records")
-    return
-
-
 def get_file_frq(ds, fnext):
     """Return a dictionary with frequency for each time axis.
 
@@ -499,7 +368,7 @@ def write_varlist(conn, indir, match, version, alias):
     mopdb_log = logging.getLogger('mopdb_log')
     line_cols = ['name','cmor_var','units','dimensions','_frequency',
         '_realm','cell_methods','cmor_table','vtype','size',
-        'nsteps','fobj.fpattern','long_name','standard_name']
+        'nsteps','fpattern','long_name','standard_name']
     vobj_list = []
     fobj_list = []
     patterns = []
@@ -569,9 +438,9 @@ def write_varlist(conn, indir, match, version, alias):
                 line = [attrgetter(k)(vobj) for k in line_cols]
                 fwriter.writerow(line)
                 vobj_list.append(vobj)
-                pattern_var_list.append(vobj.name)
-        fjob.varlist = pattern_var_list
-        fjob_list.append(fobj)
+                pattern_var_list.append(vobj)
+        fobj.varlist = pattern_var_list
+        fobj_list.append(fobj)
         mopdb_log.info(f"Variable list for {fpattern} successfully written")
     fcsv.close()
     return  fname, vobj_list, fobj_list
@@ -644,7 +513,7 @@ def match_stdname(conn, row, stdn):
     found_match = False
     sql = f"""SELECT name FROM cmorvar where 
         standard_name='{row['standard_name']}'"""
-    results = query(conn, sql, first=False)
+    results = query(conn, sql, first=False, logname='mopdb_log')
     matches = [x[0] for x in results]
     if len(matches) > 0:
         stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True)
@@ -674,7 +543,7 @@ def match_var(row, version, mode, conn, records):
     elif mode == 'no_ver':
         sql = sql_base + sql_frq
     # execute query and process results
-    result = query(conn, sql, first=False)
+    result = query(conn, sql, first=False, logname='mopdb_log')
     mopdb_log.debug(f"match_var: {result}, sql: {sql[110:]}") 
     if result is not None and result != []:
         for x in result:
@@ -835,7 +704,7 @@ def potential_vars(conn, rows, stash_vars, version):
         sql = f"""SELECT cmor_var,input_vars,calculation,frequency,
             realm,model,cmor_table,positive,units FROM mapping 
             WHERE input_vars like '%{row['name']}%'"""
-        results = query(conn, sql, first=False)
+        results = query(conn, sql, first=False, logname='mopdb_log')
         mopdb_log.debug(f"In potential: var {row['name']}, db results {results}")
         for r in results:
             allinput = r[1].split(" ")
@@ -933,7 +802,7 @@ def check_realm_units(conn, var):
     # retrieve modeling_realm, units from db cmor table
         sql = f"""SELECT modeling_realm, units FROM cmorvar
             WHERE name='{vname}' """ 
-        result = query(conn, sql)
+        result = query(conn, sql, logname='mopdb_log')
         mopdb_log.debug(f"In check_realm_units: {vname}, {result}")
         if result is not None:
             dbrealm = result[0] 
@@ -1016,40 +885,78 @@ def write_catalogue(conn, parsed, vobjs, fobjs, alias):
     """Write intake-esm catalogue and returns name
     """
     mopdb_log = logging.getLogger('mopdb_log')
-    # read template json data 
+    # read template json file 
     jfile = import_files('mopdata').joinpath('intake_cat_template.json')
     with open(jfile, 'r') as f:
         template = json.load(f)
-    mopdb_log.debug("Opened intake template file")
+    # read template yaml file 
+    yfile = import_files('mopdata').joinpath('intake_cat_template.yaml')
+    maincat = read_yaml(yfile)
+    mopdb_log.debug("Opened intake template files")
     # update json data with relevant information
     # update title, description etc with experiment
     for k,v in template.items():
         if type(v) == str:
             template[k] = v.replace('<experiment>', alias)
+    for k,v in maincat.items():
+        if type(v) == str:
+            maincat[k] = v.replace('<experiment>', alias)
     # write updated json to file
     jfile = f"intake_{alias}.json"
     with open(jfile, 'w') as f:
         json.dump(template, f, indent=4)
+    # write updated yaml to file
+    jfile = f"intake_{alias}.yaml"
+    write_yaml(maincat, jfile, 'mopdb_log')
     # create a dictionary for each file to list
-    for pat_obj in fobjs:
-        var_list = get_pattern_vars.
-        base_dict = {'experiment': alias,
-                     'realm': = pat_obj.realm,
-                     'realm': = pat_obj.realm,
+    lines = create_file_dict(fobjs)
     # write csv file
+    cols = [x['column_name'] for x in template['attributes']]
+    cols = ['path'] + cols 
     csvname = template['catalog_file']
     with lzma.open(csvname, 'wt') as fcsv:
-        fwriter = csv.DictWriter(fcsv, keys, delimiter=',')
-        for f in files_dict:
-            fwriter.writerow(f)
+        fwriter = csv.DictWriter(fcsv, cols, delimiter=';')
+        fwriter.writeheader()
+        for fd in lines:
+            fwriter.writerow(fd)
         fcsv.close()
     return jfile, csvname
 
-"experiment"
-            "column_name": "realm"
-            "column_name": "frequency"
-             "variable"
-            "column_name": "map_var"
-            "column_name": "map_table"
-            "column_name": "standard_name"
-            "column_name": "date_range"
+def get_date_pattern(fname, fpattern):
+    """Try to build a date range for each file pattern based
+       on its filename
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    # assign False to any character which is not a digit
+    date_pattern = [True if c.isdigit() else False for c in fname]
+    # assign False to fpattern
+    n = len(fpattern)
+    date_pattern[:n] = [False] * n
+    return date_pattern
+
+def create_file_dict(fobjs):
+    """
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    for pat_obj in fobjs:
+        var_list = [v.name for v in pat_obj.varlist]
+        # set to remove '' duplicates 
+        mapvar_list = list(set(v.cmor_var for v in pat_obj.varlist))
+        stnm_list = list(set(v.standard_name for v in pat_obj.varlist))
+        base_dict = {'experiment': alias,
+                     'realm': pat_obj.realm,
+                     'frequency': pat_obj.frequency,
+                     'variable': var_list,
+                     'map_var': mapvar_list,
+                     'standard_name': stnm_list}
+        # work out date_pattern in filename
+        fname = pat_obj.files[0].name
+        date_pattern = get_date_pattern(fname, pat_obj.fpattern)
+        # add date and path for each file
+        for fpath in pat_obj.files:
+            f = fpath.name
+            fd = base_dict.copy()
+            fd['path'] = str(fpath)
+            fd['date'] = ''.join(c for c in compress(f, date_pattern)) 
+            lines.append(fd)
+    return lines
diff --git a/src/mopdb/utils.py b/src/mopdb/utils.py
new file mode 100644
index 0000000..1a6ff11
--- /dev/null
+++ b/src/mopdb/utils.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python
+# Copyright 2024 ARC Centre of Excellence for Climate Extremes (CLEX)
+# Author: Paola Petrelli <paola.petrelli@utas.edu.au> for CLEX
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# contact: paola.petrelli@utas.edu.au
+#
+# last updated 12/07/2024
+#
+
+import sqlite3
+import logging
+import os
+import csv
+import json
+import stat
+import yaml
+
+from datetime import date
+
+
+def config_log(debug, logname):
+    """Configures log file"""
+    # start a logger
+    logger = logging.getLogger(logname)
+    # set a formatter to manage the output format of our handler
+    formatter = logging.Formatter('%(asctime)s; %(message)s',"%Y-%m-%d %H:%M:%S")
+    # set the level for the logger, has to be logging.LEVEL not a string
+    level = logging.INFO
+    flevel = logging.WARNING
+    if debug:
+        level = logging.DEBUG
+        flevel = logging.DEBUG
+    logger.setLevel(level)
+
+    # add a handler to send WARNING level messages to console
+    # or DEBUG level if debug is on
+    clog = logging.StreamHandler()
+    clog.setLevel(level)
+    logger.addHandler(clog)
+
+    # add a handler to send INFO level messages to file
+    # the messagges will be appended to the same file
+    # create a new log file every month
+    day = date.today().strftime("%Y%m%d")
+    logname = f"{logname}_{day}.txt"
+    flog = logging.FileHandler(logname)
+    try:
+        os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO);
+    except OSError:
+        pass
+    flog.setLevel(flevel)
+    flog.setFormatter(formatter)
+    logger.addHandler(flog)
+    # return the logger object
+    return logger
+
+def db_connect(db, logname='__name__'):
+    """Connects to ACCESS mapping sqlite database"""
+    log = logging.getLogger(logname)
+    conn = sqlite3.connect(db, timeout=10, isolation_level=None)
+    if conn.total_changes == 0:
+        log.info(f"Opened database {db} successfully")
+    return conn 
+
+def create_table(conn, sql, logname='__name__'):
+    """Creates table if database is empty
+
+    Parameters
+    ----------
+    conn : connection object
+    sql : str
+        SQL style string defining table to create
+    """
+    log = logging.getLogger(logname)
+    try:
+        c = conn.cursor()
+        c.execute(sql)
+    except Exception as e:
+        log.error(e)
+    return
+
+def query(conn, sql, tup=(), first=True, logname='__name__'):
+    """Executes generic sql query and returns row/s
+
+    Parameters
+    ----------
+    conn : connection object
+        Connection to sqlite database
+    sql : str
+        sql string representing query
+    tup : tuple
+        By default empty, used to pass values when placeholder ? is used
+        in sql string
+    first : boolean
+        By default True will return only first record found, set to False
+        to return all matching records
+
+    Returns
+    -------
+    result : tuple/list(tuple)
+        tuple or a list of, representing row/s returned by query 
+    """
+    log = logging.getLogger(logname)
+    with conn:
+        c = conn.cursor()
+        c.execute(sql, tup)
+        if first:
+            result = c.fetchone()
+        else:
+            result = [ x for x in c.fetchall() ]
+        #columns = [description[0] for description in c.description]
+        return result
+
+
+def get_columns(conn, table, logname='__name__'):
+    """Gets list of columns from db table
+    """
+    log = logging.getLogger(logname)
+    sql = f'PRAGMA table_info({table});'
+    table_data = query(conn, sql, first=False, logname=logname)
+    columns = [x[1] for x in table_data]
+    return columns
+
+
+def delete_record(conn, table, col, pairs, logname='__name__'):
+    """Deletes record from table based on pairs of column and
+    value passed for selection
+
+    Parameters
+    ----------
+    conn : connection object
+        connection to db
+    table: str
+        db table name
+    col: str
+        name of column to return with query
+    pairs : list[tuple(str, str)]
+        pairs of columns, values to select record/s
+    """
+    log = logging.getLogger(logname)
+    # Set up query
+    sqlwhere = f"FROM {table} WHERE "
+    for c,v in pairs:
+        sqlwhere += f"{c}='{v}' AND "
+    sql = f"SELECT {col} " + sqlwhere[:-4]
+    log.debug(f"Delete query: {sql}")
+    xl = query(conn, sql, first=False, logname=logname)
+    # Delete from db
+    if xl is not None:
+        log.info(f"Found {len(xl)} records")
+        for x in xl:
+            log.info(f"{x}")
+        confirm = input('Confirm deletion from database: Y/N   ')
+        if confirm == 'Y':
+            log.info('Updating db ...')
+            with conn:
+                c = conn.cursor()
+                sql = "DELETE " + sqlwhere[:-4]
+                log.debug(f"Delete sql: {sql}")
+                c.execute(sql)
+                c.execute('select total_changes()')
+                log.info(f"Rows modified: {c.fetchall()[0][0]}")
+    else:
+        log.info("The query did not return any records")
+    return
+
+def read_yaml(fname, logname='__name__'):
+    """Read yaml file
+    """
+    log = logging.getLogger(logname)
+    try:
+        with fname.open(mode='r') as yfile:
+            data = yaml.safe_load(yfile)
+    except Exception as e:
+        log.error(f"Check that {fname} exists and it is a valid yaml file")
+        log.error(f"Exception: {e}")
+    return data
+
+def write_yaml(data, fname, logname='__name__'):
+    """Write data to a yaml file
+
+    Parameters
+    ----------
+    data : dict
+        The file content as a dictionary
+    fname : str
+        Yaml filename
+
+    Returns
+    -------
+    """
+    log = logging.getLogger(logname)
+    try:
+        with open(fname, 'w') as f:
+            yaml.dump(data, f)
+    except:
+        log.error(f"Check that {data} exists and it is an object compatible with yaml")
+    return
diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py
index 954d72a..c910c80 100644
--- a/src/mopper/calculations.py
+++ b/src/mopper/calculations.py
@@ -40,7 +40,7 @@
 import logging
 
 from importlib.resources import files as import_files
-from mopper.setup_utils import read_yaml
+from mopdb.utils import read_yaml
 
 # Global Variables
 #----------------------------------------------------------------------
diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py
index 5bc5634..8831595 100755
--- a/src/mopper/mop_setup.py
+++ b/src/mopper/mop_setup.py
@@ -34,6 +34,7 @@
 from importlib.resources import files as import_files
 
 from mopper.setup_utils import *
+from mopdb.utils import read_yaml
 
 
 def find_matches(table, var, realm, frequency, varlist):
diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index 2eb9695..a5d1423 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -39,7 +39,7 @@
 from pathlib import Path
 
 from mopper.calculations import *
-from mopper.setup_utils import read_yaml
+from mopper.utils import read_yaml
 from importlib.resources import files as import_files
 
 
diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index 5b1b36c..da3dc6b 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -42,7 +42,7 @@
 from dateutil.relativedelta import relativedelta
 from json.decoder import JSONDecodeError
 
-from mopdb.mopdb_utils import query
+from mopdb.utils import query, write_yaml, read_yaml
 from mopper.cmip_utils import fix_years
 
 
@@ -103,37 +103,6 @@ def adjust_nsteps(v, frq):
     new_nsteps = tot_days * nstep_day[frq]
     return new_nsteps
 
-
-def read_yaml(fname):
-    """Read yaml file
-    """
-    with fname.open(mode='r') as yfile:
-        data = yaml.safe_load(yfile)
-    return data
-
-
-def write_yaml(data, fname, log_name='__name__'):
-    """Write data to a yaml file
-
-    Parameters
-    ----------
-    data : dict
-        The file content as a dictionary 
-    fname : str
-        Yaml filename 
-
-    Returns
-    -------
-    """
-    logger = logging.getLogger(log_name)
-    try:
-        with open(fname, 'w') as f:
-            yaml.dump(data, f)
-    except:
-        logger.error(f"Check that {data} exists and it is an object compatible with json")
-    return
-
-
 @click.pass_context
 def write_config(ctx, fname='exp_config.yaml'):
     """Write data to a yaml file
diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py
index ebc8be0..858697e 100644
--- a/tests/test_mopdb_utils.py
+++ b/tests/test_mopdb_utils.py
@@ -20,6 +20,7 @@
 import sqlite3
 import click
 import logging
+import itertools
 from mopdb.mopdb_utils import *
 from conftest import um_multi_time
 
@@ -41,3 +42,21 @@ def test_build_umfrq(um_multi_time, caplog):
     out = build_umfrq(time_axs, um_multi_time)
     assert umfrq == out
     
+#@pytest.mark.parametrize('fname', [0,1,2])
+def test_get_date_pattern(caplog):
+    caplog.set_level(logging.DEBUG, logger='mopdb_log')
+    fname = 'ocean_month.nc-09961231'
+    fpattern = 'ocean_month.nc-'
+    dp = get_date_pattern(fname, fpattern)
+    date = ''.join(x for x in itertools.compress(fname,dp))
+    assert date == '09961231'
+    fname = 'umnsa_cldrad_20160603T0000.nc'
+    fpattern = 'umnsa_cldrad_'
+    dp = get_date_pattern(fname, fpattern)
+    date = ''.join(x for x in itertools.compress(fname,dp))
+    assert date == '201606030000'
+    fname = 'cw323a.pm095101_mon.nc'
+    fpattern = 'cw323a.pm'
+    dp = get_date_pattern(fname, fpattern)
+    date = ''.join(x for x in itertools.compress(fname,dp))
+    assert date == '095101'

From 1b4df80fae53380657237923278d99a15d9e9f4c Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 12 Jul 2024 20:30:34 +1000
Subject: [PATCH 061/137] fixed missing arg in create_dict_file

---
 src/mopdb/mopdb_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 17f475e..ac3f102 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -909,7 +909,7 @@ def write_catalogue(conn, parsed, vobjs, fobjs, alias):
     jfile = f"intake_{alias}.yaml"
     write_yaml(maincat, jfile, 'mopdb_log')
     # create a dictionary for each file to list
-    lines = create_file_dict(fobjs)
+    lines = create_file_dict(fobjs, alias)
     # write csv file
     cols = [x['column_name'] for x in template['attributes']]
     cols = ['path'] + cols 
@@ -934,7 +934,7 @@ def get_date_pattern(fname, fpattern):
     date_pattern[:n] = [False] * n
     return date_pattern
 
-def create_file_dict(fobjs):
+def create_file_dict(fobjs, alias):
     """
     """
     mopdb_log = logging.getLogger('mopdb_log')

From d20db0eed0634df6831c9893c2cd6b724471cb74 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 16 Jul 2024 09:44:06 +1000
Subject: [PATCH 062/137] now producing intake catalogue from scratch,
 re-organised mopdb code #158

---
 src/mopdata/intake_cat_template.json |   7 +-
 src/mopdata/intake_cat_template.yaml |   7 +-
 src/mopdb/mopdb.py                   |   3 +-
 src/mopdb/mopdb_class.py             |   2 +-
 src/mopdb/mopdb_map.py               | 559 +++++++++++++++++++++++++++
 src/mopdb/mopdb_utils.py             | 551 +-------------------------
 6 files changed, 570 insertions(+), 559 deletions(-)
 create mode 100644 src/mopdb/mopdb_map.py

diff --git a/src/mopdata/intake_cat_template.json b/src/mopdata/intake_cat_template.json
index b4549b4..502bf2a 100644
--- a/src/mopdata/intake_cat_template.json
+++ b/src/mopdata/intake_cat_template.json
@@ -7,16 +7,15 @@
     "format": "netcdf"
   },
   "aggregation_control": {
-    "variable_column_name": "variable",
+    "variable_column_name": "frequency",
     "groupby_attrs": [
       "realm",
-      "frequency",
-      "variable"
+      "frequency"
     ],
     "aggregations": [
       {
         "type": "join_existing",
-        "attribute_name": "date_range",
+        "attribute_name": "date",
         "options": {
           "dim": "time"
         }
diff --git a/src/mopdata/intake_cat_template.yaml b/src/mopdata/intake_cat_template.yaml
index 1a04ac4..87fc7e4 100644
--- a/src/mopdata/intake_cat_template.yaml
+++ b/src/mopdata/intake_cat_template.yaml
@@ -1,7 +1,7 @@
 metadata:
   version: 1
 sources:
-  experiment: 
+  <experiment>: 
     description: "Intake catalogue to load ACCESS <experiment> model output"
     Project: ""
     Maintained By: ""
@@ -10,10 +10,11 @@ sources:
     License: "https://creativecommons.org/licenses/by/4.0/"
     Citation: ""
     References: ""
-    driver: intake_esm.esm_datastore
+    driver: intake_esm.core.esm_datastore
     args:
-      obj: "{{CATALOG_DIR}}/catalogue.json"
       columns_with_iterables:
         - variable
         - map_var
         - standard_name
+      read_csv_kwargs: {"dtype": {"date": str}}
+      obj: "{{CATALOG_DIR}}/intake_<experiment>.json"
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index d495d7b..aa16b2e 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -408,7 +408,8 @@ def write_intake(ctx, fpath, match, dbname, version, alias):
     parsed = map_variables(conn, rows, version)
     # potential vars have always duplicates: 1 for each input_var
     cat_name, fcsv = write_catalogue(conn, parsed, vobjs, fobjs, alias)
-    mopdb_log.info("Intake-esm catalogue written to {cat_name} and {fcsv}")
+    mopdb_log.info(f"""Intake-esm and intake catalogues written to
+    {cat_name} and {cat_name.replace('json','yaml')}. File list saved to {fcsv}""")
     conn.close()
     return None
 
diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index 9a9aa0e..2fec511 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -121,7 +121,7 @@ def frequency(self, value):
         value = value.replace('hPt', 'hrPt')
         if not any(x in value for x in 
             ['min', 'hr', 'day', 'mon', 'yr']):
-            self._frequency = 'NAfrq' 
+            value = 'NAfrq' 
         self._frequency = value
 
 
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
new file mode 100644
index 0000000..a0e580c
--- /dev/null
+++ b/src/mopdb/mopdb_map.py
@@ -0,0 +1,559 @@
+#!/usr/bin/env python
+# Copyright 2023 ARC Centre of Excellence for Climate Extremes (CLEX)
+# Author: Paola Petrelli <paola.petrelli@utas.edu.au> for CLEX
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# contact: paola.petrelli@utas.edu.au
+#
+# last updated 10/04/2024
+#
+
+import logging
+import csv
+import json
+import lzma
+import xarray as xr
+
+from operator import itemgetter, attrgetter
+from pathlib import Path
+from itertools import compress
+from importlib.resources import files as import_files
+
+from mopdb.mopdb_class import FPattern, Variable
+from mopdb.utils import *
+from mopdb.mopdb_utils import (get_cell_methods, remove_duplicates,
+    get_realm, check_realm_units, get_date_pattern)
+
+
+def get_cmorname(conn, vobj, version):
+    """Queries mapping table for cmip name given variable name as output
+       by the model
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping
+        WHERE input_vars='{vobj.name}' and (calculation=''
+        or calculation IS NULL)""" 
+    results = query(conn, sql, first=False, logname='mopdb_log')
+    names = list(x[0] for x in results) 
+    tables = list(x[2] for x in results) 
+    mopdb_log.debug(f"In get_cmorname query results: {results}")
+    if len(names) == 0:
+        vobj.cmor_var = ''
+        vobj.cmor_table = ''
+    elif len(names) == 1:
+        vobj.cmor_var = names[0]
+        vobj.cmor_table = tables[0]
+    elif len(names) > 1:
+        mopdb_log.debug(f"Found more than 1 definition for {vobj.name}:\n" +
+                       f"{results}")
+        match_found = False
+        for r in results:
+            if r[1] == version and r[3] == vobj.frequency:
+                vobj.cmor_var, vobj.cmor_table = r[0], r[2]
+                match_found = True
+                break
+        if not match_found:
+            for r in results:
+                if r[3] == vobj.frequency:
+                    vobj.cmor_var, vobj.cmor_table = r[0], r[2]
+                    match_found = True
+                    break
+        if not match_found:
+            for r in results:
+                if r[1] == version:
+                    vobj.cmor_var, vobj.cmor_table = r[0], r[2]
+                    match_found = True
+                    break
+        if not match_found:
+            vobj.cmor_var = names[0]
+            vobj.cmor_table = tables[0]
+            mopdb_log.info(f"Found more than 1 definition for {vobj.name}:\n"+
+                        f"{results}\n Using {vobj.cmor_var} from {vobj.cmor_table}")
+    return vobj
+        "mip_era": "",
+        "Conventions": "CF-1.7 ACDD1.3"
+    }
+    return header
+
+def get_file_frq(ds, fnext):
+    """Return a dictionary with frequency for each time axis.
+
+    Frequency is inferred by comparing interval between two consecutive
+    timesteps with expected interval at a given frequency.
+    Order time_axis so ones with only one step are last, so we can use 
+    file frequency (interval_file) inferred from other time axes.
+    This is called if there are more than one time axis in file 
+    (usually only UM) or if frequency can be guessed from filename.
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    frq = {}
+    int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0,
+               'day': 1.0, '6hr': 0.25, '3hr': 0.125,
+               '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944}
+    # retrieve all time axes
+    time_axs = [d for d in ds.dims if 'time' in d]
+    time_axs_len = set(len(ds[d]) for d in time_axs)
+    time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
+    mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}")
+    max_len = len(ds[time_axs[0]]) 
+    # if all time axes have only 1 timestep we cannot infer frequency
+    # so we open also next file but get only time axs
+    if max_len == 1:
+        dsnext = xr.open_dataset(fnext, decode_times = False)
+        time_axs2 = [d for d in dsnext.dims if 'time' in d]
+        ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time')
+        time_axs = [d for d in ds.dims if 'time' in d]
+        time_axs_len = set(len(ds[d]) for d in time_axs)
+        time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
+    for t in time_axs: 
+        mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}")
+        if len(ds[t]) > 1:
+            interval = (ds[t][1]-ds[t][0]).values
+            interval_file = (ds[t][-1] -ds[t][0]).values 
+        else:
+            interval = interval_file
+        mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}")
+        for k,v in int2frq.items():
+            if math.isclose(interval, v, rel_tol=0.05):
+                frq[t] = k
+                break
+    return frq
+
+def write_varlist(conn, indir, match, version, alias):
+    """Based on model output files create a variable list and save it
+       to a csv file. Main attributes needed to map output are provided
+       for each variable
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    line_cols = ['name','cmor_var','units','dimensions','_frequency',
+        '_realm','cell_methods','cmor_table','vtype','size',
+        'nsteps','fpattern','long_name','standard_name']
+    vobj_list = []
+    fobj_list = []
+    patterns = []
+    files = FPattern.list_files(indir, match)
+    mopdb_log.debug(f"Files after sorting: {files}")
+    if alias == '':
+        alias = 'mopdb'
+    fname = f"varlist_{alias}.csv"
+    fcsv = open(fname, 'w')
+    fwriter = csv.writer(fcsv, delimiter=';')
+    fwriter.writerow(["name", "cmor_var", "units", "dimensions",
+        "frequency", "realm", "cell_methods", "cmor_table", "vtype",
+        "size", "nsteps", "fpattern", "long_name", "standard_name"])
+    for fpath in files:
+        # get filename pattern until date match
+        mopdb_log.debug(f"Filename: {fpath.name}")
+        fpattern = fpath.name.split(match)[0]
+        if fpattern in patterns:
+            continue
+        patterns.append(fpattern)
+        fobj = FPattern(fpattern, Path(indir))
+        #pattern_list = list_files(indir, f"{fpattern}*")
+        nfiles = len(fobj.files) 
+        mopdb_log.debug(f"File pattern, number of files: {fpattern}, {nfiles}")
+        #fwriter.writerow([f"#{fpattern}"])
+        # get attributes for the file variables
+        ds = xr.open_dataset(str(fobj.files[0]), decode_times=False)
+        coords = [c for c in ds.coords] + ['latitude_longitude']
+        #pass next file in case of 1 timestep per file and no frq in name
+        fnext = str(fobj.files[1])
+        if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos':
+            frq_dict = get_file_frq(ds, fnext)
+            # if only one frequency detected empty dict
+            if len(frq_dict) == 1:
+                fobj.frequency = frq_dict.popitem()[1]
+            else:
+                fobj.multiple_frq = True
+                fobj.frequency = frq_dict['time']
+        mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}")
+        if fobj.realm == "NArealm":
+            fobj.realm = get_realm(version, ds)
+        pattern_var_list = []
+        for vname in ds.variables:
+            vobj = Variable(vname, fobj) 
+            if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']):
+                v = ds[vname]
+                mopdb_log.debug(f"Variable: {vobj.name}")
+                # get size in bytes of grid for 1 timestep and number of timesteps
+                vobj.size = v[0].nbytes
+                vobj.nsteps = nfiles * v.shape[0]
+                # assign time axis frequency if more than one is available
+                if fobj.multiple_frq:
+                    if 'time' in v.dims[0]:
+                        vobj._frequency = frq_dict[v.dims[0]]
+                    else:
+                        mopdb_log.info(f"Could not detect frequency for variable: {v}")
+                attrs = v.attrs
+                vobj.cell_methods, frqmod = get_cell_methods(attrs, v.dims)
+                vobj.frequency = vobj.frequency + frqmod
+                mopdb_log.debug(f"Frequency var: {vobj.frequency}")
+                # try to retrieve cmip name
+                vobj = get_cmorname(conn, vobj, version)
+                vobj.units = attrs.get('units', "")
+                vobj.long_name = attrs.get('long_name', "")
+                vobj.standard_name = attrs.get('standard_name', "")
+                vobj.dimensions = " ".join(v.dims)
+                vobj.vtype = v.dtype
+                line = [attrgetter(k)(vobj) for k in line_cols]
+                fwriter.writerow(line)
+                vobj_list.append(vobj)
+                pattern_var_list.append(vobj)
+        fobj.varlist = pattern_var_list
+        fobj_list.append(fobj)
+        mopdb_log.info(f"Variable list for {fpattern} successfully written")
+    fcsv.close()
+    return  fname, vobj_list, fobj_list
+
+def match_stdname(conn, row, stdn):
+    """Returns an updated stdn list if finds one or more variables
+    in cmorvar table that match the standard name passed as input.
+    It also return a False/True found_match boolean.
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    found_match = False
+    sql = f"""SELECT name FROM cmorvar where 
+        standard_name='{row['standard_name']}'"""
+    results = query(conn, sql, first=False, logname='mopdb_log')
+    matches = [x[0] for x in results]
+    if len(matches) > 0:
+        stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True)
+        found_match = True
+
+    return stdn, found_match
+
+
+def match_var(row, version, mode, conn, records):
+    """Returns match for variable if found after looping
+       variables already mapped in database
+    Parameters
+
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    found_match = False
+    # build sql query based on mode
+    sql_base = f"""SELECT cmor_var,input_vars,calculation,frequency,
+        realm,model,cmor_table,positive,units FROM mapping where 
+        input_vars='{row['name']}'"""
+    sql_frq = f" and frequency='{row['frequency']}'"
+    sql_ver = f" and model='{version}'"
+    if mode == 'full':
+        sql = sql_base + sql_frq + sql_ver
+    elif mode == 'no_frq':
+        sql = sql_base + sql_ver
+    elif mode == 'no_ver':
+        sql = sql_base + sql_frq
+    # execute query and process results
+    result = query(conn, sql, first=False, logname='mopdb_log')
+    mopdb_log.debug(f"match_var: {result}, sql: {sql[110:]}") 
+    if result is not None and result != []:
+        for x in result:
+            mopdb_log.debug(f"match: {x}")
+            records = add_var(records, row, x)
+        found_match = True
+
+    return records, found_match
+
+def parse_vars(conn, rows, version):
+    """Returns records of variables to include in template mapping file,
+    a list of all stash variables + frequency available in model output
+    and a list of variables already defined in db
+ 
+    Parameters
+    ----------
+    conn : connection object
+    rows : list(dict)
+         list of variables to match
+    version : str
+        model version to use to match variables
+
+    Returns
+    -------
+    stash_vars : list
+        varname-frequency for each listed variable, varname is from model output
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    full = []
+    no_ver = []
+    no_frq = []
+    stdn = []
+    no_match = []
+    stash_vars = []
+
+    # looping through variables from file and attempt matches to db 
+    for row in rows:
+        if row['name'][0] == "#" or row['name'] == 'name':
+            continue
+        else:
+            full, found = match_var(row, version, 'full', conn, full)
+        # if no match, ignore model version first and then frequency 
+        mopdb_log.debug(f"found perfect match: {found}")
+        if not found:
+            no_ver, found = match_var(row, version, 'no_ver', conn, no_ver)
+            mopdb_log.debug(f"found no ver match: {found}")
+        if not found:
+            no_frq, found = match_var(row, version, 'no_frq', conn, no_frq)
+            mopdb_log.debug(f"found no frq match: {found}")
+        # make a last attempt to match using standard_name
+        if not found:
+            if row['standard_name'] != '':
+                stdn, found = match_stdname(conn, row, stdn)
+            mopdb_log.debug(f"found stdnm match: {found}")
+        if not found:
+            no_match = add_var(no_match, row, tuple([row['name']]+['']*8)) 
+        stash_vars.append(f"{row['name']}-{row['frequency']}")
+
+    return full, no_ver, no_frq, stdn, no_match, stash_vars 
+
+def add_var(vlist, row, match, stdnm=False):
+    """Add information from match to variable list and re-order
+    fields so they correspond to final mapping output.
+
+    Parameters
+    match : tuple
+        match values (cmor_var,input_vars,calculation,frequency,
+        realm,model(version),cmor_table,positive,units)
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    # assign cmor_var from match and swap place with input_vars
+    mopdb_log.debug(f"Assign cmor_var: {match}")
+    mopdb_log.debug(f"initial row: {row}")
+    var = row.copy() 
+    var['cmor_var'] = match[0]
+    var['input_vars'] = match[1]
+    orig_name = var.pop('name')
+    # assign realm from match
+    var['realm'] = match[4] 
+    # with stdn assign cmorvar and table if only 1 match returned
+    # otherwise assign table from match
+    if stdnm: 
+        var['input_vars'] = orig_name
+        if len(var['cmor_var']) == 1:
+            cmor_var, table = var['cmor_var'][0].split("-")
+            var['cmor_var'] = cmor_var
+            var['cmor_table'] = table 
+    else:
+        var['cmor_table'] = match[6] 
+    # add calculation, positive and version 
+    var['calculation'] = match[2]
+    var['positive'] = match[7]
+    var['version'] = match[5] 
+    # maybe we should override units here rather than in check_realm_units
+    # if units missing get them from match
+    if var['units'] is None or var['units'] == '':
+        var['units'] = match[8]
+    vlist.append(var)
+    return vlist
+
+def potential_vars(conn, rows, stash_vars, version):
+    """Returns list of variables that can be potentially derived from
+    model output.
+
+    Loop across all model variables to match
+    Select any mapping that contains the variable and if there's a calculation
+    NB rows modified by add_row when assigning cmorname and positive values
+
+    Parameters
+    ----------
+    conn : connection object
+    rows : list(dict)
+         list of variables to match
+    stash_vars : list
+        varname-frequency for each listed variable, varname is from model output
+    version : str
+        model version to use to match variables
+
+    Returns
+    -------
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    pot_full = [] 
+    pot_part = []
+    pot_varnames = set()
+    for row in rows:
+        sql = f"""SELECT cmor_var,input_vars,calculation,frequency,
+            realm,model,cmor_table,positive,units FROM mapping 
+            WHERE input_vars like '%{row['name']}%'"""
+        results = query(conn, sql, first=False, logname='mopdb_log')
+        mopdb_log.debug(f"In potential: var {row['name']}, db results {results}")
+        for r in results:
+            allinput = r[1].split(" ")
+            mopdb_log.debug(f"{len(allinput)> 1}")
+            mopdb_log.debug(all(f"{x}-{row['frequency']}" in stash_vars for x in allinput))
+            if len(allinput) > 1 and all(f"{x}-{row['frequency']}" in stash_vars for x in allinput):
+                # if both version and frequency of applied mapping match
+                # consider this a full matching potential var 
+                if r[5] == version and r[3] == row['frequency']:
+                   pot_full = add_var(pot_full, row, r)
+                else:
+                    pot_part = add_var(pot_part, row, r)
+                pot_varnames.add(r[0])
+    return pot_full, pot_part, pot_varnames
+
+
+def write_map_template(conn, parsed, alias):
+    """Write mapping csv file template based on list of variables to define 
+
+    Input varlist file order:
+    name, cmor_var, units, dimensions, frequency, realm, cell_methods,
+    cmor_table, vtype, size, nsteps, fpattern, long_name, standard_name
+    Mapping db order:
+    cmor_var, input_vars, calculation, units, dimensions, frequency, realm,
+    cell_methods, positive, cmor_table, model, notes, origin 
+        for pot vars + vtype, size, nsteps, fpattern
+    Final template order:
+    cmor_var, input_vars, calculation, units, dimensions, frequency, realm,
+    cell_methods, positive, cmor_table, version, vtype, size, nsteps, fpattern,
+    long_name, standard_name
+    """ 
+
+    mopdb_log = logging.getLogger('mopdb_log')
+    full, no_ver, no_frq, stdn, no_match, pot_full, pot_part = parsed
+    keys = ['cmor_var', 'input_vars', 'calculation', 'units',
+            'dimensions', 'frequency', 'realm', 'cell_methods',
+            'positive', 'cmor_table', 'version', 'vtype', 'size',
+            'nsteps', 'fpattern', 'long_name', 'standard_name'] 
+
+    with open(f"map_{alias}.csv", 'w') as fcsv:
+        fwriter = csv.DictWriter(fcsv, keys, delimiter=';')
+        write_vars(full, fwriter, keys, conn=conn)
+        div = ("# Derived variables with matching version and " +
+            "frequency: Use with caution!")
+        write_vars(pot_full, fwriter, div, conn=conn)
+        div = ("# Variables definitions coming from different " +
+            "version")
+        write_vars(no_ver, fwriter, div, conn=conn)
+        div = ("# Variables with different frequency: Use with"
+            + " caution!")
+        write_vars(no_ver, fwriter, div, conn=conn)
+        div = ("# Variables matched using standard_name: Use " +
+            "with caution!")
+        write_vars(stdn, fwriter, div, sortby='input_vars')
+        div = "# Derived variables: Use with caution!"
+        write_vars(pot_part, fwriter, div, conn=conn)
+            #pot=True, conn=conn, sortby=0)
+        div = "# Variables without mapping"
+        write_vars(no_match, fwriter, div)
+        mopdb_log.debug("Finished writing variables to mapping template")
+        fcsv.close()
+    return
+
+def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'):
+    """
+    """
+
+    mopdb_log = logging.getLogger('mopdb_log')
+    if len(vlist) > 0:
+        if type(div) is str:
+            divrow = {x:'' for x in vlist[0].keys()}
+            divrow['cmor_var'] = div
+        elif type(div) is list:
+            divrow = {x:x for x in div}
+        fwriter.writerow(divrow)
+        for var in sorted(vlist, key=itemgetter(sortby)):
+            if conn:
+                var = check_realm_units(conn, var)
+            fwriter.writerow(var)
+    return
+
+def map_variables(conn, rows, version):
+    """
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    # return lists of fully/partially matching variables and stash_vars 
+    # these are input_vars for calculation defined in already in mapping db
+    full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, 
+        rows, version)
+    # remove duplicates from partially matched variables 
+    no_ver = remove_duplicate(no_ver)
+    no_frq = remove_duplicate(no_frq, strict=False)
+    no_match = remove_duplicate(no_match, strict=False)
+    # check if more derived variables can be added based on all
+    # input_vars being available
+    pot_full, pot_part, pot_varnames = potential_vars(conn, rows,
+        stash_vars, version)
+    # potential vars have always duplicates: 1 for each input_var
+    pot_full = remove_duplicate(pot_full, strict=False)
+    pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False)
+    mopdb_log.info(f"Derived variables: {pot_varnames}")
+    return full, no_ver, no_frq, stdn, no_match, pot_full, pot_part 
+
+def write_catalogue(conn, parsed, vobjs, fobjs, alias):
+    """Write intake-esm catalogue and returns name
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    # read template json file 
+    jfile = import_files('mopdata').joinpath('intake_cat_template.json')
+    with open(jfile, 'r') as f:
+        template = json.load(f)
+    # write updated json to file
+    for k,v in template.items():
+        if type(v) == str:
+            template[k] = v.replace("<experiment>", alias)
+    jout = f"intake_{alias}.json"
+    with open(jout, 'w') as f:
+        json.dump(template, f, indent=4)
+    # read template yaml file 
+    yfile = import_files('mopdata').joinpath('intake_cat_template.yaml')
+    with open(yfile, "r") as f:
+        maincat = f.read()
+    maincat = maincat.replace("<experiment>", alias)
+    mopdb_log.debug("Opened intake template files")
+    # write updated yaml to file
+    yout = f"intake_{alias}.yaml"
+    with open(yout, 'w') as f:
+        f.writelines(maincat)
+    # create a dictionary for each file to list
+    lines = create_file_dict(fobjs, alias)
+    # write csv file
+    cols = [x['column_name'] for x in template['attributes']]
+    cols = ['path'] + cols 
+    csvname = template['catalog_file']
+    with lzma.open(csvname, 'wt') as fcsv:
+        fwriter = csv.DictWriter(fcsv, cols)
+        fwriter.writeheader()
+        for fd in lines:
+            fwriter.writerow(fd)
+        fcsv.close()
+    return jout, csvname
+
+def create_file_dict(fobjs, alias):
+    """
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    lines = []
+    for pat_obj in fobjs:
+        var_list = [v.name for v in pat_obj.varlist]
+        # set to remove '' duplicates 
+        mapvar_list = list(set(v.cmor_var for v in pat_obj.varlist))
+        mapvar_list.remove("")
+        stnm_list = list(set(v.standard_name for v in pat_obj.varlist))
+        stnm_list.remove("")
+        base_dict = {'experiment': alias,
+                     'realm': pat_obj.realm,
+                     'frequency': pat_obj.frequency,
+                     'variable': str(var_list),
+                     'map_var': str(mapvar_list),
+                     'standard_name': str(stnm_list)}
+        # work out date_pattern in filename
+        fname = pat_obj.files[0].name
+        date_pattern = get_date_pattern(fname, pat_obj.fpattern)
+        # add date and path for each file
+        for fpath in pat_obj.files:
+            f = fpath.name
+            fd = base_dict.copy()
+            fd['path'] = str(fpath)
+            fd['date'] = ''.join(c for c in compress(f, date_pattern)) 
+            lines.append(fd)
+    return lines
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index ac3f102..062e9dd 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -22,23 +22,12 @@
 import sqlite3
 import logging
 import sys
-import os
 import csv
 import json
-import stat
-import lzma
-import xarray as xr
-import numpy as np
-import math
 
-from datetime import datetime, date
+from datetime import date
 from collections import Counter
-from operator import itemgetter, attrgetter
-from pathlib import Path
-from itertools import compress
-from importlib.resources import files as import_files
 
-from mopdb.mopdb_class import FPattern, Variable
 from mopdb.utils import *
 
 
@@ -68,7 +57,6 @@ def mapping_sql():
                 ) WITHOUT ROWID;""")
     return sql
 
-
 def cmorvar_sql():
     """Returns sql definition of cmorvar table
 
@@ -99,7 +87,6 @@ def cmorvar_sql():
                 ok_max_mean_abs TEXT);""")
     return sql
 
-
 def map_update_sql():
     """Returns sql needed to update mapping table
 
@@ -117,7 +104,6 @@ def map_update_sql():
           {', '.join(x+' = excluded.'+x for x in cols)}"""
     return sql
 
-
 def cmor_update_sql():
     """Returns sql needed to update cmorvar table
 
@@ -136,7 +122,6 @@ def cmor_update_sql():
           {', '.join(x+' = excluded.'+x for x in cols)}"""
     return sql
 
-
 def create_table(conn, sql):
     """Creates table if database is empty
 
@@ -154,7 +139,6 @@ def create_table(conn, sql):
         mopdb_log.error(e)
     return
 
-
 def update_db(conn, table, rows_list):
     """Adds to table new variables definitions
 
@@ -185,53 +169,6 @@ def update_db(conn, table, rows_list):
     mopdb_log.info('--- Done ---')
     return
 
-def get_cmorname(conn, vobj, version):
-    """Queries mapping table for cmip name given variable name as output
-       by the model
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    sql = f"""SELECT cmor_var,model,cmor_table,frequency FROM mapping
-        WHERE input_vars='{vobj.name}' and (calculation=''
-        or calculation IS NULL)""" 
-    results = query(conn, sql, first=False, logname='mopdb_log')
-    names = list(x[0] for x in results) 
-    tables = list(x[2] for x in results) 
-    mopdb_log.debug(f"In get_cmorname query results: {results}")
-    if len(names) == 0:
-        vobj.cmor_var = ''
-        vobj.cmor_table = ''
-    elif len(names) == 1:
-        vobj.cmor_var = names[0]
-        vobj.cmor_table = tables[0]
-    elif len(names) > 1:
-        mopdb_log.debug(f"Found more than 1 definition for {vobj.name}:\n" +
-                       f"{results}")
-        match_found = False
-        for r in results:
-            if r[1] == version and r[3] == vobj.frequency:
-                vobj.cmor_var, vobj.cmor_table = r[0], r[2]
-                match_found = True
-                break
-        if not match_found:
-            for r in results:
-                if r[3] == vobj.frequency:
-                    vobj.cmor_var, vobj.cmor_table = r[0], r[2]
-                    match_found = True
-                    break
-        if not match_found:
-            for r in results:
-                if r[1] == version:
-                    vobj.cmor_var, vobj.cmor_table = r[0], r[2]
-                    match_found = True
-                    break
-        if not match_found:
-            vobj.cmor_var = names[0]
-            vobj.cmor_table = tables[0]
-            mopdb_log.info(f"Found more than 1 definition for {vobj.name}:\n"+
-                        f"{results}\n Using {vobj.cmor_var} from {vobj.cmor_table}")
-    return vobj
-
-
 def cmor_table_header(name, realm, frequency):
     """
     """
@@ -255,7 +192,6 @@ def cmor_table_header(name, realm, frequency):
     }
     return header
 
-
 def write_cmor_table(var_list, name):
     """
     """
@@ -294,51 +230,6 @@ def write_cmor_table(var_list, name):
         json.dump(out, f, indent=4)
     return
 
-def get_file_frq(ds, fnext):
-    """Return a dictionary with frequency for each time axis.
-
-    Frequency is inferred by comparing interval between two consecutive
-    timesteps with expected interval at a given frequency.
-    Order time_axis so ones with only one step are last, so we can use 
-    file frequency (interval_file) inferred from other time axes.
-    This is called if there are more than one time axis in file 
-    (usually only UM) or if frequency can be guessed from filename.
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    frq = {}
-    int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0,
-               'day': 1.0, '6hr': 0.25, '3hr': 0.125,
-               '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944}
-    # retrieve all time axes
-    time_axs = [d for d in ds.dims if 'time' in d]
-    time_axs_len = set(len(ds[d]) for d in time_axs)
-    time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
-    mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}")
-    max_len = len(ds[time_axs[0]]) 
-    # if all time axes have only 1 timestep we cannot infer frequency
-    # so we open also next file but get only time axs
-    if max_len == 1:
-        dsnext = xr.open_dataset(fnext, decode_times = False)
-        time_axs2 = [d for d in dsnext.dims if 'time' in d]
-        ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time')
-        time_axs = [d for d in ds.dims if 'time' in d]
-        time_axs_len = set(len(ds[d]) for d in time_axs)
-        time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
-    for t in time_axs: 
-        mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}")
-        if len(ds[t]) > 1:
-            interval = (ds[t][1]-ds[t][0]).values
-            interval_file = (ds[t][-1] -ds[t][0]).values 
-        else:
-            interval = interval_file
-        mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}")
-        for k,v in int2frq.items():
-            if math.isclose(interval, v, rel_tol=0.05):
-                frq[t] = k
-                break
-    return frq
-
-
 def get_cell_methods(attrs, dims):
     """Get cell_methods from variable attributes.
        If cell_methods is not defined assumes values are instantaneous
@@ -359,93 +250,6 @@ def get_cell_methods(attrs, dims):
             val = val.replace(time_axs[0], 'time')
     return val, frqmod
 
-
-def write_varlist(conn, indir, match, version, alias):
-    """Based on model output files create a variable list and save it
-       to a csv file. Main attributes needed to map output are provided
-       for each variable
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    line_cols = ['name','cmor_var','units','dimensions','_frequency',
-        '_realm','cell_methods','cmor_table','vtype','size',
-        'nsteps','fpattern','long_name','standard_name']
-    vobj_list = []
-    fobj_list = []
-    patterns = []
-    files = FPattern.list_files(indir, match)
-    mopdb_log.debug(f"Files after sorting: {files}")
-    if alias == '':
-        alias = 'mopdb'
-    fname = f"varlist_{alias}.csv"
-    fcsv = open(fname, 'w')
-    fwriter = csv.writer(fcsv, delimiter=';')
-    fwriter.writerow(["name", "cmor_var", "units", "dimensions",
-        "frequency", "realm", "cell_methods", "cmor_table", "vtype",
-        "size", "nsteps", "fpattern", "long_name", "standard_name"])
-    for fpath in files:
-        # get filename pattern until date match
-        mopdb_log.debug(f"Filename: {fpath.name}")
-        fpattern = fpath.name.split(match)[0]
-        if fpattern in patterns:
-            continue
-        patterns.append(fpattern)
-        fobj = FPattern(fpattern, Path(indir))
-        #pattern_list = list_files(indir, f"{fpattern}*")
-        nfiles = len(fobj.files) 
-        mopdb_log.debug(f"File pattern, number of files: {fpattern}, {nfiles}")
-        #fwriter.writerow([f"#{fpattern}"])
-        # get attributes for the file variables
-        ds = xr.open_dataset(str(fobj.files[0]), decode_times=False)
-        coords = [c for c in ds.coords] + ['latitude_longitude']
-        #pass next file in case of 1 timestep per file and no frq in name
-        fnext = str(fobj.files[1])
-        if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos':
-            frq_dict = get_file_frq(ds, fnext)
-            # if only one frequency detected empty dict
-            if len(frq_dict) == 1:
-                fobj.frequency = frq_dict.popitem()[1]
-            else:
-                fobj.multiple_frq = True
-        mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}")
-        if fobj.realm == "NArealm":
-            fobj.realm = get_realm(version, ds)
-        pattern_var_list = []
-        for vname in ds.variables:
-            vobj = Variable(vname, fobj) 
-            if vname not in coords and all(x not in vname for x in ['_bnds','_bounds']):
-                v = ds[vname]
-                mopdb_log.debug(f"Variable: {vobj.name}")
-                # get size in bytes of grid for 1 timestep and number of timesteps
-                vobj.size = v[0].nbytes
-                vobj.nsteps = nfiles * v.shape[0]
-                # assign time axis frequency if more than one is available
-                if fobj.multiple_frq:
-                    if 'time' in v.dims[0]:
-                        vobj._frequency = frq_dict[v.dims[0]]
-                    else:
-                        mopdb_log.info(f"Could not detect frequency for variable: {v}")
-                attrs = v.attrs
-                vobj.cell_methods, frqmod = get_cell_methods(attrs, v.dims)
-                vobj.frequency = vobj.frequency + frqmod
-                mopdb_log.debug(f"Frequency var: {vobj.frequency}")
-                # try to retrieve cmip name
-                vobj = get_cmorname(conn, vobj, version)
-                vobj.units = attrs.get('units', "")
-                vobj.long_name = attrs.get('long_name', "")
-                vobj.standard_name = attrs.get('standard_name', "")
-                vobj.dimensions = " ".join(v.dims)
-                vobj.vtype = v.dtype
-                line = [attrgetter(k)(vobj) for k in line_cols]
-                fwriter.writerow(line)
-                vobj_list.append(vobj)
-                pattern_var_list.append(vobj)
-        fobj.varlist = pattern_var_list
-        fobj_list.append(fobj)
-        mopdb_log.info(f"Variable list for {fpattern} successfully written")
-    fcsv.close()
-    return  fname, vobj_list, fobj_list
-
-
 def read_map_app4(fname):
     """Reads APP4 style mapping """
     mopdb_log = logging.getLogger('mopdb_log')
@@ -503,150 +307,6 @@ def read_map(fname, alias):
                 var_list.append(row[:11] + [notes, alias])
     return var_list
 
-
-def match_stdname(conn, row, stdn):
-    """Returns an updated stdn list if finds one or more variables
-    in cmorvar table that match the standard name passed as input.
-    It also return a False/True found_match boolean.
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    found_match = False
-    sql = f"""SELECT name FROM cmorvar where 
-        standard_name='{row['standard_name']}'"""
-    results = query(conn, sql, first=False, logname='mopdb_log')
-    matches = [x[0] for x in results]
-    if len(matches) > 0:
-        stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True)
-        found_match = True
-
-    return stdn, found_match
-
-
-def match_var(row, version, mode, conn, records):
-    """Returns match for variable if found after looping
-       variables already mapped in database
-    Parameters
-
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    found_match = False
-    # build sql query based on mode
-    sql_base = f"""SELECT cmor_var,input_vars,calculation,frequency,
-        realm,model,cmor_table,positive,units FROM mapping where 
-        input_vars='{row['name']}'"""
-    sql_frq = f" and frequency='{row['frequency']}'"
-    sql_ver = f" and model='{version}'"
-    if mode == 'full':
-        sql = sql_base + sql_frq + sql_ver
-    elif mode == 'no_frq':
-        sql = sql_base + sql_ver
-    elif mode == 'no_ver':
-        sql = sql_base + sql_frq
-    # execute query and process results
-    result = query(conn, sql, first=False, logname='mopdb_log')
-    mopdb_log.debug(f"match_var: {result}, sql: {sql[110:]}") 
-    if result is not None and result != []:
-        for x in result:
-            mopdb_log.debug(f"match: {x}")
-            records = add_var(records, row, x)
-        found_match = True
-
-    return records, found_match
-
-
-def parse_vars(conn, rows, version):
-    """Returns records of variables to include in template mapping file,
-    a list of all stash variables + frequency available in model output
-    and a list of variables already defined in db
- 
-    Parameters
-    ----------
-    conn : connection object
-    rows : list(dict)
-         list of variables to match
-    version : str
-        model version to use to match variables
-
-    Returns
-    -------
-    stash_vars : list
-        varname-frequency for each listed variable, varname is from model output
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    full = []
-    no_ver = []
-    no_frq = []
-    stdn = []
-    no_match = []
-    stash_vars = []
-
-    # looping through variables from file and attempt matches to db 
-    for row in rows:
-        if row['name'][0] == "#" or row['name'] == 'name':
-            continue
-        else:
-            full, found = match_var(row, version, 'full', conn, full)
-        # if no match, ignore model version first and then frequency 
-        mopdb_log.debug(f"found perfect match: {found}")
-        if not found:
-            no_ver, found = match_var(row, version, 'no_ver', conn, no_ver)
-            mopdb_log.debug(f"found no ver match: {found}")
-        if not found:
-            no_frq, found = match_var(row, version, 'no_frq', conn, no_frq)
-            mopdb_log.debug(f"found no frq match: {found}")
-        # make a last attempt to match using standard_name
-        if not found:
-            if row['standard_name'] != '':
-                stdn, found = match_stdname(conn, row, stdn)
-            mopdb_log.debug(f"found stdnm match: {found}")
-        if not found:
-            no_match = add_var(no_match, row, tuple([row['name']]+['']*8)) 
-        stash_vars.append(f"{row['name']}-{row['frequency']}")
-
-    return full, no_ver, no_frq, stdn, no_match, stash_vars 
-
-
-def add_var(vlist, row, match, stdnm=False):
-    """Add information from match to variable list and re-order
-    fields so they correspond to final mapping output.
-
-    Parameters
-    match : tuple
-        match values (cmor_var,input_vars,calculation,frequency,
-        realm,model(version),cmor_table,positive,units)
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    # assign cmor_var from match and swap place with input_vars
-    mopdb_log.debug(f"Assign cmor_var: {match}")
-    mopdb_log.debug(f"initial row: {row}")
-    var = row.copy() 
-    var['cmor_var'] = match[0]
-    var['input_vars'] = match[1]
-    orig_name = var.pop('name')
-    # assign realm from match
-    var['realm'] = match[4] 
-    # with stdn assign cmorvar and table if only 1 match returned
-    # otherwise assign table from match
-    if stdnm: 
-        var['input_vars'] = orig_name
-        if len(var['cmor_var']) == 1:
-            cmor_var, table = var['cmor_var'][0].split("-")
-            var['cmor_var'] = cmor_var
-            var['cmor_table'] = table 
-    else:
-        var['cmor_table'] = match[6] 
-    # add calculation, positive and version 
-    var['calculation'] = match[2]
-    var['positive'] = match[7]
-    var['version'] = match[5] 
-    # maybe we should override units here rather than in check_realm_units
-    # if units missing get them from match
-    if var['units'] is None or var['units'] == '':
-        var['units'] = match[8]
-    vlist.append(var)
-    return vlist
-
-
 def remove_duplicate(vlist, extra=[], strict=True):
     """Returns list without duplicate variable definitions.
 
@@ -674,121 +334,6 @@ def remove_duplicate(vlist, extra=[], strict=True):
         vid_list.append(vid)
     return final
 
-
-def potential_vars(conn, rows, stash_vars, version):
-    """Returns list of variables that can be potentially derived from
-    model output.
-
-    Loop across all model variables to match
-    Select any mapping that contains the variable and if there's a calculation
-    NB rows modified by add_row when assigning cmorname and positive values
-
-    Parameters
-    ----------
-    conn : connection object
-    rows : list(dict)
-         list of variables to match
-    stash_vars : list
-        varname-frequency for each listed variable, varname is from model output
-    version : str
-        model version to use to match variables
-
-    Returns
-    -------
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    pot_full = [] 
-    pot_part = []
-    pot_varnames = set()
-    for row in rows:
-        sql = f"""SELECT cmor_var,input_vars,calculation,frequency,
-            realm,model,cmor_table,positive,units FROM mapping 
-            WHERE input_vars like '%{row['name']}%'"""
-        results = query(conn, sql, first=False, logname='mopdb_log')
-        mopdb_log.debug(f"In potential: var {row['name']}, db results {results}")
-        for r in results:
-            allinput = r[1].split(" ")
-            mopdb_log.debug(f"{len(allinput)> 1}")
-            mopdb_log.debug(all(f"{x}-{row['frequency']}" in stash_vars for x in allinput))
-            if len(allinput) > 1 and all(f"{x}-{row['frequency']}" in stash_vars for x in allinput):
-                # if both version and frequency of applied mapping match
-                # consider this a full matching potential var 
-                if r[5] == version and r[3] == row['frequency']:
-                   pot_full = add_var(pot_full, row, r)
-                else:
-                    pot_part = add_var(pot_part, row, r)
-                pot_varnames.add(r[0])
-    return pot_full, pot_part, pot_varnames
-
-
-def write_map_template(conn, parsed, alias):
-    """Write mapping csv file template based on list of variables to define 
-
-    Input varlist file order:
-    name, cmor_var, units, dimensions, frequency, realm, cell_methods,
-    cmor_table, vtype, size, nsteps, fpattern, long_name, standard_name
-    Mapping db order:
-    cmor_var, input_vars, calculation, units, dimensions, frequency, realm,
-    cell_methods, positive, cmor_table, model, notes, origin 
-        for pot vars + vtype, size, nsteps, fpattern
-    Final template order:
-    cmor_var, input_vars, calculation, units, dimensions, frequency, realm,
-    cell_methods, positive, cmor_table, version, vtype, size, nsteps, fpattern,
-    long_name, standard_name
-    """ 
-
-    mopdb_log = logging.getLogger('mopdb_log')
-    full, no_ver, no_frq, stdn, no_match, pot_full, pot_part = parsed
-    keys = ['cmor_var', 'input_vars', 'calculation', 'units',
-            'dimensions', 'frequency', 'realm', 'cell_methods',
-            'positive', 'cmor_table', 'version', 'vtype', 'size',
-            'nsteps', 'fpattern', 'long_name', 'standard_name'] 
-
-    with open(f"map_{alias}.csv", 'w') as fcsv:
-        fwriter = csv.DictWriter(fcsv, keys, delimiter=';')
-        write_vars(full, fwriter, keys, conn=conn)
-        div = ("# Derived variables with matching version and " +
-            "frequency: Use with caution!")
-        write_vars(pot_full, fwriter, div, conn=conn)
-        div = ("# Variables definitions coming from different " +
-            "version")
-        write_vars(no_ver, fwriter, div, conn=conn)
-        div = ("# Variables with different frequency: Use with"
-            + " caution!")
-        write_vars(no_ver, fwriter, div, conn=conn)
-        div = ("# Variables matched using standard_name: Use " +
-            "with caution!")
-        write_vars(stdn, fwriter, div, sortby='input_vars')
-        div = "# Derived variables: Use with caution!"
-        write_vars(pot_part, fwriter, div, conn=conn)
-            #pot=True, conn=conn, sortby=0)
-        div = "# Variables without mapping"
-        write_vars(no_match, fwriter, div)
-        mopdb_log.debug("Finished writing variables to mapping template")
-        fcsv.close()
-
-        return
-
-
-def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'):
-    """
-    """
-
-    mopdb_log = logging.getLogger('mopdb_log')
-    if len(vlist) > 0:
-        if type(div) is str:
-            divrow = {x:'' for x in vlist[0].keys()}
-            divrow['cmor_var'] = div
-        elif type(div) is list:
-            divrow = {x:x for x in div}
-        fwriter.writerow(divrow)
-        for var in sorted(vlist, key=itemgetter(sortby)):
-            if conn:
-                var = check_realm_units(conn, var)
-            fwriter.writerow(var)
-    return
-
-
 def check_realm_units(conn, var):
     """Checks that realm and units are consistent with values in 
     cmor table.
@@ -818,7 +363,6 @@ def check_realm_units(conn, var):
             mopdb_log.warning(f"Variable {vname} not found in cmor table")
     return var 
        
-
 def get_realm(version, ds):
     '''Try to retrieve realm if using path failed'''
 
@@ -830,7 +374,6 @@ def get_realm(version, ds):
     mopdb_log.debug(f"Realm is {realm}")
     return realm
 
-
 def check_varlist(rows, fname):
     """Checks that varlist written to file has sensible information for frequency and realm
     to avoid incorrect mapping to be produced.
@@ -857,71 +400,6 @@ def check_varlist(rows, fname):
                 sys.exit()
     return
 
-
-def map_variables(conn, rows, version):
-    """
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    # return lists of fully/partially matching variables and stash_vars 
-    # these are input_vars for calculation defined in already in mapping db
-    full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, 
-        rows, version)
-    # remove duplicates from partially matched variables 
-    no_ver = remove_duplicate(no_ver)
-    no_frq = remove_duplicate(no_frq, strict=False)
-    no_match = remove_duplicate(no_match, strict=False)
-    # check if more derived variables can be added based on all
-    # input_vars being available
-    pot_full, pot_part, pot_varnames = potential_vars(conn, rows,
-        stash_vars, version)
-    # potential vars have always duplicates: 1 for each input_var
-    pot_full = remove_duplicate(pot_full, strict=False)
-    pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False)
-    mopdb_log.info(f"Derived variables: {pot_varnames}")
-    return full, no_ver, no_frq, stdn, no_match, pot_full, pot_part 
-
-
-def write_catalogue(conn, parsed, vobjs, fobjs, alias):
-    """Write intake-esm catalogue and returns name
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    # read template json file 
-    jfile = import_files('mopdata').joinpath('intake_cat_template.json')
-    with open(jfile, 'r') as f:
-        template = json.load(f)
-    # read template yaml file 
-    yfile = import_files('mopdata').joinpath('intake_cat_template.yaml')
-    maincat = read_yaml(yfile)
-    mopdb_log.debug("Opened intake template files")
-    # update json data with relevant information
-    # update title, description etc with experiment
-    for k,v in template.items():
-        if type(v) == str:
-            template[k] = v.replace('<experiment>', alias)
-    for k,v in maincat.items():
-        if type(v) == str:
-            maincat[k] = v.replace('<experiment>', alias)
-    # write updated json to file
-    jfile = f"intake_{alias}.json"
-    with open(jfile, 'w') as f:
-        json.dump(template, f, indent=4)
-    # write updated yaml to file
-    jfile = f"intake_{alias}.yaml"
-    write_yaml(maincat, jfile, 'mopdb_log')
-    # create a dictionary for each file to list
-    lines = create_file_dict(fobjs, alias)
-    # write csv file
-    cols = [x['column_name'] for x in template['attributes']]
-    cols = ['path'] + cols 
-    csvname = template['catalog_file']
-    with lzma.open(csvname, 'wt') as fcsv:
-        fwriter = csv.DictWriter(fcsv, cols, delimiter=';')
-        fwriter.writeheader()
-        for fd in lines:
-            fwriter.writerow(fd)
-        fcsv.close()
-    return jfile, csvname
-
 def get_date_pattern(fname, fpattern):
     """Try to build a date range for each file pattern based
        on its filename
@@ -933,30 +411,3 @@ def get_date_pattern(fname, fpattern):
     n = len(fpattern)
     date_pattern[:n] = [False] * n
     return date_pattern
-
-def create_file_dict(fobjs, alias):
-    """
-    """
-    mopdb_log = logging.getLogger('mopdb_log')
-    for pat_obj in fobjs:
-        var_list = [v.name for v in pat_obj.varlist]
-        # set to remove '' duplicates 
-        mapvar_list = list(set(v.cmor_var for v in pat_obj.varlist))
-        stnm_list = list(set(v.standard_name for v in pat_obj.varlist))
-        base_dict = {'experiment': alias,
-                     'realm': pat_obj.realm,
-                     'frequency': pat_obj.frequency,
-                     'variable': var_list,
-                     'map_var': mapvar_list,
-                     'standard_name': stnm_list}
-        # work out date_pattern in filename
-        fname = pat_obj.files[0].name
-        date_pattern = get_date_pattern(fname, pat_obj.fpattern)
-        # add date and path for each file
-        for fpath in pat_obj.files:
-            f = fpath.name
-            fd = base_dict.copy()
-            fd['path'] = str(fpath)
-            fd['date'] = ''.join(c for c in compress(f, date_pattern)) 
-            lines.append(fd)
-    return lines

From 917582f23294c4e3db7e94ef9fea956da3d728d8 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 16 Jul 2024 11:32:31 +1000
Subject: [PATCH 063/137] fixed imports

---
 .gitignore             |  9 ++++++++-
 src/mopdb/mopdb.py     |  7 ++++++-
 src/mopdb/mopdb_map.py | 10 ++--------
 tests/pytest.ini       |  3 +++
 4 files changed, 19 insertions(+), 10 deletions(-)
 create mode 100644 tests/pytest.ini

diff --git a/.gitignore b/.gitignore
index 3ba2e33..c0e81d6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,10 @@
-custom_app4_*.sh
 __pycache__/
 build/
+mopper_venv/
+*.csv
+*.yaml
+*.json
+localdata/
+src/mopper.egg-info/
+extras/
+*.txt
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index aa16b2e..4d8a269 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -26,9 +26,14 @@
 import json
 
 from importlib.resources import files as import_files
+from pathlib import Path
 
-from mopdb.mopdb_utils import *
+from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map,
+    read_map_app4, map_update_sql, create_table, write_cmor_table,
+    check_varlist) 
 from mopdb.utils import *
+from mopdb.mopdb_map import (write_varlist, write_map_template,
+    write_catalogue, map_variables)
 
 def mopdb_catch():
     """
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index a0e580c..11b9ca1 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -23,6 +23,7 @@
 import csv
 import json
 import lzma
+import math
 import xarray as xr
 
 from operator import itemgetter, attrgetter
@@ -32,7 +33,7 @@
 
 from mopdb.mopdb_class import FPattern, Variable
 from mopdb.utils import *
-from mopdb.mopdb_utils import (get_cell_methods, remove_duplicates,
+from mopdb.mopdb_utils import (get_cell_methods, remove_duplicate,
     get_realm, check_realm_units, get_date_pattern)
 
 
@@ -81,10 +82,6 @@ def get_cmorname(conn, vobj, version):
             mopdb_log.info(f"Found more than 1 definition for {vobj.name}:\n"+
                         f"{results}\n Using {vobj.cmor_var} from {vobj.cmor_table}")
     return vobj
-        "mip_era": "",
-        "Conventions": "CF-1.7 ACDD1.3"
-    }
-    return header
 
 def get_file_frq(ds, fnext):
     """Return a dictionary with frequency for each time axis.
@@ -230,10 +227,8 @@ def match_stdname(conn, row, stdn):
     if len(matches) > 0:
         stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True)
         found_match = True
-
     return stdn, found_match
 
-
 def match_var(row, version, mode, conn, records):
     """Returns match for variable if found after looping
        variables already mapped in database
@@ -262,7 +257,6 @@ def match_var(row, version, mode, conn, records):
             mopdb_log.debug(f"match: {x}")
             records = add_var(records, row, x)
         found_match = True
-
     return records, found_match
 
 def parse_vars(conn, rows, version):
diff --git a/tests/pytest.ini b/tests/pytest.ini
new file mode 100644
index 0000000..389e88b
--- /dev/null
+++ b/tests/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+log_cli = true
+log_cli_level = DEBUG

From cdb24633755a6cabc4f29bfe8abf40c941c3ea18 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 17 Jul 2024 14:00:34 +1000
Subject: [PATCH 064/137] improved intake catalogue

---
 conda/environment.yaml               | 12 +------
 src/mopdata/intake_cat_template.json |  7 +++--
 src/mopdata/intake_cat_template.yaml |  2 --
 src/mopdb/mopdb.py                   |  1 +
 src/mopdb/mopdb_map.py               | 47 ++++++++++++++++++++++------
 src/mopper/calculations.py           |  2 +-
 tests/conftest.py                    | 14 ++++-----
 tests/test_calculations.py           |  6 ++--
 8 files changed, 54 insertions(+), 37 deletions(-)

diff --git a/conda/environment.yaml b/conda/environment.yaml
index 2f0d566..ae78e08 100644
--- a/conda/environment.yaml
+++ b/conda/environment.yaml
@@ -1,11 +1,8 @@
-name: test-env
+#name: test-env
 channels:
   - defaults
   - conda-forge
 dependencies:
-  - python=3.10
-  - pip
-  - pbr
   - click
   - cmor
   - xarray
@@ -13,10 +10,3 @@ dependencies:
   - dask
   - pyyaml
   - cftime
-  - python-dateutil
-  - pytest
-  - coverage
-  - codecov
-  - importlib_resources
-  - pip:
-      - git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@pytests_sam
diff --git a/src/mopdata/intake_cat_template.json b/src/mopdata/intake_cat_template.json
index 502bf2a..85c943c 100644
--- a/src/mopdata/intake_cat_template.json
+++ b/src/mopdata/intake_cat_template.json
@@ -7,10 +7,11 @@
     "format": "netcdf"
   },
   "aggregation_control": {
-    "variable_column_name": "frequency",
+    "variable_column_name": "variable",
     "groupby_attrs": [
       "realm",
-      "frequency"
+      "frequency",
+      "mapvar"
     ],
     "aggregations": [
       {
@@ -38,7 +39,7 @@
       "column_name": "variable"
     },
     {
-      "column_name": "map_var"
+      "column_name": "mapvar"
     },
     {
       "column_name": "standard_name"
diff --git a/src/mopdata/intake_cat_template.yaml b/src/mopdata/intake_cat_template.yaml
index 87fc7e4..0f94eeb 100644
--- a/src/mopdata/intake_cat_template.yaml
+++ b/src/mopdata/intake_cat_template.yaml
@@ -14,7 +14,5 @@ sources:
     args:
       columns_with_iterables:
         - variable
-        - map_var
-        - standard_name
       read_csv_kwargs: {"dtype": {"date": str}}
       obj: "{{CATALOG_DIR}}/intake_<experiment>.json"
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 4d8a269..4f4e3cd 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -398,6 +398,7 @@ def write_intake(ctx, fpath, match, dbname, version, alias):
     fpath = Path(fpath)
     if fpath.is_file():
         fname = fpath.name
+        vobjs, fobjs = load_vars(fname)
     else:
         mopdb_log.debug(f"Calling model_vars() from intake: {fpath}")
         fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) 
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index 11b9ca1..a96fd51 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -530,24 +530,53 @@ def create_file_dict(fobjs, alias):
     for pat_obj in fobjs:
         var_list = [v.name for v in pat_obj.varlist]
         # set to remove '' duplicates 
-        mapvar_list = list(set(v.cmor_var for v in pat_obj.varlist))
-        mapvar_list.remove("")
-        stnm_list = list(set(v.standard_name for v in pat_obj.varlist))
-        stnm_list.remove("")
         base_dict = {'experiment': alias,
-                     'realm': pat_obj.realm,
-                     'frequency': pat_obj.frequency,
-                     'variable': str(var_list),
-                     'map_var': str(mapvar_list),
-                     'standard_name': str(stnm_list)}
+            'realm': pat_obj.realm,
+            'frequency': pat_obj.frequency,
+            'variable': str(var_list),
+            'mapvar': "NAV",
+            'standard_name': "NAV"}
         # work out date_pattern in filename
         fname = pat_obj.files[0].name
         date_pattern = get_date_pattern(fname, pat_obj.fpattern)
         # add date and path for each file
+        path_list = []
         for fpath in pat_obj.files:
             f = fpath.name
             fd = base_dict.copy()
             fd['path'] = str(fpath)
             fd['date'] = ''.join(c for c in compress(f, date_pattern)) 
             lines.append(fd)
+            path_list.append((fd['path'],fd['date']))
+        lines = add_mapvars(pat_obj.varlist, lines, path_list, alias)
     return lines
+
+def add_mapvars(vobjs, lines, path_list, alias):
+    """
+    """
+    mopdb_log = logging.getLogger('mopdb_log')
+    for vobj in vobjs:
+        if vobj.cmor_var != "" or vobj.standard_name != "":
+            mapvar = vobj.cmor_var
+            stdname = vobj.standard_name
+            base_dict = {'experiment': alias,
+                'realm': vobj.realm,
+                'frequency': vobj.frequency,
+                'variable': str([vobj.name]),
+                'mapvar': mapvar if mapvar else "NAV",
+                'standard_name': stdname if stdname else "NAV"}
+        # use path_list to add path and date for all files
+            for fpath, date in path_list:
+                fd = base_dict.copy()
+                fd['path'] = fpath
+                fd['date'] = date 
+                lines.append(fd)
+    return lines
+
+def load_vars(fname):
+    """Returns Variable and FPattern objs from varlist or map file.
+    """
+    vobjs = []
+    fobjs = []
+    # distinguish between varlist and mapping file vbased on header
+    return vobjs, fobjs
diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py
index 9f34e35..ed9ae92 100644
--- a/src/mopper/calculations.py
+++ b/src/mopper/calculations.py
@@ -1167,7 +1167,7 @@ def calc_topsoil(soilvar):
     Returns
     -------
     topsoil : Xarray DataArray
-        Variable define don top 10cm of soil
+        Variable defined on top 10cm of soil
     """    
     depth = soilvar.depth
     # find index of bottom depth level including the first 10cm of soil
diff --git a/tests/conftest.py b/tests/conftest.py
index f26c225..0dd6c56 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -25,7 +25,7 @@
 import logging
 import csv
 from mopdb.mopdb_utils import mapping_sql, cmorvar_sql
-#from mopper.setup_utils import filelist_sql
+from mopper.setup_utils import filelist_sql
 
 
 TESTS_HOME = os.path.abspath(os.path.dirname(__file__))
@@ -59,12 +59,12 @@ def setup_access_db(session):
     session.connection.commit()
 
 
-#@pytest.fixture
-#def setup_mopper_db(session):
-#    filelist_sql = mapping_sql()
-#    session.execute(filelist_sql)
-#    session.execute('''INSERT INTO filelist VALUES ("/testdata/atmos/umnsa_spec_*.nc", 	"/testdata/mjo-elnino/v1-0/A10min/", "tas_AUS2200_mjo-elnino_subhrPt_20160101001000-20160102000000.nc", "fld_s03i236", "tas", "AUS2200_A10min", "subhrPt", "atmos", "point", "20160101T0005", "20160102T0000", "201601010000", "201601012355", "unprocessed", "3027.83203125", "mjo-elnino", "K", "AUS2200", "AUS2200", "/testdata/mjo-elnino/mjo-elnino.json",	"1970-01-01", "v1-0")''')
-#    session.connection.commit()
+@pytest.fixture
+def setup_mopper_db(session):
+    filelist_sql = mapping_sql()
+    session.execute(filelist_sql)
+    session.execute('''INSERT INTO filelist VALUES ("/testdata/atmos/umnsa_spec_*.nc", 	"/testdata/mjo-elnino/v1-0/A10min/", "tas_AUS2200_mjo-elnino_subhrPt_20160101001000-20160102000000.nc", "fld_s03i236", "tas", "AUS2200_A10min", "subhrPt", "atmos", "point", "20160101T0005", "20160102T0000", "201601010000", "201601012355", "unprocessed", "3027.83203125", "mjo-elnino", "K", "AUS2200", "AUS2200", "/testdata/mjo-elnino/mjo-elnino.json",	"1970-01-01", "v1-0")''')
+    session.connection.commit()
 
 
 def test_check_timestamp(caplog):
diff --git a/tests/test_calculations.py b/tests/test_calculations.py
index dc8507b..692211f 100644
--- a/tests/test_calculations.py
+++ b/tests/test_calculations.py
@@ -58,15 +58,14 @@ def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100):
             attrs={'name': 'random'})
     return da
 
-mrsol = create_var(2, 3, ntime=4, sdepth=True)
 
 def test_calc_topsoil():
-    global mrsol
+    mrsol = create_var(2, 3, ntime=4, sdepth=True)
+    #print(mrsol)
     expected = mrsol.isel(depth=0) + mrsol.isel(depth=1)/3.0
     out = calc_topsoil(mrsol)
     xrtest.assert_allclose(out, expected, rtol=1e-05) 
 
-'''
 def test_overturn_stream(caplog):
     global ctx
     caplog.set_level(logging.DEBUG, logger='varlog_1')
@@ -118,4 +117,3 @@ def test_overturn_stream(caplog):
     with ctx:
         out4 = overturn_stream(varlist)
     nptest.assert_array_equal(res4, out4)
-'''

From c049d46af3e13101e815d98356ebfa126293de98 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 18 Jul 2024 17:09:05 +1000
Subject: [PATCH 065/137] modifying intake function to allow different
 workflows

---
 src/mopdata/intake_cat_template.json |   9 +++
 src/mopdb/mopdb.py                   | 105 +++++++++++++++------------
 src/mopdb/mopdb_class.py             |   1 +
 src/mopdb/mopdb_map.py               |  38 ++++++++--
 4 files changed, 102 insertions(+), 51 deletions(-)

diff --git a/src/mopdata/intake_cat_template.json b/src/mopdata/intake_cat_template.json
index 85c943c..f3395ba 100644
--- a/src/mopdata/intake_cat_template.json
+++ b/src/mopdata/intake_cat_template.json
@@ -46,6 +46,15 @@
     },
     {
       "column_name": "date"
+    },
+    {
+      "column_name": "units"
+    },
+    {
+      "column_name": "calculation"
+    },
+    {
+      "column_name": "cell_methods"
     }
   ]
 }
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 4f4e3cd..85e6136 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -33,7 +33,7 @@
     check_varlist) 
 from mopdb.utils import *
 from mopdb.mopdb_map import (write_varlist, write_map_template,
-    write_catalogue, map_variables)
+    write_catalogue, map_variables, load_vars)
 
 def mopdb_catch():
     """
@@ -49,11 +49,20 @@ def mopdb_catch():
 
 
 def require_date(ctx, param, value):
-    """Changes match option in template/intake commands from optional to
+    """Changes match option in template command from optional to
     required if fpath is a directory.
     """
-    if Path(value).is_dir():
-        ctx.command.params[1].required = True
+    # this looks convoluted but pop() was necessary to retrieve the
+    # objetc rather than the string
+    names = []
+    print(ix for x in ctx.command.params.keys())
+    for i in range(len(ctx.command.params)):
+        opt = ctx.command.params.pop()
+        print(type(opt))
+        names.append(opt.name)
+    idx = names.index('match')
+    if Path(value).is_dir() and 'filelist' not in names:
+        ctx.command.params[idx].required = True
     return value
 
 
@@ -77,20 +86,20 @@ def map_args(f):
     constraints = [
         click.option('--fpath', '-f', type=str, required=True,
             callback=require_date,
-            help=(''''Path for model output files. For "template"
-              command can also be file generated by varlist step''')),
+            help=("""Model output files path. For 'template'
+              command can also be file generated by varlist step""")),
         click.option('--match', '-m', type=str, required=False,
-            help=('''String to match output files. Most often
-                the timestamp from one of the output files''')),
+            help=("""String to match output files. Most often
+                the timestamp from one of the output files""")),
         click.option('--version', '-v', required=True,
             type=click.Choice(['ESM1.5', 'CM2', 'AUS2200', 'OM2']),
             show_default=True,
-            help='ACCESS version currently only CM2, ESM1.5, AUS2200, OM2'),
+            help="ACCESS version currently only CM2, ESM1.5, AUS2200, OM2"),
         click.option('--dbname', type=str, required=False, default='default',
-            help='Database relative path by default is package access.db'),
+            help="Database relative path by default is package access.db"),
         click.option('--alias', '-a', type=str, required=False, default='',
-            help='''Alias to use to keep track of variable definition origin.
-                 If none passed uses input filename''')]
+            help="""Alias to use to keep track of variable definition origin.
+                 If none passed uses input filename""")]
     for c in reversed(constraints):
         f = c(f)
     return f
@@ -307,7 +316,7 @@ def map_template(ctx, fpath, match, dbname, version, alias):
        mapping table. If not tries to partially match them.
 
     It can get as input the directory containing the output in
-    which case it will first call model_vars() (varlist command)
+    which case it will first call write_varlist()
     or the file output of the same if already available.
 
     Parameters
@@ -338,20 +347,25 @@ def map_template(ctx, fpath, match, dbname, version, alias):
     # work out if fpath is varlist or path to output
     fpath = Path(fpath)
     if fpath.is_file():
-        fname = fpath.name
+        fobjs, vobjs = load_vars(fpath)
+        name = fpath.name
     else:
-        mopdb_log.debug(f"Calling model_vars() from template: {fpath}")
-        fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) 
+        mopdb_log.debug(f"Calling write_varlist() from template: {fpath}")
+        fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias)
     if alias == '':
         alias = fname.split(".")[0]
+# also from here on it should be called by separate function I can call from intake too
+# without repeating steps
     # read list of vars from file
-    with open(fname, 'r') as csvfile:
-        reader = csv.DictReader(csvfile, delimiter=';')
-        rows = list(reader)
-    check_varlist(rows, fname)
+    # this should now spit out fobjs, vobjs to pass to template 
+    #with open(fname, 'r') as csvfile:
+    #    reader = csv.DictReader(csvfile, delimiter=';')
+    #    rows = list(reader)
+    #check_varlist(rows, fname)
     # return lists of fully/partially matching variables and stash_vars 
     # these are input_vars for calculation defined in already in mapping db
-    parsed = map_variables(conn, rows, version)
+    #parsed = map_variables(conn, rows, version)
+    parsed = map_variables(conn, vobjs, version)
     # potential vars have always duplicates: 1 for each input_var
     write_map_template(conn, parsed, alias)
     conn.close()
@@ -361,12 +375,14 @@ def map_template(ctx, fpath, match, dbname, version, alias):
 
 @mopdb.command(name='intake')
 @map_args
+@click.option('--filelist','-fl', type=str, required=False, default=None,
+            help='Map or varlist csv file relative path')
 @click.pass_context
-def write_intake(ctx, fpath, match, dbname, version, alias):
+def write_intake(ctx, fpath, match, filelist, dbname, version, alias):
     """Writes an intake-esm catalogue.
 
     It can get as input the directory containing the output in
-    which case it will first call model_vars() (varlist command)
+    which case it will first call write_varlist() (varlist command)
     or the file output of the same if already available.
 
     Parameters
@@ -374,10 +390,11 @@ def write_intake(ctx, fpath, match, dbname, version, alias):
     ctx : obj
         Click context object
     fpath : str
-        Path of csv input file with output variables to map or
-        of directory containing output files to scan
+        Path of directory containing output files to scan
     match : str
         Date or other string to match to individuate one file per type
+    filelist : str
+        Map or varlist csv file path, optional (default is None)
     dbname : str
         Database relative path (default is data/access.db)
     version : str
@@ -397,11 +414,15 @@ def write_intake(ctx, fpath, match, dbname, version, alias):
     # work out if fpath is varlist or path to output
     fpath = Path(fpath)
     if fpath.is_file():
-        fname = fpath.name
-        vobjs, fobjs = load_vars(fname)
+        mopdb_log.error(f"""   {fpath} 
+        should be absolute or relative path to model output.
+        To pass a varlist or map file use --filelist/-f""")
+    elif filelist is None:
+        mopdb_log.debug(f"Calling write_varlist() from intake: {fpath}")
+        fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias)
     else:
-        mopdb_log.debug(f"Calling model_vars() from intake: {fpath}")
-        fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias) 
+        fname = filelist.name
+        vobjs, fobjs = load_vars(filelist)
     if alias == '':
         alias = fname.split(".")[0]
     # read list of vars from file
@@ -470,23 +491,13 @@ def update_map(ctx, dbname, fname, alias):
     return None
 
 
-@mopdb.command(name='varlist')
-@map_args
-@click.pass_context
-def list_vars(ctx, fpath, match, dbname, version, alias):
-    """Calls model_vars to generate list of variables""" 
-    # connect to db, check first if db exists or exit 
-    if dbname == 'default':
-        dbname = import_files('mopdata').joinpath('access.db')
-    conn = db_connect(dbname)
-    conn = db_connect(dbname, logname='mopdb_log')
-    fname, vobjs, fobjs = model_vars(fpath, match, conn, version, alias)
-    conn.close()
     return None
 
 
+@mopdb.command(name='varlist')
+@map_args
 @click.pass_context
-def model_vars(ctx, fpath, match, conn, version, alias):
+def model_vars(ctx, fpath, match, version, alias):
     """Read variables from model output
        opens one file for each kind, save variable list as csv file
 
@@ -508,13 +519,15 @@ def model_vars(ctx, fpath, match, conn, version, alias):
 
     Returns
     -------
-    fname : str
-        Name of output varlist file
     """
-
+    # connect to db, check first if db exists or exit 
+    if dbname == 'default':
+        dbname = import_files('mopdata').joinpath('access.db')
+    conn = db_connect(dbname, logname='mopdb_log')
     mopdb_log = logging.getLogger('mopdb_log')
     fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias)
-    return fname, vobjs, fobjs
+    conn.close()
+    return None
 
 
 @mopdb.command(name='del')
diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index 2fec511..864b5d1 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -96,6 +96,7 @@ def __init__(self, varname: str, fobj: FPattern):
         self._realm = fobj.realm
         self.cmor_var = '' 
         self.cmor_table = '' 
+        self.calculation = '' 
         #self.version = self.fpattern.version
         self.match = False
         # descriptive attributes
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index a96fd51..dfd56c6 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -30,11 +30,12 @@
 from pathlib import Path
 from itertools import compress
 from importlib.resources import files as import_files
+from access_nri_intake.source.builders import AccessEsm15Builder
 
 from mopdb.mopdb_class import FPattern, Variable
 from mopdb.utils import *
 from mopdb.mopdb_utils import (get_cell_methods, remove_duplicate,
-    get_realm, check_realm_units, get_date_pattern)
+    get_realm, check_realm_units, get_date_pattern, check_varlist)
 
 
 def get_cmorname(conn, vobj, version):
@@ -535,7 +536,10 @@ def create_file_dict(fobjs, alias):
             'frequency': pat_obj.frequency,
             'variable': str(var_list),
             'mapvar': "NAV",
-            'standard_name': "NAV"}
+            'standard_name': "",
+            'units': "",
+            'calculation': "",
+            'cell_methods': ""}
         # work out date_pattern in filename
         fname = pat_obj.files[0].name
         date_pattern = get_date_pattern(fname, pat_obj.fpattern)
@@ -558,13 +562,15 @@ def add_mapvars(vobjs, lines, path_list, alias):
     for vobj in vobjs:
         if vobj.cmor_var != "" or vobj.standard_name != "":
             mapvar = vobj.cmor_var
-            stdname = vobj.standard_name
             base_dict = {'experiment': alias,
                 'realm': vobj.realm,
                 'frequency': vobj.frequency,
                 'variable': str([vobj.name]),
                 'mapvar': mapvar if mapvar else "NAV",
-                'standard_name': stdname if stdname else "NAV"}
+                'standard_name': vobj.standard_name,
+                'units': vobj.units,
+                'calculation': vobj.calculation,
+                'cell_methods': vobj.cell_methods}
         # use path_list to add path and date for all files
             for fpath, date in path_list:
                 fd = base_dict.copy()
@@ -578,5 +584,27 @@ def load_vars(fname):
     """
     vobjs = []
     fobjs = []
-    # distinguish between varlist and mapping file vbased on header
+    # distinguish between varlist and mapping file based on header
+    with open(fname, 'r') as csvfile:
+        reader = csv.DictReader(csvfile, delimiter=';')
+        rows = list(reader)
+    #check_varlist(rows, fname)
+    # set fobjs
+    patterns = list(set(x['fpattern'] for x in rows)) 
+    print(patterns)
+    for pat in patterns:
+        if pat != "":
+            fo = FPattern(fpattern, Path(indir))
+            fobjs.append(fo)
+    patterns2 = [x['fpattern'] for x in rows] 
+    sys.exit()
+    
+    if 'calculation' in rows[0].keys():
+        map_file = True
+    else:
+        map_file = False
+    for row in rows[1:]:
+        row['fpattern']
+        v = Variable(row['name'], )
+    #for field in row[0]:
     return vobjs, fobjs

From 57d6d2466a073cfaabf46803d7efdf8174925ddb Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 23 Jul 2024 21:14:05 +1000
Subject: [PATCH 066/137] completed #147, introduced MapVaribale class

---
 src/mopdb/mopdb.py       |  37 ++++-----
 src/mopdb/mopdb_class.py |  98 +++++++++++++++++-----
 src/mopdb/mopdb_map.py   | 171 +++++++++++++++++++++++----------------
 src/mopdb/mopdb_utils.py |  22 ++---
 src/mopper/mop_setup.py  |   2 +-
 src/mopper/mop_utils.py  |   2 +-
 6 files changed, 210 insertions(+), 122 deletions(-)

diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 85e6136..fee8f45 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -33,7 +33,7 @@
     check_varlist) 
 from mopdb.utils import *
 from mopdb.mopdb_map import (write_varlist, write_map_template,
-    write_catalogue, map_variables, load_vars)
+    write_catalogue, map_variables, load_vars, get_map_obj)
 
 def mopdb_catch():
     """
@@ -52,14 +52,9 @@ def require_date(ctx, param, value):
     """Changes match option in template command from optional to
     required if fpath is a directory.
     """
-    # this looks convoluted but pop() was necessary to retrieve the
-    # objetc rather than the string
     names = []
-    print(ix for x in ctx.command.params.keys())
     for i in range(len(ctx.command.params)):
-        opt = ctx.command.params.pop()
-        print(type(opt))
-        names.append(opt.name)
+        names.append(ctx.command.params[i].name)
     idx = names.index('match')
     if Path(value).is_dir() and 'filelist' not in names:
         ctx.command.params[idx].required = True
@@ -347,8 +342,10 @@ def map_template(ctx, fpath, match, dbname, version, alias):
     # work out if fpath is varlist or path to output
     fpath = Path(fpath)
     if fpath.is_file():
-        fobjs, vobjs = load_vars(fpath)
-        name = fpath.name
+        map_file, vobjs, fobjs = load_vars(fpath)
+        fname = fpath.name
+        mopdb_log.debug(f"Imported {len(vobjs)} objects from file {fpath}")
+        mopdb_log.debug(f"Is mapping file? {map_file}")
     else:
         mopdb_log.debug(f"Calling write_varlist() from template: {fpath}")
         fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias)
@@ -369,7 +366,6 @@ def map_template(ctx, fpath, match, dbname, version, alias):
     # potential vars have always duplicates: 1 for each input_var
     write_map_template(conn, parsed, alias)
     conn.close()
-
     return
 
 
@@ -420,21 +416,26 @@ def write_intake(ctx, fpath, match, filelist, dbname, version, alias):
     elif filelist is None:
         mopdb_log.debug(f"Calling write_varlist() from intake: {fpath}")
         fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias)
+        map_file = False
     else:
-        fname = filelist.name
-        vobjs, fobjs = load_vars(filelist)
+        flist = Path(filelist)
+        fname = flist.name
+        map_file, vobjs, fobjs = load_vars(flist, indir=fpath)
     if alias == '':
         alias = fname.split(".")[0]
     # read list of vars from file
-    with open(fname, 'r') as csvfile:
-        reader = csv.DictReader(csvfile, delimiter=';')
-        rows = list(reader)
-    check_varlist(rows, fname)
+    #with open(fname, 'r') as csvfile:
+    #    reader = csv.DictReader(csvfile, delimiter=';')
+    #    rows = list(reader)
+    #check_varlist(rows, fname)
     # return lists of fully/partially matching variables and stash_vars 
     # these are input_vars for calculation defined in already in mapping db
-    parsed = map_variables(conn, rows, version)
+    if map_file is False:
+        parsed = map_variables(conn, vobjs, version)
+        vobjs = get_map_obj(parsed)
+        write_map_template(conn, parsed, alias)
     # potential vars have always duplicates: 1 for each input_var
-    cat_name, fcsv = write_catalogue(conn, parsed, vobjs, fobjs, alias)
+    cat_name, fcsv = write_catalogue(conn, vobjs, fobjs, alias)
     mopdb_log.info(f"""Intake-esm and intake catalogues written to
     {cat_name} and {cat_name.replace('json','yaml')}. File list saved to {fcsv}""")
     conn.close()
diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index 864b5d1..8b73805 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -25,7 +25,7 @@ class FPattern():
        its attributes represents features of the variables which are shared.
     """ 
 
-    def __init__(self, fpattern: str, fpath: Path):
+    def __init__(self, fpattern: str, fpath: Path | None) -> None:
         self.fpattern = fpattern
         self.fpath = fpath
         self.files = self.get_files() 
@@ -37,34 +37,39 @@ def __init__(self, fpattern: str, fpath: Path):
 
     def get_frequency(self):
         frequency = 'NAfrq'
-        fname = str(self.files[0])
-        if self.realm == 'atmos':
-            fbits = fname.split("_")
-            frequency = fbits[-1].replace(".nc", "")
-        elif self.realm == 'ocean':
-            if any(x in fname for x in ['scalar', 'month']):
-                frequency = 'mon'
-            elif 'daily' in fname:
-                frequency = 'day'
-        elif self.realm == 'seaIce':
-            if '_m.' in fname:
-                frequency = 'mon'
-            elif '_d.' in fname:
-                frequency = 'day'
+        if len(self.files) > 0 and self.realm != 'NArealm':
+            fname = str(self.files[0])
+            if self.realm == 'atmos':
+                fbits = fname.split("_")
+                frequency = fbits[-1].replace(".nc", "")
+            elif self.realm == 'ocean':
+                if any(x in fname for x in ['scalar', 'month']):
+                    frequency = 'mon'
+                elif 'daily' in fname:
+                    frequency = 'day'
+            elif self.realm == 'seaIce':
+                if '_m.' in fname:
+                    frequency = 'mon'
+                elif '_d.' in fname:
+                    frequency = 'day'
         return frequency
 
-
     def get_realm(self):
         realm = 'NArealm'
-        realm = next((x for x in ['atmos', 'ocean', 'ice', 'ocn','atm']
-            if x in self.fpath.parts), 'NArealm')
+        if self.fpath is not None:
+            realm = next((x for x in ['atmos', 'ocean', 'ice', 'ocn','atm']
+                if x in self.fpath.parts), 'NArealm')
         fix_realm = {'atm': 'atmos', 'ice': 'seaIce', 'ocn': 'ocean'}
         if realm in fix_realm.keys():
             realm = fix_realm[realm]
         return realm
 
     def get_files(self):
-        return self.list_files(self.fpath, self.fpattern)
+        if self.fpath is None:
+            files = []
+        else:
+            files = self.list_files(self.fpath, self.fpattern)
+        return files
 
     @staticmethod
     def list_files(indir, match):
@@ -96,8 +101,9 @@ def __init__(self, varname: str, fobj: FPattern):
         self._realm = fobj.realm
         self.cmor_var = '' 
         self.cmor_table = '' 
+        #self.input_vars = varname 
         self.calculation = '' 
-        #self.version = self.fpattern.version
+        self.version = fobj.version
         self.match = False
         # descriptive attributes
         self.units = '' 
@@ -136,3 +142,55 @@ def realm(self, value):
             ['atmos', 'seaIce', 'ocean', 'land', 'landIce']):
             value = 'NArealm' 
         self.realm = value
+
+    def get_match(self):
+        """Returns the attributes that mimic
+        a database match"""
+        if self.cmor_var != '':
+            cmor_var = self.cmor_var
+        else:
+            cmor_var = self.name
+        match = (self.cmor_var, self.name, '', self.frequency,
+            self.realm, self.version, '', self.positive, self.units)
+        return match
+
+
+class MapVariable():
+    """This class represent a mapping for variable 
+       It's similar but from a cmor_name point of view
+    """ 
+
+    def __init__(self, match: list, vobj: Variable):
+        # path object
+        self.fpattern = vobj.fpattern
+        # mapping attributes
+        self.frequency = vobj.frequency 
+        self.realm = match[4]
+        self.cmor_var = match[0] 
+        self.cmor_table = match[6] 
+        self.input_vars = match[1] 
+        self.calculation = match[2] 
+        self.version = match[5]
+        # could change this to nomatch found or 
+        # kind of match
+        self.match = True
+        # descriptive attributes
+        self.units = vobj.units 
+        if self.units == '':
+            self.units = match[8]
+        self.dimensions = vobj.dimensions 
+        self.cell_methods = vobj.cell_methods
+        self.positive = match[7]
+        self.long_name = vobj.long_name 
+        self.standard_name = vobj.standard_name
+        # type and size attributes
+        self.vtype = vobj.vtype
+        self.size = vobj.size
+        self.nsteps = vobj.nsteps
+
+    def attrs(self):
+        attrs = [] 
+        for k in self.__dict__.keys():
+            if k not in ['match']:
+                attrs.append(k)
+        return attrs
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index dfd56c6..4f098ba 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -32,7 +32,7 @@
 from importlib.resources import files as import_files
 from access_nri_intake.source.builders import AccessEsm15Builder
 
-from mopdb.mopdb_class import FPattern, Variable
+from mopdb.mopdb_class import FPattern, Variable, MapVariable
 from mopdb.utils import *
 from mopdb.mopdb_utils import (get_cell_methods, remove_duplicate,
     get_realm, check_realm_units, get_date_pattern, check_varlist)
@@ -214,7 +214,7 @@ def write_varlist(conn, indir, match, version, alias):
     fcsv.close()
     return  fname, vobj_list, fobj_list
 
-def match_stdname(conn, row, stdn):
+def match_stdname(conn, vobj, stdn):
     """Returns an updated stdn list if finds one or more variables
     in cmorvar table that match the standard name passed as input.
     It also return a False/True found_match boolean.
@@ -222,15 +222,15 @@ def match_stdname(conn, row, stdn):
     mopdb_log = logging.getLogger('mopdb_log')
     found_match = False
     sql = f"""SELECT name FROM cmorvar where 
-        standard_name='{row['standard_name']}'"""
+        standard_name='{vobj.standard_name}'"""
     results = query(conn, sql, first=False, logname='mopdb_log')
     matches = [x[0] for x in results]
     if len(matches) > 0:
-        stdn = add_var(stdn, row, tuple([matches]+['']*7), stdnm=True)
+        stdn = add_var(stdn, vobj, tuple([matches]+['']*7), stdnm=True)
         found_match = True
     return stdn, found_match
 
-def match_var(row, version, mode, conn, records):
+def match_var(vobj, version, mode, conn, records):
     """Returns match for variable if found after looping
        variables already mapped in database
     Parameters
@@ -241,8 +241,8 @@ def match_var(row, version, mode, conn, records):
     # build sql query based on mode
     sql_base = f"""SELECT cmor_var,input_vars,calculation,frequency,
         realm,model,cmor_table,positive,units FROM mapping where 
-        input_vars='{row['name']}'"""
-    sql_frq = f" and frequency='{row['frequency']}'"
+        input_vars='{vobj.name}'"""
+    sql_frq = f" and frequency='{vobj.frequency}'"
     sql_ver = f" and model='{version}'"
     if mode == 'full':
         sql = sql_base + sql_frq + sql_ver
@@ -252,15 +252,15 @@ def match_var(row, version, mode, conn, records):
         sql = sql_base + sql_frq
     # execute query and process results
     result = query(conn, sql, first=False, logname='mopdb_log')
-    mopdb_log.debug(f"match_var: {result}, sql: {sql[110:]}") 
+    mopdb_log.debug(f"match_var: {result}, sql: {sql[114:]}") 
     if result is not None and result != []:
         for x in result:
             mopdb_log.debug(f"match: {x}")
-            records = add_var(records, row, x)
+            records = add_var(records, vobj, x)
         found_match = True
     return records, found_match
 
-def parse_vars(conn, rows, version):
+def parse_vars(conn, vobjs, version):
     """Returns records of variables to include in template mapping file,
     a list of all stash variables + frequency available in model output
     and a list of variables already defined in db
@@ -287,31 +287,33 @@ def parse_vars(conn, rows, version):
     stash_vars = []
 
     # looping through variables from file and attempt matches to db 
-    for row in rows:
-        if row['name'][0] == "#" or row['name'] == 'name':
-            continue
-        else:
-            full, found = match_var(row, version, 'full', conn, full)
+    for v in vobjs:
+        #if row['name'][0] == "#" or row['name'] == 'name':
+        #    continue
+        #else:
+        full, found = match_var(v, version, 'full', conn, full)
         # if no match, ignore model version first and then frequency 
-        mopdb_log.debug(f"found perfect match: {found}")
+        #mopdb_log.debug(f"found perfect match: {found}")
         if not found:
-            no_ver, found = match_var(row, version, 'no_ver', conn, no_ver)
+            no_ver, found = match_var(v, version, 'no_ver', conn, no_ver)
             mopdb_log.debug(f"found no ver match: {found}")
         if not found:
-            no_frq, found = match_var(row, version, 'no_frq', conn, no_frq)
+            no_frq, found = match_var(v, version, 'no_frq', conn, no_frq)
             mopdb_log.debug(f"found no frq match: {found}")
         # make a last attempt to match using standard_name
         if not found:
-            if row['standard_name'] != '':
-                stdn, found = match_stdname(conn, row, stdn)
+            if v.standard_name != '':
+                stdn, found = match_stdname(conn, v, stdn)
             mopdb_log.debug(f"found stdnm match: {found}")
         if not found:
-            no_match = add_var(no_match, row, tuple([row['name']]+['']*8)) 
-        stash_vars.append(f"{row['name']}-{row['frequency']}")
+            # use original var values for match
+            match = v.get_match()
+            no_match = add_var(no_match, v, v.get_match()) 
+        stash_vars.append(f"{v.name}-{v.frequency}")
 
     return full, no_ver, no_frq, stdn, no_match, stash_vars 
 
-def add_var(vlist, row, match, stdnm=False):
+def add_var(vlist, vobj, match, stdnm=False):
     """Add information from match to variable list and re-order
     fields so they correspond to final mapping output.
 
@@ -323,35 +325,36 @@ def add_var(vlist, row, match, stdnm=False):
     mopdb_log = logging.getLogger('mopdb_log')
     # assign cmor_var from match and swap place with input_vars
     mopdb_log.debug(f"Assign cmor_var: {match}")
-    mopdb_log.debug(f"initial row: {row}")
-    var = row.copy() 
-    var['cmor_var'] = match[0]
-    var['input_vars'] = match[1]
-    orig_name = var.pop('name')
+    mopdb_log.debug(f"initial variable definition: {vobj}")
+    #var = vobj.__dict__.copy() 
+    var = MapVariable(match, vobj)
+    #var.cmor_var = match[0]
+    #vobj.input_vars = match[1]
+   # orig_name = var.pop('name')
     # assign realm from match
-    var['realm'] = match[4] 
+    #var['realm'] = match[4] 
     # with stdn assign cmorvar and table if only 1 match returned
     # otherwise assign table from match
     if stdnm: 
-        var['input_vars'] = orig_name
-        if len(var['cmor_var']) == 1:
-            cmor_var, table = var['cmor_var'][0].split("-")
-            var['cmor_var'] = cmor_var
-            var['cmor_table'] = table 
-    else:
-        var['cmor_table'] = match[6] 
+        var.input_vars = vobj.name
+        if len(var.cmor_var) == 1:
+            cmor_var, table = var.cmor_var[0].split("-")
+            var.cmor_var = cmor_var
+            var.cmor_table = table 
+    #else:
+    #    var['cmor_table'] = match[6] 
     # add calculation, positive and version 
-    var['calculation'] = match[2]
-    var['positive'] = match[7]
-    var['version'] = match[5] 
+    #var['calculation'] = match[2]
+    #var['positive'] = match[7]
+    #var['version'] = match[5] 
     # maybe we should override units here rather than in check_realm_units
     # if units missing get them from match
-    if var['units'] is None or var['units'] == '':
-        var['units'] = match[8]
+    #if var['units'] is None or var['units'] == '':
+    #    var['units'] = match[8]
     vlist.append(var)
     return vlist
 
-def potential_vars(conn, rows, stash_vars, version):
+def potential_vars(conn, vobjs, stash_vars, version):
     """Returns list of variables that can be potentially derived from
     model output.
 
@@ -376,23 +379,23 @@ def potential_vars(conn, rows, stash_vars, version):
     pot_full = [] 
     pot_part = []
     pot_varnames = set()
-    for row in rows:
+    for v in vobjs:
         sql = f"""SELECT cmor_var,input_vars,calculation,frequency,
             realm,model,cmor_table,positive,units FROM mapping 
-            WHERE input_vars like '%{row['name']}%'"""
+            WHERE input_vars like '%{v.name}%'"""
         results = query(conn, sql, first=False, logname='mopdb_log')
-        mopdb_log.debug(f"In potential: var {row['name']}, db results {results}")
+        mopdb_log.debug(f"In potential: var {v.name}, db results {results}")
         for r in results:
             allinput = r[1].split(" ")
             mopdb_log.debug(f"{len(allinput)> 1}")
-            mopdb_log.debug(all(f"{x}-{row['frequency']}" in stash_vars for x in allinput))
-            if len(allinput) > 1 and all(f"{x}-{row['frequency']}" in stash_vars for x in allinput):
+            mopdb_log.debug(all(f"{x}-{v.frequency}" in stash_vars for x in allinput))
+            if len(allinput) > 1 and all(f"{x}-{v.frequency}" in stash_vars for x in allinput):
                 # if both version and frequency of applied mapping match
                 # consider this a full matching potential var 
-                if r[5] == version and r[3] == row['frequency']:
-                   pot_full = add_var(pot_full, row, r)
+                if r[5] == version and r[3] == v.frequency:
+                   pot_full = add_var(pot_full, v, r)
                 else:
-                    pot_part = add_var(pot_part, row, r)
+                    pot_part = add_var(pot_part, v, r)
                 pot_varnames.add(r[0])
     return pot_full, pot_part, pot_varnames
 
@@ -437,7 +440,6 @@ def write_map_template(conn, parsed, alias):
         write_vars(stdn, fwriter, div, sortby='input_vars')
         div = "# Derived variables: Use with caution!"
         write_vars(pot_part, fwriter, div, conn=conn)
-            #pot=True, conn=conn, sortby=0)
         div = "# Variables without mapping"
         write_vars(no_match, fwriter, div)
         mopdb_log.debug("Finished writing variables to mapping template")
@@ -451,32 +453,36 @@ def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'):
     mopdb_log = logging.getLogger('mopdb_log')
     if len(vlist) > 0:
         if type(div) is str:
-            divrow = {x:'' for x in vlist[0].keys()}
+            divrow = {x:'' for x in vlist[0].attrs()}
             divrow['cmor_var'] = div
         elif type(div) is list:
             divrow = {x:x for x in div}
         fwriter.writerow(divrow)
-        for var in sorted(vlist, key=itemgetter(sortby)):
+        dlist = []
+        for var in vlist:
             if conn:
                 var = check_realm_units(conn, var)
-            fwriter.writerow(var)
+            dlist.append( var.__dict__ )
+        for dvar in sorted(dlist, key=itemgetter(sortby)):
+            dvar.pop('match')
+            fwriter.writerow(dvar)
     return
 
-def map_variables(conn, rows, version):
+def map_variables(conn, vobjs, version):
     """
     """
     mopdb_log = logging.getLogger('mopdb_log')
     # return lists of fully/partially matching variables and stash_vars 
     # these are input_vars for calculation defined in already in mapping db
     full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn, 
-        rows, version)
+        vobjs, version)
     # remove duplicates from partially matched variables 
     no_ver = remove_duplicate(no_ver)
     no_frq = remove_duplicate(no_frq, strict=False)
     no_match = remove_duplicate(no_match, strict=False)
     # check if more derived variables can be added based on all
     # input_vars being available
-    pot_full, pot_part, pot_varnames = potential_vars(conn, rows,
+    pot_full, pot_part, pot_varnames = potential_vars(conn, vobjs,
         stash_vars, version)
     # potential vars have always duplicates: 1 for each input_var
     pot_full = remove_duplicate(pot_full, strict=False)
@@ -484,7 +490,16 @@ def map_variables(conn, rows, version):
     mopdb_log.info(f"Derived variables: {pot_varnames}")
     return full, no_ver, no_frq, stdn, no_match, pot_full, pot_part 
 
-def write_catalogue(conn, parsed, vobjs, fobjs, alias):
+def get_map_obj(parsed):
+    """Returns list of variable objects to pass to intake"""
+    full, no_ver, no_frq, stdn, no_match, pot_full, pot_part = parsed
+    vobjs = []
+    select = full + no_ver + no_frq 
+    for v in select:
+        vobjs.append(v)
+    return vobjs
+
+def write_catalogue(conn, vobjs, fobjs, alias):
     """Write intake-esm catalogue and returns name
     """
     mopdb_log = logging.getLogger('mopdb_log')
@@ -579,11 +594,14 @@ def add_mapvars(vobjs, lines, path_list, alias):
                 lines.append(fd)
     return lines
 
-def load_vars(fname):
+def load_vars(fname, indir=None):
     """Returns Variable and FPattern objs from varlist or map file.
     """
+    mopdb_log = logging.getLogger('mopdb_log')
     vobjs = []
-    fobjs = []
+    fobjs = {}
+    if indir is not None:
+        indir = Path(indir)
     # distinguish between varlist and mapping file based on header
     with open(fname, 'r') as csvfile:
         reader = csv.DictReader(csvfile, delimiter=';')
@@ -591,20 +609,31 @@ def load_vars(fname):
     #check_varlist(rows, fname)
     # set fobjs
     patterns = list(set(x['fpattern'] for x in rows)) 
-    print(patterns)
     for pat in patterns:
         if pat != "":
-            fo = FPattern(fpattern, Path(indir))
-            fobjs.append(fo)
-    patterns2 = [x['fpattern'] for x in rows] 
-    sys.exit()
-    
+            fo = FPattern(pat, indir)
+            fobjs[pat] = fo
     if 'calculation' in rows[0].keys():
         map_file = True
+        colname = 'input_vars'
     else:
         map_file = False
-    for row in rows[1:]:
-        row['fpattern']
-        v = Variable(row['name'], )
-    #for field in row[0]:
-    return vobjs, fobjs
+        colname = 'name'
+    for row in rows:
+        fo =  fobjs[row['fpattern']]
+        vo = Variable(row[colname], fo)
+        for k,v in row.items():
+            if k in ['realm', 'frequency']:
+                k = '_' + k
+            vo.__dict__[k] = v
+        if fo.realm == 'NArealm':
+            fo.realm = vo.realm
+        if fo.frequency == 'NAfrq':
+            fo.frequency = vo.frequency
+        fo.varlist.append(vo)
+        if map_file is True:
+            mvo = MapVariable(list(vo.get_match()), vo)
+            vobjs.append(mvo)
+        else:
+            vobjs.append(vo)
+    return map_file, vobjs, [x for x in fobjs.values()]
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 062e9dd..0f80b6d 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -323,12 +323,12 @@ def remove_duplicate(vlist, extra=[], strict=True):
     if strict is True:
         keys += ['frequency', 'realm']
     if extra:
-        vid_list = [tuple(x[k] for k in keys) for x in extra] 
+        vid_list = [tuple(getattr(x,k) for k in keys) for x in extra] 
     mopdb_log.debug(f"vid_list: {vid_list}")
     final = []
     for v in vlist:
-        vid = tuple(v[k] for k in keys)
-        mopdb_log.debug(f"var and vid: {v['cmor_var']}, {vid}")
+        vid = tuple(getattr(v,k) for k in keys)
+        mopdb_log.debug(f"var and vid: {v.cmor_var}, {vid}")
         if vid not in vid_list:
             final.append(v)
         vid_list.append(vid)
@@ -340,8 +340,8 @@ def check_realm_units(conn, var):
     """
 
     mopdb_log = logging.getLogger('mopdb_log')
-    vname = f"{var['cmor_var']}-{var['cmor_table']}"
-    if var['cmor_table'] is None or var['cmor_table'] == "":
+    vname = f"{var.cmor_var}-{var.cmor_table}"
+    if var.cmor_table is None or var.cmor_table == "":
         mopdb_log.warning(f"Variable: {vname} has no associated cmor_table")
     else:
     # retrieve modeling_realm, units from db cmor table
@@ -353,12 +353,12 @@ def check_realm_units(conn, var):
             dbrealm = result[0] 
             dbunits = result[1] 
             # dbrealm could have two realms
-            if var['realm'] not in [dbrealm] + dbrealm.split():
-                mopdb_log.info(f"Changing {vname} realm from {var['realm']} to {dbrealm}")
-                var['realm'] = dbrealm
-            if var['units'] != dbunits :
-                mopdb_log.info(f"Changing {vname} units from {var['units']} to {dbunits}")
-                var['units'] = dbunits
+            if var.realm not in [dbrealm] + dbrealm.split():
+                mopdb_log.info(f"Changing {vname} realm from {var.realm} to {dbrealm}")
+                var.realm = dbrealm
+            if var.units != dbunits :
+                mopdb_log.info(f"Changing {vname} units from {var.units} to {dbunits}")
+                var.units = dbunits
         else:
             mopdb_log.warning(f"Variable {vname} not found in cmor table")
     return var 
diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py
index 8831595..441d05b 100755
--- a/src/mopper/mop_setup.py
+++ b/src/mopper/mop_setup.py
@@ -103,7 +103,7 @@ def find_matches(table, var, realm, frequency, varlist):
             realmdir = 'atmos'
         else:
             realmdir = match['realm']
-        in_fname = match['filename'].split()
+        in_fname = match['fpattern'].split()
         match['file_structure'] = ''
         for f in in_fname:
             #match['file_structure'] += f"/{realmdir}/{f}* "
diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index a5d1423..01b29d9 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -39,7 +39,7 @@
 from pathlib import Path
 
 from mopper.calculations import *
-from mopper.utils import read_yaml
+from mopdb.utils import read_yaml
 from importlib.resources import files as import_files
 
 
From 3c5e8b48dfedf597abd936107840fbd088355a38 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 24 Jul 2024 10:22:00 +1000
Subject: [PATCH 067/137] updated docs

---
 docs/cmor_conf.yaml     |  1 +
 docs/gettingstarted.rst | 10 ++++++----
 docs/mopdb_command.rst  | 30 ++++++++++++++++++++++++++++++
 src/mopper/mopper.py    |  2 +-
 4 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/docs/cmor_conf.yaml b/docs/cmor_conf.yaml
index 8ff286d..5c93d65 100755
--- a/docs/cmor_conf.yaml
+++ b/docs/cmor_conf.yaml
@@ -93,3 +93,4 @@ cmor:
     # walltime in "hh:mm:ss"
     walltime: '8:00:00'
     mode: custom
+    conda_env: /g/data/.../mopper_env/bin/activate
diff --git a/docs/gettingstarted.rst b/docs/gettingstarted.rst
index 2e7181e..62aabd1 100644
--- a/docs/gettingstarted.rst
+++ b/docs/gettingstarted.rst
@@ -1,13 +1,15 @@
 Starting with MOPPeR
 ====================
 
-A typical workflow to post-process an ACCESS or UM model output requires three steps.
+A typical workflow to post-process an ACCESS or UM model output requires two steps.
+The first step is creating the mapping for a spcific simualtion and it is done only once for an experiment.
+The second step is to setup and run the actual post-processing.
 
  
 Step1: create a template for a mapping file
 -------------------------------------------
 
-   *mopdb template -f <path-to-model-output> -v <access-version> -a <alias>*
+   *mopdb template -f <path-to-model-output> -m <match> -v <access-version> -a <alias>*
 
 .. code-block:: console 
 
@@ -53,11 +55,11 @@ It also provides an intermediate varlist_<alias>.csv file that shows the informa
 Step2: Set up the working environment 
 -------------------------------------
 
-   *mop -c <conf_exp.yaml> setup*
+   *mop setup -c <conf_exp.yaml>*
 
 .. code-block:: console 
 
-   $ mop -c exp_conf.yaml setup
+   $ mop setup -c exp_conf.yaml
    Simulation to process: cy286
    Setting environment and creating working directory
    Output directory '/scratch/v45/pxp581/MOPPER_output/cy286' exists.
diff --git a/docs/mopdb_command.rst b/docs/mopdb_command.rst
index 421f39c..5c24196 100644
--- a/docs/mopdb_command.rst
+++ b/docs/mopdb_command.rst
@@ -11,6 +11,7 @@ This module is used to manage the mapping of raw output to CMIP style variables.
 
 - **varlist**  creates an initial list of variables and attributes based on actual files
 - **template** uses the above list to generate a template of mappings to use in the processing
+- **intake**   uses the mappings to create an intake catalogue of the raw model output
 - **cmor**     populates the database cmor variables table
 - **map**      populates the database mappings table
 - **check**    checks a variable list against the cmor database table to individuate variables without a definition
@@ -107,6 +108,35 @@ The other groups of records require checking, as either the version or the frequ
    ...
 
 
+Create an intake catalogue
+--------------------------
+.. code-block::
+
+This represents an extra step on top of the mapping, so it can be start directly from an existing mapping or from scratch by providing the model ouptut path and a match. 
+
+From output path:
+  
+    mopdb intake  -f <output-path> -m <string-to-match> -v <access-version> { -a <alias> }
+
+From varlist file:
+
+    mopdb intake  -f <output-path> -fl <varlist-out> -v <access-version> { -a <alias> }
+
+From mapping file:
+
+    mopdb intake  -f <output-path> -fl <mapping-out> -v <access-version> { -a <alias> }
+
+NB the model output path is still needed even when passing an existing mapping or variable list.
+ 
+`intake` will generate:
+* intake_<alias>.yaml - the main intake catalogue;
+* intake_<alias>.json - the intake-esm catalogue;
+* catalogue.csv.xz - a csv file containing a list of the assets.
+
+The esm-catalogue is a multi-variable catalogue, which means that each file can have more than one variable as it is usual for raw model output. While each file contains a lot of variables, a user can select just one or few and only these will be loaded as an xarray dataset. This is helpful with the UM output where variables with different dimensions can co-exist in a file. In such case, it's necessary to use preprocess to select variables with consitent dimensions to avoid concatenation issues. As this is the standard behaviour for multi-variable intake-esm catalogues, the user don't need to worry about it.
+
+The esm-intake catalogue also lists separately each variable that can be mapped to a cmor name and/or standard_name. This allows to use the cmor names and/or the standard_names more effectively to query the data.  
+
 Get a list of variables from the model output
 ---------------------------------------------
 .. code-block::
diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 6313edd..7a136c1 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -288,7 +288,7 @@ def mop_process(ctx):
         var_log.debug(f"{ctx.obj['reference_date']}")
         t_ax_val = cftime.date2num(axes['t_ax'], units=ctx.obj['reference_date'],
             calendar=ctx.obj['attrs']['calendar'])
-        var_log.debug(f"t_ax[3] {t_ax_val[3]}")
+        #var_log.debug(f"t_ax[3] {t_ax_val[3]}")
         t_bounds = None
         if cmor_tName in bounds_list:
             t_bounds = get_bounds(dsin[var1], axes['t_ax'], cmor_tName,

From 683962f406438931682c3218f4ecf1502d500b6e Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 24 Jul 2024 13:08:45 +1000
Subject: [PATCH 068/137] switch temporarily to analysis3 stable

---
 src/mopper/setup_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index da3dc6b..ff1e082 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -683,7 +683,7 @@ def define_template(ctx, flag, nrows):
 # for a list of packages
 
 module use /g/data/hh5/public/modules
-module load conda/analysis3-unstable
+module load conda/analysis3
 {ctx.obj['conda_env']}
 
 cd {ctx.obj['appdir']}

From 47f4b1acf64da92995c5d80b94b3fec548b753a3 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 31 Jul 2024 10:16:37 +1000
Subject: [PATCH 069/137] fixed #161

---
 mappings/map_AUS2200.csv     |  2 +-
 mappings/map_aerosol_CM2.csv |  2 +-
 mappings/map_atmos_CM2.csv   |  2 +-
 mappings/map_land_CM2.csv    |  2 +-
 mappings/map_land_ESM1.5.csv |  2 +-
 mappings/map_ocean_OM2.csv   |  2 +-
 src/mopper/mop_utils.py      | 38 +++++++++++++++++++++++++++++++-----
 src/mopper/mopper.py         |  2 ++
 8 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/mappings/map_AUS2200.csv b/mappings/map_AUS2200.csv
index 06e3df1..a6c1ab9 100644
--- a/mappings/map_AUS2200.csv
+++ b/mappings/map_AUS2200.csv
@@ -1,4 +1,4 @@
-#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name
+#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name
 amdry;fld_s30i403;;kg m-2;time lat lon;10minPt;atmos;area: time: point;;AUS2200_A10min;AUS2200;;float32;22048000;2304;umnsa_spec;TOTAL COLUMN DRY MASS  RHO GRID;
 amwet;fld_s30i404;;kg m-2;time lat lon;10minPt;atmos;area: time: point;;AUS2200_A10min;AUS2200;;float32;22048000;2304;umnsa_spec;TOTAL COLUMN WET MASS  RHO GRID;atmosphere_mass_per_unit_area
 cl;fld_s00i265;level_to_height(var[0],levs=(0,66));1;time model_theta_level_number lat lon;1hrPt;atmos;area: time: point;;AUS2200_A1hr;AUS2200;float32;1543360000;384;umnsa_cldrad;AREA CLOUD FRACTION IN EACH LAYER;cloud_area_fraction_in_atmosphere_layer
diff --git a/mappings/map_aerosol_CM2.csv b/mappings/map_aerosol_CM2.csv
index 4c7b1b3..18d671a 100644
--- a/mappings/map_aerosol_CM2.csv
+++ b/mappings/map_aerosol_CM2.csv
@@ -1,4 +1,4 @@
-#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name
+#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name
 abs550aer;fld_s02i240 fld_s02i241 fld_s02i242 fld_s02i243 fld_s02i585;optical_depth(var,3);1;time pseudo_level_0 lat lon;mon;aerosol;area: time: mean;;CMIP6_AERmon;CM2;float32;663552;12;cw323a.pm;Ambient Aerosol Absorption Optical Thickness at 550nm;atmosphere_absorption_optical_thickness_due_to_ambient_aerosol_particles
 dryss;fld_s38i218 fld_s38i219;calc_depositions(var);kg m-2 s-1;time model_theta_level_number lat lon;mon;aerosol;area: time: mean;;CMIP6_AERmon;CM2;float32;9400320;12;cw323a.pm;Dry Deposition Rate of Sea-Salt Aerosol;minus_tendency_of_atmosphere_mass_content_of_sea_salt_dry_aerosol_particles_due_to_dry_deposition
 lwp;fld_s30i405;;kg m-2;time lat lon;mon;aerosol;area: time: mean;;CMIP6_AERmon;CM2;float32;110592;12;cw323a.pm;TOTAL COLUMN QCL  RHO GRID;atmosphere_cloud_liquid_water_content
diff --git a/mappings/map_atmos_CM2.csv b/mappings/map_atmos_CM2.csv
index 5d6f369..b2cc1c3 100644
--- a/mappings/map_atmos_CM2.csv
+++ b/mappings/map_atmos_CM2.csv
@@ -1,4 +1,4 @@
-#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name
+#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name
 amdry;fld_s30i403;;kg m-2;time lat lon;mon;atmos;area: time: mean;;CM2_mon;CM2;float32;110592;12;cw323a.pm;TOTAL COLUMN DRY MASS  RHO GRID;atmosphere_mass_per_unit_area
 amwet;fld_s30i404;;kg m-2;time lat lon;mon;atmos;area: time: mean;;CM2_mon;CM2;float32;110592;12;cw323a.pm;TOTAL COLUMN WET MASS  RHO GRID;atmosphere_mass_per_unit_area
 ci;fld_s05i269;;1;time lat lon;mon;atmos;area: time: mean;;CMIP6_Amon;CM2;float32;110592;12;cw323a.pm;deep convection indicator;
diff --git a/mappings/map_land_CM2.csv b/mappings/map_land_CM2.csv
index 92da281..af1f539 100644
--- a/mappings/map_land_CM2.csv
+++ b/mappings/map_land_CM2.csv
@@ -1,4 +1,4 @@
-#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name
+#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name
 agesno;fld_s03i832 fld_s03i317;average_tile(var[0],tilefrac=var[1]);mon;time pseudo_level_1 lat lon;mon;landIce land;area: time: mean;;CMIP6_LImon;CM2;float32;1880064;12;cw323a.pm;CABLE SNOW AGE ON TILES;age_of_surface_snow
 baresoilFrac;fld_s03i317 fld_s03i395;extract_tilefrac(var[0],14,landfrac=var[1],lev='typebare');1;time lat lon;mon;land;area: mean where land over all_area_types time: mean;;CMIP6_Lmon;CM2;float32;110592;12;cw323a.pm;Bare Soil Percentage Area Coverage;area_fraction
 c3PftFrac;fld_s03i317 fld_s03i395;extract_tilefrac(var[0],[1,2,3,4,5,6,8,9,11],landfrac=var[1],lev='typec3pft');1;time pseudo_level_1 lat lon;mon;land;area: mean where land over all_area_types time: mean;;CMIP6_Lmon;CM2;float32;1880064;12;cw323a.pm;Percentage Cover by C3 Plant Functional Type;area_fraction
diff --git a/mappings/map_land_ESM1.5.csv b/mappings/map_land_ESM1.5.csv
index 23b129a..56b59dd 100644
--- a/mappings/map_land_ESM1.5.csv
+++ b/mappings/map_land_ESM1.5.csv
@@ -1,4 +1,4 @@
-#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name
+#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name
 agesno;fld_s03i832 fld_s03i317;average_tile(var[0],tilefrac=var[1]);mon;time pseudo_level_1 lat lon;mon;landIce land;area: time: mean;;CMIP6_LImon;ESM1.5;float32;1880064;12;cw323a.pm;CABLE SNOW AGE ON TILES;age_of_surface_snow
 baresoilFrac;fld_s03i317 fld_s03i395;extract_tilefrac(var[0],14,landfrac=var[1],lev='typebare');1;time lat lon;mon;land;area: mean where land over all_area_types time: mean;;CMIP6_Lmon;ESM1.5;float32;110592;12;cw323a.pm;Bare Soil Percentage Area Coverage;area_fraction
 c3PftFrac;fld_s03i317 fld_s03i395;extract_tilefrac(var[0],[1,2,3,4,5,6,8,9,11],landfrac=var[1],lev='typec3pft');1;time pseudo_level_1 lat lon;mon;land;area: mean where land over all_area_types time: mean;;CMIP6_Lmon;ESM1.5;float32;1880064;12;cw323a.pm;Percentage Cover by C3 Plant Functional Type;area_fraction
diff --git a/mappings/map_ocean_OM2.csv b/mappings/map_ocean_OM2.csv
index 224677f..073e4b4 100644
--- a/mappings/map_ocean_OM2.csv
+++ b/mappings/map_ocean_OM2.csv
@@ -1,4 +1,4 @@
-#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;filename;long_name;standard_name
+#cmor_var;input_vars;calculation;units;dimensions;frequency;realm;cell_methods;positive;cmor_table;version;vtype;size;nsteps;fpattern;long_name;standard_name
 advectsweby;temp_sweby_advec;;W m-2;time st_ocean yt_ocean xt_ocean;mon;ocean;area: time: mean;;CM2_mon;OM2;float32;21600000;36;ocean_month.nc-;cp*rho*dzt*sweby advect tendency;
 agessc;age_global;;yr;time st_ocean yt_ocean xt_ocean;mon;ocean;area: time: mean;;CMIP6_Omon;OM2;float32;21600000;708;ocean_month.nc-;Age (global);sea_water_age_since_surface_contact
 areacello;dummy;get_areacello();1;time st_ocean yt_ocean xt_ocean;mon;ocean;area: sum;;CMIP6_Ofx;OM2;float32;21600000;708;ocean_month.nc-;t-cell thickness;cell_area
diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index 01b29d9..ef41953 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -112,7 +112,8 @@ def _preselect(ds, varlist):
         if bounds is None:
             bounds = ds[c].attrs.get('edges', None)
         if bounds is not None:
-            bnds.extend(bounds.split())
+            bnds.extend([b for b in bounds.split() if b in ds.variables])
+    # check all bnds are in file
     varsel.extend(bnds)
     # remove attributes for boundaries
     for v in bnds:
@@ -520,7 +521,7 @@ def ll_axis(ctx, ax, ax_name, ds, table, bounds_list):
     """
     """
     var_log = logging.getLogger(ctx.obj['var_log'])
-    var_log.debug(f"n ll_axis")
+    var_log.debug(f"in ll_axis")
     cmor.set_table(table)
     cmor_aName = get_cmorname(ax_name, ax)
     try:
@@ -562,7 +563,6 @@ def define_grid(ctx, j_id, i_id, lat, lat_bnds, lon, lon_bnds):
     var_log.info("setup of lat,lon grid complete")
     return grid_id
 
-
 @click.pass_context
 def get_coords(ctx, ovar, coords):
     """Get lat/lon and their boundaries from ancil file
@@ -628,6 +628,9 @@ def get_axis_dim(ctx, var):
                     axes['lat_ax'] = axis
                 elif any(x in dim.lower() for x in ['nj', 'yu_ocean', 'yt_ocean']):
                     axes['j_ax'] = axis
+            # have to add this because a simulation didn't have the dimenision variables
+            elif any(x in dim.lower() for x in ['nj', 'yu_ocean', 'yt_ocean']):
+                axes['j_ax'] = axis
             elif axis_name and 'X' in axis_name:
                 if 'glon' in dim.lower():
                     axes['glon_ax'] = axis
@@ -635,6 +638,9 @@ def get_axis_dim(ctx, var):
                     axes['lon_ax'] = axis
                 elif any(x in dim.lower() for x in ['ni', 'xu_ocean', 'xt_ocean']):
                     axes['i_ax'] = axis
+            # have to add this because a simulation didn't have the dimenision variables
+            elif any(x in dim.lower() for x in ['ni', 'xu_ocean', 'xt_ocean']):
+                axes['i_ax'] = axis
             elif axis_name == 'Z' or any(x in dim for x in ['lev', 'heigth', 'depth']):
                 axes['z_ax'] = axis
                 #z_ax.attrs['axis'] = 'Z'
@@ -723,10 +729,10 @@ def get_bounds(ctx, ds, axis, cmor_name, ax_val=None):
     if 'subhr' in frq:
         frq =  ctx.obj['subhr'] + frq.split('subhr')[1]
     if 'bounds' in keys and not changed_bnds:
-        dim_bnds_val = ds[axis.bounds].values
+        calc, dim_bnds_val = get_bounds_values(ds, axis.bounds)
         var_log.info(f"Using dimension bounds: {axis.bounds}")
     elif 'edges' in keys and not changed_bnds:
-        dim_bnds_val = ds[axis.edges].values
+        calc, dim_bnds_val = get_bounds_values(ds, axis.edges)
         var_log.info(f"Using dimension edges as bounds: {axis.edges}")
     else:
         var_log.info(f"No bounds for {dim}")
@@ -752,6 +758,7 @@ def get_bounds(ctx, ds, axis, cmor_name, ax_val=None):
             max_val = np.roll(min_val, -1)
             max_val[-1] = 1.5*ax_val[-1] - 0.5*ax_val[-2]
             dim_bnds_val = np.column_stack((min_val, max_val))
+            var_log.debug(f"{axis.name} bnds: {dim_bnds_val}")
         except Exception as e:
             var_log.warning(f"dodgy bounds for dimension: {dim}")
             var_log.error(f"error: {e}")
@@ -785,6 +792,27 @@ def get_bounds(ctx, ds, axis, cmor_name, ax_val=None):
         var_log.info(f"setting minimum {cmor_name} bound to 0")
     return dim_bnds_val
 
+@click.pass_context
+def get_bounds_values(ctx, ds, bname):
+    """Return values of axis bounds, if they're not in file
+       tries to get them from ancillary grid file instead.
+    """
+    calc = False
+    var_log = logging.getLogger(ctx.obj['var_log'])
+    var_log.debug(f"Getting bounds values for {bname}")
+    ancil_file =  ctx.obj[f"grid_{ctx.obj['realm']}"]
+    if bname in ds.variables:
+        bnds_val = ds[bname].values
+    elif ancil_file != "":     
+        fname = f"{ctx.obj['ancils_path']}/{ancil_file}"
+        ancil = xr.open_dataset(fname)
+        if bname in ancil.variables:
+            bnds_val = ancil[bname].values
+        else:
+            var_log.info(f"Can't locate {bname} in data or ancil file")
+            bnds_val = None
+            calc = True
+    return calc, bnds_val
 
 @click.pass_context
 def get_attrs(ctx, infiles, var1):
diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 7a136c1..1b6c769 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -318,6 +318,7 @@ def mop_process(ctx):
         axis_ids.append(z_ax_id)
     # if both i, j are defined setgrid if only one treat as lat/lon
     if axes['i_ax'] is not None and axes['j_ax'] is not None:
+        var_log.debug(f"Setting grid with {axes}")
         setgrid = True
         j_id = ij_axis(axes['j_ax'], 'j_index', tables[0])
         i_id = ij_axis(axes['i_ax'], 'i_index', tables[0])
@@ -331,6 +332,7 @@ def mop_process(ctx):
         grid_id = define_grid(j_id, i_id, lat, lat_bnds, lon, lon_bnds)
     else:
         if axes['glat_ax'] is not None:
+            print("getting lat")
             lat_id = ll_axis(axes['glat_ax'], 'glat', dsin[var1],
                              tables[1], bounds_list)
             axis_ids.append(lat_id)

From 350b47080cae0ec5d8e423fa5d9305654a4a9bf7 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 31 Jul 2024 10:19:49 +1000
Subject: [PATCH 070/137] removed print

---
 src/mopper/mopper.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 1b6c769..4b08c5a 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -332,7 +332,6 @@ def mop_process(ctx):
         grid_id = define_grid(j_id, i_id, lat, lat_bnds, lon, lon_bnds)
     else:
         if axes['glat_ax'] is not None:
-            print("getting lat")
             lat_id = ll_axis(axes['glat_ax'], 'glat', dsin[var1],
                              tables[1], bounds_list)
             axis_ids.append(lat_id)

From 1e31e25e425597ca2ef38367f3700b140de54850 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 31 Jul 2024 11:23:49 +1000
Subject: [PATCH 071/137] fixed missing dbname in model_vars arguments

---
 src/mopdb/mopdb.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index fee8f45..c21d647 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -77,7 +77,9 @@ def db_args(f):
 
 
 def map_args(f):
-    """Define mapping click options for varlist and template commands"""
+    """Define mapping click options for varlist, template, intake
+    commands
+    """
     constraints = [
         click.option('--fpath', '-f', type=str, required=True,
             callback=require_date,
@@ -498,7 +500,7 @@ def update_map(ctx, dbname, fname, alias):
 @mopdb.command(name='varlist')
 @map_args
 @click.pass_context
-def model_vars(ctx, fpath, match, version, alias):
+def model_vars(ctx, fpath, match, dbname, version, alias):
     """Read variables from model output
        opens one file for each kind, save variable list as csv file
 

From e3c2bedc8681aa89fa279be10e023e5d68098c7f Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 31 Jul 2024 12:37:40 +1000
Subject: [PATCH 072/137] added import of update_db to mopdb.py

---
 src/mopdb/mopdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index c21d647..40757b2 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -30,7 +30,7 @@
 
 from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map,
     read_map_app4, map_update_sql, create_table, write_cmor_table,
-    check_varlist) 
+    check_varlist, update_db) 
 from mopdb.utils import *
 from mopdb.mopdb_map import (write_varlist, write_map_template,
     write_catalogue, map_variables, load_vars, get_map_obj)

From f9c48a5fc9f45790989d4afbfb4852bb4e001f69 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 2 Aug 2024 15:55:38 +1000
Subject: [PATCH 073/137] attempt 1 actions

---
 .github/workflows/mopper-pytest.yaml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 430693f..8a9825f 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -4,7 +4,8 @@ name: mopper-conda-install-test
 on: 
   push:
     branches:
-      - prerelease
+      #- prerelease
+      - class
   pull_request:
     branches:
       - main
@@ -38,6 +39,10 @@ jobs:
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    # making sure we are testing installed package
+    - name: Install package
+      run: |
+        conda env install  --name base 
     - name: Test with pytest
       run: |
         conda install pytest coverage codecov --solver classic

From 7c1a243758f69db53bd8974cb6c55e1e7263d821 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 2 Aug 2024 16:02:40 +1000
Subject: [PATCH 074/137] attempt 2 actions

---
 .github/workflows/mopper-pytest.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 8a9825f..1bed83d 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -40,9 +40,9 @@ jobs:
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     # making sure we are testing installed package
-    - name: Install package
-      run: |
-        conda env install  --name base 
+    #- name: Install package
+    #  run: |
+    #    conda env install  --name base 
     - name: Test with pytest
       run: |
         conda install pytest coverage codecov --solver classic

From 8039925e93bd38f0882632b17f06d59643207bdd Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 2 Aug 2024 16:13:54 +1000
Subject: [PATCH 075/137] attempt 3 actions, sub flake8 with ruff

---
 .github/workflows/mopper-pytest.yaml     | 19 +++++++++++--------
 .github/workflows/mopper-test-calcs.yaml |  2 +-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 1bed83d..4ddcd48 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -1,4 +1,4 @@
-name: mopper-conda-install-test
+name: mopper-all-tests
 
 #on: [push]
 on: 
@@ -6,10 +6,10 @@ on:
     branches:
       #- prerelease
       - class
-  pull_request:
-    branches:
-      - main
-      - prerelease
+  #pull_request:
+  #  branches:
+  #    - main
+  #    - prerelease
 
 
 jobs:
@@ -34,11 +34,14 @@ jobs:
         conda env update --file conda/environment.yaml --name base 
     - name: Lint with flake8
       run: |
-        conda install flake8 --solver classic
+        conda install -c conda-forge ruff
+        ruff check --output-format=github .
+      continue-on-error: true
+        #conda install flake8 --solver classic
         # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     # making sure we are testing installed package
     #- name: Install package
     #  run: |
diff --git a/.github/workflows/mopper-test-calcs.yaml b/.github/workflows/mopper-test-calcs.yaml
index 50d2512..ee830d6 100644
--- a/.github/workflows/mopper-test-calcs.yaml
+++ b/.github/workflows/mopper-test-calcs.yaml
@@ -1,4 +1,4 @@
-name: mopper-conda-install-test
+name: mopper-branch-test
 
 #on: [push]
 on: 

From 9898c91fbdc37e7ef2bb981a82168813d75b8896 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 2 Aug 2024 16:35:13 +1000
Subject: [PATCH 076/137] attempt 4 actions

---
 .github/workflows/mopper-pytest.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 4ddcd48..2cb0e6d 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -34,7 +34,7 @@ jobs:
         conda env update --file conda/environment.yaml --name base 
     - name: Lint with flake8
       run: |
-        conda install -c conda-forge ruff
+        conda install -c conda-forge ruff --solver classic
         ruff check --output-format=github .
       continue-on-error: true
         #conda install flake8 --solver classic
@@ -43,9 +43,9 @@ jobs:
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     # making sure we are testing installed package
-    #- name: Install package
-    #  run: |
-    #    conda env install  --name base 
+    - name: Install package
+      run: |
+        pip install  -e .
     - name: Test with pytest
       run: |
         conda install pytest coverage codecov --solver classic

From 87c00b153d582c30adac5c6c959b882111c95ea8 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 2 Aug 2024 16:49:03 +1000
Subject: [PATCH 077/137] temporarily remove import of nri intake

---
 src/mopdb/mopdb_map.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index 4f098ba..3bef37c 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -30,7 +30,7 @@
 from pathlib import Path
 from itertools import compress
 from importlib.resources import files as import_files
-from access_nri_intake.source.builders import AccessEsm15Builder
+#from access_nri_intake.source.builders import AccessEsm15Builder
 
 from mopdb.mopdb_class import FPattern, Variable, MapVariable
 from mopdb.utils import *

From 6429a5697939d14b6e8a3489d4148512f78bcf0a Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 6 Aug 2024 12:07:15 +1000
Subject: [PATCH 078/137] do not try to get frequency if only 1 file, fixed
 getting match data if matched by standard_name

---
 .github/workflows/mopper-test-conda.yaml | 44 ++++++++++++++++++++++++
 src/mopdb/mopdb_class.py                 |  1 +
 src/mopdb/mopdb_map.py                   | 30 +++++++++-------
 3 files changed, 63 insertions(+), 12 deletions(-)
 create mode 100644 .github/workflows/mopper-test-conda.yaml

diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
new file mode 100644
index 0000000..471e1fe
--- /dev/null
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -0,0 +1,44 @@
+name: Test Build of mopper conda package
+
+# Controls when the action will run.
+on:
+  # Triggers the workflow on push or pull request events but only for the prerelase branch
+  push:
+    branches: [ prerelease ]
+  pull_request:
+    branches: [ prerelease ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+jobs:
+  conda_deployment_with_new_tag:
+    name: Test conda deployment of package with Python ${{ matrix.python-version }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Conda environment creation and activation
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+          environment-file: devtools/conda-envs/build_env.yaml    # Path to the build conda environment
+          auto-update-conda: false
+          auto-activate-base: false
+          show-channel-urls: true
+      - name: Build but do not upload the conda packages
+        uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0
+        with:
+          meta_yaml_dir: devtools/conda-build
+          python-version: ${{ matrix.python-version }} # Values previously defined in `matrix`
+          platform_linux-64: true
+          platform_osx-64: true
+          platform_win-64: true
+          user: uibcdf
+          label: auto
+          upload: false
+          token: ${{ secrets.ANACONDA_TOKEN }} # Replace with the right name of your secret
diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index 8b73805..373903b 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -163,6 +163,7 @@ class MapVariable():
     def __init__(self, match: list, vobj: Variable):
         # path object
         self.fpattern = vobj.fpattern
+        print(match)
         # mapping attributes
         self.frequency = vobj.frequency 
         self.realm = match[4]
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index 3bef37c..e9b529b 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -166,15 +166,18 @@ def write_varlist(conn, indir, match, version, alias):
         ds = xr.open_dataset(str(fobj.files[0]), decode_times=False)
         coords = [c for c in ds.coords] + ['latitude_longitude']
         #pass next file in case of 1 timestep per file and no frq in name
-        fnext = str(fobj.files[1])
-        if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos':
-            frq_dict = get_file_frq(ds, fnext)
-            # if only one frequency detected empty dict
-            if len(frq_dict) == 1:
-                fobj.frequency = frq_dict.popitem()[1]
-            else:
-                fobj.multiple_frq = True
-                fobj.frequency = frq_dict['time']
+        if len(fobj.files) > 1:
+            fnext = str(fobj.files[1])
+            if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos':
+                frq_dict = get_file_frq(ds, fnext)
+                # if only one frequency detected empty dict
+                if len(frq_dict) == 1:
+                    fobj.frequency = frq_dict.popitem()[1]
+                else:
+                    fobj.multiple_frq = True
+                    fobj.frequency = frq_dict['time']
+        else:
+            mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}")
         mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}")
         if fobj.realm == "NArealm":
             fobj.realm = get_realm(version, ds)
@@ -226,7 +229,9 @@ def match_stdname(conn, vobj, stdn):
     results = query(conn, sql, first=False, logname='mopdb_log')
     matches = [x[0] for x in results]
     if len(matches) > 0:
-        stdn = add_var(stdn, vobj, tuple([matches]+['']*7), stdnm=True)
+        vmatch = vobj.get_match()
+        stdn = add_var(stdn, vobj, tuple([matches]+list(vmatch[1:])),
+            stdnm=True)
         found_match = True
     return stdn, found_match
 
@@ -307,8 +312,9 @@ def parse_vars(conn, vobjs, version):
             mopdb_log.debug(f"found stdnm match: {found}")
         if not found:
             # use original var values for match
-            match = v.get_match()
-            no_match = add_var(no_match, v, v.get_match()) 
+            vmatch = v.get_match()
+            mopdb_log.debug(f"Getting match from variable: {vmatch}")
+            no_match = add_var(no_match, v, vmatch) 
         stash_vars.append(f"{v.name}-{v.frequency}")
 
     return full, no_ver, no_frq, stdn, no_match, stash_vars 

From ffb234a20382ab1dc9de287b5a248c2db851f1ff Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 6 Aug 2024 12:08:05 +1000
Subject: [PATCH 079/137] removed print

---
 src/mopdb/mopdb_class.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index 373903b..8b73805 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -163,7 +163,6 @@ class MapVariable():
     def __init__(self, match: list, vobj: Variable):
         # path object
         self.fpattern = vobj.fpattern
-        print(match)
         # mapping attributes
         self.frequency = vobj.frequency 
         self.realm = match[4]

From fdddbbdf937e7bd7b7b4a3683728e4ad9ec9f366 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 6 Aug 2024 15:38:28 +1000
Subject: [PATCH 080/137] switching to pyproject.toml

---
 pyproject.toml                       | 52 ++++++++++++++++++++++++++++
 setup.cfg                            | 49 --------------------------
 src/{mopper => mopdata}/update_db.py |  0
 src/mopper/mop_setup.py              |  2 +-
 4 files changed, 53 insertions(+), 50 deletions(-)
 create mode 100644 pyproject.toml
 delete mode 100644 setup.cfg
 rename src/{mopper => mopdata}/update_db.py (100%)

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..dc328d7
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,52 @@
+[build-system]
+requires = ["setuptools>=64.0.0", "setuptools-scm", "pbr>=6.0.0"]
+build-backend = "pbr.build"
+#build-backend = "setuptools.build_meta"
+
+[project]
+name = "ACCESS-MOPPeR"
+url = https://github.com/ACCESS-Hive/ACCESS-MOPPeR
+
+authors = [
+    {name = "Paola Petrelli", email = "paola.petrelli@utas.edu.au"},
+    {name = "Sam Green", email = "sam.green@unsw.edu.au"},
+]
+description = "ACCESS Model Output Post-Processor, maps raw model output to CMIP-style defined variables and produce post-processed output using CMOR3"
+readme = "README.md"
+requires-python = ">=3.8"
+keywords = ["ACCESS", "post-processing"]
+license = {text = "Apache-2.0"}
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Environment :: Console",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: Apache Software License",
+    "Operating System :: POSIX :: Linux",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+]
+dynamic = ["version", "dependencies"]
+
+[tool.setuptools-git-versioning]
+enabled = true
+
+[project.scripts]
+mop = "mopper.mopper:mop_catch"
+mopdb = "mopdb.mopdb:mopdb_catch"
+
+[tool.setuptools.dynamic]
+dependencies =  {file = "requirements.txt"} 
+
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.setuptools]
+include_package_data = True
+
+[tool.setuptools.package-data]
+mopdata = ["*.json", "*.yaml", "*.db", "*.csv", "update_db.py"]
+
+
+# ... other project metadata fields as listed in:
+#     https://packaging.python.org/en/latest/guides/writing-pyproject-toml/
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 677f9bc..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,49 +0,0 @@
-[metadata]
-name = mopper 
-url = https://github.com/ACCESS-Hive/ACCESS-MOPPeR
-author = Paola Petrelli, Sam Green
-author_email = paola.petrelli@utas.edu.au, sam.green@unsw.edu.au
-summary = 'ACCESS Model Output Post-Processor, maps raw model output to CMIP-style defined variables and produce post-processed output using CMOR3' 
-description_file = README.md
-licence = 'Apache-2.0'
-keywords = 'ACCESS model'
-classifier =
-    Development Status :: 3 - Alpha
-    Environment :: Console
-    Intended Audience :: Science/Research
-    License :: OSI Approved :: Apache Software License
-    Operating System :: POSIX :: Linux
-    Programming Language :: Python :: 3.9 
-    Programming Language :: Python :: 3.10 
-
-[options]
-packages = find_namespace:
-package_dir =
-    = src
-include_package_data = True
-
-[options.packages.find]
-where = src
-
-[options.package_data]
-mopdata = *.json, *.yaml, *.db, *.csv
-mopper = update_db.py 
-
-[pbr]
-autodoc_tree_index_modules = True
-autodoc_tree_excludes =
-    setup.py
-    test
-    docs/conf.py
-
-[entry_points]
-console_scripts =
-    mop = mopper.mopper:mop_catch
-    mopdb = mopdb.mopdb:mopdb_catch
-
-[build_sphinx]
-source_dir = docs
-build_dir = docs/_build
-
-[tool:pytest]
-addopts = --doctest-modules --doctest-glob='*.rst' --ignore setup.py --ignore docs/conf.py
diff --git a/src/mopper/update_db.py b/src/mopdata/update_db.py
similarity index 100%
rename from src/mopper/update_db.py
rename to src/mopdata/update_db.py
diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py
index 441d05b..cc4b0e1 100755
--- a/src/mopper/mop_setup.py
+++ b/src/mopper/mop_setup.py
@@ -417,6 +417,6 @@ def manage_env(ctx):
         else:
             fname = ctx.obj[f]
         shutil.copyfile(fpath, ctx.obj['tpath'] / fname)
-    update_code = import_files('mopper').joinpath("update_db.py")
+    update_code = import_files('mopdata').joinpath("update_db.py")
     shutil.copyfile(update_code, ctx.obj['outpath'] / "update_db.py")
     return

From b0e8497ce57012737795f87de7a83ec72e06fd21 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 6 Aug 2024 16:09:38 +1000
Subject: [PATCH 081/137] removed url form pyporject.toml

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index dc328d7..682b28f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,6 @@ build-backend = "pbr.build"
 
 [project]
 name = "ACCESS-MOPPeR"
-url = https://github.com/ACCESS-Hive/ACCESS-MOPPeR
 
 authors = [
     {name = "Paola Petrelli", email = "paola.petrelli@utas.edu.au"},

From f805da2fea9fd16bf9e2dfc68f9b823c9f09566a Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 6 Aug 2024 16:24:14 +1000
Subject: [PATCH 082/137] try removing inlcude_package_data

---
 pyproject.toml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 682b28f..c606a5a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,9 +40,6 @@ dependencies =  {file = "requirements.txt"}
 [tool.setuptools.packages.find]
 where = ["src"]
 
-[tool.setuptools]
-include_package_data = True
-
 [tool.setuptools.package-data]
 mopdata = ["*.json", "*.yaml", "*.db", "*.csv", "update_db.py"]
 

From f12d0cff510b9be1de20c868e3e120a92baae32f Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 6 Aug 2024 16:32:35 +1000
Subject: [PATCH 083/137] trying to re-introduce setup.cfg

---
 setup.cfg | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 setup.cfg

diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..48b922f
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,13 @@
+[pbr]
+autodoc_tree_index_modules = True
+autodoc_tree_excludes =
+    setup.py
+    test
+    docs/conf.py
+
+[build_sphinx]
+source_dir = docs
+build_dir = docs/_build
+
+[tool:pytest]
+addopts = --doctest-modules --doctest-glob='*.rst' --ignore setup.py --ignore docs/conf.py

From 0d60f373948b2c5f2fe301c9e72c56c1289421ad Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 6 Aug 2024 16:37:14 +1000
Subject: [PATCH 084/137] removing pbr

---
 pyproject.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c606a5a..0bc0d99 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [build-system]
-requires = ["setuptools>=64.0.0", "setuptools-scm", "pbr>=6.0.0"]
-build-backend = "pbr.build"
-#build-backend = "setuptools.build_meta"
+requires = ["setuptools>=64.0.0", "setuptools-scm"]
+#build-backend = "pbr.build"
+build-backend = "setuptools.build_meta"
 
 [project]
 name = "ACCESS-MOPPeR"

From 668a61f89284971ab7a75ad892d74a8d081398e3 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 6 Aug 2024 16:44:56 +1000
Subject: [PATCH 085/137] removed pbr from setup.py

---
 setup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 52f1ab9..ba5a4e2 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,6 @@
 from setuptools import setup
 
 setup(
-        setup_requires=['pbr', 'setuptools'],
-        pbr=True,
+        setup_requires=['setuptools-scm', 'setuptools'],
         )
 

From bf491608604d7f407c0dd7f1839f73a5a5e3cfa2 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 7 Aug 2024 10:04:44 +1000
Subject: [PATCH 086/137] storing update.py as txt

---
 src/mopdata/{update_db.py => update_db.py.txt} | 0
 src/mopper/mop_setup.py                        | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename src/mopdata/{update_db.py => update_db.py.txt} (100%)

diff --git a/src/mopdata/update_db.py b/src/mopdata/update_db.py.txt
similarity index 100%
rename from src/mopdata/update_db.py
rename to src/mopdata/update_db.py.txt
diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py
index cc4b0e1..5c1e04b 100755
--- a/src/mopper/mop_setup.py
+++ b/src/mopper/mop_setup.py
@@ -417,6 +417,6 @@ def manage_env(ctx):
         else:
             fname = ctx.obj[f]
         shutil.copyfile(fpath, ctx.obj['tpath'] / fname)
-    update_code = import_files('mopdata').joinpath("update_db.py")
+    update_code = import_files('mopdata').joinpath("update_db.py.txt")
     shutil.copyfile(update_code, ctx.obj['outpath'] / "update_db.py")
     return

From 3fe92d72515ad69061f6bd5ab128fcefe57e660a Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 7 Aug 2024 14:28:33 +1000
Subject: [PATCH 087/137] Update requirements.txt

added cftime
---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 7b8bd45..233e9da 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ cmor
 xarray
 numpy
 pyyaml
+cftime

From a6dde1ae1b799967ebdad2ed590f4781b1cb587c Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 7 Aug 2024 14:30:29 +1000
Subject: [PATCH 088/137] Update environment.yaml

removed unnecessary packages
---
 conda/environment.yaml | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/conda/environment.yaml b/conda/environment.yaml
index 2f0d566..d66b631 100644
--- a/conda/environment.yaml
+++ b/conda/environment.yaml
@@ -3,9 +3,6 @@ channels:
   - defaults
   - conda-forge
 dependencies:
-  - python=3.10
-  - pip
-  - pbr
   - click
   - cmor
   - xarray
@@ -13,10 +10,3 @@ dependencies:
   - dask
   - pyyaml
   - cftime
-  - python-dateutil
-  - pytest
-  - coverage
-  - codecov
-  - importlib_resources
-  - pip:
-      - git+https://github.com/ACCESS-Community-Hub/ACCESS-MOPPeR@pytests_sam

From 6ad967fef47919ca2ec0969242ee36c9d715e5f9 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 7 Aug 2024 14:32:15 +1000
Subject: [PATCH 089/137] Update environment.yaml

---
 conda/environment.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/conda/environment.yaml b/conda/environment.yaml
index d66b631..7c2904b 100644
--- a/conda/environment.yaml
+++ b/conda/environment.yaml
@@ -1,6 +1,5 @@
 name: test-env
 channels:
-  - defaults
   - conda-forge
 dependencies:
   - click

From 54cc2d89d45e1deecd82b9a2fbc3f96f947c59b7 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 7 Aug 2024 17:58:40 +1000
Subject: [PATCH 090/137] more test added fake fs

---
 .github/workflows/mopper-test-conda.yaml | 10 ++--
 conda/enviroment.yaml                    |  9 ----
 conda/environment.yaml                   |  2 +-
 src/mopdb/mopdb_map.py                   | 38 ++++++++-------
 tests/conftest.py                        | 62 +++++++++++++++++++-----
 tests/test_mop_utils.py                  |  8 +--
 tests/test_mopdb_map.py                  | 59 ++++++++++++++++++++++
 tests/test_mopdb_utils.py                | 16 +-----
 tests/testdata/varlist_ex.csv            |  2 +-
 9 files changed, 142 insertions(+), 64 deletions(-)
 delete mode 100644 conda/enviroment.yaml
 create mode 100644 tests/test_mopdb_map.py

diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
index 471e1fe..fd46b01 100644
--- a/.github/workflows/mopper-test-conda.yaml
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -26,19 +26,19 @@ jobs:
         uses: conda-incubator/setup-miniconda@v2
         with:
           python-version: ${{ matrix.python-version }}
-          environment-file: devtools/conda-envs/build_env.yaml    # Path to the build conda environment
+          environment-file: conda/environment.yaml    # Path to the build conda environment
           auto-update-conda: false
           auto-activate-base: false
-          show-channel-urls: true
+          show-channel-urls: true #
       - name: Build but do not upload the conda packages
-        uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0
+        uses: coecms/action-build-and-upload-conda-packages@v1.3.0
         with:
-          meta_yaml_dir: devtools/conda-build
+          meta_yaml_dir: conda
           python-version: ${{ matrix.python-version }} # Values previously defined in `matrix`
           platform_linux-64: true
           platform_osx-64: true
           platform_win-64: true
-          user: uibcdf
+          user: coecms
           label: auto
           upload: false
           token: ${{ secrets.ANACONDA_TOKEN }} # Replace with the right name of your secret
diff --git a/conda/enviroment.yaml b/conda/enviroment.yaml
deleted file mode 100644
index 3856ac4..0000000
--- a/conda/enviroment.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-channels:
-  - conda-forge
-dependencies:
-  - click
-  - cmor
-  - xarray
-  - numpy
-  - pyyaml
-  - dask
diff --git a/conda/environment.yaml b/conda/environment.yaml
index ae78e08..d66b631 100644
--- a/conda/environment.yaml
+++ b/conda/environment.yaml
@@ -1,4 +1,4 @@
-#name: test-env
+name: test-env
 channels:
   - defaults
   - conda-forge
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index e9b529b..9db15eb 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -108,12 +108,16 @@ def get_file_frq(ds, fnext):
     # if all time axes have only 1 timestep we cannot infer frequency
     # so we open also next file but get only time axs
     if max_len == 1:
-        dsnext = xr.open_dataset(fnext, decode_times = False)
-        time_axs2 = [d for d in dsnext.dims if 'time' in d]
-        ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time')
-        time_axs = [d for d in ds.dims if 'time' in d]
-        time_axs_len = set(len(ds[d]) for d in time_axs)
-        time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
+        if fnext is None:
+            mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}")
+            return frq
+        else:
+            dsnext = xr.open_dataset(fnext, decode_times = False)
+            time_axs2 = [d for d in dsnext.dims if 'time' in d]
+            ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time')
+            time_axs = [d for d in ds.dims if 'time' in d]
+            time_axs_len = set(len(ds[d]) for d in time_axs)
+            time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
     for t in time_axs: 
         mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}")
         if len(ds[t]) > 1:
@@ -166,18 +170,18 @@ def write_varlist(conn, indir, match, version, alias):
         ds = xr.open_dataset(str(fobj.files[0]), decode_times=False)
         coords = [c for c in ds.coords] + ['latitude_longitude']
         #pass next file in case of 1 timestep per file and no frq in name
-        if len(fobj.files) > 1:
-            fnext = str(fobj.files[1])
-            if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos':
-                frq_dict = get_file_frq(ds, fnext)
-                # if only one frequency detected empty dict
-                if len(frq_dict) == 1:
-                    fobj.frequency = frq_dict.popitem()[1]
-                else:
-                    fobj.multiple_frq = True
-                    fobj.frequency = frq_dict['time']
+        if len(fobj.files) == 1:
+            fnext = None
         else:
-            mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}")
+            fnext = str(fobj.files[1])
+        if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos':
+            frq_dict = get_file_frq(ds, fnext)
+            # if only one frequency detected empty dict
+            if len(frq_dict) == 1:
+                fobj.frequency = frq_dict.popitem()[1]
+            else:
+                fobj.multiple_frq = True
+                fobj.frequency = frq_dict['time']
         mopdb_log.debug(f"Multiple frq: {fobj.multiple_frq}")
         if fobj.realm == "NArealm":
             fobj.realm = get_realm(version, ds)
diff --git a/tests/conftest.py b/tests/conftest.py
index 0dd6c56..6123524 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -24,13 +24,29 @@
 import datetime
 import logging
 import csv
+import pyfakefs
+from pathlib import Path
+
 from mopdb.mopdb_utils import mapping_sql, cmorvar_sql
+from mopdb.mopdb_class import MapVariable, Variable, FPattern
 from mopper.setup_utils import filelist_sql
 
 
 TESTS_HOME = os.path.abspath(os.path.dirname(__file__))
 TESTS_DATA = os.path.join(TESTS_HOME, "testdata")
+# consecutive files with multiple time axes
+dsmulti = os.path.join(TESTS_DATA, "multitime.nc")
+dsmulti2 = os.path.join(TESTS_DATA, "multitime_next.nc")
+# consecutive files with a 1-time step time axis
+dsonestep = os.path.join(TESTS_DATA, "onetstep.nc")
+dsonestep2 = os.path.join(TESTS_DATA, "onetstep_next.nc")
 
+@pytest.fixture
+def fake_fs(fs):  # pylint:disable=invalid-name
+    """Variable name 'fs' causes a pylint warning. Provide a longer name
+    acceptable to pylint for use in tests.
+    """
+    yield fs
 
 # setting up fixtures for databases:a ccess.db and mopper.db
 @pytest.fixture
@@ -40,6 +56,15 @@ def session():
     yield db_session
     connection.close()
 
+@pytest.fixture
+def input_dir(fake_fs):
+    dfrq = {'d': 'dai', '8': '3h', '7': '6h', 'm': 'mon'}  
+    for date in ['201312', '201401', '201402']:
+        for k,v in dfrq.items():
+            filebase = f"cm000a.p{k}{date}_{v}.nc"
+            fake_fs.create_file("/raw/atmos/"+ filebase)
+    assert os.path.exists("/raw/atmos/cm000a.p8201402_3h.nc")
+       
 
 @pytest.fixture
 def setup_access_db(session):
@@ -100,15 +125,28 @@ def map_rows():
     return maps
 
 @pytest.fixture
-def um_multi_time():
-    '''Return a um stule file with multiple time axes'''
-    time1 = pd.date_range("2001-01-01", periods=1)
-    time2 = pd.date_range("2001-01-01", periods=24, freq='h')
-    time3 = pd.date_range("2001-01-01", periods=48, freq='30min')
-    var1 = xr.DataArray(name='var1', data=[1],
-         dims=["time"], coords={"time": time1})
-    var2 = xr.DataArray(name='var2', data=np.arange(24),
-         dims=["time_0"], coords={"time_0": time2})
-    var3 = xr.DataArray(name='var3', data=np.arange(48), dims=["time_1"],
-         coords={"time_1": time3})
-    return xr.merge([var1, var2, var3])
+def fobj(input_dir):
+    fobj = FPattern("cm000a.", Path("/raw/atmos/"))
+    return fobj
+
+@pytest.fixture
+def var_obj(fobj):
+    vobj = Variable('tas', fobj)
+    return vobj
+
+@pytest.fixture
+def mapvar_obj(var_obj):
+    match = ('','','','','','','','','')
+    mvobj = MapVariable(match, var_obj)
+    return mvobj
+
+@pytest.fixture
+def varobjs(mapvar_obj):
+    mvobj = mapvar_obj
+    vobjs = []
+    vobjs.append(mvobj)
+    mvobj.name = 'siconca' 
+    vobjs.append(mvobj)
+    mvobj.name = 'hfls' 
+    vobjs.append(mvobj)
+    return vobjs
diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py
index 4889274..b47e158 100644
--- a/tests/test_mop_utils.py
+++ b/tests/test_mop_utils.py
@@ -71,10 +71,10 @@ def test_get_cmorname(caplog):
     foo = xr.DataArray(data, coords=[levs, tdata, lats, lons],
           dims=["lev", "t", "lat", "lon"])
     with ctx:
-        tname = get_cmorname('t', foo.t, caplog, z_len=None)
-        iname = get_cmorname('lon', foo.lon, caplog, z_len=None)
-        jname = get_cmorname('lat', foo.lat, caplog, z_len=None)
-        zname = get_cmorname('z', foo.lev, caplog, z_len=3)
+        tname = get_cmorname('t', foo.t, z_len=None)
+        iname = get_cmorname('lon', foo.lon, z_len=None)
+        jname = get_cmorname('lat', foo.lat, z_len=None)
+        zname = get_cmorname('z', foo.lev, z_len=3)
     assert tname == 'time'
     assert iname == 'longitude'
     assert jname == 'latitude'
diff --git a/tests/test_mopdb_map.py b/tests/test_mopdb_map.py
new file mode 100644
index 0000000..38ac29e
--- /dev/null
+++ b/tests/test_mopdb_map.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# Copyright 2023 ARC Centre of Excellence for Climate Extremes
+# author: Paola Petrelli <paola.petrelli@utas.edu.au>
+# author: Sam Green <sam.green@unsw.edu.au>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import os
+import sqlite3
+import click
+import logging
+import itertools
+from mopdb.mopdb_map import *
+from mopdb.mopdb_class import MapVariable, Variable, FPattern
+from conftest import *
+
+
+TESTS_HOME = os.path.abspath(os.path.dirname(__file__))
+TESTS_DATA = os.path.join(TESTS_HOME, "testdata")
+# consecutive files with multiple time axes
+dsmulti = os.path.join(TESTS_DATA, "multitime.nc")
+dsmulti2 = os.path.join(TESTS_DATA, "multitime_next.nc")
+# consecutive files with a 1-time step time axis
+dsonestep = os.path.join(TESTS_DATA, "onetstep.nc")
+dsonestep2 = os.path.join(TESTS_DATA, "onetstep_next.nc")
+
+@pytest.mark.parametrize('idx', [0,1,2])
+def test_add_var(varobjs, matches, idx, caplog):
+    caplog.set_level(logging.DEBUG, logger='mopdb_log')
+    vlist = []
+    vlist = add_var(vlist, varobjs[idx], matches[idx])
+    assert vlist[0].cmor_var == matches[idx][0] 
+
+
+def test_get_file_frq(caplog):
+    global dsmulti, dsmulti2, dsonestep, dsonestep2
+    caplog.set_level(logging.DEBUG, logger='mopdb_log')
+    umfrq = {'time': 'day', 'time_0': '1hr', 'time_1': '30min'}
+    # multi time axes in file
+    ds =  xr.open_dataset(dsmulti, decode_times=False)
+    out = get_file_frq(ds, dsmulti2)
+    assert umfrq == out
+    # only one time axis in file with 1 value
+    ds =  xr.open_dataset(dsonestep, decode_times=False)
+    out = get_file_frq(ds, dsonestep2)
+    umfrq = {'time': 'day'}
+    assert umfrq == out
+    
diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py
index 858697e..0f872a9 100644
--- a/tests/test_mopdb_utils.py
+++ b/tests/test_mopdb_utils.py
@@ -22,25 +22,11 @@
 import logging
 import itertools
 from mopdb.mopdb_utils import *
-from conftest import um_multi_time
+from mopdb.mopdb_class import MapVariable, Variable, FPattern
 
 #from click.testing import CliRunner
 
 
-@pytest.mark.parametrize('idx', [0,1,2])
-def test_add_var(varlist_rows, matches, idx, caplog):
-    caplog.set_level(logging.DEBUG, logger='mopdb_log')
-    vlist = []
-    vlist = add_var(vlist, varlist_rows[idx], matches[idx])
-    assert vlist[0]['cmor_var'] == matches[idx][0] 
-
-
-def test_build_umfrq(um_multi_time, caplog):
-    caplog.set_level(logging.DEBUG, logger='mopdb_log')
-    time_axs = [d for d in um_multi_time.dims if 'time' in d]
-    umfrq = {'time': 'day', 'time_0': '1hr', 'time_1': '30min'}
-    out = build_umfrq(time_axs, um_multi_time)
-    assert umfrq == out
     
 #@pytest.mark.parametrize('fname', [0,1,2])
 def test_get_date_pattern(caplog):
diff --git a/tests/testdata/varlist_ex.csv b/tests/testdata/varlist_ex.csv
index 154729f..780142d 100644
--- a/tests/testdata/varlist_ex.csv
+++ b/tests/testdata/varlist_ex.csv
@@ -1,3 +1,3 @@
-name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;filename;long_name;standard_name
+name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;fpattern;long_name;standard_name
 fld_s03i236;tas;degC;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature
 fld_s00i031;siconca;%;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction                                                                                                     fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX        W/M2;surface_upward_latent_heat_flu 

From 33025660e89138fa4268f7bef19057cc242d937c Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 14:45:54 +1000
Subject: [PATCH 091/137] added test-requirements.txt

---
 .github/workflows/mopper-pytest.yaml | 2 +-
 pyproject.toml                       | 3 ++-
 test-requirements.txt                | 3 +++
 3 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 test-requirements.txt

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 2cb0e6d..f1d08c1 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -48,7 +48,7 @@ jobs:
         pip install  -e .
     - name: Test with pytest
       run: |
-        conda install pytest coverage codecov --solver classic
+        conda install pytest pyfakefs coverage codecov --solver classic
         conda run python -m pytest
         conda run coverage run --source src -m py.test
     - name: Upload to codecov 
diff --git a/pyproject.toml b/pyproject.toml
index 0bc0d99..51aac13 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
 ]
-dynamic = ["version", "dependencies"]
+dynamic = ["version", "dependencies",  "optional-dependencies"]
 
 [tool.setuptools-git-versioning]
 enabled = true
@@ -36,6 +36,7 @@ mopdb = "mopdb.mopdb:mopdb_catch"
 
 [tool.setuptools.dynamic]
 dependencies =  {file = "requirements.txt"} 
+optional-dependencies.test = { file = ["test-requirements.txt"] }
 
 [tool.setuptools.packages.find]
 where = ["src"]
diff --git a/test-requirements.txt b/test-requirements.txt
new file mode 100644
index 0000000..f6d89a6
--- /dev/null
+++ b/test-requirements.txt
@@ -0,0 +1,3 @@
+pytest
+pyfakefs
+

From 136431441a67079ddc1b2c65e363b558c8dea818 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 15:14:35 +1000
Subject: [PATCH 092/137] improved test-conda action

---
 .github/workflows/mopper-test-conda.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
index fd46b01..78c0e5a 100644
--- a/.github/workflows/mopper-test-conda.yaml
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -19,11 +19,11 @@ jobs:
       matrix:
         python-version: ["3.9", "3.10", "3.11"]
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4.1.7
         with:
           fetch-depth: 0
       - name: Conda environment creation and activation
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v3
         with:
           python-version: ${{ matrix.python-version }}
           environment-file: conda/environment.yaml    # Path to the build conda environment
@@ -31,7 +31,7 @@ jobs:
           auto-activate-base: false
           show-channel-urls: true #
       - name: Build but do not upload the conda packages
-        uses: coecms/action-build-and-upload-conda-packages@v1.3.0
+        uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0
         with:
           meta_yaml_dir: conda
           python-version: ${{ matrix.python-version }} # Values previously defined in `matrix`

From 2d66224ee130db2f2fd0b9c32cbba9cfa7ced1d2 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 16:06:16 +1000
Subject: [PATCH 093/137] more updates to conda and actions

---
 .github/workflows/mopper-pytest.yaml     |  5 +++--
 .github/workflows/mopper-test-conda.yaml |  4 ++--
 conda/environment.yaml                   | 22 +++++++++++++---------
 conda/meta.yaml                          | 19 ++++++++++++-------
 conda/test-env.yaml                      | 14 ++++++++++++++
 requirements.txt                         |  5 ++++-
 6 files changed, 48 insertions(+), 21 deletions(-)
 create mode 100644 conda/test-env.yaml

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index f1d08c1..9cf4e9f 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -31,7 +31,7 @@ jobs:
         echo $CONDA/bin >> $GITHUB_PATH
     - name: Install dependencies
       run: |
-        conda env update --file conda/environment.yaml --name base 
+        conda env update --file conda/test-env.yaml --name base 
     - name: Lint with flake8
       run: |
         conda install -c conda-forge ruff --solver classic
@@ -48,7 +48,8 @@ jobs:
         pip install  -e .
     - name: Test with pytest
       run: |
-        conda install pytest pyfakefs coverage codecov --solver classic
+        conda install pytest coverage codecov --solver classic
+        conda install -c conda-forge pyfakefs 
         conda run python -m pytest
         conda run coverage run --source src -m py.test
     - name: Upload to codecov 
diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
index 78c0e5a..1132a54 100644
--- a/.github/workflows/mopper-test-conda.yaml
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -36,8 +36,8 @@ jobs:
           meta_yaml_dir: conda
           python-version: ${{ matrix.python-version }} # Values previously defined in `matrix`
           platform_linux-64: true
-          platform_osx-64: true
-          platform_win-64: true
+          platform_osx-64: false
+          platform_win-64: false
           user: coecms
           label: auto
           upload: false
diff --git a/conda/environment.yaml b/conda/environment.yaml
index d66b631..ff12e08 100644
--- a/conda/environment.yaml
+++ b/conda/environment.yaml
@@ -1,12 +1,16 @@
-name: test-env
 channels:
-  - defaults
   - conda-forge
+  - coecms
+  - default
+
 dependencies:
-  - click
-  - cmor
-  - xarray
-  - numpy
-  - dask
-  - pyyaml
-  - cftime
+  - anaconda-client
+  - conda-build
+  - conda-verify
+  #- click
+  #- cmor
+  #- xarray
+  #- numpy
+  #- dask
+  #- pyyaml
+  #- cftime
diff --git a/conda/meta.yaml b/conda/meta.yaml
index 05d58fc..c5fb0cf 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,18 +1,19 @@
 package:
     name: mopper 
-    version: 1.0.0
+    version: "{{ environ['GIT_DESCRIBE_TAG'] }}"
 
 #source:
 #    path: ./
 
 source:
   #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz
-  git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
-  git_rev: "{{ version }}" 
-  git_depth: 1 # (Defaults to -1/not shallow)
+  #git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
+  #git_rev: "{{ version }}" 
+  #git_depth: 1 # (Defaults to -1/not shallow)
+  path: ../src
 
 build:
-    number: 0
+    number: 1
     noarch: python
     script: "{{ PYTHON }} -m pip install . --no-deps --ignore-installed"
     entry_points:
@@ -23,7 +24,6 @@ requirements:
     host:
         - python
         - pip
-        - pbr
     run:
         - python
         - click
@@ -34,12 +34,17 @@ requirements:
         - pyyaml
         - cftime
         - python-dateutil
+
 test:
   source_files:
+    - tests/testdata/*
     - tests/testdata
+  requires:
+    - pytest
+    - pyfakefs
 
 about:
     home: https://github.com/ACCESS-Hive/ACCESS-MOPPeR
     license: Apache 2.0
     #license_file: LICENSE.txt
-    summary: 'ACCESS-MOPPeR post-process ACCESS raw model output to ESGF data standards' 
+    summary: 'ACCESS-MOPPeR post-process ACCESS raw model output using CMOR and pre-defined data standards' 
diff --git a/conda/test-env.yaml b/conda/test-env.yaml
new file mode 100644
index 0000000..e7866c9
--- /dev/null
+++ b/conda/test-env.yaml
@@ -0,0 +1,14 @@
+channels:
+  - conda-forge
+  - coecms
+  - default
+
+dependencies:
+  - click
+  - cmor
+  - xarray
+  - numpy
+  - dask
+  - pyyaml
+  - cftime
+  - python-dateutil
diff --git a/requirements.txt b/requirements.txt
index 7b8bd45..0953f2f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,10 @@
 # Add general dependencies here
-# Optional dependencies e.g. [dev] are added in `setup.cfg`
+# Optional dependencies e.g. [dev] are added in `test-requirements`
 click
 cmor
 xarray
 numpy
 pyyaml
+dask
+python-dateutil
+cftime

From 13ee445f99f0e564e3d686eb43612608132f3973 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 16:13:55 +1000
Subject: [PATCH 094/137] commenting codecov temporarily using solve classic

---
 .github/workflows/mopper-pytest.yaml | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 9cf4e9f..ff5072e 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -31,31 +31,27 @@ jobs:
         echo $CONDA/bin >> $GITHUB_PATH
     - name: Install dependencies
       run: |
+        conda config --set solver classic
         conda env update --file conda/test-env.yaml --name base 
     - name: Lint with flake8
       run: |
-        conda install -c conda-forge ruff --solver classic
+        conda install -c conda-forge ruff 
         ruff check --output-format=github .
       continue-on-error: true
-        #conda install flake8 --solver classic
-        # stop the build if there are Python syntax errors or undefined names
-        #flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     # making sure we are testing installed package
     - name: Install package
       run: |
         pip install  -e .
     - name: Test with pytest
       run: |
-        conda install pytest coverage codecov --solver classic
+        conda install pytest coverage codecov
         conda install -c conda-forge pyfakefs 
         conda run python -m pytest
-        conda run coverage run --source src -m py.test
-    - name: Upload to codecov 
-      if: steps.build.outcome == 'success'
-      run: |
-        curl -Os https://uploader.codecov.io/latest/linux/codecov
-        chmod +x codecov
-        ./codecov
+        #conda run coverage run --source src -m py.test
+   # - name: Upload to codecov 
+   #   if: steps.build.outcome == 'success'
+   #   run: |
+   #     curl -Os https://uploader.codecov.io/latest/linux/codecov
+   #     chmod +x codecov
+   #     ./codecov
 

From 89058b8792a0f7a046298710ac7b0248d2b4cd09 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 16:29:54 +1000
Subject: [PATCH 095/137] more updates

---
 .github/workflows/mopper-pytest.yaml     | 4 ++--
 .github/workflows/mopper-test-conda.yaml | 2 +-
 conda/environment.yaml                   | 7 -------
 3 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index ff5072e..dfe5f90 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -20,9 +20,9 @@ jobs:
       max-parallel: 5
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4.1.7
     - name: Set up Python 3.10
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v5.1.1
       with:
         python-version: '3.10'
     - name: Add conda to system path
diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
index 1132a54..0e89622 100644
--- a/.github/workflows/mopper-test-conda.yaml
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -23,7 +23,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Conda environment creation and activation
-        uses: conda-incubator/setup-miniconda@v3
+        uses: conda-incubator/setup-miniconda@v3.0.4
         with:
           python-version: ${{ matrix.python-version }}
           environment-file: conda/environment.yaml    # Path to the build conda environment
diff --git a/conda/environment.yaml b/conda/environment.yaml
index ff12e08..069e0f9 100644
--- a/conda/environment.yaml
+++ b/conda/environment.yaml
@@ -7,10 +7,3 @@ dependencies:
   - anaconda-client
   - conda-build
   - conda-verify
-  #- click
-  #- cmor
-  #- xarray
-  #- numpy
-  #- dask
-  #- pyyaml
-  #- cftime

From 52acabc8782e4dbe1379d876afb14cdb28a8f4b9 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 16:54:49 +1000
Subject: [PATCH 096/137] reverting solver as slows down conda install

---
 .github/workflows/mopper-pytest.yaml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index dfe5f90..4dee0cb 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -31,11 +31,12 @@ jobs:
         echo $CONDA/bin >> $GITHUB_PATH
     - name: Install dependencies
       run: |
-        conda config --set solver classic
+        #conda config --set solver classic
+        # this seems to slow it down!!!
         conda env update --file conda/test-env.yaml --name base 
-    - name: Lint with flake8
+    - name: Lint with ruff
       run: |
-        conda install -c conda-forge ruff 
+        conda install -c conda-forge ruff --solver classic 
         ruff check --output-format=github .
       continue-on-error: true
     # making sure we are testing installed package
@@ -44,7 +45,7 @@ jobs:
         pip install  -e .
     - name: Test with pytest
       run: |
-        conda install pytest coverage codecov
+        conda install pytest coverage codecov --solver classic
         conda install -c conda-forge pyfakefs 
         conda run python -m pytest
         #conda run coverage run --source src -m py.test

From 5a5c415812dbc65d9544ce90449e89eb60cde3f4 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 17:00:01 +1000
Subject: [PATCH 097/137] adopting miniconda for pytest action

---
 .github/workflows/mopper-pytest.yaml | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 4dee0cb..cb3fa56 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -25,10 +25,15 @@ jobs:
       uses: actions/setup-python@v5.1.1
       with:
         python-version: '3.10'
-    - name: Add conda to system path
-      run: |
-        # $CONDA is an environment variable pointing to the root of the miniconda directory
-        echo $CONDA/bin >> $GITHUB_PATH
+      - name: Install Miniconda
+        uses: conda-incubator/setup-miniconda@v3.0.4
+        with:
+          auto-update-conda: true
+          python-version: ${{ matrix.python-version }}
+    #- name: Add conda to system path
+    #  run: |
+    #    # $CONDA is an environment variable pointing to the root of the miniconda directory
+    #    echo $CONDA/bin >> $GITHUB_PATH
     - name: Install dependencies
       run: |
         #conda config --set solver classic
@@ -36,7 +41,7 @@ jobs:
         conda env update --file conda/test-env.yaml --name base 
     - name: Lint with ruff
       run: |
-        conda install -c conda-forge ruff --solver classic 
+        conda install -c conda-forge ruff #--solver classic 
         ruff check --output-format=github .
       continue-on-error: true
     # making sure we are testing installed package
@@ -45,7 +50,7 @@ jobs:
         pip install  -e .
     - name: Test with pytest
       run: |
-        conda install pytest coverage codecov --solver classic
+        conda install pytest coverage codecov #--solver classic
         conda install -c conda-forge pyfakefs 
         conda run python -m pytest
         #conda run coverage run --source src -m py.test

From cb2055482076c988527fed5590bb7d2038e55083 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 17:03:59 +1000
Subject: [PATCH 098/137] fixed pytest action

---
 .github/workflows/mopper-pytest.yaml | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index cb3fa56..14134cf 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -18,18 +18,20 @@ jobs:
     timeout-minutes: 60
     strategy:
       max-parallel: 5
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
 
     steps:
     - uses: actions/checkout@v4.1.7
     - name: Set up Python 3.10
       uses: actions/setup-python@v5.1.1
       with:
-        python-version: '3.10'
-      - name: Install Miniconda
-        uses: conda-incubator/setup-miniconda@v3.0.4
-        with:
-          auto-update-conda: true
-          python-version: ${{ matrix.python-version }}
+        python-version: ${{ matrix.python-version }}
+    - name: Install Miniconda
+      uses: conda-incubator/setup-miniconda@v3.0.4
+      with:
+        auto-update-conda: true
+        python-version: ${{ matrix.python-version }}
     #- name: Add conda to system path
     #  run: |
     #    # $CONDA is an environment variable pointing to the root of the miniconda directory

From 37a3096622156b314d9c8977522313d41b38b451 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 17:10:07 +1000
Subject: [PATCH 099/137] trying to install cmor

---
 conda/test-env.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/conda/test-env.yaml b/conda/test-env.yaml
index e7866c9..525dd16 100644
--- a/conda/test-env.yaml
+++ b/conda/test-env.yaml
@@ -1,11 +1,9 @@
 channels:
   - conda-forge
-  - coecms
-  - default
 
 dependencies:
-  - click
   - cmor
+  - click
   - xarray
   - numpy
   - dask

From 9c47f708e800acdd4dd5b61174e25a9f7dac7ff5 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 17:16:05 +1000
Subject: [PATCH 100/137] trying to fix conda env

---
 .github/workflows/mopper-pytest.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 14134cf..cec4d68 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -40,7 +40,7 @@ jobs:
       run: |
         #conda config --set solver classic
         # this seems to slow it down!!!
-        conda env update --file conda/test-env.yaml --name base 
+        conda env update --file conda/test-env.yaml
     - name: Lint with ruff
       run: |
         conda install -c conda-forge ruff #--solver classic 

From 3c80278c60431e397d73a0f052ea5e0de256c7e5 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 17:24:15 +1000
Subject: [PATCH 101/137] attempt 1000

---
 .github/workflows/mopper-pytest.yaml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index cec4d68..461565c 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -31,6 +31,7 @@ jobs:
       uses: conda-incubator/setup-miniconda@v3.0.4
       with:
         auto-update-conda: true
+        activate-environment: testenv
         python-version: ${{ matrix.python-version }}
     #- name: Add conda to system path
     #  run: |
@@ -40,10 +41,10 @@ jobs:
       run: |
         #conda config --set solver classic
         # this seems to slow it down!!!
-        conda env update --file conda/test-env.yaml
+        conda env update --file conda/test-env.yaml --name testenv
     - name: Lint with ruff
       run: |
-        conda install -c conda-forge ruff #--solver classic 
+        conda install -c conda-forge ruff --name testenv #--solver classic 
         ruff check --output-format=github .
       continue-on-error: true
     # making sure we are testing installed package
@@ -52,8 +53,8 @@ jobs:
         pip install  -e .
     - name: Test with pytest
       run: |
-        conda install pytest coverage codecov #--solver classic
-        conda install -c conda-forge pyfakefs 
+        conda install pytest coverage codecov --name testenv #--solver classic
+        conda install -c conda-forge pyfakefs --name testenv
         conda run python -m pytest
         #conda run coverage run --source src -m py.test
    # - name: Upload to codecov 

From cbe72d5793d8cb4dde686e186ec4479d20847837 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 17:35:43 +1000
Subject: [PATCH 102/137] attempt 1001

---
 .github/workflows/mopper-pytest.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 461565c..21259ca 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -44,12 +44,14 @@ jobs:
         conda env update --file conda/test-env.yaml --name testenv
     - name: Lint with ruff
       run: |
-        conda install -c conda-forge ruff --name testenv #--solver classic 
+        conda install -c conda-forge ruff  #--solver classic 
         ruff check --output-format=github .
       continue-on-error: true
+     
     # making sure we are testing installed package
     - name: Install package
       run: |
+        conda list | grep cmor
         pip install  -e .
     - name: Test with pytest
       run: |

From 322fadbb324972ac6c201192dfe57df2151e97b7 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 17:46:45 +1000
Subject: [PATCH 103/137] attempt 1002

---
 .github/workflows/mopper-pytest.yaml | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 21259ca..3d569a6 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -30,9 +30,11 @@ jobs:
     - name: Install Miniconda
       uses: conda-incubator/setup-miniconda@v3.0.4
       with:
-        auto-update-conda: true
-        activate-environment: testenv
+        #auto-update-conda: true
+        activate-environment: ""
+        auto-activate-base: true
         python-version: ${{ matrix.python-version }}
+        channels: conda-forge
     #- name: Add conda to system path
     #  run: |
     #    # $CONDA is an environment variable pointing to the root of the miniconda directory
@@ -41,7 +43,8 @@ jobs:
       run: |
         #conda config --set solver classic
         # this seems to slow it down!!!
-        conda env update --file conda/test-env.yaml --name testenv
+        conda env update --file conda/test-env.yaml --name base
+        conda list
     - name: Lint with ruff
       run: |
         conda install -c conda-forge ruff  #--solver classic 
@@ -51,12 +54,11 @@ jobs:
     # making sure we are testing installed package
     - name: Install package
       run: |
-        conda list | grep cmor
         pip install  -e .
     - name: Test with pytest
       run: |
-        conda install pytest coverage codecov --name testenv #--solver classic
-        conda install -c conda-forge pyfakefs --name testenv
+        conda install pytest coverage codecov #--name testenv #--solver classic
+        conda install -c conda-forge pyfakefs #--name testenv
         conda run python -m pytest
         #conda run coverage run --source src -m py.test
    # - name: Upload to codecov 

From 02b76c478686e1f28ab076e88ebede695500059f Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 17:50:14 +1000
Subject: [PATCH 104/137] attempt 1003

---
 .github/workflows/mopper-pytest.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 3d569a6..1fdc87f 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -31,8 +31,8 @@ jobs:
       uses: conda-incubator/setup-miniconda@v3.0.4
       with:
         #auto-update-conda: true
-        activate-environment: ""
         auto-activate-base: true
+        activate-environment: ""
         python-version: ${{ matrix.python-version }}
         channels: conda-forge
     #- name: Add conda to system path

From 430c327f57c6d602136e79827ca06218fd0a3ece Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 8 Aug 2024 17:52:32 +1000
Subject: [PATCH 105/137] attempt 1004

---
 .github/workflows/mopper-pytest.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 1fdc87f..08458e0 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -32,7 +32,7 @@ jobs:
       with:
         #auto-update-conda: true
         auto-activate-base: true
-        activate-environment: ""
+        activate-environment: true
         python-version: ${{ matrix.python-version }}
         channels: conda-forge
     #- name: Add conda to system path

From 86aad611e4cd1da7e6d5b0e0979351170766d9a3 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 13:37:37 +1000
Subject: [PATCH 106/137] solved issue #148

---
 mappings/map_AUS2200.csv                  |   1 +
 src/mopdata/access.db                     | Bin 1081344 -> 1085440 bytes
 src/mopdata/access_dump.sql               |   2 +
 src/mopdata/cmor_tables/AUS2200_A3hr.json |  18 ++++++
 src/mopdata/interval2frq.yaml             |  25 +++++++++
 src/mopdb/mopdb_class.py                  |   4 +-
 src/mopdb/mopdb_map.py                    |  65 ++++++++++------------
 7 files changed, 75 insertions(+), 40 deletions(-)
 create mode 100644 src/mopdata/interval2frq.yaml

diff --git a/mappings/map_AUS2200.csv b/mappings/map_AUS2200.csv
index a6c1ab9..7f7aec5 100644
--- a/mappings/map_AUS2200.csv
+++ b/mappings/map_AUS2200.csv
@@ -100,5 +100,6 @@ wsgmax10m;fld_s03i463;;m s-1;time lat lon;10minPt;atmos;area: time: point;;AUS22
 wsgmax10m_max;fld_s03i463_max;;m s-1;time_0 lat lon;10min;atmos;area: time: maximum;;AUS2200_A10min;AUS2200;;float32;22048000;2304;umnsa_spec;WIND GUST;wind_speed_of_gust
 z0;fld_s00i026;;m;time lat lon;1hrPt;atmos;area: time: point;;AUS2200_A1hr;AUS2200;;float32;22048000;384;umnsa_slv;ROUGHNESS LENGTH AFTER TIMESTEP;surface_roughness_length
 zfull;fld_s15i101;level_to_height(var[0],levs=(0,66));m;time_0 model_theta_level_number lat lon_0;1hrPt;atmos;area: time: point;;AUS2200_A1hr;AUS2200;float32;1543360000;384;umnsa_mdl;H OF THETA MODEL LEVS FROM SEA LEVEL;height_above_reference_ellipsoid
+zg16;fld_s16i202;;m;time pressure lat lon;3hrPt;atmos;area: time: point;;AUS2200_A3hr;AUS2200;float32;352768000;114;flreduced_;GEOPOTENTIAL HEIGHT ON P LEV/P GRID;geopotential_height
 zmla;fld_s00i025;;m;time_0 lat lon;1hr;atmos;area: time: mean;;AUS2200_A1hr;AUS2200;float32;22048000;384;umnsa_slv;BOUNDARY LAYER DEPTH AFTER TIMESTEP;atmosphere_boundary_layer_thickness
 zmla;fld_s00i025;;m;time lat lon;10minPt;atmos;area: time: point;;AUS2200_A10min;AUS2200;float32;22048000;2304;umnsa_spec;BOUNDARY LAYER DEPTH AFTER TIMESTEP;atmosphere_boundary_layer_thickness
diff --git a/src/mopdata/access.db b/src/mopdata/access.db
index 22328690cd4064cdbbd0ece89367c33ccc995762..70be17f5931c78e7391cfaf84613d7bb09b1b68c 100644
GIT binary patch
delta 470
zcmZo@aB5iKG(nn`gMopObE1MhBge*sCHx$>xn3}UL^ksXY++>Lda-?D1K$Gn%?=Hn
zjLh#RMs9v5wfigMhUu#W7%#9eJzzB3Y}jDLIQ@hWqp+fJMiIwRab`oN<ebd1OkKy&
zU?U>~gLuc3#7d4UKtm-aI*PRa5Ml&kCLm@8Viq7~-Tp&}O`=zZaUT;u%Sx6hEVV3|
zETJs+EE+6=%)gnRFl}PIvR!8Zn<pa+lNE#KWWxs6=?yE{Sf<-8VpE^~av@tF3wsKK
z_GCo^_4d?7Y(UHo#2i4(3B+7L%nigm+fx_u&Y#OTaXPC5-+3mMsnf4G@P$nGapaTR
z?%~L%#muM%G(l<lh4;)X!d2;pW(fC#{B^5+jSJuQH7@)Sw;9E!vnmM4GjXh)uA(4t
zcRRD9z;bp|HU@@jK2c>=X<mMFadCZKV`-2fX*ntJ#fD~?Mg~Uk-~bv2W#>+3oX#&g
z{pT!x>FL6H0>bQ2@$H(L0@uU1ctH`tdzyjwG$azXA86oHWMgSDoESCz{98Wd=^Qut
UcqhALN=@!>GT!d<j&BPi02tnqd;kCd

delta 288
zcmZoz;MCCIG(nn`k%57cW1@mRBjd(|CHx$>xZW`U!TZfT0$Uij2MRKEGjC52WU2v^
zT1=ZiO6~Z{_+a`f0mci=K%LVS3s`hFJ2c2KPTwHJD9oCYSjlk(#FS|NA;bv8OhC*G
z#4JF}y8VX`n?$b+lNA#`%Sx6hEVV3|ETJs+EE+6=%)gnRFl}NA*simH&6AOZJ%yoU
zvSCBP^p^|S0;lg=$fn(%x`+*k*@2h?h&h3n3y8Uam}h(HBHsCP87E9<b>KTc{h9+`
z2otm4bRS1Px$Pc~d|J$mwv!bNET=cDWMgUH;KH|kgA0GiZAP)_tO^41)72CN?rvvP
T6j;g5*aWn)Zo8(Y!1XWygiKvX

diff --git a/src/mopdata/access_dump.sql b/src/mopdata/access_dump.sql
index de52ba2..8d4a090 100644
--- a/src/mopdata/access_dump.sql
+++ b/src/mopdata/access_dump.sql
@@ -2279,6 +2279,7 @@ INSERT INTO cmorvar VALUES('lmask-AUS2200_fx','fx','land','land_binary_mask','%'
 INSERT INTO cmorvar VALUES('omldamax-CM2_mon','mon','ocean','ocean_mixed_layer_thickness_defined_by_mixing_scheme','m','area: mean time: maximum','area: areacello','Mean Monthly Maximum Ocean Mixed Layer Thickness Defined by Mixing Scheme','The ocean mixed layer is the upper part of the ocean, regarded as being well-mixed. The base of the mixed layer defined by the mixing scheme is a diagnostic of ocean models. ''Thickness'' means the vertical extent of a layer.','longitude latitude time','omldamax','real','','','','','','','');
 INSERT INTO cmorvar VALUES('difvho-CM2_mon','mon','ocean','ocean_vertical_heat_diffusivity','m2 s-1','area: mean time: mean','area: areacello volume: volcello','Ocean Vertical Heat Diffusivity','Vertical/dianeutral diffusivity applied to prognostic temperature field.','longitude latitude olevel time','difvho','real','','','','','','','');
 INSERT INTO cmorvar VALUES('rho0-CM2_mon','mon','ocean','sea_water_potential_density','kg m-3','area: mean time: mean','area: areacello volume: volcello','Potential Density referenced to 0 dbar','','longitude latitude olevel time','rho0','real','','','','','','','');
+INSERT INTO cmorvar VALUES('zg16-AUS2200_A3hr','3hrPt','atmos','geopotential_height','m','area: mean time: point','area: areacella','Geopotential Height on pressure levels','Geopotential is the sum of the specific gravitational potential energy relative to the geoid and the specific centripetal potential energy. Geopotential height is the geopotential divided by the standard acceleration due to gravity. It is numerically similar to the altitude (or geometric height) and not to the quantity with standard name height, which is relative to the surface.','longitude latitude plev16 time1','zg','real','','','','','','','');
 CREATE TABLE mapping (
                 cmor_var TEXT,
                 input_vars TEXT,
@@ -2734,6 +2735,7 @@ INSERT INTO mapping VALUES('zfull','fld_s15i101','','m','time model_theta_level_
 INSERT INTO mapping VALUES('zg','fld_s30i297','','m','time pressure lat lon','mon','atmos','area: time: mean','','CMIP6_Amon','CM2','geopotential_height','map_atmos_CM2');
 INSERT INTO mapping VALUES('zg','fld_s30i297','','m','time pressure lat lon','day','atmos','area: time: mean','','CMIP6_Eday','CM2','geopotential_height','map_atmos_CM2');
 INSERT INTO mapping VALUES('zg','fld_s30i297','','m','time pressure lat lon','day','atmos','area: time: mean','','CMIP6_day','CM2','geopotential_height','map_atmos_CM2');
+INSERT INTO mapping VALUES('zg16','fld_s16i202','','m','time pressure lat lon','3hrPt','atmos','area: time: point','','AUS2200_A3hr','AUS2200','geopotential_height','AUS2200');
 INSERT INTO mapping VALUES('zg500','fld_s30i297','','m','time pressure lat lon','6hrPt','atmos','area: time: point','','CMIP6_6hrPlevPt','CM2','geopotential_height','map_atmos_CM2');
 INSERT INTO mapping VALUES('zg500','fld_s30i297','var[0].sel(pressure=500)','m','time pressure lat lon','day','aerosol','area: time: mean','','CMIP6_AERday','CM2','geopotential_height','map_aerosol_CM2');
 INSERT INTO mapping VALUES('zguvgrid','fld_s30i207','','m','time pressure lat_v lon_u','mon','atmos','area: time: mean','','CM2_mon','CM2','geopotential_height','map_atmos_CM2');
diff --git a/src/mopdata/cmor_tables/AUS2200_A3hr.json b/src/mopdata/cmor_tables/AUS2200_A3hr.json
index d9108a9..99e6833 100644
--- a/src/mopdata/cmor_tables/AUS2200_A3hr.json
+++ b/src/mopdata/cmor_tables/AUS2200_A3hr.json
@@ -103,6 +103,24 @@
             "valid_max": "",
             "ok_min_mean_abs": "",
             "ok_max_mean_abs": ""
+        },
+        "zg16": {
+            "frequency": "3hrPt",
+            "modeling_realm": "atmos",
+            "standard_name": "geopotential_height",
+            "units": "m",
+            "cell_methods": "area: mean time: point",
+            "cell_measures": "area: areacella",
+            "long_name": "Geopotential Height on pressure levels",
+            "comment": "Geopotential is the sum of the specific gravitational potential energy relative to the geoid and the specific centripetal potential energy. Geopotential height is the geopotential divided by the standard acceleration due to gravity. It is numerically similar to the altitude (or geometric height) and not to the quantity with standard name height, which is relative to the surface.",
+            "dimensions": "longitude latitude plev16 time1",
+            "out_name": "zg",
+            "type": "real",
+            "positive": "",
+            "valid_min": "",
+            "valid_max": "",
+            "ok_min_mean_abs": "",
+            "ok_max_mean_abs": ""
         }
     }
 }
diff --git a/src/mopdata/interval2frq.yaml b/src/mopdata/interval2frq.yaml
new file mode 100644
index 0000000..37189ec
--- /dev/null
+++ b/src/mopdata/interval2frq.yaml
@@ -0,0 +1,25 @@
+# This file contains the dictionary neededto associate a time step interval
+# to a frequency. There can be more than one depending on the units used by 
+# the time axis
+days:
+    dec: 3652.0
+    yr: 365.0
+    mon: 30.0
+    day: 1.0 
+    6hr: 0.25
+    3hr: 0.125
+    1hr: 0.041667 
+    30min: 0.020833 
+    10min: 0.006944
+
+hours:
+    dec: 87648.0
+    yr: 8760.0
+    mon: 720.0
+    day: 24.0 
+    6hr: 6.0
+    3hr: 3.0
+    1hr: 1.0 
+    30min: 0.5 
+    10min: 0.167
+
diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index 8b73805..d044eaa 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -94,8 +94,6 @@ def __init__(self, varname: str, fobj: FPattern):
         self.name = varname
         # path object
         self.fpattern = fobj.fpattern
-        #self.fpath = fobj.fpath
-        #self.files = fobj.files
         # mapping attributes
         self._frequency = fobj.frequency 
         self._realm = fobj.realm
@@ -127,7 +125,7 @@ def frequency(self):
     def frequency(self, value):
         value = value.replace('hPt', 'hrPt')
         if not any(x in value for x in 
-            ['min', 'hr', 'day', 'mon', 'yr']):
+            ['fx', 'min', 'hr', 'day', 'mon', 'yr']):
             value = 'NAfrq' 
         self._frequency = value
 
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index 9db15eb..b88ea7c 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -84,7 +84,7 @@ def get_cmorname(conn, vobj, version):
                         f"{results}\n Using {vobj.cmor_var} from {vobj.cmor_table}")
     return vobj
 
-def get_file_frq(ds, fnext):
+def get_file_frq(ds, fnext, int2frq):
     """Return a dictionary with frequency for each time axis.
 
     Frequency is inferred by comparing interval between two consecutive
@@ -95,22 +95,23 @@ def get_file_frq(ds, fnext):
     (usually only UM) or if frequency can be guessed from filename.
     """
     mopdb_log = logging.getLogger('mopdb_log')
+    mopdb_log.debug(f"in get_file_frq fnext: {fnext}")
     frq = {}
-    int2frq = {'dec': 3652.0, 'yr': 365.0, 'mon': 30.0,
-               'day': 1.0, '6hr': 0.25, '3hr': 0.125,
-               '1hr': 0.041667, '30min': 0.020833, '10min': 0.006944}
     # retrieve all time axes
     time_axs = [d for d in ds.dims if 'time' in d]
     time_axs_len = set(len(ds[d]) for d in time_axs)
     time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
     mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}")
-    max_len = len(ds[time_axs[0]]) 
+    if len(time_axs) > 0:
+        max_len = len(ds[time_axs[0]]) 
+    else:
+        max_len = 0
+        frq = {'time': 'fx'}
     # if all time axes have only 1 timestep we cannot infer frequency
     # so we open also next file but get only time axs
     if max_len == 1:
         if fnext is None:
             mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}")
-            return frq
         else:
             dsnext = xr.open_dataset(fnext, decode_times = False)
             time_axs2 = [d for d in dsnext.dims if 'time' in d]
@@ -118,18 +119,19 @@ def get_file_frq(ds, fnext):
             time_axs = [d for d in ds.dims if 'time' in d]
             time_axs_len = set(len(ds[d]) for d in time_axs)
             time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
-    for t in time_axs: 
-        mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}")
-        if len(ds[t]) > 1:
-            interval = (ds[t][1]-ds[t][0]).values
-            interval_file = (ds[t][-1] -ds[t][0]).values 
-        else:
-            interval = interval_file
-        mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}")
-        for k,v in int2frq.items():
-            if math.isclose(interval, v, rel_tol=0.05):
-                frq[t] = k
-                break
+    if max_len > 0:
+        for t in time_axs: 
+            mopdb_log.debug(f"len of time axis {t}: {len(ds[t])}")
+            if len(ds[t]) > 1:
+                interval = (ds[t][1]-ds[t][0]).values
+                interval_file = (ds[t][-1] -ds[t][0]).values 
+            else:
+                interval = interval_file
+            mopdb_log.debug(f"interval 2 timesteps for {t}: {interval}")
+            for k,v in int2frq.items():
+                if math.isclose(interval, v, rel_tol=0.05):
+                    frq[t] = k
+                    break
     return frq
 
 def write_varlist(conn, indir, match, version, alias):
@@ -168,6 +170,10 @@ def write_varlist(conn, indir, match, version, alias):
         #fwriter.writerow([f"#{fpattern}"])
         # get attributes for the file variables
         ds = xr.open_dataset(str(fobj.files[0]), decode_times=False)
+        time_units = ds['time'].units.split()[0]
+        yfile = import_files('mopdata').joinpath('interval2frq.yaml')
+        fdata = read_yaml(yfile)
+        int2frq = fdata[time_units]
         coords = [c for c in ds.coords] + ['latitude_longitude']
         #pass next file in case of 1 timestep per file and no frq in name
         if len(fobj.files) == 1:
@@ -175,7 +181,7 @@ def write_varlist(conn, indir, match, version, alias):
         else:
             fnext = str(fobj.files[1])
         if fobj.frequency == 'NAfrq' or fobj.realm == 'atmos':
-            frq_dict = get_file_frq(ds, fnext)
+            frq_dict = get_file_frq(ds, fnext, int2frq)
             # if only one frequency detected empty dict
             if len(frq_dict) == 1:
                 fobj.frequency = frq_dict.popitem()[1]
@@ -336,31 +342,13 @@ def add_var(vlist, vobj, match, stdnm=False):
     # assign cmor_var from match and swap place with input_vars
     mopdb_log.debug(f"Assign cmor_var: {match}")
     mopdb_log.debug(f"initial variable definition: {vobj}")
-    #var = vobj.__dict__.copy() 
     var = MapVariable(match, vobj)
-    #var.cmor_var = match[0]
-    #vobj.input_vars = match[1]
-   # orig_name = var.pop('name')
-    # assign realm from match
-    #var['realm'] = match[4] 
-    # with stdn assign cmorvar and table if only 1 match returned
-    # otherwise assign table from match
     if stdnm: 
         var.input_vars = vobj.name
         if len(var.cmor_var) == 1:
             cmor_var, table = var.cmor_var[0].split("-")
             var.cmor_var = cmor_var
             var.cmor_table = table 
-    #else:
-    #    var['cmor_table'] = match[6] 
-    # add calculation, positive and version 
-    #var['calculation'] = match[2]
-    #var['positive'] = match[7]
-    #var['version'] = match[5] 
-    # maybe we should override units here rather than in check_realm_units
-    # if units missing get them from match
-    #if var['units'] is None or var['units'] == '':
-    #    var['units'] = match[8]
     vlist.append(var)
     return vlist
 
@@ -436,6 +424,9 @@ def write_map_template(conn, parsed, alias):
     with open(f"map_{alias}.csv", 'w') as fcsv:
         fwriter = csv.DictWriter(fcsv, keys, delimiter=';')
         write_vars(full, fwriter, keys, conn=conn)
+        # write header as write_vars skips it if full is empty
+        if len(full) == 0:
+            fwriter.writerow({x:x for x in keys})
         div = ("# Derived variables with matching version and " +
             "frequency: Use with caution!")
         write_vars(pot_full, fwriter, div, conn=conn)

From 7547fb2835306a888ab4c7cffa0bbaead7ae069d Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 15:19:35 +1000
Subject: [PATCH 107/137] attempt 1005 to fix workflows

---
 .github/workflows/mopper-pytest.yaml  | 14 +++++++-------
 conda/{test-env.yaml => testenv.yaml} |  5 +++++
 2 files changed, 12 insertions(+), 7 deletions(-)
 rename conda/{test-env.yaml => testenv.yaml} (68%)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 08458e0..f957e4e 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -30,9 +30,9 @@ jobs:
     - name: Install Miniconda
       uses: conda-incubator/setup-miniconda@v3.0.4
       with:
-        #auto-update-conda: true
-        auto-activate-base: true
-        activate-environment: true
+        auto-update-conda: true
+        activate-environment: testenv
+        environment-file: conda/testenv.yml
         python-version: ${{ matrix.python-version }}
         channels: conda-forge
     #- name: Add conda to system path
@@ -43,11 +43,11 @@ jobs:
       run: |
         #conda config --set solver classic
         # this seems to slow it down!!!
-        conda env update --file conda/test-env.yaml --name base
+        #conda env update --file conda/test-env.yaml --name testenv
         conda list
     - name: Lint with ruff
       run: |
-        conda install -c conda-forge ruff  #--solver classic 
+        #conda install -c conda-forge ruff  #--solver classic 
         ruff check --output-format=github .
       continue-on-error: true
      
@@ -57,8 +57,8 @@ jobs:
         pip install  -e .
     - name: Test with pytest
       run: |
-        conda install pytest coverage codecov #--name testenv #--solver classic
-        conda install -c conda-forge pyfakefs #--name testenv
+        #conda install pytest coverage codecov #--name testenv #--solver classic
+        #conda install -c conda-forge pyfakefs #--name testenv
         conda run python -m pytest
         #conda run coverage run --source src -m py.test
    # - name: Upload to codecov 
diff --git a/conda/test-env.yaml b/conda/testenv.yaml
similarity index 68%
rename from conda/test-env.yaml
rename to conda/testenv.yaml
index 525dd16..becd88a 100644
--- a/conda/test-env.yaml
+++ b/conda/testenv.yaml
@@ -10,3 +10,8 @@ dependencies:
   - pyyaml
   - cftime
   - python-dateutil
+  - pytest 
+  - coverage 
+  - codecov
+  - pyfakefs
+  - ruff

From 2e92a8fcadfae40bc5c5785c19cf6993a149c8fc Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 15:42:16 +1000
Subject: [PATCH 108/137] fixed env name in workflow

---
 .github/workflows/mopper-pytest.yaml | 2 +-
 src/mopper/mop_utils.py              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index f957e4e..732987c 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -32,7 +32,7 @@ jobs:
       with:
         auto-update-conda: true
         activate-environment: testenv
-        environment-file: conda/testenv.yml
+        environment-file: conda/testenv.yaml
         python-version: ${{ matrix.python-version }}
         channels: conda-forge
     #- name: Add conda to system path
diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index ef41953..6ed8b60 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -800,10 +800,10 @@ def get_bounds_values(ctx, ds, bname):
     calc = False
     var_log = logging.getLogger(ctx.obj['var_log'])
     var_log.debug(f"Getting bounds values for {bname}")
-    ancil_file =  ctx.obj[f"grid_{ctx.obj['realm']}"]
     if bname in ds.variables:
         bnds_val = ds[bname].values
     elif ancil_file != "":     
+        ancil_file =  ctx.obj[f"grid_{ctx.obj['realm']}"]
         fname = f"{ctx.obj['ancils_path']}/{ancil_file}"
         ancil = xr.open_dataset(fname)
         if bname in ancil.variables:

From 4b16115468a8a1acf53819c3c4d579d210674f68 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 16:08:10 +1000
Subject: [PATCH 109/137] trying again by adding env name to env.yaml file

---
 .github/workflows/mopper-pytest.yaml | 5 +++++
 conda/testenv.yaml                   | 1 +
 2 files changed, 6 insertions(+)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 732987c..17bf218 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -44,26 +44,31 @@ jobs:
         #conda config --set solver classic
         # this seems to slow it down!!!
         #conda env update --file conda/test-env.yaml --name testenv
+        conda activate testenv
         conda list
     - name: Lint with ruff
       run: |
         #conda install -c conda-forge ruff  #--solver classic 
+        source ~/miniconda/bin/activate testenv
         ruff check --output-format=github .
       continue-on-error: true
      
     # making sure we are testing installed package
     - name: Install package
       run: |
+        source ~/miniconda/bin/activate testenv
         pip install  -e .
     - name: Test with pytest
       run: |
         #conda install pytest coverage codecov #--name testenv #--solver classic
         #conda install -c conda-forge pyfakefs #--name testenv
+        source ~/miniconda/bin/activate testenv
         conda run python -m pytest
         #conda run coverage run --source src -m py.test
    # - name: Upload to codecov 
    #   if: steps.build.outcome == 'success'
    #   run: |
+         source ~/miniconda/bin/activate testenv
    #     curl -Os https://uploader.codecov.io/latest/linux/codecov
    #     chmod +x codecov
    #     ./codecov
diff --git a/conda/testenv.yaml b/conda/testenv.yaml
index becd88a..7fa8dd5 100644
--- a/conda/testenv.yaml
+++ b/conda/testenv.yaml
@@ -1,3 +1,4 @@
+name: testenv
 channels:
   - conda-forge
 

From 801ed003ba8938f4141062365fe41322a01f1d57 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 16:11:07 +1000
Subject: [PATCH 110/137] trying again by adding env name to env.yaml file 2

---
 .github/workflows/mopper-pytest.yaml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 17bf218..9626530 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -30,7 +30,6 @@ jobs:
     - name: Install Miniconda
       uses: conda-incubator/setup-miniconda@v3.0.4
       with:
-        auto-update-conda: true
         activate-environment: testenv
         environment-file: conda/testenv.yaml
         python-version: ${{ matrix.python-version }}
@@ -44,31 +43,26 @@ jobs:
         #conda config --set solver classic
         # this seems to slow it down!!!
         #conda env update --file conda/test-env.yaml --name testenv
-        conda activate testenv
         conda list
     - name: Lint with ruff
       run: |
         #conda install -c conda-forge ruff  #--solver classic 
-        source ~/miniconda/bin/activate testenv
         ruff check --output-format=github .
       continue-on-error: true
      
     # making sure we are testing installed package
     - name: Install package
       run: |
-        source ~/miniconda/bin/activate testenv
         pip install  -e .
     - name: Test with pytest
       run: |
         #conda install pytest coverage codecov #--name testenv #--solver classic
         #conda install -c conda-forge pyfakefs #--name testenv
-        source ~/miniconda/bin/activate testenv
         conda run python -m pytest
         #conda run coverage run --source src -m py.test
    # - name: Upload to codecov 
    #   if: steps.build.outcome == 'success'
    #   run: |
-         source ~/miniconda/bin/activate testenv
    #     curl -Os https://uploader.codecov.io/latest/linux/codecov
    #     chmod +x codecov
    #     ./codecov

From 13aca2303e130072716dc071b93de02fbf475c45 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 16:28:51 +1000
Subject: [PATCH 111/137] found correct way to activate env?

---
 .github/workflows/mopper-pytest.yaml | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 9626530..d6f8643 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -38,29 +38,25 @@ jobs:
     #  run: |
     #    # $CONDA is an environment variable pointing to the root of the miniconda directory
     #    echo $CONDA/bin >> $GITHUB_PATH
-    - name: Install dependencies
-      run: |
-        #conda config --set solver classic
-        # this seems to slow it down!!!
-        #conda env update --file conda/test-env.yaml --name testenv
-        conda list
     - name: Lint with ruff
+      shell: bash -el {0}
       run: |
-        #conda install -c conda-forge ruff  #--solver classic 
         ruff check --output-format=github .
       continue-on-error: true
      
     # making sure we are testing installed package
     - name: Install package
+      shell: bash -el {0}
       run: |
+        conda activate testenv
         pip install  -e .
     - name: Test with pytest
+      shell: bash -el {0}
       run: |
-        #conda install pytest coverage codecov #--name testenv #--solver classic
-        #conda install -c conda-forge pyfakefs #--name testenv
         conda run python -m pytest
         #conda run coverage run --source src -m py.test
    # - name: Upload to codecov 
+   #   shell: bash -el {0}
    #   if: steps.build.outcome == 'success'
    #   run: |
    #     curl -Os https://uploader.codecov.io/latest/linux/codecov

From 5534961231c34652a37c2a401625653c263e9f3d Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 17:03:07 +1000
Subject: [PATCH 112/137] remove 3.9 added 3.12 for tests, added testdata files

---
 .github/workflows/mopper-pytest.yaml     |   8 ++------
 .github/workflows/mopper-test-conda.yaml |   6 +++---
 conda/environment.yaml                   |   1 +
 tests/test_mopdb_map.py                  |   5 +++--
 tests/testdata/multitime.nc              | Bin 0 -> 9731 bytes
 tests/testdata/multitime_next.nc         | Bin 0 -> 9731 bytes
 tests/testdata/onetstep.nc               | Bin 0 -> 8908 bytes
 tests/testdata/onetstep_next.nc          | Bin 0 -> 8908 bytes
 8 files changed, 9 insertions(+), 11 deletions(-)
 create mode 100644 tests/testdata/multitime.nc
 create mode 100644 tests/testdata/multitime_next.nc
 create mode 100644 tests/testdata/onetstep.nc
 create mode 100644 tests/testdata/onetstep_next.nc

diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index d6f8643..51d846c 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -19,11 +19,11 @@ jobs:
     strategy:
       max-parallel: 5
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
     - uses: actions/checkout@v4.1.7
-    - name: Set up Python 3.10
+    - name: Set up Python 3.10/3.11
       uses: actions/setup-python@v5.1.1
       with:
         python-version: ${{ matrix.python-version }}
@@ -34,10 +34,6 @@ jobs:
         environment-file: conda/testenv.yaml
         python-version: ${{ matrix.python-version }}
         channels: conda-forge
-    #- name: Add conda to system path
-    #  run: |
-    #    # $CONDA is an environment variable pointing to the root of the miniconda directory
-    #    echo $CONDA/bin >> $GITHUB_PATH
     - name: Lint with ruff
       shell: bash -el {0}
       run: |
diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
index 0e89622..1ef10f9 100644
--- a/.github/workflows/mopper-test-conda.yaml
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -17,7 +17,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.10", "3.11"i, "3.12"]
     steps:
       - uses: actions/checkout@v4.1.7
         with:
@@ -26,12 +26,12 @@ jobs:
         uses: conda-incubator/setup-miniconda@v3.0.4
         with:
           python-version: ${{ matrix.python-version }}
+          activate-environment: mopper_env 
           environment-file: conda/environment.yaml    # Path to the build conda environment
-          auto-update-conda: false
-          auto-activate-base: false
           show-channel-urls: true #
       - name: Build but do not upload the conda packages
         uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0
+        shell: bash -el {0}
         with:
           meta_yaml_dir: conda
           python-version: ${{ matrix.python-version }} # Values previously defined in `matrix`
diff --git a/conda/environment.yaml b/conda/environment.yaml
index 069e0f9..62adf87 100644
--- a/conda/environment.yaml
+++ b/conda/environment.yaml
@@ -1,3 +1,4 @@
+name: mopper_env
 channels:
   - conda-forge
   - coecms
diff --git a/tests/test_mopdb_map.py b/tests/test_mopdb_map.py
index 38ac29e..8b7a5e9 100644
--- a/tests/test_mopdb_map.py
+++ b/tests/test_mopdb_map.py
@@ -47,13 +47,14 @@ def test_get_file_frq(caplog):
     global dsmulti, dsmulti2, dsonestep, dsonestep2
     caplog.set_level(logging.DEBUG, logger='mopdb_log')
     umfrq = {'time': 'day', 'time_0': '1hr', 'time_1': '30min'}
+    int2frq = {'day': 1.0, '1hr':  0.041667, '30min': 0.020833}
     # multi time axes in file
     ds =  xr.open_dataset(dsmulti, decode_times=False)
-    out = get_file_frq(ds, dsmulti2)
+    out = get_file_frq(ds, dsmulti2, int2frq)
     assert umfrq == out
     # only one time axis in file with 1 value
     ds =  xr.open_dataset(dsonestep, decode_times=False)
-    out = get_file_frq(ds, dsonestep2)
+    out = get_file_frq(ds, dsonestep2, int2frq)
     umfrq = {'time': 'day'}
     assert umfrq == out
     
diff --git a/tests/testdata/multitime.nc b/tests/testdata/multitime.nc
new file mode 100644
index 0000000000000000000000000000000000000000..fbee02705ad3153d0188ed463ae8f1fda684de1f
GIT binary patch
literal 9731
zcmeHNU2IfE6rSz2rM6or0#%^u76BDX?bZr4fQMzf1!B9E)(9%5S=a@Zl$N#&SWM-|
zK*|G1zy}fpA3&^<Kmxq*074`RK46r_h>7LJml93?|NlJa%o!-PlGad2%suJuxifd>
z%$aY#xpU6#?p>Od9UnU-HX<fQ8R-|X&(t+(a^tG^;QqX&S@{u;^AXdy&TcxHE`xP!
zgtAe~garDwMbWoXo3v8VPtk=fQi>`v9NHMHmy+&sHbNJ(q;^u0qULs+8a$LK>wcAE
zb%({$4^`W~zs|7x@gLc26RAGJh9V7&wmZn<1e}<j+aeV;mTK&?s4U*>(1T1Kr#l}M
z8^*cle|65`rlxi^Q91CV3Vp%wn!wr&Pf1BxAXMxR2g`YAjfitd2g!hAHo}HxqR~lg
z>&bB!6sQS!gJ32fvzFz0eFe+%e1!!bcaB#%o}+rZiqai??p!aorv;(*1Uy@{W23R?
z99lKbWra3|0{6RxJxCnLjL3U#Y%UIk%h~KCyh&yQ{)%#Ed8l|z(3zf=mNqBNg?~zg
zu6<jV=UMs&O@q7|_JAuc{C=4_Mo*^UZIK!$rJ~9I!n9Me9)?$As1-`&7Ju0*KM_7C
zr_N-YA{#MJQps-HDWtqiMHEL7+C!(tsWe)md~dcl-|O>u3v-qgyrt&iv<T#YKkP3L
zhLs`;6EgznL2-Ggq*%q!{Rge!-tpLAfP9g(;X6%CHGr6iEsN3iiA2NkPPj|Vj5w9i
zZ&&vbRQKuFt_gaJ=crVN*fIniGnxdYgNnJ(BdZ%X{ifh!y>p6eBMRpE3=im(6q
zVaUI_D46OL$;jkEhfOE)a@CSK?=cU(WRrNv+o{m!d9$o!Q?M)?!u?3JnT8+guq9YV
zOS~{WrI<JF!i?0+R9DLSz}i<Ax>8-~sp*D#;wMwjRT__dQA$oHO~*B4F<O@zAnu)O
zixbZ_K;qd0yHf&<kjxcLkP@J)Cdj;4ms_%}89dye1$-sjZXT&^0q%C76}Z{rHqg~=
zz;{o!!yY!!0c15ft`k11Z8<sbVkdk#j|_GJ50~5xZ2o39kQWDb_dpFAKRK@#xOrVK
z+`-Xr9_fQt>|K}J4>FBKqKTU<1QW?!0CW+bJX*|xn!Tx?vPwN6_hUuJiFSw<2yu<H
z$X4ggNHu0p3FCR$sGcA6sOgbJlBgB$Te(El0PdXA&Y3G3;N!!q@Uj7}{t>&nEUFQr
zTo-m7UeyRoD%TyqbgmJqu1~^SP4EM$)lCqGty~wHfV(F&!~U)t*_G>>;Ya*FeyJG}
z4$a0cEs%fvR&{+*3vlsj3)F0#duRUCR!HQ5T44n@Yy}?P*#^I1>zb`?FtzpQzOEZ>
zKwdd#w!<+jWmmQXo2zLDbQ*hh0K2iP15R8zxBT|44q*3Poxr!NI$=F}zx;M5d_`vG
zb^#5JmLY{F)CC*ap>7})5)O4k12wzT4Lp5!56~=k9qxhiXuZ0=2iUFbUf@|A?S&8c
zq8A7)cTOKL49ELm0T0s$4Bzs8VE3y3Km2)GNW>=+UQdzoRMAPIpAr46XgrWvoT8IO
z)1!vud9BoR(KAF#Or~Ct@{6Kh5<N>a9<3}XqUVTC75%bkOIZk52s}Cj{u$_*lhf$o
zWgsSet&oG&oTDbhvyWzoM{wx~^BXyj-xUOPZG}VuuNhp;>21mw=q=b#!-1aUfF?em
zSUufD$lBVc66Tvnp2QlNwVm@hddtzt&^U|*@U*{|`f$Kb&t(e8O`JYd&PT|XN`OGu
z745NAWZ7U`ItXJ*5u!#lMKBQ~I>VAbW6bB8IX@3b5zN_R^nXba+GME)=dh!iA`r+U
zP7xf*I$Io^!%`$klh8Tf$g)^=Bui(pamXIzPhobbsOW8f(dOWg$<|Q(;U!WX+bJI=
zUPxL#?0ndF<yfv>-K+<#?y%lW7|yyS(7vplQ;+1iz;Hg)?MMqdqSplE!k%BQY@q$C
zA0MR>IvIu!i$|`J@rd1g>^yq^`(MA-Jen|oN9F~B(IY!wcFMuBgMdXbJc=02^M&Dj
zs63){H1GAEPk0WG^J|GGFNdL|LkUG-m6tk7)$3~WKICq_dADWqYIEM3HzLwaRhz`v
zR9M<Vz(T-6z(T-6z(T-6z(T-6z(T-6z(T-6z(U}WBQWs2vQ7R8Zr-ho{!dTLk6_HS
J*RDKK`!~N8N7w)W

literal 0
HcmV?d00001

diff --git a/tests/testdata/multitime_next.nc b/tests/testdata/multitime_next.nc
new file mode 100644
index 0000000000000000000000000000000000000000..5c76393f94f9c0d58d294434d34f413d1e3a6993
GIT binary patch
literal 9731
zcmeHNU2IfU5I*<b-Mh8jLJ_C}RksyTq14@4p$5>|?rwp&-AZc&6w@s10!x;b{s0zJ
z`AHz<0c^kr5(FPWtdc+iyzl@*BwBpHD2)*l%Zom=(bNY(Bqlg>&Ky{3C9R>5m^<n2
z+;i@kIdi_5bI;7}?wB90%*~#dZDwT&O}Ch=sjA1%H_k_P@2Z|3t}(qQ%_7-nX(25#
zkY}61ppkib^liB5TZpxa3Hvd+FkIXeu9noAWIwl*$Z=CO!`wT*K+tf@5Cf;uZ6((>
zN(of39J1>>e>*14&wbA@#!<g%An^drEiYM|hYM3{1K}P;J=Qd8vo)L5Bojs};oYo5
z8)yHkao!*ewZu5##gC|qC0mv!R+NPr8k!QR`glvSSx&7G@eOJqDez85*iem)M&j5|
zG+0|J#^McvnQRO%sEWjD7gWdUYD2+jM0lT~eoMHOKrC1lk>klBG@gK$S}hzj3XMap
z+PH9PO)7D>Stvo|#mI2oF=JhQs-;<~9gjCjZ6e;<>}yWdFHibP{eJ%pe*pi4NOgVD
zTOFGJDlLPo8uEmTPW^P21nmhlz2Q<g31O4{b^asl4;@z?8inLqA8&f$TfztF$g#2`
zq$BH5YFWk>4$8CCMDnIl9~w1U_{l{zk;+I-Bo>O)MHke*E@tAg2;@Y(CElED5rRUP
zA|rto)HkOZ>O~gazn2GhjKKjJ^l`!JFI8ZQjKD-}C24Mrqc9xfd*yaumgR`DTb8&h
zK$XmmO)5Yy%QXsdgLUZu^^9bIlH-RRpfX%guzF(d;!F?QY`OAqpdFV>RDa>ucT@3Y
z>12tIsjOU1I^=L7E0@lj@s^Ck2<fCal-sE;7J99zVQsRhC58Ku!lnp6B(Of&L@u6N
zT3jzT?%cAH@{&ODs>F&H=LSjwr6r|0^w>@rdV$7VltpSdS}>W^5TP$K3APs}3UihS
z$XU`=k$+r3$2RIPApbaB8Sr7x)x)z+8&D(%nDEK0)4f#<CP=X^6XdW$3(Bc`qXm*(
zvLGZCxPY{FY;!@ZVdJHJJuc|pM+)5_r|WQoRNw0cvZAZPhCbACX`c;p_*EOO;q1Lt
z9w6zfhdrQ-&tiUOJ$ik&0pL~gF2k**NbgPclojF;?$2hP!`#c9Aw(Z3;;k+>BlYM#
z#St&vM*Z?ZkDMM!MEQ!zean}`z&CYK-k#|KDxyd6Qoyn%ci_}k0bBPj4MvX&ICSR3
z!Yh{roc}o+ZyE3lv48<Kj@rA_04d&Xz`VAV2hW@^U=@BZykbE6h4Cn2!l(V|?hP|d
z_?nu_Oz4|E>DvCCCgjM8(2sJU33B>83)bN1zS$P+x_Pm(ZKVZdRo)&8zQNYPGZy?#
z>iR4|qfycY(u|-BYOY-F>`%Kun!nct^7eTboJ8w8`_b>Dc9I)#a@6dGe6=8;-fR2a
zKq$0daKk&Y!wquzJ{xG2!Ke*CqW10$Hb}D$+CZzgdC`VB^1=o}%Qw{n5{88yI6%|1
zc|gL~=>ch8_x~q;9_JvEClX#y<n|=y1<apd{v>lekR|z;7cwtm{xtK+%%?EtoJ>5!
z?Pr-k$9x)dJX$3cGoQh{g!%K#9p)h5An@Q2_-CT0GXc$qR)OgBwUrN6eU9oB&l<@T
zkKj@n=I4AKzsU%y-d4^6UedVg(_6PO(VMfnTPAv31M2*Ma`jT3AuDYkaGI|ld7L|y
z_xw3qW^Ykd42{NI08jfnrH@S5>A6e^xz5uEtN94|Motjuy7iCjcG9ddZXAFyl?d(;
zEfI9ah{i~&nd13KMb38_C4xSCH2*Ip!gY4lz#4W$O9TRW*d>CuP*sZqYgo!8X%VUh
z99|VG4GF6%HV2~z<EO4Nl}^7APp?Z3T5Js#Khj0w#wMzVDK5AyA96h$J9nT;c{hA7
z?+)qBgpS!DC)#I~apHkI7bKkbbvu${hxM9(OxXVYxz)6P)#Ia)QzsqaLyDuo@Zw0C
z`OxF&?XQ0LOvTaIj5yLS5Hug&`Lcx%mdyk#l8z&DB+nNK=Y5SMDo3kd{_B0YhKKnz
zFH{|sP|~4<#IW*99U=Nvw|*aTyI;TC(si{y@AVrI789MiFxDLocMxz8a1d}1a1d}1
va1d}1a1d}1a1d}1a1d}1c;E<RzE?K*pWyo4%E<rpME?jzFMGw}Lp^^3^CAU;

literal 0
HcmV?d00001

diff --git a/tests/testdata/onetstep.nc b/tests/testdata/onetstep.nc
new file mode 100644
index 0000000000000000000000000000000000000000..ddeb565036b24ce6336b5290a218cf833ce9a6fe
GIT binary patch
literal 8908
zcmeI0L5tH+5XUD;TT0Dti@U2Rf@P2HT9UGgmx3kD+G01A#*3#?Hd!{}rrN}nUc`&2
z*F6ay{Qw^Hs8>%#6g>DHJP3XPop~>Bi)Hn)cu;0&^O867X6F4;$iI2kZf!1<&X)9I
z5tRMNY`Q9Etjuxn{`sWWZuNEZjV@h=p1IVL`LU#fMj?gMbkuTmB+I4IUieL(!^$}z
zhoQmza&=v)Q0OG9=4b0vQc(fta{I%vJuPKkGaC<wTOYqa7VR%Q)U;)q)-|-uuSUb9
zjw?t*0=WwLMVc`#&~?_qBiYc`6LRrEmP2i%@Y{`;O$s$&8BF}bzzfsuU}wV_jmAL~
z57RIar6s58kquG;a}8l)ngTY7u}-(?`>=!#;zT{RI$N&icY5BycbZ)n%=0vFKwd8J
znp>`j&u8&;J499kgBGzl%&MAeMSD^3Yg@RA%*0<tZq~;AI7$<db_Jc(MlhTt)g+3y
z!)o2Ktc#Y7AAq+{-aYL(?dzsXS*W1c#=vuKj7N9FaT*~9+RiF|DbId5PNGrVsISKY
zSYxAhrDm`120ND;cFnHW>YvtLy;iqSjku7F2`~XBzyz286Zp>wNO8AxY;jjSNuUhv
zGf59LrHqxl)Bn1>>q>c7Mm>iraVF-v9seer!Intpk)#%y^avG85@;&6L74y^#4lfv
z;!<0i&TY-S1E>uRD1${6`|hUOcRj})h>DjnnkojvbeM$ctaN~~F(zdOMe_eiwjSzo
zj3Ybs;r2aw#z!W=1egF5U;@V?aLws;#eaW#7NM#40P6Xz-Zx1vz?ZT*<B<t40Vco%
Ym;e)C0!)AjFaajO1egF5_*V%01k+~P-~a#s

literal 0
HcmV?d00001

diff --git a/tests/testdata/onetstep_next.nc b/tests/testdata/onetstep_next.nc
new file mode 100644
index 0000000000000000000000000000000000000000..ab9ffa4f26c4de404bcf13e35a1f6db4104775f7
GIT binary patch
literal 8908
zcmeI0&x_Mg5XWDVwv?LP7I#-s1j`=XwIpQ~F9l1QwZ(2KjTcWPY_e>`O*M%ty@(f4
zuX_?a`UiN>qh37~QSjjZ;6d;o(3$sRi)Hn)cu;0&^O867X6AiT$hUdc?QAYo&Q|nt
znJN1rZMrIEEa^CS|9mp&c80q7Mi(wamoBwreyr$B!;sQxJZc3zq80LZFZw3V*~&SF
z4nt$}%hh$QhM^N#&6n#`QeFY(3j0B7Pb<=E%JFcx_3`^--u}WvO<TrkT?3PTH5w*%
zTmu>c$W>Nc#2MoPUds+1iH5$O5Q`6F4z-ceZ#QDLG1Oqo%!Dr+c~QO{?rb>YaT>-+
zkVhFWEjZPWY!DNeYXB3{l%Pq7b^2}JXG`EfPS|6ox8-_%Z{Ur5r`>m%c^>BtRul`o
z_Lj@zi!vT>2hVCi&>}PkSygkLcrOlrZ3|YRnefXf%-Xo0#CgWku7DHU2!lyh&*Ee|
zsy8jmx@g((VeIXbcTWdS_qyp~7A(lOG4h-n>G*Dx<}q-<?X1EV^Xx}y7LSuwb3Ng}
zS{sck4SRhz+_}`U8+NnN{IvG!HAbSTTPd#SNCb!g5g-CYfC&801cbO-I<~kgpF~QA
z_E`!FrHqxl)Bn1>>kD~T1w98UaVF;aJ^v;<1I?k%LrKjw=@BXxCD4>_L&%{A$;%h0
zxYX9Bb6Yd-Fwlkut1=5K4&6<6=z5Mj;uSALG*%3QJjkMaRyqXPn4mIaW%U0^wjSzo
zh(kN|;r2aoMn@t*1c(3;AOgoCaLpO?`G0?W7J;ev0P6Xz-Zu#^V_&N3j7B0r1c(3;
YAOb{y2oM1xKm>>Y5g-CY;9nu|6Z2-;;Q#;t

literal 0
HcmV?d00001


From 4f1cfabe1aa00eda9bdcd5ae2264cab2c821701c Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 19:06:23 +1000
Subject: [PATCH 113/137] cleaned code following ruff report, except for tests

---
 docs/conf.py               |  3 ---
 src/mopdb/__init__.py      |  1 -
 src/mopdb/mopdb.py         | 29 ++++-------------------
 src/mopdb/mopdb_class.py   |  7 +-----
 src/mopdb/mopdb_map.py     | 20 ++++++++--------
 src/mopdb/mopdb_utils.py   |  9 ++++----
 src/mopdb/utils.py         | 13 +++++------
 src/mopper/__init__.py     |  1 -
 src/mopper/calculations.py | 30 ++++--------------------
 src/mopper/cmip_utils.py   |  7 +++---
 src/mopper/mop_setup.py    | 19 +++++++--------
 src/mopper/mop_utils.py    | 47 ++++++++++++++++----------------------
 src/mopper/mopper.py       | 33 ++++++++++++++------------
 src/mopper/setup_utils.py  | 29 ++++++-----------------
 14 files changed, 88 insertions(+), 160 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 1f21a6f..bd47e1c 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -12,9 +12,6 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys
-import os
-
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
diff --git a/src/mopdb/__init__.py b/src/mopdb/__init__.py
index 2a413df..e69de29 100644
--- a/src/mopdb/__init__.py
+++ b/src/mopdb/__init__.py
@@ -1 +0,0 @@
-from mopdb import *
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 40757b2..561e728 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -19,19 +19,16 @@
 # last updated 08/04/2024
 
 import click
-import sqlite3
 import logging
 import sys
-import csv
 import json
 
 from importlib.resources import files as import_files
 from pathlib import Path
 
 from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map,
-    read_map_app4, map_update_sql, create_table, write_cmor_table,
-    check_varlist, update_db) 
-from mopdb.utils import *
+    read_map_app4, create_table, write_cmor_table, update_db) 
+from mopdb.utils import (config_log, db_connect, query, delete_record)
 from mopdb.mopdb_map import (write_varlist, write_map_template,
     write_catalogue, map_variables, load_vars, get_map_obj)
 
@@ -112,7 +109,7 @@ def mopdb(ctx, debug):
     ctx.obj={}
     # set up a default value for flow if none selected for logging
     ctx.obj['debug'] = debug
-    mopdb_log = config_log(debug, logname='mopdb_log')
+    #mopdb_log = config_log(debug, logname='mopdb_log')
 
 
 @mopdb.command(name='check')
@@ -200,7 +197,7 @@ def cmor_table(ctx, dbname, fname, alias, label):
     # extract cmor_var,units,dimensions,frequency,realm,cell_methods
     var_list = []
     for v in vlist[1:]:
-        vid = (v[0], v[5], v[6])
+        #vid = (v[0], v[5], v[6])
         # This was adding variables to the table just if they didn't exists in other tables
         if v[0][:4] != 'fld_':
             if v[0] not in cmor_vars:
@@ -353,17 +350,6 @@ def map_template(ctx, fpath, match, dbname, version, alias):
         fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias)
     if alias == '':
         alias = fname.split(".")[0]
-# also from here on it should be called by separate function I can call from intake too
-# without repeating steps
-    # read list of vars from file
-    # this should now spit out fobjs, vobjs to pass to template 
-    #with open(fname, 'r') as csvfile:
-    #    reader = csv.DictReader(csvfile, delimiter=';')
-    #    rows = list(reader)
-    #check_varlist(rows, fname)
-    # return lists of fully/partially matching variables and stash_vars 
-    # these are input_vars for calculation defined in already in mapping db
-    #parsed = map_variables(conn, rows, version)
     parsed = map_variables(conn, vobjs, version)
     # potential vars have always duplicates: 1 for each input_var
     write_map_template(conn, parsed, alias)
@@ -425,11 +411,6 @@ def write_intake(ctx, fpath, match, filelist, dbname, version, alias):
         map_file, vobjs, fobjs = load_vars(flist, indir=fpath)
     if alias == '':
         alias = fname.split(".")[0]
-    # read list of vars from file
-    #with open(fname, 'r') as csvfile:
-    #    reader = csv.DictReader(csvfile, delimiter=';')
-    #    rows = list(reader)
-    #check_varlist(rows, fname)
     # return lists of fully/partially matching variables and stash_vars 
     # these are input_vars for calculation defined in already in mapping db
     if map_file is False:
@@ -527,7 +508,7 @@ def model_vars(ctx, fpath, match, dbname, version, alias):
     if dbname == 'default':
         dbname = import_files('mopdata').joinpath('access.db')
     conn = db_connect(dbname, logname='mopdb_log')
-    mopdb_log = logging.getLogger('mopdb_log')
+    #mopdb_log = logging.getLogger('mopdb_log')
     fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias)
     conn.close()
     return None
diff --git a/src/mopdb/mopdb_class.py b/src/mopdb/mopdb_class.py
index d044eaa..a36a6a4 100644
--- a/src/mopdb/mopdb_class.py
+++ b/src/mopdb/mopdb_class.py
@@ -85,11 +85,6 @@ class Variable():
        and the one added by mapping.
     """ 
 
-  #  __slots__ = ('name', 'pattern', 'files', 'frequency', 'realm',
-  #      'cmor_var', 'cmor_table', 'version', 'units', 'dimensions',
-  #      'cell_methods', 'positive', 'long_name', 'standard_name',
-  #      'vtype', 'size', 'nsteps')
-
     def __init__(self, varname: str, fobj: FPattern):
         self.name = varname
         # path object
@@ -148,7 +143,7 @@ def get_match(self):
             cmor_var = self.cmor_var
         else:
             cmor_var = self.name
-        match = (self.cmor_var, self.name, '', self.frequency,
+        match = (cmor_var, self.name, '', self.frequency,
             self.realm, self.version, '', self.positive, self.units)
         return match
 
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index b88ea7c..135a960 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -33,9 +33,9 @@
 #from access_nri_intake.source.builders import AccessEsm15Builder
 
 from mopdb.mopdb_class import FPattern, Variable, MapVariable
-from mopdb.utils import *
+from mopdb.utils import query, read_yaml
 from mopdb.mopdb_utils import (get_cell_methods, remove_duplicate,
-    get_realm, check_realm_units, get_date_pattern, check_varlist)
+    get_realm, check_realm_units, get_date_pattern)
 
 
 def get_cmorname(conn, vobj, version):
@@ -111,13 +111,12 @@ def get_file_frq(ds, fnext, int2frq):
     # so we open also next file but get only time axs
     if max_len == 1:
         if fnext is None:
-            mopdb_log.info(f"Only 1 file cannot determine frequency for: {fpattern}")
+            mopdb_log.info(f"Only 1 file with 1 tstep cannot determine frequency")
         else:
             dsnext = xr.open_dataset(fnext, decode_times = False)
             time_axs2 = [d for d in dsnext.dims if 'time' in d]
             ds = xr.concat([ds[time_axs], dsnext[time_axs2]], dim='time')
             time_axs = [d for d in ds.dims if 'time' in d]
-            time_axs_len = set(len(ds[d]) for d in time_axs)
             time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
     if max_len > 0:
         for t in time_axs: 
@@ -232,7 +231,7 @@ def match_stdname(conn, vobj, stdn):
     in cmorvar table that match the standard name passed as input.
     It also return a False/True found_match boolean.
     """
-    mopdb_log = logging.getLogger('mopdb_log')
+    #mopdb_log = logging.getLogger('mopdb_log')
     found_match = False
     sql = f"""SELECT name FROM cmorvar where 
         standard_name='{vobj.standard_name}'"""
@@ -451,7 +450,7 @@ def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'):
     """
     """
 
-    mopdb_log = logging.getLogger('mopdb_log')
+    #mopdb_log = logging.getLogger('mopdb_log')
     if len(vlist) > 0:
         if type(div) is str:
             divrow = {x:'' for x in vlist[0].attrs()}
@@ -503,6 +502,7 @@ def get_map_obj(parsed):
 def write_catalogue(conn, vobjs, fobjs, alias):
     """Write intake-esm catalogue and returns name
     """
+
     mopdb_log = logging.getLogger('mopdb_log')
     # read template json file 
     jfile = import_files('mopdata').joinpath('intake_cat_template.json')
@@ -510,7 +510,7 @@ def write_catalogue(conn, vobjs, fobjs, alias):
         template = json.load(f)
     # write updated json to file
     for k,v in template.items():
-        if type(v) == str:
+        if type(v) is str:
             template[k] = v.replace("<experiment>", alias)
     jout = f"intake_{alias}.json"
     with open(jout, 'w') as f:
@@ -542,7 +542,7 @@ def write_catalogue(conn, vobjs, fobjs, alias):
 def create_file_dict(fobjs, alias):
     """
     """
-    mopdb_log = logging.getLogger('mopdb_log')
+    #mopdb_log = logging.getLogger('mopdb_log')
     lines = []
     for pat_obj in fobjs:
         var_list = [v.name for v in pat_obj.varlist]
@@ -574,7 +574,7 @@ def create_file_dict(fobjs, alias):
 def add_mapvars(vobjs, lines, path_list, alias):
     """
     """
-    mopdb_log = logging.getLogger('mopdb_log')
+    #mopdb_log = logging.getLogger('mopdb_log')
     for vobj in vobjs:
         if vobj.cmor_var != "" or vobj.standard_name != "":
             mapvar = vobj.cmor_var
@@ -598,7 +598,7 @@ def add_mapvars(vobjs, lines, path_list, alias):
 def load_vars(fname, indir=None):
     """Returns Variable and FPattern objs from varlist or map file.
     """
-    mopdb_log = logging.getLogger('mopdb_log')
+    #mopdb_log = logging.getLogger('mopdb_log')
     vobjs = []
     fobjs = {}
     if indir is not None:
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 0f80b6d..6de2ddf 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -19,7 +19,6 @@
 # last updated 10/04/2024
 #
 
-import sqlite3
 import logging
 import sys
 import csv
@@ -28,7 +27,7 @@
 from datetime import date
 from collections import Counter
 
-from mopdb.utils import *
+from mopdb.utils import query 
 
 
 def mapping_sql():
@@ -236,7 +235,7 @@ def get_cell_methods(attrs, dims):
        `time: point`
        If `area` not specified is added at start of string as `area: `
     """
-    mopdb_log = logging.getLogger('mopdb_log')
+    #mopdb_log = logging.getLogger('mopdb_log')
     frqmod = ''
     val = attrs.get('cell_methods', "") 
     if 'area' not in val: 
@@ -252,7 +251,7 @@ def get_cell_methods(attrs, dims):
 
 def read_map_app4(fname):
     """Reads APP4 style mapping """
-    mopdb_log = logging.getLogger('mopdb_log')
+    #mopdb_log = logging.getLogger('mopdb_log')
     # old order
     #cmor_var,definable,input_vars,calculation,units,axes_mod,positive,ACCESS_ver[CM2/ESM/both],realm,notes
     var_list = []
@@ -404,7 +403,7 @@ def get_date_pattern(fname, fpattern):
     """Try to build a date range for each file pattern based
        on its filename
     """
-    mopdb_log = logging.getLogger('mopdb_log')
+    #mopdb_log = logging.getLogger('mopdb_log')
     # assign False to any character which is not a digit
     date_pattern = [True if c.isdigit() else False for c in fname]
     # assign False to fpattern
diff --git a/src/mopdb/utils.py b/src/mopdb/utils.py
index 1a6ff11..c71dc71 100644
--- a/src/mopdb/utils.py
+++ b/src/mopdb/utils.py
@@ -22,8 +22,6 @@
 import sqlite3
 import logging
 import os
-import csv
-import json
 import stat
 import yaml
 
@@ -57,7 +55,7 @@ def config_log(debug, logname):
     logname = f"{logname}_{day}.txt"
     flog = logging.FileHandler(logname)
     try:
-        os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO);
+        os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
     except OSError:
         pass
     flog.setLevel(flevel)
@@ -112,7 +110,7 @@ def query(conn, sql, tup=(), first=True, logname='__name__'):
     result : tuple/list(tuple)
         tuple or a list of, representing row/s returned by query 
     """
-    log = logging.getLogger(logname)
+    #log = logging.getLogger(logname)
     with conn:
         c = conn.cursor()
         c.execute(sql, tup)
@@ -127,7 +125,7 @@ def query(conn, sql, tup=(), first=True, logname='__name__'):
 def get_columns(conn, table, logname='__name__'):
     """Gets list of columns from db table
     """
-    log = logging.getLogger(logname)
+    #log = logging.getLogger(logname)
     sql = f'PRAGMA table_info({table});'
     table_data = query(conn, sql, first=False, logname=logname)
     columns = [x[1] for x in table_data]
@@ -205,6 +203,7 @@ def write_yaml(data, fname, logname='__name__'):
     try:
         with open(fname, 'w') as f:
             yaml.dump(data, f)
-    except:
-        log.error(f"Check that {data} exists and it is an object compatible with yaml")
+    except Exception as e:
+        log.error(f"Exception: {e}")
+        log.error(f"Check {data} exists and is yaml object")
     return
diff --git a/src/mopper/__init__.py b/src/mopper/__init__.py
index 2c52bb9..e69de29 100644
--- a/src/mopper/__init__.py
+++ b/src/mopper/__init__.py
@@ -1 +0,0 @@
-from mopper import *
diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py
index ed9ae92..ab8462c 100644
--- a/src/mopper/calculations.py
+++ b/src/mopper/calculations.py
@@ -33,7 +33,6 @@
 import click
 import xarray as xr
 import os
-import yaml
 import json 
 import numpy as np
 import dask
@@ -233,7 +232,7 @@ def transAcrossLine(self, var, i_start, i_end, j_start, j_end):
                 #sum each axis apart from time (3d)
                 #trans = var.isel(yu_ocean=slice(271, 271+1), xt_ocean=slice(292, 300+1))
                 trans = var[..., j_start:j_end+1, i_start:i_end+1].sum(dim=['st_ocean', f'{y_ocean}', f'{x_ocean}']) #4D
-            except:
+            except Exception as e:
                 trans = var[..., j_start:j_end+1, i_start:i_end+1].sum(dim=[f'{y_ocean}', f'{x_ocean}']) #3D
             
             return trans
@@ -665,25 +664,6 @@ def calc_hemi_seaice_extent(self, hemi):
 
         return vout.item()
 
-
-def ocean_floor(var):
-    """Not sure.. 
-
-    Parameters
-    ----------
-    var : Xarray dataset
-        pot_temp variable
-
-    Returns
-    -------
-    vout : Xarray dataset
-        ocean floor temperature?
-    """
-    lv = (~var.isnull()).sum(dim='st_ocean') - 1
-    vout = var.take(lv, dim='st_ocean').squeeze()
-    return vout
-
-
 def maskSeaIce(var, sic):
     """Mask seaice.
 
@@ -702,7 +682,6 @@ def maskSeaIce(var, sic):
     vout = var.where(sic != 0)
     return vout
 
-
 def sithick(hi, aice):
     """Calculate seaice thickness.
 
@@ -722,7 +701,6 @@ def sithick(hi, aice):
     vout = hi / aice
     return vout
 
-
 def sisnconc(sisnthick):
     """Calculate seas ice?
 
@@ -807,7 +785,7 @@ def calc_global_ave_ocean(var, rho_dzt, area_t):
     
     try:
         vnew = var.weighted(mass).mean(dim=('st_ocean', 'yt_ocean', 'xt_ocean'), skipna=True)
-    except:
+    except Exception as e:
         vnew = var.weighted(mass[:, 0, :, :]).mean(dim=('x', 'y'), skipna=True)
     
     return vnew
@@ -1267,7 +1245,7 @@ def calc_global_ave_ocean(ctx, var, rho_dzt):
     mass = rho_dzt * area_t
     try: 
         vnew=np.average(var,axis=(1,2,3),weights=mass)
-    except: 
+    except Exception as e:
         vnew=np.average(var,axis=(1,2),weights=mass[:,0,:,:])
 
     return vnew
@@ -1437,7 +1415,7 @@ def calc_depositions(ctx, var, weight=None):
     (personal communication from M. Woodhouse)
     """
 
-    var_log = logging.getLogger(ctx.obj['var_log'])
+    #var_log = logging.getLogger(ctx.obj['var_log'])
     varlist = []
     for v in var:
         v0 = v.sel(model_theta_level_number=1).squeeze(dim='model_theta_level_number')
diff --git a/src/mopper/cmip_utils.py b/src/mopper/cmip_utils.py
index dd7674c..161e55f 100755
--- a/src/mopper/cmip_utils.py
+++ b/src/mopper/cmip_utils.py
@@ -25,7 +25,6 @@
 import json
 import csv
 import ast
-import copy
 import click
 from collections import OrderedDict
 
@@ -39,7 +38,7 @@ def find_cmip_tables(dreq):
     with dreq.open(mode='r') as f:
         reader = csv.reader(f, delimiter='\t')
         for row in reader:
-            if not row[0] in tables:
+            if row[0] not in tables:
                 if (row[0] != 'Notes') and (row[0] != 'MIP table') and (row[0] != '0'):
                     tables.append(f"CMIP6_{row[0]}")
     f.close()
@@ -137,9 +136,9 @@ def read_dreq_vars(ctx, table_id, activity_id):
                             years = ast.literal_eval(row[31])
                             years = reallocate_years(years, ctx.obj['reference_date'])
                             years = f'"{years}"'
-                        except:
+                        except Exception as e:
                             years = 'all'
-                except:
+                except Exception as e:
                     years = 'all'
                 dreq_variables[cmorname] = years
     f.close()
diff --git a/src/mopper/mop_setup.py b/src/mopper/mop_setup.py
index 5c1e04b..02132bb 100755
--- a/src/mopper/mop_setup.py
+++ b/src/mopper/mop_setup.py
@@ -24,7 +24,6 @@
 import os
 import sys
 import shutil
-import yaml
 import json
 import csv
 import click
@@ -33,7 +32,9 @@
 from json.decoder import JSONDecodeError
 from importlib.resources import files as import_files
 
-from mopper.setup_utils import *
+from mopper.setup_utils import (define_timeshot, adjust_nsteps,
+    find_custom_tables, write_var_map, write_table)
+from mopper.cmip_utils import find_cmip_tables, read_dreq_vars
 from mopdb.utils import read_yaml
 
 
@@ -99,10 +100,10 @@ def find_matches(table, var, realm, frequency, varlist):
         match['timeshot'] = timeshot
         match['table'] = table
         match['frequency'] = frequency
-        if match['realm'] == 'land':
-            realmdir = 'atmos'
-        else:
-            realmdir = match['realm']
+        #if match['realm'] == 'land':
+        #    realmdir = 'atmos'
+        #else:
+        #    realmdir = match['realm']
         in_fname = match['fpattern'].split()
         match['file_structure'] = ''
         for f in in_fname:
@@ -253,8 +254,8 @@ def var_map(ctx, activity_id=None):
         else:
             sublist = ctx.obj['appdir'] / sublist
 # Custom mode vars
-    if ctx.obj['mode'].lower() == 'custom':
-        access_version = ctx.obj['access_version']
+    #if ctx.obj['mode'].lower() == 'custom':
+    #    access_version = ctx.obj['access_version']
     if ctx.obj['force_dreq'] is True:
         if ctx.obj['dreq'] == 'default':
             ctx.obj['dreq'] = import_files('mopdata').joinpath( 
@@ -272,7 +273,7 @@ def var_map(ctx, activity_id=None):
             create_var_map(table, masters, selection=selection[table])
     elif tables.lower() == 'all':
         mop_log.info(f"Experiment {ctx.obj['exp']}: processing all tables")
-        if ctx.obj['force_dreq'] == True:
+        if ctx.obj['force_dreq']:
             tables = find_cmip_tables(ctx.obj['dreq'])
         else:
             tables = find_custom_tables()
diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index 6ed8b60..315afec 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -22,19 +22,17 @@
 # last updated 15/05/2024
 
 import numpy as np
-import glob
 import re
-import os,sys
+import os
 import stat
 import yaml
 import xarray as xr
 import cmor
-import calendar
 import click
 import logging
 import cftime
-import itertools
 import copy
+import json
 from functools import partial
 from pathlib import Path
 
@@ -71,7 +69,7 @@ def config_log(debug, path, stream_level=logging.WARNING):
     logname = f"{path}/mopper_log.txt"
     flog = logging.FileHandler(logname)
     try:
-        os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO);
+        os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
     except OSError:
         pass
     flog.setLevel(level)
@@ -93,7 +91,7 @@ def config_varlog(debug, logname, pid):
     logger.setLevel(level)
     flog = logging.FileHandler(logname)
     try:
-        os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO);
+        os.chmod(logname, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
     except OSError:
         pass
     flog.setLevel(level)
@@ -147,7 +145,7 @@ def get_files(ctx):
                 inrange_files.append( check_in_range(paths, time_dim) )
             else:
                 inrange_files.append( check_timestamp(paths) )
-    except:
+    except Exception as e:
         for i,paths in enumerate(all_files):
             inrange_files.append( check_in_range(paths, time_dim) )
 
@@ -205,7 +203,7 @@ def check_vars_in_file(ctx, invars, fname):
     """Check that all variables needed for calculation are in file
     else return extra filenames
     """
-    var_log = logging.getLogger(ctx.obj['var_log'])
+    #var_log = logging.getLogger(ctx.obj['var_log'])
     ds = xr.open_dataset(fname, decode_times=False)
     tofind = [v for v in invars if v not in ds.variables]
     found = [v for v in invars if v not in tofind]
@@ -457,7 +455,6 @@ def pseudo_axis(ctx, axis):
         cmor_name = 'vegtype'
     return cmor_name, p_vals, p_len
 
-
 #PP this should eventually just be generated directly by defining the dimension using the same terms 
 # in calculation for meridional overturning
 @click.pass_context
@@ -480,11 +477,13 @@ def create_axis(ctx, axis, table):
     var_log.info(f"setup of {axis.name} axis complete")
     return axis_id
 
-
-def hybrid_axis(lev, z_ax_id, z_ids):
+@click.pass_context
+def hybrid_axis(ctx, lev, z_ax_id, z_ids):
     """Setting up additional hybrid axis information
+     PP this needs fixing can't possible work now without b_vals, b_bnds??
+    lev is cmor_zName?
     """
-    var_log = logging.getLogger(ctx.obj['var_log'])
+    #var_log = logging.getLogger(ctx.obj['var_log'])
     hybrid_dict = {'hybrid_height': 'b',
                    'hybrid_height_half': 'b_half'}
     orog_vals = getOrog()
@@ -503,31 +502,26 @@ def hybrid_axis(lev, z_ax_id, z_ids):
             zfactor_values=orog_vals)
     return zfactor_b_id, zfactor_orog_id
 
-
 @click.pass_context
 def ij_axis(ctx, ax, ax_name, table):
     """
     """
-    var_log = logging.getLogger(ctx.obj['var_log'])
+    #var_log = logging.getLogger(ctx.obj['var_log'])
     cmor.set_table(table)
     ax_id = cmor.axis(table_entry=ax_name,
         units='1',
         coord_vals=ax.values)
     return ax_id
 
-
 @click.pass_context
 def ll_axis(ctx, ax, ax_name, ds, table, bounds_list):
     """
     """
     var_log = logging.getLogger(ctx.obj['var_log'])
-    var_log.debug(f"in ll_axis")
+    var_log.debug("in ll_axis")
     cmor.set_table(table)
     cmor_aName = get_cmorname(ax_name, ax)
-    try:
-        ax_units = ax.units
-    except:
-        ax_units = 'degrees'
+    ax_units = ax.attrs.get('units', 'degrees')
     a_bnds = None
     var_log.debug(f"got cmor name: {cmor_aName}")
     if cmor_aName in bounds_list:
@@ -606,10 +600,10 @@ def get_axis_dim(ctx, var):
             'lat_ax': None, 'lon_ax': None, 'j_ax': None,
             'i_ax': None, 'p_ax': None, 'e_ax': None}
     for dim in var.dims:
-        try:
+        if dim in var.coords:
             axis = var[dim]
             var_log.debug(f"axis found: {axis}")
-        except:
+        else:
             var_log.warning(f"No coordinate variable associated with the dimension {dim}")
             axis = None
         # need to file to give a value then???
@@ -694,7 +688,7 @@ def bnds_change(ctx, axis):
     """Returns True if calculation/resample changes bnds of specified
        dimension.
     """
-    var_log = logging.getLogger(ctx.obj['var_log'])
+    #var_log = logging.getLogger(ctx.obj['var_log'])
     dim = axis.name
     calculation = ctx.obj['calculation']
     changed_bnds = False
@@ -707,7 +701,6 @@ def bnds_change(ctx, axis):
             changed_bnds = True
     return changed_bnds
 
-
 @click.pass_context
 def get_bounds(ctx, ds, axis, cmor_name, ax_val=None):
     """Returns bounds for input dimension, if bounds are not available
@@ -800,10 +793,10 @@ def get_bounds_values(ctx, ds, bname):
     calc = False
     var_log = logging.getLogger(ctx.obj['var_log'])
     var_log.debug(f"Getting bounds values for {bname}")
+    ancil_file =  ctx.obj[f"grid_{ctx.obj['realm']}"]
     if bname in ds.variables:
         bnds_val = ds[bname].values
     elif ancil_file != "":     
-        ancil_file =  ctx.obj[f"grid_{ctx.obj['realm']}"]
         fname = f"{ctx.obj['ancils_path']}/{ancil_file}"
         ancil = xr.open_dataset(fname)
         if bname in ancil.variables:
@@ -902,7 +895,7 @@ def extract_var(ctx, input_ds, tdim, in_missing):
     if array.dtype.kind == 'i':
         try:
             in_missing = int(in_missing)
-        except:
+        except Eception as e:
             in_missing = int(-999)
     else:
         array = array.fillna(in_missing)
@@ -925,7 +918,7 @@ def define_attrs(ctx):
     listed in notes file, this is indicated by precending any function
     in file with a ~. For other fields it checks equality.
     """
-    var_log = logging.getLogger(ctx.obj['var_log'])
+    #var_log = logging.getLogger(ctx.obj['var_log'])
     attrs = ctx.obj['attrs']
     notes = attrs.get('notes', '')
     # open file containing notes
diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 4b08c5a..2c155a3 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -25,17 +25,22 @@
 
 import click
 import logging
-import sqlite3
 import concurrent.futures
-import os,sys
+import os
+import subprocess
+import sys
 import warnings
 import yaml
 import cmor
-import numpy as np
-import xarray as xr
-
-from mopper.mop_utils import *
-from mopper.mop_setup import *
+import cftime
+
+from mopper.mop_utils import (config_log, config_varlog, get_files,
+    load_data, get_cmorname, pseudo_axis, create_axis, hybrid_axis,
+    ij_axis, ll_axis, define_grid, get_coords, get_axis_dim,
+    require_bounds, get_bounds, get_attrs, extract_var, define_attrs)
+from mopper.mop_setup import setup_env, var_map, manage_env
+from mopper.setup_utils import create_exp_json, edit_json_cv, write_config,
+    populate_db, count_rows, sum_file_sizes, filelist_sql, write_job 
 from mopdb.mopdb_utils import db_connect, create_table, query
 
 warnings.simplefilter(action='ignore', category=FutureWarning)
@@ -359,7 +364,7 @@ def mop_process(ctx):
     # Set up additional hybrid coordinate information
     if (axes['z_ax'] is not None and cmor_zName in 
         ['hybrid_height', 'hybrid_height_half']):
-        zfactor_b_id, zfactor_orog_id = hybrid_axis(lev_name, z_ax_id, z_ids)
+        zfactor_b_id, zfactor_orog_id = hybrid_axis(cmor_zName, z_ax_id, z_ids)
 
     # Freeing up memory 
     del dsin
@@ -382,11 +387,11 @@ def mop_process(ctx):
         mop_log.error(f"Unable to define the CMOR variable {ctx.obj['filename']}")
         var_log.error(f"Unable to define the CMOR variable {e}")
         return 2
-    var_log.info('Writing...')
+    var_log.info("Writing...")
     var_log.info(f"Variable shape is {ovar.shape}")
     status = None
     # Write timesteps separately if variable potentially exceeding memory
-    if float(ctx.obj['file_size']) > 4000.0 and time_dim != None:
+    if float(ctx.obj['file_size']) > 4000.0 and time_dim is not None:
         for i in range(ovar.shape[0]):
             data = ovar.isel({time_dim: i}).values
             status = cmor.write(variable_id, data, ntimes_passed=1)
@@ -395,10 +400,10 @@ def mop_process(ctx):
         status = cmor.write(variable_id, ovar.values)
     if status != 0:
         mop_log.error(f"Unable to write the CMOR variable: {ctx.obj['filename']}\n")
-        var_log.error(f"Unable to write the CMOR variable to file\n"
+        var_log.error("Unable to write the CMOR variable to file\n"
                       + f"See cmor log, status: {status}")
         return 2
-    var_log.info(f"Finished writing")
+    var_log.info("Finished writing")
     
     # Close the CMOR file.
     path = cmor.close(variable_id, file_name=True)
@@ -508,14 +513,12 @@ def process_row(ctx, row):
               'json_file_path', 'reference_date', 'version', 'rowid']  
     for i,val in enumerate(header):
         record[val] = row[i]
-    table = record['table'].split('_')[1]
     # call logging 
-    trange = record['filename'].replace('.nc.','').split("_")[-1]
     varlog_file = (f"{ctx.obj['var_logs']}/{record['variable_id']}"
                  + f"_{record['table']}_{record['tstart']}.txt")
     var_log = config_varlog(ctx.obj['debug'], varlog_file, pid) 
     ctx.obj['var_log'] = var_log.name 
-    var_log.info(f"Start processing")
+    var_log.info("Start processing")
     var_log.debug(f"Process id: {pid}")
     msg = process_file(record)
     var_log.handlers[0].close()
diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index ff1e082..2bc293d 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -21,28 +21,18 @@
 #
 # last updated 08/04/2024
 
-import os
 import sys
-import shutil
-import calendar
-import yaml
 import json
-import csv
 import sqlite3
-import subprocess
-import ast
 import copy
-import re
 import click
 import pathlib
 import logging
 
-from collections import OrderedDict
 from datetime import datetime#, timedelta
 from dateutil.relativedelta import relativedelta
-from json.decoder import JSONDecodeError
 
-from mopdb.utils import query, write_yaml, read_yaml
+from mopdb.utils import query, write_yaml
 from mopper.cmip_utils import fix_years
 
 
@@ -129,12 +119,11 @@ def write_config(ctx, fname='exp_config.yaml'):
 
 
 @click.pass_context
-def find_custom_tables(ctx):
+def find_custom_tables(ctx, cmip=False):
     """Returns list of tables files in custom table path
     """
     mop_log = logging.getLogger('mop_log')
     tables = []
-    path = ctx.obj['tables_path']
     table_files = ctx.obj['tables_path'].rglob("*_*.json")
     for f in table_files:
         f = str(f).replace(".json", "")
@@ -411,10 +400,10 @@ def adjust_size(opts, insize):
     resample = opts['resample']
     grid_size = insize
     if 'plevinterp' in calc:
-        try:
+        if "," in calc:
             plevnum = calc.split(',')[-1]
-        except:
-            raise('check plevinterp calculation definition plev probably missing')
+        else:
+            raise('check plevinterp calculation def plev probably missing')
         plevnum = float(plevnum.replace(')',''))
         grid_size = float(insize)/float(opts['levnum'])*plevnum
     return grid_size
@@ -437,7 +426,7 @@ def compute_fsize(ctx, opts, grid_size, frequency):
     Returns
     -------
     """
-    mop_log = logging.getLogger('mop_log')
+    #mop_log = logging.getLogger('mop_log')
     # set small number for fx frequency so it always create only one file
     nstep_day = {'10min': 144, '30min': 48, '1hr': 24, '3hr': 8, 
                  '6hr': 4, 'day': 1, '10day': 0.1, 'mon': 1/30, 
@@ -557,10 +546,6 @@ def process_vars(ctx, maps, opts, cursor):
     Returns
     -------
     """
-    tstep_dict = {'10min': 'minutes=10', '30min': 'minutes=30',
-        '1hr': 'hours=1', '3hr': 'hours=3', '6hr': 'hours=6',
-        'day': 'days=1', '10day': 'days=10', 'mon': 'months=1',
-        'yr': 'years=1', 'dec': 'years=10'}
     unchanged = ['frequency', 'realm', 'table', 'calculation',
                  'resample', 'positive', 'timeshot']  
     for mp in maps:
@@ -620,7 +605,6 @@ def define_files(ctx, cursor, opts, mp):
          finish = start + relativedelta(days=1)
          tstep_dict['fx'] = tstep_dict['day']
     while (start < finish):
-        tstep = eval(f"relativedelta({tstep_dict[frq][0]})")
         half_tstep = eval(f"relativedelta({tstep_dict[frq][1]})")
         delta = eval(f"relativedelta({interval})")
         newtime = min(start+delta, finish)
@@ -633,6 +617,7 @@ def define_files(ctx, cursor, opts, mp):
         opts['filepath'], opts['filename'] = build_filename(opts,
             start, newtime, half_tstep)
         rowid = add_row(opts, cursor, update)
+        mop_log.debug(f"Last added row id: {rowid}")
         start = newtime
     return
 

From 59015650b833a09932360eb56d80c0cca35e3f5a Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 19:15:43 +1000
Subject: [PATCH 114/137] fixed typowq

---
 src/mopdb/mopdb.py   | 3 +--
 src/mopper/mopper.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index 561e728..f0ab45c 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -28,7 +28,7 @@
 
 from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map,
     read_map_app4, create_table, write_cmor_table, update_db) 
-from mopdb.utils import (config_log, db_connect, query, delete_record)
+from mopdb.utils import (db_connect, query, delete_record)
 from mopdb.mopdb_map import (write_varlist, write_map_template,
     write_catalogue, map_variables, load_vars, get_map_obj)
 
@@ -109,7 +109,6 @@ def mopdb(ctx, debug):
     ctx.obj={}
     # set up a default value for flow if none selected for logging
     ctx.obj['debug'] = debug
-    #mopdb_log = config_log(debug, logname='mopdb_log')
 
 
 @mopdb.command(name='check')
diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 2c155a3..211a000 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -39,8 +39,8 @@
     ij_axis, ll_axis, define_grid, get_coords, get_axis_dim,
     require_bounds, get_bounds, get_attrs, extract_var, define_attrs)
 from mopper.mop_setup import setup_env, var_map, manage_env
-from mopper.setup_utils import create_exp_json, edit_json_cv, write_config,
-    populate_db, count_rows, sum_file_sizes, filelist_sql, write_job 
+from mopper.setup_utils import (create_exp_json, edit_json_cv, write_config,
+    populate_db, count_rows, sum_file_sizes, filelist_sql, write_job)
 from mopdb.mopdb_utils import db_connect, create_table, query
 
 warnings.simplefilter(action='ignore', category=FutureWarning)

From 89200cfc954e0716b290c646e50ad29fe5766ac7 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 19:17:47 +1000
Subject: [PATCH 115/137] fixed typo in test-conda workflow

---
 .github/workflows/mopper-test-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
index 1ef10f9..e664ad7 100644
--- a/.github/workflows/mopper-test-conda.yaml
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -17,7 +17,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10", "3.11"i, "3.12"]
+        python-version: ["3.10", "3.11", "3.12"]
     steps:
       - uses: actions/checkout@v4.1.7
         with:

From c14d05e9c2622aaef7e734c2c3f2a9eac33348f2 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 19:21:28 +1000
Subject: [PATCH 116/137] fixed wrong import

---
 src/mopdb/mopdb_map.py | 2 +-
 src/mopper/mopper.py   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index 135a960..28530c5 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -111,7 +111,7 @@ def get_file_frq(ds, fnext, int2frq):
     # so we open also next file but get only time axs
     if max_len == 1:
         if fnext is None:
-            mopdb_log.info(f"Only 1 file with 1 tstep cannot determine frequency")
+            mopdb_log.info("Only 1 file with 1 tstep cannot determine frequency")
         else:
             dsnext = xr.open_dataset(fnext, decode_times = False)
             time_axs2 = [d for d in dsnext.dims if 'time' in d]
diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 211a000..533090f 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -39,9 +39,10 @@
     ij_axis, ll_axis, define_grid, get_coords, get_axis_dim,
     require_bounds, get_bounds, get_attrs, extract_var, define_attrs)
 from mopper.mop_setup import setup_env, var_map, manage_env
-from mopper.setup_utils import (create_exp_json, edit_json_cv, write_config,
+from mopper.setup_utils import (create_exp_json, write_config,
     populate_db, count_rows, sum_file_sizes, filelist_sql, write_job)
 from mopdb.mopdb_utils import db_connect, create_table, query
+from cmip_utils import edit_json_cv
 
 warnings.simplefilter(action='ignore', category=FutureWarning)
 warnings.simplefilter(action='ignore', category=UserWarning)

From 0109567919bdac15fb4473362bfda486bfa27be4 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 19:27:24 +1000
Subject: [PATCH 117/137] fixed wrong import 2

---
 src/mopper/mopper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 533090f..dd22deb 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -41,7 +41,7 @@
 from mopper.mop_setup import setup_env, var_map, manage_env
 from mopper.setup_utils import (create_exp_json, write_config,
     populate_db, count_rows, sum_file_sizes, filelist_sql, write_job)
-from mopdb.mopdb_utils import db_connect, create_table, query
+from mopdb.utils import db_connect, create_table, query
 from cmip_utils import edit_json_cv
 
 warnings.simplefilter(action='ignore', category=FutureWarning)

From 87df3ada5dab3b672030be517266e72e41e7258f Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Tue, 13 Aug 2024 19:30:17 +1000
Subject: [PATCH 118/137] fixed wrong import 3

---
 src/mopper/mopper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index dd22deb..91bddca 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -42,7 +42,7 @@
 from mopper.setup_utils import (create_exp_json, write_config,
     populate_db, count_rows, sum_file_sizes, filelist_sql, write_job)
 from mopdb.utils import db_connect, create_table, query
-from cmip_utils import edit_json_cv
+from mopper.cmip_utils import edit_json_cv
 
 warnings.simplefilter(action='ignore', category=FutureWarning)
 warnings.simplefilter(action='ignore', category=UserWarning)

From f3b693ef0245d1be00a8f2e9778c3a7f592997fc Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 15 Aug 2024 13:34:58 +1000
Subject: [PATCH 119/137] updates and clean up of tests

---
 .github/workflows/mopper-test-calcs.yaml | 90 ++++++++++--------------
 src/mopdb/mopdb.py                       | 22 +++---
 src/mopdb/mopdb_map.py                   |  7 +-
 src/mopdb/mopdb_utils.py                 |  2 +-
 tests/conftest.py                        | 27 +++++--
 tests/test_calculations.py               |  2 -
 tests/test_mop_utils.py                  | 10 ++-
 tests/test_mopdb.py                      | 62 +++++++++-------
 tests/test_mopdb_map.py                  |  9 +--
 tests/test_mopdb_utils.py                | 10 +--
 tests/testdata/varlist.csv               |  6 ++
 tests/testdata/varlist_ex.csv            |  3 -
 12 files changed, 127 insertions(+), 123 deletions(-)
 create mode 100644 tests/testdata/varlist.csv
 delete mode 100644 tests/testdata/varlist_ex.csv

diff --git a/.github/workflows/mopper-test-calcs.yaml b/.github/workflows/mopper-test-calcs.yaml
index ee830d6..0c34152 100644
--- a/.github/workflows/mopper-test-calcs.yaml
+++ b/.github/workflows/mopper-test-calcs.yaml
@@ -1,67 +1,49 @@
-name: mopper-branch-test
+# this workflow can be used as a template for a worklfow
+# that runs automatically only specific tests when pushing 
+# to a selected branch. 
+name: mopper-specific-tests
 
-#on: [push]
 on: 
   push:
     branches:
-      - pytests_sam
+      - class
 
 
 jobs:
-  build:
-  
+  build-linux:
     runs-on: ubuntu-latest
+    timeout-minutes: 60
     strategy:
       max-parallel: 5
       matrix:
-        python-version: ["3.10"]
-        
+        python-version: ["3.10", "3.11", "3.12"]
 
     steps:
-      - uses: actions/checkout@v4
-      #---------------------------------------------------
-      - name: Set up Python 3.10
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-      #---------------------------------------------------
-      # Install Miniconda
-      - name: Install Miniconda
-        uses: conda-incubator/setup-miniconda@v2
-        with:
-          auto-update-conda: true
-          python-version: ${{ matrix.python-version }}
-
-      # Create and activate conda environment
-      - name: Create and activate conda environment
-        run: |
-          conda env create --name test-env --file conda/environment.yaml
-
-      # Install dependencies from conda
-      - name: Install dependencies
-        run: conda env update --name test-env --file conda/environment.yaml
-      #---------------------------------------------------
-      #- name: Lint with flake8
-      #  run: |
-      #    conda install flake8
-          # stop the build if there are Python syntax errors or undefined names
-      #    flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-      #    flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-          #    - name: Install package 
-      #        run: |
-      #                 conda build conda/meta.yaml
-      #---------------------------------------------------
-      - name: Test with pytest
-        run: |
-          conda install -n test-env pytest coverage codecov
-          conda run -n test-env pytest -q tests/test_calculations.py
-      #    conda run -n test-env coverage run --source src -m pytest
-      #---------------------------------------------------
-      #- name: Upload to codecov 
-      #  if: steps.build.outcome == 'success'
-      #  run: |
-      #    curl -Os https://uploader.codecov.io/latest/linux/codecov
-      #    chmod +x codecov
-      #    ./codecov
-      #---------------------------------------------------
+    - uses: actions/checkout@v4.1.7
+    - name: Set up Python 3.10/3.11
+      uses: actions/setup-python@v5.1.1
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install Miniconda
+      uses: conda-incubator/setup-miniconda@v3.0.4
+      with:
+        activate-environment: testenv
+        environment-file: conda/testenv.yaml
+        python-version: ${{ matrix.python-version }}
+        channels: conda-forge
+    - name: Lint with ruff
+      shell: bash -el {0}
+      run: |
+        ruff check --output-format=github .
+      continue-on-error: true
+     
+    # making sure we are testing installed package
+    - name: Install package
+      shell: bash -el {0}
+      run: |
+        conda activate testenv
+        pip install  -e .
+    - name: Test with pytest
+      shell: bash -el {0}
+      run: |
+        conda run python -m pytest -q tests/test_calculations.py
diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index f0ab45c..d9c267c 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -28,7 +28,7 @@
 
 from mopdb.mopdb_utils import (mapping_sql, cmorvar_sql, read_map,
     read_map_app4, create_table, write_cmor_table, update_db) 
-from mopdb.utils import (db_connect, query, delete_record)
+from mopdb.utils import (config_log, db_connect, query, delete_record)
 from mopdb.mopdb_map import (write_varlist, write_map_template,
     write_catalogue, map_variables, load_vars, get_map_obj)
 
@@ -62,7 +62,8 @@ def db_args(f):
     """Define database click options
     """
     constraints = [
-        click.option('--fname', '-f', type=str, required=True,
+        click.option('--fname', '-f', type=click.Path(exists=True),
+            required=True,
             help='Input file: used to update db table (mapping/cmor)'),
         click.option('--dbname', type=str, required=False, default='default',
             help='Database relative path by default is package access.db'),
@@ -78,8 +79,8 @@ def map_args(f):
     commands
     """
     constraints = [
-        click.option('--fpath', '-f', type=str, required=True,
-            callback=require_date,
+        click.option('--fpath', '-f', type=click.Path(exists=True),
+            required=True, callback=require_date,
             help=("""Model output files path. For 'template'
               command can also be file generated by varlist step""")),
         click.option('--match', '-m', type=str, required=False,
@@ -109,6 +110,7 @@ def mopdb(ctx, debug):
     ctx.obj={}
     # set up a default value for flow if none selected for logging
     ctx.obj['debug'] = debug
+    mopdb_log = config_log(debug, logname='mopdb_log')
 
 
 @mopdb.command(name='check')
@@ -340,7 +342,9 @@ def map_template(ctx, fpath, match, dbname, version, alias):
     # work out if fpath is varlist or path to output
     fpath = Path(fpath)
     if fpath.is_file():
+        mopdb_log.debug(f"{fpath} is file")
         map_file, vobjs, fobjs = load_vars(fpath)
+        mopdb_log.debug(f"loaded data from  file")
         fname = fpath.name
         mopdb_log.debug(f"Imported {len(vobjs)} objects from file {fpath}")
         mopdb_log.debug(f"Is mapping file? {map_file}")
@@ -358,7 +362,8 @@ def map_template(ctx, fpath, match, dbname, version, alias):
 
 @mopdb.command(name='intake')
 @map_args
-@click.option('--filelist','-fl', type=str, required=False, default=None,
+@click.option('--filelist','-fl', type=click.Path(exists=True),
+            required=False, default=None,
             help='Map or varlist csv file relative path')
 @click.pass_context
 def write_intake(ctx, fpath, match, filelist, dbname, version, alias):
@@ -474,9 +479,6 @@ def update_map(ctx, dbname, fname, alias):
     return None
 
 
-    return None
-
-
 @mopdb.command(name='varlist')
 @map_args
 @click.pass_context
@@ -514,8 +516,8 @@ def model_vars(ctx, fpath, match, dbname, version, alias):
 
 
 @mopdb.command(name='del')
-@click.option('--dbname', type=str, required=True,
-    help='Database relative path')
+@click.option('--dbname', type=click.Path(exists=True),
+    required=True, help='Database relative path')
 @click.option('--table', '-t', type=str, required=True,
     help='DB table to remove records from')
 @click.option('--pair', '-p', type=(str, str), required=True,
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index 28530c5..4fb78df 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -464,7 +464,8 @@ def write_vars(vlist, fwriter, div, conn=None, sortby='cmor_var'):
                 var = check_realm_units(conn, var)
             dlist.append( var.__dict__ )
         for dvar in sorted(dlist, key=itemgetter(sortby)):
-            dvar.pop('match')
+            if 'match' in dvar.keys():
+                dvar.pop('match')
             fwriter.writerow(dvar)
     return
 
@@ -598,7 +599,7 @@ def add_mapvars(vobjs, lines, path_list, alias):
 def load_vars(fname, indir=None):
     """Returns Variable and FPattern objs from varlist or map file.
     """
-    #mopdb_log = logging.getLogger('mopdb_log')
+    mopdb_log = logging.getLogger('mopdb_log')
     vobjs = []
     fobjs = {}
     if indir is not None:
@@ -607,7 +608,7 @@ def load_vars(fname, indir=None):
     with open(fname, 'r') as csvfile:
         reader = csv.DictReader(csvfile, delimiter=';')
         rows = list(reader)
-    #check_varlist(rows, fname)
+    #mopdb_log.debug(f"{rows}")
     # set fobjs
     patterns = list(set(x['fpattern'] for x in rows)) 
     for pat in patterns:
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index 6de2ddf..c1f436d 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -395,7 +395,7 @@ def check_varlist(rows, fname):
         elif (not any( x in row['frequency'] for x in frq_list) 
             or row['realm'] not in realm_list):
                 mopdb_log.error(f"""  Check frequency and realm in {fname}.
-  Some values might be invalid and need fixing""")
+    Some values might be invalid and need fixing""")
                 sys.exit()
     return
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 6123524..e430ed8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -18,10 +18,9 @@
 import pytest
 import os
 import sqlite3
-import xarray as xr
-import numpy as np
-import pandas as pd
-import datetime
+#import xarray as xr
+#import numpy as np
+#import pandas as pd
 import logging
 import csv
 import pyfakefs
@@ -40,6 +39,7 @@
 # consecutive files with a 1-time step time axis
 dsonestep = os.path.join(TESTS_DATA, "onetstep.nc")
 dsonestep2 = os.path.join(TESTS_DATA, "onetstep_next.nc")
+# varlist, map file examples
 
 @pytest.fixture
 def fake_fs(fs):  # pylint:disable=invalid-name
@@ -48,6 +48,11 @@ def fake_fs(fs):  # pylint:disable=invalid-name
     """
     yield fs
 
+@pytest.fixture
+def vlistcsv():
+    vlistcsv = os.path.join(TESTS_DATA, "varlist.csv")
+    return vlistcsv
+
 # setting up fixtures for databases:a ccess.db and mopper.db
 @pytest.fixture
 def session(): 
@@ -98,7 +103,7 @@ def test_check_timestamp(caplog):
 
 @pytest.fixture
 def varlist_rows():
-    # read list of vars from iexample file
+    # read list of vars from example file
     with open('testdata/varlist_ex.csv', 'r') as csvfile:
         reader = csv.DictReader(csvfile, delimiter=';')
         rows = list(reader)
@@ -150,3 +155,15 @@ def varobjs(mapvar_obj):
     mvobj.name = 'hfls' 
     vobjs.append(mvobj)
     return vobjs
+
+
+@pytest.fixture
+def output_file(tmp_path):
+    # create your file manually here using the tmp_path fixture
+    # or just import a static pre-built mock file
+    # something like : 
+    target_output = os.path.join(tmp_path,'mydoc.csv')
+    with open(target_output, 'w+'):
+        pass
+        # write stuff here
+    return target_output
diff --git a/tests/test_calculations.py b/tests/test_calculations.py
index 692211f..eedf5c0 100644
--- a/tests/test_calculations.py
+++ b/tests/test_calculations.py
@@ -15,7 +15,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import xarray.testing as xrtest
 import numpy.testing as nptest
 import xarray as xr
 import numpy as np
@@ -61,7 +60,6 @@ def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100):
 
 def test_calc_topsoil():
     mrsol = create_var(2, 3, ntime=4, sdepth=True)
-    #print(mrsol)
     expected = mrsol.isel(depth=0) + mrsol.isel(depth=1)/3.0
     out = calc_topsoil(mrsol)
     xrtest.assert_allclose(out, expected, rtol=1e-05) 
diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py
index b47e158..15fcd9d 100644
--- a/tests/test_mop_utils.py
+++ b/tests/test_mop_utils.py
@@ -15,15 +15,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pytest
+#import pytest
+import click
+import xarray as xr
 import numpy as np
 import pandas as pd
-from mopper.mop_utils import *
+from mopper.mop_utils import (check_timestamp, get_cmorname,)
 
-#try:
-#    import unittest.mock as mock
-#except ImportError:
-#    import mock
 
 ctx = click.Context(click.Command('cmd'),
     obj={'sel_start': '198302170600', 'sel_end': '198302181300',
diff --git a/tests/test_mopdb.py b/tests/test_mopdb.py
index e570fdb..b6c1e50 100644
--- a/tests/test_mopdb.py
+++ b/tests/test_mopdb.py
@@ -17,13 +17,25 @@
 
 import pytest
 import os
-import sqlite3
-from mopdb.mopdb import *
+import logging
+#from mopdb.mopdb import *
 from click.testing import CliRunner
+from conftest import vlistcsv
+from pytest import CaptureFixture
 
-@pytest.mark.parametrize('subcommand', ['varlist', 'template', 'check', 'cmor', 'table', 'map'])
-def test_mopdb(command, subcommand, runner):
-    ctx = click.Context(click.Command('cmd'), obj={'prop': 'A Context'})
+
+@pytest.fixture(scope='module')
+def runner():
+    return CliRunner()
+
+def test_command(runner):
+    result = runner.invoke(mopdb, ['--help'])
+    assert result.exit_code == 0
+
+@pytest.mark.parametrize('subcommand', ['varlist', 'template',
+    'intake', 'check', 'cmor', 'table', 'map', 'del'])
+def test_subcmd(subcommand, runner):
+    ctx = click.Context(click.Command('mopdb'), obj={'prop': 'A Context'})
     with ctx:
         result = runner.invoke(mopdb, ['--help'])
         assert result.exit_code == 0
@@ -31,29 +43,27 @@ def test_mopdb(command, subcommand, runner):
         assert result.exit_code == 0
 
 @pytest.mark.usefixtures("setup_access_db") # 1
-def test_template(session):
-
-    runner = CliRunner()
-
-    with runner.isolated_filesystem():
-        with open('varlist.txt', 'w') as f:
-            f.write('name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;dtype;size;nsteps;file_name;long_name;standard_name')
-            f.write('fld_s03i236;tas;K;time lat lon,mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;2081;cm000a.pm;TEMPERATURE AT 1.5M;air_temperature')
-            f.write('fld_s03i237;huss;1;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;2081;cm000a.pm;SPECIFIC HUMIDITY  AT 1.5M;specific_humidity')
-            f.write('fld_s05i205;prrc;kg m-2 s-1;time_0 lat lon;3hr;atmos;area: time: mean;CMIP6_E3hr;float32;110592;578880;cm000a.p8;CONVECTIVE RAINFALL RATE     KG/M2/S;convective_rainfall_flux')
-            f.write('fld_s03i236;tas;K;time lat lon;day;atmos;area: time: mean;CMIP6_day;float32;110592;74772;cm000a.pd;TEMPERATURE AT 1.5M;air_temperature')
-
-        result = runner.invoke(mopdb, ['template', '-f varlist.txt', '-vCM2'])
-        #assert result.exit_code == 0
-        assert 'Opened database ' in result.output
-        #assert 'Definable cmip var' in result.output 
-#Pass temp_dir to control where the temporary directory is created. The directory will not be removed by Click in this case. This is useful to integrate with a framework like Pytest that manages temporary files.
+def test_template(session, runner, tmp_path, caplog,
+                  capsys: CaptureFixture):
 
-#def test_keep_dir(tmp_path):
-#    runner = CliRunner()
+    caplog.set_level(logging.DEBUG, logger='mopdb_log')
+    with capsys.disabled() as disabled:
+        with runner.isolated_filesystem(temp_dir=tmp_path) as td:
+            os.mkdir("myfiles")
+            with open('myfiles/varlist.csv', 'w') as f:
+                f.write('name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;dtype;size;nsteps;fpattern;long_name;standard_name')
+                f.write('fld_s03i236;tas;K;time lat lon,mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;2081;cm000a.pm;TEMPERATURE AT 1.5M;air_temperature')
+                f.write('fld_s03i237;huss;1;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;2081;cm000a.pm;SPECIFIC HUMIDITY  AT 1.5M;specific_humidity')
+                f.write('fld_s05i205;prrc;kg m-2 s-1;time_0 lat lon;3hr;atmos;area: time: mean;CMIP6_E3hr;float32;110592;578880;cm000a.p8;CONVECTIVE RAINFALL RATE     KG/M2/S;convective_rainfall_flux')
+                f.write('fld_s03i236;tas;K;time lat lon;day;atmos;area: time: mean;CMIP6_day;float32;110592;74772;cm000a.pd;TEMPERATURE AT 1.5M;air_temperature')
 
-#    with runner.isolated_filesystem(temp_dir=tmp_path) as td:
-#        ...
+            args = ['--debug', 'template', '-f', 'myfiles/varlist.csv', '-v', 'CM2']
+            result = runner.invoke(mopdb, args)
+            #assert result.exit_code == 0
+            assert 'Opened database ' in caplog.messages[0]
+            assert 'myfiles/varlist.csv is file' in caplog.messages
+            #assert caplog.messages[-1] == 'Finished writing variables to mapping template'
+    #assert 'Definable cmip var' in result.output 
 
 #def test_with_context():
 #    ctx = click.Context(click.Command('cmd'), obj={'prop': 'A Context'})
diff --git a/tests/test_mopdb_map.py b/tests/test_mopdb_map.py
index 8b7a5e9..3372db6 100644
--- a/tests/test_mopdb_map.py
+++ b/tests/test_mopdb_map.py
@@ -17,13 +17,10 @@
 
 import pytest
 import os
-import sqlite3
-import click
 import logging
-import itertools
-from mopdb.mopdb_map import *
-from mopdb.mopdb_class import MapVariable, Variable, FPattern
-from conftest import *
+from mopdb.mopdb_map import (add_var, get_file_frq)
+#from mopdb.mopdb_class import MapVariable, Variable, FPattern
+#from conftest import *
 
 
 TESTS_HOME = os.path.abspath(os.path.dirname(__file__))
diff --git a/tests/test_mopdb_utils.py b/tests/test_mopdb_utils.py
index 0f872a9..b5409b3 100644
--- a/tests/test_mopdb_utils.py
+++ b/tests/test_mopdb_utils.py
@@ -15,16 +15,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pytest
-import os
-import sqlite3
-import click
+#import pytest
 import logging
 import itertools
-from mopdb.mopdb_utils import *
-from mopdb.mopdb_class import MapVariable, Variable, FPattern
+from mopdb.mopdb_utils import (get_date_pattern, )
+#from mopdb.mopdb_class import MapVariable, Variable, FPattern
 
-#from click.testing import CliRunner
 
 
diff --git a/tests/testdata/varlist.csv b/tests/testdata/varlist.csv
new file mode 100644
index 0000000..f04cc58
--- /dev/null
+++ b/tests/testdata/varlist.csv
@@ -0,0 +1,6 @@
+name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;fpattern;long_name;standard_name
+fld_s00i004;theta;K;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CM2_mon;float32;9400320;12;cw323a.pm;THETA AFTER TIMESTEP;air_potential_temperature
+fld_s00i010;hus;1;time model_theta_level_number lat lon;mon;atmos;area: time: mean;CMIP6_CFmon;float32;9400320;12;cw323a.pm;SPECIFIC HUMIDITY AFTER TIMESTEP;specific_humidity
+fld_s00i024;ts;K;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE TEMPERATURE AFTER TIMESTEP;surface_temperature
+fld_s00i031;siconca;%;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction
+fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX W/M2;surface_upward_latent_heat_flu 
diff --git a/tests/testdata/varlist_ex.csv b/tests/testdata/varlist_ex.csv
deleted file mode 100644
index 780142d..0000000
--- a/tests/testdata/varlist_ex.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-name;cmor_var;units;dimensions;frequency;realm;cell_methods;cmor_table;vtype;size;nsteps;fpattern;long_name;standard_name
-fld_s03i236;tas;degC;time_0 lat lon;1hr;atmos;area: time: mean;AUS2200_A1hr;float32;22048000;96;umnsa_slv_;TEMPERATURE AT 1.5M;air_temperature
-fld_s00i031;siconca;%;time lat lon;mon;atmos;area: time: mean;;float32;110592;12;cw323a.pm;FRAC OF SEA ICE IN SEA AFTER TSTEP;sea_ice_area_fraction                                                                                                     fld_s03i234;hfls;W m-2;time lat lon;mon;atmos;area: time: mean;CMIP6_Amon;float32;110592;12;cw323a.pm;SURFACE LATENT HEAT FLUX        W/M2;surface_upward_latent_heat_flu 

From a1921865661dcd3d74950bf31d52387df318a62e Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Thu, 15 Aug 2024 14:47:55 +1000
Subject: [PATCH 120/137] fixed import in tests

---
 src/mopdb/mopdb.py         |  5 ++---
 src/mopdb/mopdb_map.py     |  4 ++--
 src/mopper/calculations.py | 11 +++++++----
 src/mopper/mop_utils.py    |  2 +-
 src/mopper/mopper.py       |  3 +--
 src/mopper/setup_utils.py  |  2 +-
 tests/conftest.py          | 16 +++++++++++-----
 tests/test_calculations.py | 16 ++++++++--------
 tests/test_mop_utils.py    |  1 +
 tests/test_mopdb.py        |  5 +++--
 tests/test_mopdb_map.py    |  1 +
 11 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index d9c267c..d458811 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -110,7 +110,7 @@ def mopdb(ctx, debug):
     ctx.obj={}
     # set up a default value for flow if none selected for logging
     ctx.obj['debug'] = debug
-    mopdb_log = config_log(debug, logname='mopdb_log')
+    config_log(debug, logname='mopdb_log')
 
 
 @mopdb.command(name='check')
@@ -344,10 +344,9 @@ def map_template(ctx, fpath, match, dbname, version, alias):
     if fpath.is_file():
         mopdb_log.debug(f"{fpath} is file")
         map_file, vobjs, fobjs = load_vars(fpath)
-        mopdb_log.debug(f"loaded data from  file")
         fname = fpath.name
         mopdb_log.debug(f"Imported {len(vobjs)} objects from file {fpath}")
-        mopdb_log.debug(f"Is mapping file? {map_file}")
+        mopdb_log.debug(f"File is mapping: {map_file}")
     else:
         mopdb_log.debug(f"Calling write_varlist() from template: {fpath}")
         fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias)
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index 4fb78df..885e3de 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -99,7 +99,7 @@ def get_file_frq(ds, fnext, int2frq):
     frq = {}
     # retrieve all time axes
     time_axs = [d for d in ds.dims if 'time' in d]
-    time_axs_len = set(len(ds[d]) for d in time_axs)
+    #time_axs_len = set(len(ds[d]) for d in time_axs)
     time_axs.sort(key=lambda x: len(ds[x]), reverse=True)
     mopdb_log.debug(f"in get_file_frq, time_axs: {time_axs}")
     if len(time_axs) > 0:
@@ -608,7 +608,7 @@ def load_vars(fname, indir=None):
     with open(fname, 'r') as csvfile:
         reader = csv.DictReader(csvfile, delimiter=';')
         rows = list(reader)
-    #mopdb_log.debug(f"{rows}")
+    mopdb_log.debug(f"Loaded file with {len(rows)} rows")
     # set fobjs
     patterns = list(set(x['fpattern'] for x in rows)) 
     for pat in patterns:
diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py
index ab8462c..568285c 100644
--- a/src/mopper/calculations.py
+++ b/src/mopper/calculations.py
@@ -917,7 +917,7 @@ def K_degC(ctx, var):
 
 
 def tos_3hr(var, landfrac):
-    """notes
+    """not sure this is needed??
 
     Parameters
     ----------
@@ -928,7 +928,7 @@ def tos_3hr(var, landfrac):
     vout : Xarray dataset
     """    
 
-    v = K_degC(var)
+    var = K_degC(var)
 
     vout = xr.zeros_like(var)
     t = len(var.time)
@@ -1132,7 +1132,8 @@ def average_tile(var, tilefrac=None, lfrac=1, landfrac=None, lev=None):
     return vout
 
 
-def calc_topsoil(soilvar):
+@click.pass_context
+def calc_topsoil(ctx, soilvar):
     """Returns the variable over the first 10cm of soil.
 
     Parameters
@@ -1147,9 +1148,11 @@ def calc_topsoil(soilvar):
     topsoil : Xarray DataArray
         Variable defined on top 10cm of soil
     """    
+    var_log = logging.getLogger(ctx.obj['var_log'])
     depth = soilvar.depth
     # find index of bottom depth level including the first 10cm of soil
-    maxlev = depth.where(depth >= 0.1).argmin().values
+    maxlev = np.nanargmin(depth.where(depth >= 0.1).values)
+    var_log.debug(f"Max level of soil used is {maxlev}")
     # calculate the fraction of maxlev which falls in first 10cm
     fraction = (0.1 - depth[maxlev -1])/(depth[maxlev] - depth[maxlev-1])
     topsoil = soilvar.isel(depth=slice(0,maxlev)).sum(dim='depth')
diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index 315afec..12ef93b 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -895,7 +895,7 @@ def extract_var(ctx, input_ds, tdim, in_missing):
     if array.dtype.kind == 'i':
         try:
             in_missing = int(in_missing)
-        except Eception as e:
+        except Exception as e:
             in_missing = int(-999)
     else:
         array = array.fillna(in_missing)
diff --git a/src/mopper/mopper.py b/src/mopper/mopper.py
index 91bddca..a8f57d5 100644
--- a/src/mopper/mopper.py
+++ b/src/mopper/mopper.py
@@ -226,7 +226,6 @@ def mop_process(ctx):
  
     mop_log = logging.getLogger('mop_log')
     var_log = logging.getLogger(ctx.obj['var_log'])
-    default_cal = "gregorian"
     logname = f"{ctx.obj['variable_id']}_{ctx.obj['table']}_{ctx.obj['tstart']}"
     
     # Setup CMOR
@@ -478,7 +477,7 @@ def process_file(ctx, row):
             #Check if output file matches what we expect
             var_log.info(f"Output file:   {ret}")
             if ret == expected_file:
-                var_log.info(f"Expected and cmor file paths match")
+                var_log.info("Expected and cmor file paths match")
                 msg = f"Successfully processed variable: {var_msg}\n"
                 status = "processed"
             else :
diff --git a/src/mopper/setup_utils.py b/src/mopper/setup_utils.py
index 2bc293d..05dfaca 100755
--- a/src/mopper/setup_utils.py
+++ b/src/mopper/setup_utils.py
@@ -397,7 +397,7 @@ def adjust_size(opts, insize):
     # volume,any vertical sum
     # resample will affect frequency but that should be already taken into account in mapping
     calc = opts['calculation']
-    resample = opts['resample']
+    #resample = opts['resample']
     grid_size = insize
     if 'plevinterp' in calc:
         if "," in calc:
diff --git a/tests/conftest.py b/tests/conftest.py
index e430ed8..3770a94 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -18,9 +18,7 @@
 import pytest
 import os
 import sqlite3
-#import xarray as xr
-#import numpy as np
-#import pandas as pd
+import click
 import logging
 import csv
 import pyfakefs
@@ -48,6 +46,13 @@ def fake_fs(fs):  # pylint:disable=invalid-name
     """
     yield fs
 
+@pytest.fixture
+def ctx():
+    ctx = click.Context(click.Command('cmd'),
+        obj={'sel_start': '198302170600', 'sel_end': '198302181300',
+        'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'})
+    return ctx
+
 @pytest.fixture
 def vlistcsv():
     vlistcsv = os.path.join(TESTS_DATA, "varlist.csv")
@@ -91,8 +96,8 @@ def setup_access_db(session):
 
 @pytest.fixture
 def setup_mopper_db(session):
-    filelist_sql = mapping_sql()
-    session.execute(filelist_sql)
+    flist_sql = filelist_sql()
+    session.execute(flist_sql)
     session.execute('''INSERT INTO filelist VALUES ("/testdata/atmos/umnsa_spec_*.nc", 	"/testdata/mjo-elnino/v1-0/A10min/", "tas_AUS2200_mjo-elnino_subhrPt_20160101001000-20160102000000.nc", "fld_s03i236", "tas", "AUS2200_A10min", "subhrPt", "atmos", "point", "20160101T0005", "20160102T0000", "201601010000", "201601012355", "unprocessed", "3027.83203125", "mjo-elnino", "K", "AUS2200", "AUS2200", "/testdata/mjo-elnino/mjo-elnino.json",	"1970-01-01", "v1-0")''')
     session.connection.commit()
 
@@ -121,6 +126,7 @@ def add_var_out():
     vlist = [{'cmor_var': '', 'input_vars': '', 'calculation': '', 'units': ''
               ,'realm': '', 'positive': '', 'version': '', 'cmor_table': ''}
             ]
+    return vlist
 
 @pytest.fixture
 def map_rows():
diff --git a/tests/test_calculations.py b/tests/test_calculations.py
index eedf5c0..8c70d28 100644
--- a/tests/test_calculations.py
+++ b/tests/test_calculations.py
@@ -17,14 +17,13 @@
 
 import numpy.testing as nptest
 import xarray as xr
+import xarray.testing as xrtest
 import numpy as np
 import pandas as pd
 import logging
-from mopper.calculations import *
+from mopper.calculations import (overturn_stream, calc_topsoil,)
+from conftest import ctx
 
-ctx = click.Context(click.Command('cmd'),
-    obj={'sel_start': '198302170600', 'sel_end': '198302181300',
-         'realm': 'atmos', 'frequency': '1hr', 'var_log': 'varlog_1'})
 
 def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100):
 
@@ -58,14 +57,15 @@ def create_var(nlat, nlon, ntime=None, nlev=None, sdepth=False, seed=100):
     return da
 
 
-def test_calc_topsoil():
+def test_calc_topsoil(caplog, ctx):
+    caplog.set_level(logging.DEBUG, logger='varlog_1')
     mrsol = create_var(2, 3, ntime=4, sdepth=True)
     expected = mrsol.isel(depth=0) + mrsol.isel(depth=1)/3.0
-    out = calc_topsoil(mrsol)
+    with ctx:
+        out = calc_topsoil(mrsol)
     xrtest.assert_allclose(out, expected, rtol=1e-05) 
 
-def test_overturn_stream(caplog):
-    global ctx
+def test_overturn_stream(caplog, ctx):
     caplog.set_level(logging.DEBUG, logger='varlog_1')
     # set up input
     dims = ['time', 'depth', 'lat', 'lon']
diff --git a/tests/test_mop_utils.py b/tests/test_mop_utils.py
index 15fcd9d..8cf28a2 100644
--- a/tests/test_mop_utils.py
+++ b/tests/test_mop_utils.py
@@ -20,6 +20,7 @@
 import xarray as xr
 import numpy as np
 import pandas as pd
+import logging
 from mopper.mop_utils import (check_timestamp, get_cmorname,)
 
 
diff --git a/tests/test_mopdb.py b/tests/test_mopdb.py
index b6c1e50..b377077 100644
--- a/tests/test_mopdb.py
+++ b/tests/test_mopdb.py
@@ -18,9 +18,10 @@
 import pytest
 import os
 import logging
-#from mopdb.mopdb import *
+import click
+from mopdb.mopdb import mopdb
 from click.testing import CliRunner
-from conftest import vlistcsv
+#from conftest import vlistcsv
 from pytest import CaptureFixture
 
 
diff --git a/tests/test_mopdb_map.py b/tests/test_mopdb_map.py
index 3372db6..9b66447 100644
--- a/tests/test_mopdb_map.py
+++ b/tests/test_mopdb_map.py
@@ -18,6 +18,7 @@
 import pytest
 import os
 import logging
+import xarray as xr
 from mopdb.mopdb_map import (add_var, get_file_frq)
 #from mopdb.mopdb_class import MapVariable, Variable, FPattern
 #from conftest import *

From 1737476edd5ffc7d24bc94a18c168e736599739f Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 16 Aug 2024 13:33:08 +1000
Subject: [PATCH 121/137] moving shell commnad in conda action

---
 .github/workflows/mopper-test-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
index e664ad7..6330e6c 100644
--- a/.github/workflows/mopper-test-conda.yaml
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -30,8 +30,8 @@ jobs:
           environment-file: conda/environment.yaml    # Path to the build conda environment
           show-channel-urls: true #
       - name: Build but do not upload the conda packages
-        uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0
         shell: bash -el {0}
+        uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0
         with:
           meta_yaml_dir: conda
           python-version: ${{ matrix.python-version }} # Values previously defined in `matrix`

From f7e7cea7195c85024fe8752e40fb6910f25ed86a Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 16 Aug 2024 13:52:47 +1000
Subject: [PATCH 122/137] removing shell command in conda action

---
 .github/workflows/mopper-test-conda.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
index 6330e6c..102b423 100644
--- a/.github/workflows/mopper-test-conda.yaml
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -30,9 +30,9 @@ jobs:
           environment-file: conda/environment.yaml    # Path to the build conda environment
           show-channel-urls: true #
       - name: Build but do not upload the conda packages
-        shell: bash -el {0}
         uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0
         with:
+          #shell: bash -el {0}
           meta_yaml_dir: conda
           python-version: ${{ matrix.python-version }} # Values previously defined in `matrix`
           platform_linux-64: true

From ae510e2d5e7e0eadd67a5ade63632b2a050959da Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 16 Aug 2024 14:05:34 +1000
Subject: [PATCH 123/137] trying alternative way of getting version for conda
 action

---
 .github/workflows/mopper-test-conda.yaml | 3 ++-
 conda/meta.yaml                          | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
index 102b423..76fb53f 100644
--- a/.github/workflows/mopper-test-conda.yaml
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -22,6 +22,8 @@ jobs:
       - uses: actions/checkout@v4.1.7
         with:
           fetch-depth: 0
+      - name: Set env
+        run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
       - name: Conda environment creation and activation
         uses: conda-incubator/setup-miniconda@v3.0.4
         with:
@@ -32,7 +34,6 @@ jobs:
       - name: Build but do not upload the conda packages
         uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0
         with:
-          #shell: bash -el {0}
           meta_yaml_dir: conda
           python-version: ${{ matrix.python-version }} # Values previously defined in `matrix`
           platform_linux-64: true
diff --git a/conda/meta.yaml b/conda/meta.yaml
index c5fb0cf..8cda7f3 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,6 +1,6 @@
 package:
     name: mopper 
-    version: "{{ environ['GIT_DESCRIBE_TAG'] }}"
+    version: "{{ env.RELEASE_VERSION }}"
 
 #source:
 #    path: ./

From a5f24ee28b318c4b3457de2e8f5dbe7d3c88fd10 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 16 Aug 2024 15:45:56 +1000
Subject: [PATCH 124/137] hopefully fixed meta.yaml

---
 conda/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index 8cda7f3..9ed9b45 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,6 +1,6 @@
 package:
     name: mopper 
-    version: "{{ env.RELEASE_VERSION }}"
+    version: "{{ environ.get('GIT_DESCRIBE_TAG', '0.9') }}"
 
 #source:
 #    path: ./

From b50d34b4b37ec741b9a6263bcedadc86a44c49fe Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 16 Aug 2024 15:51:49 +1000
Subject: [PATCH 125/137] fixed source in meta.yaml

---
 conda/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index 9ed9b45..88fdd13 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -7,10 +7,10 @@ package:
 
 source:
   #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz
-  #git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
+  git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git#prerelease
   #git_rev: "{{ version }}" 
   #git_depth: 1 # (Defaults to -1/not shallow)
-  path: ../src
+  #path: ../
 
 build:
     number: 1

From 8c0b4ea185c20a426fc8b58f9461be24404d98f5 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 16 Aug 2024 16:01:13 +1000
Subject: [PATCH 126/137] fixed source in meta.yaml 2

---
 conda/meta.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index 88fdd13..aadef38 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -7,9 +7,9 @@ package:
 
 source:
   #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz
-  git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git#prerelease
+  git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git@prerelease
   #git_rev: "{{ version }}" 
-  #git_depth: 1 # (Defaults to -1/not shallow)
+  git_depth: 1 # (Defaults to -1/not shallow)
   #path: ../
 
 build:

From 492961d01d163acb5f76f0e74a0a95f33fd5441b Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 16 Aug 2024 16:09:19 +1000
Subject: [PATCH 127/137] fixed source in meta.yaml 3

---
 conda/meta.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index aadef38..57da1b0 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -7,7 +7,8 @@ package:
 
 source:
   #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz
-  git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git@prerelease
+  git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
+  git_tag: prerelease
   #git_rev: "{{ version }}" 
   git_depth: 1 # (Defaults to -1/not shallow)
   #path: ../

From 11e1c09dfa8de790df7d2a2e7044d65468261e39 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 16 Aug 2024 16:28:03 +1000
Subject: [PATCH 128/137] fixing git_depth

---
 conda/meta.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index 57da1b0..a8b2f5a 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -10,7 +10,7 @@ source:
   git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
   git_tag: prerelease
   #git_rev: "{{ version }}" 
-  git_depth: 1 # (Defaults to -1/not shallow)
+  #git_depth: 1 # (Defaults to -1/not shallow)
   #path: ../
 
 build:

From 3cef82897c6adf5092d09acfa804398adc7d4eb3 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Wed, 21 Aug 2024 12:39:34 +1000
Subject: [PATCH 129/137] fixed bug when detecting variables from different
 realms at once

---
 src/mopdb/mopdb.py         | 2 +-
 src/mopdb/mopdb_map.py     | 2 +-
 src/mopdb/mopdb_utils.py   | 2 +-
 src/mopper/calculations.py | 4 +++-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/mopdb/mopdb.py b/src/mopdb/mopdb.py
index d458811..7e50bed 100644
--- a/src/mopdb/mopdb.py
+++ b/src/mopdb/mopdb.py
@@ -403,7 +403,7 @@ def write_intake(ctx, fpath, match, filelist, dbname, version, alias):
     if fpath.is_file():
         mopdb_log.error(f"""   {fpath} 
         should be absolute or relative path to model output.
-        To pass a varlist or map file use --filelist/-f""")
+        To pass a varlist or map file use --filelist/-fl""")
     elif filelist is None:
         mopdb_log.debug(f"Calling write_varlist() from intake: {fpath}")
         fname, vobjs, fobjs = write_varlist(conn, fpath, match, version, alias)
diff --git a/src/mopdb/mopdb_map.py b/src/mopdb/mopdb_map.py
index 885e3de..cd26112 100644
--- a/src/mopdb/mopdb_map.py
+++ b/src/mopdb/mopdb_map.py
@@ -162,7 +162,7 @@ def write_varlist(conn, indir, match, version, alias):
         if fpattern in patterns:
             continue
         patterns.append(fpattern)
-        fobj = FPattern(fpattern, Path(indir))
+        fobj = FPattern(fpattern, fpath.parent)
         #pattern_list = list_files(indir, f"{fpattern}*")
         nfiles = len(fobj.files) 
         mopdb_log.debug(f"File pattern, number of files: {fpattern}, {nfiles}")
diff --git a/src/mopdb/mopdb_utils.py b/src/mopdb/mopdb_utils.py
index c1f436d..b3f8bf4 100644
--- a/src/mopdb/mopdb_utils.py
+++ b/src/mopdb/mopdb_utils.py
@@ -364,7 +364,7 @@ def check_realm_units(conn, var):
        
 def get_realm(version, ds):
     '''Try to retrieve realm if using path failed'''
-
+    realm = 'NArealm'
     mopdb_log = logging.getLogger('mopdb_log')
     if version == 'AUS2200':
         realm = 'atmos'
diff --git a/src/mopper/calculations.py b/src/mopper/calculations.py
index 568285c..ba68dc4 100644
--- a/src/mopper/calculations.py
+++ b/src/mopper/calculations.py
@@ -813,9 +813,11 @@ def get_plev(ctx, levnum):
 @click.pass_context
 def plevinterp(ctx, var, pmod, levnum):
     """Interpolating var from model levels to pressure levels
-
+    
     _extended_summary_
 
+    Based on function from Dale Roberts (currently ANU)
+
     Parameters
     ----------
     var : Xarray DataArray 

From a376cc230ce1f06a5aa7f1e20736f54cf89c5160 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 23 Aug 2024 08:38:27 +1000
Subject: [PATCH 130/137] minor adjustment to docs, conf files and code after
 tests

---
 ACDD_conf.yaml               |  2 +-
 CMIP6_conf.yaml              |  4 +++-
 docs/mopdb_command.rst       | 12 ++++++++++--
 src/mopdata/update_db.py.txt |  9 ++++++++-
 src/mopper/mop_utils.py      | 11 ++++++++---
 5 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/ACDD_conf.yaml b/ACDD_conf.yaml
index f507aeb..a86f373 100755
--- a/ACDD_conf.yaml
+++ b/ACDD_conf.yaml
@@ -174,4 +174,4 @@ attrs:
     parent: !!bool false 
     # CMOR will add a tracking_id if you want to define a prefix add here
     tracking_id_prefix: 
-    comment: "post-processed using ACCESS-MOPPeR v0.6.0 https://doi.org/10.5281/zenodo.10346216"
+    comment: "post-processed using ACCESS-MOPPeR v1.0.0 https://doi.org/10.5281/zenodo.10346216"
diff --git a/CMIP6_conf.yaml b/CMIP6_conf.yaml
index 9ae85aa..c421df0 100755
--- a/CMIP6_conf.yaml
+++ b/CMIP6_conf.yaml
@@ -91,6 +91,8 @@ cmor:
     # walltime in "hh:mm:ss"
     walltime: '8:00:00'
     mode: cmip6
+    # conda_env: <custom-env-path>/bin/activate
+    conda_env: default
 
 # Global attributes: these will be added to each files comment unwanted ones
 # the labels CMIP6/ACDD indicates which ones are necessary to comply with respective standards
@@ -161,4 +163,4 @@ attrs:
     #CMOR will add a tracking_id if you want to define a prefix add here
     tracking_id_prefix:
     Conventions: "CF-1.7 CMIP-6.2" 
-    comment: "post-processed using ACCESS-MOPPeR v0.6.0 https://doi.org/10.5281/zenodo.10346216"
+    comment: "post-processed using ACCESS-MOPPeR v1.0.0 https://doi.org/10.5281/zenodo.10346216"
diff --git a/docs/mopdb_command.rst b/docs/mopdb_command.rst
index 5c24196..f60d958 100644
--- a/docs/mopdb_command.rst
+++ b/docs/mopdb_command.rst
@@ -57,16 +57,19 @@ A user that wants to create a mapping table for another AUS2200 simulation can u
 
 Create a mapping file
 ---------------------
-.. code-block::
 
 This can be done by providing the model output path and a pattern to match or directly a varlist file
 
 From output path:
   
+.. code-block::
+
     mopdb template  -f <output-path> -m <string-to-match> -v <access-version>
 
 From varlist file:
 
+.. code-block::
+
     mopdb template  -f <varlist-out> -v <access-version>
 
 This will create a map_<exp>.csv file using, if available, information from the mapping table.
@@ -110,20 +113,25 @@ The other groups of records require checking, as either the version or the frequ
 
 Create an intake catalogue
 --------------------------
-.. code-block::
 
 This represents an extra step on top of the mapping, so it can be start directly from an existing mapping or from scratch by providing the model ouptut path and a match. 
 
 From output path:
   
+.. code-block::
+
     mopdb intake  -f <output-path> -m <string-to-match> -v <access-version> { -a <alias> }
 
 From varlist file:
 
+.. code-block::
+
     mopdb intake  -f <output-path> -fl <varlist-out> -v <access-version> { -a <alias> }
 
 From mapping file:
 
+.. code-block::
+
     mopdb intake  -f <output-path> -fl <mapping-out> -v <access-version> { -a <alias> }
 
 NB the model output path is still needed even when passing an existing mapping or variable list.
diff --git a/src/mopdata/update_db.py.txt b/src/mopdata/update_db.py.txt
index ddcd6a5..c1cb17d 100644
--- a/src/mopdata/update_db.py.txt
+++ b/src/mopdata/update_db.py.txt
@@ -37,7 +37,8 @@ def update_map(conn, varid, ctable):
     """Read mappings for variable from map file and
     update them in filelist
     """
-    keys = ['frequency','realm','timeshot','calculation', 'positive', 'resample']
+    keys = ['frequency','realm','timeshot','calculation',
+            'positive', 'resample']
     keys2 = {'vin': 'input_vars', 'in_units': 'units'}
     fname = f"maps/{ctable}.json"
     with open(fname, 'r') as f:
@@ -48,6 +49,12 @@ def update_map(conn, varid, ctable):
     args = {k: row[k] for k in keys}
     for k,v in keys2.items():
         args[k] = row[v]
+    if 'datadir' in row.keys():
+        paths = row['file_structure'].split()
+        infile = ''
+        for x in paths:
+            infile += f"{row['datadir']}/{x} "
+        args['infile'] = infile
     cur = conn.cursor()
     sql = f"UPDATE filelist SET"
     for k,v in args.items(): 
diff --git a/src/mopper/mop_utils.py b/src/mopper/mop_utils.py
index 12ef93b..3577e25 100755
--- a/src/mopper/mop_utils.py
+++ b/src/mopper/mop_utils.py
@@ -564,9 +564,13 @@ def get_coords(ctx, ovar, coords):
     var_log = logging.getLogger(ctx.obj['var_log'])
     # open ancil grid file to read vertices
     #PP be careful this is currently hardcoded which is not ok!
-    ancil_file = ctx.obj[f"grid_{ctx.obj['realm']}"]
+    ancil_dir = ctx.obj.get('ancils_path', '')
+    ancil_file = ancil_dir + "/" + ctx.obj.get(f"grid_{ctx.obj['realm']}", '')
+    if ancil_file == '' or not Path(ancil_file).exists():
+        var_log.error(f"Ancil file {ancil_file} not set or inexistent")
+        sys.exit()
     var_log.debug(f"getting lat/lon and bnds from ancil file: {ancil_file}")
-    ds = xr.open_dataset(f"{ctx.obj['ancils_path']}/{ancil_file}")
+    ds = xr.open_dataset(ancil_file)
     var_log.debug(f"ancil ds: {ds}")
     # read lat/lon and vertices mapping
     cfile = import_files('mopdata').joinpath('latlon_vertices.yaml')
@@ -793,8 +797,9 @@ def get_bounds_values(ctx, ds, bname):
     calc = False
     var_log = logging.getLogger(ctx.obj['var_log'])
     var_log.debug(f"Getting bounds values for {bname}")
-    ancil_file =  ctx.obj[f"grid_{ctx.obj['realm']}"]
+    ancil_file =  ctx.obj.get(f"grid_{ctx.obj['realm']}", '')
     if bname in ds.variables:
+        var_log.debug(f"Bounds for {bname} in file")
         bnds_val = ds[bname].values
     elif ancil_file != "":     
         fname = f"{ctx.obj['ancils_path']}/{ancil_file}"

From 0a4465f050968201df2965dc678ecf4849322a14 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 23 Aug 2024 09:08:36 +1000
Subject: [PATCH 131/137] moving to python -m pytest to solve conda test issue

---
 conda/run_test.sh          | 2 +-
 conda/run_test_coverage.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/conda/run_test.sh b/conda/run_test.sh
index 21da92a..6b7b31c 100644
--- a/conda/run_test.sh
+++ b/conda/run_test.sh
@@ -1,2 +1,2 @@
 #!/bin/bash
-py.test
+python -m pytest
diff --git a/conda/run_test_coverage.sh b/conda/run_test_coverage.sh
index 4da4a69..476fc38 100644
--- a/conda/run_test_coverage.sh
+++ b/conda/run_test_coverage.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
 pip install coverage pytest-cov
-py.test --cov=mopper --cov-report xml:/tmp/artefacts/tests/pytest/coverage.xml --junit-xml /tmp/artefacts/tests/pytest/results.xml
-py.test --cov=mopdb --cov-report xml:/tmp/artefacts/tests/pytest/coverage.xml --junit-xml /tmp/artefacts/tests/pytest/results.xml
+python -m pytest --cov=mopper --cov-report xml:/tmp/artefacts/tests/pytest/coverage.xml --junit-xml /tmp/artefacts/tests/pytest/results.xml
+python -m pytest --cov=mopdb --cov-report xml:/tmp/artefacts/tests/pytest/coverage.xml --junit-xml /tmp/artefacts/tests/pytest/results.xml
 

From c8d6d744e7b1d370dd4fdca5a85790377a773849 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 23 Aug 2024 09:19:40 +1000
Subject: [PATCH 132/137] adding echo to run_test to see if it gets called

---
 conda/run_test.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conda/run_test.sh b/conda/run_test.sh
index 6b7b31c..1c3c29b 100644
--- a/conda/run_test.sh
+++ b/conda/run_test.sh
@@ -1,2 +1,3 @@
 #!/bin/bash
+echo 'calling run_test'
 python -m pytest

From 6ca07ed20a29d6cd7e55b826976e1f5f5df4d445 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 23 Aug 2024 09:37:25 +1000
Subject: [PATCH 133/137] trying to fix conda tests

---
 conda/meta.yaml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index a8b2f5a..9af645d 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -37,12 +37,17 @@ requirements:
         - python-dateutil
 
 test:
+  imports:
+    - mopdb
+    - mopper
   source_files:
-    - tests/testdata/*
+    - tests/*.py
     - tests/testdata
   requires:
     - pytest
     - pyfakefs
+    - coverage
+    - codecov
 
 about:
     home: https://github.com/ACCESS-Hive/ACCESS-MOPPeR

From d448bcd03cec84fed61ee3141ce8175c9d7998a3 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 23 Aug 2024 09:43:48 +1000
Subject: [PATCH 134/137] trying to fix conda tests 2

---
 conda/meta.yaml | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index 9af645d..bf5e2cd 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -37,13 +37,20 @@ requirements:
         - python-dateutil
 
 test:
-  imports:
-    - mopdb
-    - mopper
+  #imports:
+  #  - mopdb
+  #  - mopper
   source_files:
-    - tests/*.py
+    - tests
     - tests/testdata
   requires:
+    - cmor
+    - xarray
+    - numpy
+    - dask
+    - pyyaml
+    - cftime
+    - python-dateutil
     - pytest
     - pyfakefs
     - coverage

From 1da6fee86e259acfdd7563f331ad002eb42ccbd0 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 23 Aug 2024 09:57:37 +1000
Subject: [PATCH 135/137] fixing conda action

---
 .github/workflows/mopper-test-conda.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/mopper-test-conda.yaml b/.github/workflows/mopper-test-conda.yaml
index 76fb53f..6ab53d2 100644
--- a/.github/workflows/mopper-test-conda.yaml
+++ b/.github/workflows/mopper-test-conda.yaml
@@ -36,9 +36,6 @@ jobs:
         with:
           meta_yaml_dir: conda
           python-version: ${{ matrix.python-version }} # Values previously defined in `matrix`
-          platform_linux-64: true
-          platform_osx-64: false
-          platform_win-64: false
           user: coecms
           label: auto
           upload: false

From ceb0710701e419c98871b08880bf170bee8c2e04 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 23 Aug 2024 12:03:48 +1000
Subject: [PATCH 136/137] last fixes before releasing 1.0.0

---
 .github/workflows/mopper-conda-release.yaml | 36 +++++++++++++++++++++
 .github/workflows/mopper-pytest.yaml        | 12 +++----
 README.md                                   |  7 ++--
 conda/meta.yaml                             |  8 ++---
 src/mopdata/cmor_tables/CM2_3hr.json        |  5 +--
 5 files changed, 51 insertions(+), 17 deletions(-)
 create mode 100644 .github/workflows/mopper-conda-release.yaml

diff --git a/.github/workflows/mopper-conda-release.yaml b/.github/workflows/mopper-conda-release.yaml
new file mode 100644
index 0000000..7e070d2
--- /dev/null
+++ b/.github/workflows/mopper-conda-release.yaml
@@ -0,0 +1,36 @@
+name: Build of mopper conda package for new release
+
+# Controls when the action will run.
+on:
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+jobs:
+  conda_deployment_with_new_tag:
+    name: Test conda deployment of package with Python ${{ matrix.python-version }}
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@v4.1.7
+        with:
+          fetch-depth: 0
+      - name: Set env
+        run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+      - name: Conda environment creation and activation
+        uses: conda-incubator/setup-miniconda@v3.0.4
+        with:
+          python-version: ${{ matrix.python-version }}
+          activate-environment: mopper_env 
+          environment-file: conda/environment.yaml    # Path to the build conda environment
+          show-channel-urls: true #
+      - name: Build but do not upload the conda packages
+        uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0
+        with:
+          meta_yaml_dir: conda
+          python-version: ${{ matrix.python-version }} # Values previously defined in `matrix`
+          user: coecms
+          label: auto
+          upload: true
+          token: ${{ secrets.ANACONDA_TOKEN }} # Replace with the right name of your secret
diff --git a/.github/workflows/mopper-pytest.yaml b/.github/workflows/mopper-pytest.yaml
index 51d846c..44e2b5e 100644
--- a/.github/workflows/mopper-pytest.yaml
+++ b/.github/workflows/mopper-pytest.yaml
@@ -4,12 +4,12 @@ name: mopper-all-tests
 on: 
   push:
     branches:
-      #- prerelease
-      - class
-  #pull_request:
-  #  branches:
-  #    - main
-  #    - prerelease
+      - prerelease
+      - main
+  pull_request:
+    branches:
+      - main
+      - prerelease
 
 
 jobs:
diff --git a/README.md b/README.md
index 159a3c5..12b0d94 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # [ACCESS Model Output Post-Processor (MOPPeR)](https://access-mopper.readthedocs.io/en/latest)
 [![Read the docs](https://readthedocs.org/projects/access-mopper/badge/?version=latest)](https://access-mopper.readthedocs.io/en/latest/)
-[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10976467.svg)](https://doi.org/10.5281/zenodo.10976467)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.12747219.svg)](https://doi.org/10.5281/zenodo.12747219)
 
 This code is derived from the [APP4](https://doi.org/10.5281/zenodo.7703469), initially created by Peter Uhe for CMIP5, and further developed for CMIP6-era by Chloe Mackallah from CSIRO, O&A Aspendale.
 
@@ -15,12 +15,13 @@ Designed for use on ACCESS model output that has been archived using the [ACCESS
 
 Although we retained a differentiation between `custom` and `cmip` mode the main workflow is the same and `mode` is now only another field in the main  configuration file.
 
+See [MOPPeR ReadtheDocs](https://access-mopper.readthedocs.io/en/stable/) for the full documentation.
 
 ### Install
 
 You can install the latest version of `mopper` directly from conda (accessnri channel)::
 
-   conda install -c accessnri mopper
+   conda install -c coecms mopper
 
 If you want to install an unstable version or a different branch:
 
@@ -35,6 +36,6 @@ If you want to install an unstable version or a different branch:
 MOPPeR is pre-installed into a Conda environment at NCI. Load it with::
 
     module use /g/data3/hh5/public/modules
-    module load conda/analysis3-unstable
+    module load conda/analysis3
 
   NB. You need to be a member of the hh5 project to load the modules.
diff --git a/conda/meta.yaml b/conda/meta.yaml
index bf5e2cd..479c1f7 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -1,6 +1,6 @@
 package:
     name: mopper 
-    version: "{{ environ.get('GIT_DESCRIBE_TAG', '0.9') }}"
+    version: "{{ environ.get('GIT_DESCRIBE_TAG', '1.0') }}"
 
 #source:
 #    path: ./
@@ -8,9 +8,9 @@ package:
 source:
   #url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR/archive/refs/tags/{{version}}.tar.gz
   git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
-  git_tag: prerelease
-  #git_rev: "{{ version }}" 
-  #git_depth: 1 # (Defaults to -1/not shallow)
+  #git_tag: prerelease
+  git_rev: "{{ version }}" 
+  git_depth: 1 # (Defaults to -1/not shallow)
   #path: ../
 
 build:
diff --git a/src/mopdata/cmor_tables/CM2_3hr.json b/src/mopdata/cmor_tables/CM2_3hr.json
index dd13bab..34584c2 100644
--- a/src/mopdata/cmor_tables/CM2_3hr.json
+++ b/src/mopdata/cmor_tables/CM2_3hr.json
@@ -104,10 +104,7 @@
             "valid_max": "",      
             "ok_min_mean_abs": "",
             "ok_max_mean_abs": ""
-        },
-
-
-
+        }
 
     }
 }

From d22f93310c974d8f212eb7b344df36dd72b48277 Mon Sep 17 00:00:00 2001
From: Paola Petrelli <paola.petrelli@utas.edu.au>
Date: Fri, 23 Aug 2024 12:16:01 +1000
Subject: [PATCH 137/137] attempt to fix version from git

---
 conda/meta.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/conda/meta.yaml b/conda/meta.yaml
index 479c1f7..fff9987 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -10,8 +10,7 @@ source:
   git_url: https://github.com/ACCESS-Hive/ACCESS-MOPPeR.git
   #git_tag: prerelease
   git_rev: "{{ version }}" 
-  git_depth: 1 # (Defaults to -1/not shallow)
-  #path: ../
+  #git_depth: 1 # (Defaults to -1/not shallow)
 
 build:
     number: 1