Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Intermediate step to new version 1.0 #152

Merged
merged 3 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions .github/workflows/mopper-conda.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: xmhw-conda-install-test

#on: [push]
on:
push:
branches:
- main
- newrelease
pull_request:
branches:
- main


jobs:
build-linux:
runs-on: ubuntu-latest
strategy:
max-parallel: 5

steps:
- uses: actions/checkout@v2
- name: Set up Python 3.10
uses: actions/setup-python@v2
with:
python-version: 3.10
- name: Add conda to system path
run: |
# $CONDA is an environment variable pointing to the root of the miniconda directory
echo $CONDA/bin >> $GITHUB_PATH
- name: Install dependencies
run: |
conda env update --file conda/environment.yml --name base
- name: Lint with flake8
run: |
conda install flake8
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
# - name: Install package
# run: |
# conda run python setup.py install
- name: Test with pytest
run: |
conda install pytest coverage codecov
conda run python -m pytest
conda run coverage run --source src -m py.test
- name: Upload to codecov
if: steps.build.outcome == 'success'
run: |
curl -Os https://uploader.codecov.io/latest/linux/codecov
chmod +x codecov
./codecov

13 changes: 11 additions & 2 deletions ACDD_conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,25 @@ cmor:
grids: "ACDD_grids.json"
# Additional NCI information:
# NCI project to charge compute; $PROJECT = your default project
# NCI queue to use; hugemem is recommended
project: v45
# additional NCI projects to be included in the storage flags
addprojs: []
# queue and memory (GB) per CPU (depends on queue)
# queue and memory (GB) per CPU (depends on queue),
# hugemem is reccomended for high reoslution data and/or derived variables
# hugemem requires a minimum of 6 cpus this is handled by the code
queue: hugemem
mem_per_cpu: 32
# walltime in "hh:mm:ss"
walltime: '8:00:00'
mode: custom
# conda_env to use by default hh5 analysis3-unstable
# as this has the code and all dependecies installed
# you can override that by supplying the env to pass to "source"
# Ex
# conda_env: <custom-env-path>/bin/activate
# or you can set "test: true" and modify mopper_job.sh manually
conda_env: default

#
# Global attributes: these will be added to each files comment unwanted ones
# Using ACDD CV vocab to check validity of global attributes
Expand Down
2 changes: 2 additions & 0 deletions CMIP6_conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ cmor:
# additional NCI projects to be included in the storage flags
addprojs: []
# queue and memory (GB) per CPU (depends on queue)
# hugemem is reccomended for high reoslution data and/or derived variables
# hugemem requires a minimum of 6 cpus this is handled by the code
queue: hugemem
mem_per_cpu: 30
# walltime in "hh:mm:ss"
Expand Down
95 changes: 47 additions & 48 deletions src/mopdb/mopdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def mopdb(ctx, debug):
ctx.obj={}
# set up a default value for flow if none selected for logging
ctx.obj['debug'] = debug
ctx.obj['log'] = config_log(debug)
mopdb_log = config_log(debug)


@mopdb.command(name='check')
Expand All @@ -89,11 +89,11 @@ def check_cmor(ctx, dbname):
dbname : str
Database relative path (default is data/access.db)
"""
db_log = ctx.obj['log']
mopdb_log = logging.getLogger('mopdb_log')
# connect to db, this will create one if not existing
if dbname == 'default':
dbname = import_files('data').joinpath('access.db')
conn = db_connect(dbname, db_log)
conn = db_connect(dbname)
# get list of variables already in db
sql = 'SELECT name, out_name FROM cmorvar'
results = query(conn, sql, first=False)
Expand All @@ -108,9 +108,9 @@ def check_cmor(ctx, dbname):
results = query(conn, sql, first=False)
map_vars = [x[0] for x in results]
missing = set(map_vars) - set(cmor_vars)
db_log.info("Variables not yet defined in cmorvar table:")
mopdb_log.info("Variables not yet defined in cmorvar table:")
for v in missing:
db_log.info(f"{v}")
mopdb_log.info(f"{v}")
conn.close()
return

Expand Down Expand Up @@ -140,27 +140,27 @@ def cmor_table(ctx, dbname, fname, alias, label):
label : str
Label indicating preferred cmor variable definitions
"""
db_log = ctx.obj['log']
mopdb_log = logging.getLogger('mopdb_log')
# connect to db, this will create one if not existing
if dbname == 'default':
dbname = import_files('data').joinpath('access.db')
conn = db_connect(dbname, db_log)
conn = db_connect(dbname)
# get list of variables already in db
sql = "SELECT out_name, frequency, modeling_realm FROM cmorvar"
results = query(conn, sql, first=False)
# cmor_vars is the actual cmip variable name
# this sometime differs from name used in tables tohat can distinguish different dims/freq
cmor_vars = set(x[0] for x in results)
# read variable list from map_ file
vlist = read_map(fname, alias, db_log)
vlist = read_map(fname, alias)
# extract cmor_var,units,dimensions,frequency,realm,cell_methods
var_list = []
for v in vlist[1:]:
vid = (v[0], v[5], v[6])
# This was adding variables to the table just if they didn't exists in other tables
if v[0][:4] != 'fld_':
if v[0] not in cmor_vars:
db_log.warning(f"Variable {v[0]} not defined in cmorvar table")
mopdb_log.warning(f"Variable {v[0]} not defined in cmorvar table")
else:

sql = f"SELECT * FROM cmorvar WHERE out_name='{v[0]}'"
Expand All @@ -178,13 +178,13 @@ def cmor_table(ctx, dbname, fname, alias, label):
definition[2] = v[6]
# if units are different print warning!
if v[3] != record[4]:
db_log.warning(f"Variable {v[0]} units orig/table are different: {v[3]}/{record[4]}")
mopdb_log.warning(f"Variable {v[0]} units orig/table are different: {v[3]}/{record[4]}")
if v[7] != '' and v[7] != record[5]:
db_log.warning(f"Variable {v[0]} cell_methods orig/table are different: {v[7]}/{record[5]}")
mopdb_log.warning(f"Variable {v[0]} cell_methods orig/table are different: {v[7]}/{record[5]}")
if len(v[4].split()) != len(record[9].split()):
db_log.warning(f"Variable {v[0]} number of dims orig/table are different: {v[4]}/{record[9]}")
mopdb_log.warning(f"Variable {v[0]} number of dims orig/table are different: {v[4]}/{record[9]}")
var_list.append(definition)
write_cmor_table(var_list, alias, db_log)
write_cmor_table(var_list, alias, mopdb_log)
conn.close()
return

Expand Down Expand Up @@ -213,26 +213,26 @@ def update_cmor(ctx, dbname, fname, alias):
-------
"""

db_log = ctx.obj['log']
mopdb_log = logging.getLogger('mopdb_log')
if alias is None:
alias = fname.split("/")[-1]
alias = alias.replace('.json', '')
db_log.info(f"Adding {alias} to variable name to track origin")
mopdb_log.info(f"Adding {alias} to variable name to track origin")
# connect to db, this will create one if not existing
dbcentral = import_files('data').joinpath('access.db')
if dbname in [dbcentral, 'default']:
db_log.error("The package database cannot be updated")
mopdb_log.error("The package database cannot be updated")
sys.exit()
conn = db_connect(dbname, db_log)
conn = db_connect(dbname)
# create table if not existing
table_sql = cmorvar_sql()
create_table(conn, table_sql, db_log)
create_table(conn, table_sql)
# get list of variables already in db in debug mode
if ctx.obj['debug']:
sql = 'SELECT name FROM cmorvar'
results = query(conn, sql, first=False)
existing_vars = [x[0] for x in results]
db_log.debug(f"Variables already in db: {existing_vars}")
mopdb_log.debug(f"Variables already in db: {existing_vars}")

# read list of vars from file
with open(fname, 'r') as fj:
Expand All @@ -247,14 +247,14 @@ def update_cmor(ctx, dbname, fname, alias):
if 'flag_values' not in row.keys():
values = values[:-2] + ['',''] + values[-2:]
vars_list.append(tuple([name] + values))
db_log.debug(f"Variables list: {vars_list}")
mopdb_log.debug(f"Variables list: {vars_list}")
# check that all tuples have len == 19
for r in vars_list:
if len(r) != 19:
db_log.error(r)
mopdb_log.error(r)
sys.exit()
# insert new vars and update existing ones
update_db(conn, 'cmorvar', vars_list, db_log)
update_db(conn, 'cmorvar', vars_list)

return

Expand Down Expand Up @@ -287,38 +287,37 @@ def map_template(ctx, dbname, fname, alias, version):
Returns
-------
"""
db_log = ctx.obj['log']
mopdb_log = logging.getLogger('mopdb_log')
if alias is None:
alias = fname.split(".")[0]
# connect to db, check first if db exists or exit
if dbname == 'default':
dbname = import_files('data').joinpath('access.db')
conn = db_connect(dbname, db_log)
conn = db_connect(dbname)
# read list of vars from file
with open(fname, 'r') as csvfile:
reader = csv.DictReader(csvfile, delimiter=';')
rows = list(reader)
# return lists of fully/partially matching variables and stash_vars
# these are input_vars for calculation defined in already in mapping db
full, no_ver, no_frq, stdn, no_match, stash_vars = parse_vars(conn,
rows, version, db_log)
rows, version)

# remove duplicates from partially matched variables
no_ver = remove_duplicate(no_ver, db_log)
no_frq = remove_duplicate(no_frq, db_log, strict=False)
no_match = remove_duplicate(no_match, db_log, strict=False)
no_ver = remove_duplicate(no_ver)
no_frq = remove_duplicate(no_frq, strict=False)
no_match = remove_duplicate(no_match, strict=False)

# check if more derived variables can be added based on all
# input_vars being available
pot_full, pot_part, pot_varnames = potential_vars(conn, rows,
stash_vars, version, db_log)
stash_vars, version)
# potential vars have always duplicates: 1 for each input_var
pot_full = remove_duplicate(pot_full, db_log, strict=False)
pot_part = remove_duplicate(pot_part, db_log, extra=pot_full,
strict=False)
db_log.info(f"Derived variables: {pot_varnames}")
pot_full = remove_duplicate(pot_full, strict=False)
pot_part = remove_duplicate(pot_part, extra=pot_full, strict=False)
mopdb_log.info(f"Derived variables: {pot_varnames}")
write_map_template(conn, full, no_ver, no_frq, stdn,
no_match, pot_full, pot_part, alias, db_log)
no_match, pot_full, pot_part, alias)
conn.close()

return
Expand Down Expand Up @@ -347,29 +346,29 @@ def update_map(ctx, dbname, fname, alias):
Returns
-------
"""
db_log = ctx.obj['log']
mopdb_log = logging.getLogger('mopdb_log')
# connect to db, this will create one if not existing
dbcentral = import_files('data').joinpath('access.db')
if dbname in [dbcentral, 'default']:
db_log.error("The package database cannot be updated")
mopdb_log.error("The package database cannot be updated")
sys.exit()
conn = db_connect(dbname, db_log)
conn = db_connect(dbname)
# create table if not existing
table_sql = mapping_sql()
create_table(conn, table_sql, db_log)
create_table(conn, table_sql)
# get list of variables already in db in debug mode
if ctx.obj['debug']:
sql = 'SELECT cmor_var FROM mapping'
results = query(conn, sql, first=False)
existing_vars = [x[0] for x in results]
db_log.debug(f"Variables already in db: {existing_vars}")
mopdb_log.debug(f"Variables already in db: {existing_vars}")
# read list of vars from file
if alias == 'app4':
var_list = read_map_app4(fname)
else:
var_list = read_map(fname, alias, db_log)
var_list = read_map(fname, alias)
# update mapping table
update_db(conn, 'mapping', var_list, db_log)
update_db(conn, 'mapping', var_list)
return


Expand Down Expand Up @@ -405,12 +404,12 @@ def model_vars(ctx, indir, startdate, dbname, version):
Returns
-------
"""
db_log = ctx.obj['log']
mopdb_log = logging.getLogger('mopdb_log')
# connect to db, this will create one if not existing
if dbname == 'default':
dbname = import_files('data').joinpath('access.db')
conn = db_connect(dbname, db_log)
write_varlist(conn, indir, startdate, version, db_log)
conn = db_connect(dbname)
write_varlist(conn, indir, startdate, version)
conn.close()
return

Expand Down Expand Up @@ -442,19 +441,19 @@ def remove_record(ctx, dbname, table, pair):
Returns
-------
"""
db_log = ctx.obj['log']
mopdb_log = logging.getLogger('mopdb_log')
# connect to db, this will create one if not existing
dbcentral = import_files('data').joinpath('access.db')
if dbname == dbcentral:
db_log.error("The package database cannot be updated")
mopdb_log.error("The package database cannot be updated")
sys.exit()
conn = db_connect(dbname, db_log)
conn = db_connect(dbname)
# set which columns to show based on table
if table == 'cmorvar':
col = "name"
elif table == 'mapping':
col = "cmor_var,frequency,realm,cmor_table"
# select, confirm, delete record/s
delete_record(conn, table, col, pair, db_log)
delete_record(conn, table, col, pair)
return

Loading
Loading