From 338836a7c78b9aa7ab6dba3df2490f689ad0e76c Mon Sep 17 00:00:00 2001
From: sdgamboa <samuel.gamboa.tuz@gmail.com>
Date: Mon, 8 Apr 2024 17:39:28 -0400
Subject: [PATCH] fix indents and length of lines

---
 R/bacdive.R               | 549 +++++++++++++++++++-------------------
 R/bugphyzz.R              | 450 +++++++++++++++++--------------
 R/cache.R                 |  24 +-
 R/fattyAcidComposition.R  |  48 ++--
 R/physiologies.R          | 506 ++++++++++++++++++-----------------
 bugphyzz.Rproj            |   2 +-
 man/getTaxonSignatures.Rd |   4 +-
 man/makeSignatures.Rd     |   4 +-
 man/physiologies.Rd       |   4 +-
 vignettes/bugphyzz.Rmd    |  69 +++--
 10 files changed, 872 insertions(+), 788 deletions(-)

diff --git a/R/bacdive.R b/R/bacdive.R
index 7143a207..ca94273f 100644
--- a/R/bacdive.R
+++ b/R/bacdive.R
@@ -1,316 +1,325 @@
 
 ## Main function for importing BacDive
 .getBacDive <- function(verbose = FALSE ) {
-  bacdive_data <- .importBacDiveExcel(verbose = verbose)
-  colnames(bacdive_data) <- .changeBDColNames(colnames(bacdive_data))
-  .getTidyBD(bacdive_data)
+    bacdive_data <- .importBacDiveExcel(verbose = verbose)
+    colnames(bacdive_data) <- .changeBDColNames(colnames(bacdive_data))
+    .getTidyBD(bacdive_data)
 }
 
 ## Helper function for .getBacDive
 .importBacDiveExcel <- function(verbose = FALSE) {
-  if (verbose)
-    message('Importing BacDive...')
-  url <- 'https://docs.google.com/spreadsheets/d/1smQTi1IKt4wSGTrGTW25I6u47M5txZkq/export?format=csv'
-  bacdive <- utils::read.csv(url)
-  colnames(bacdive) <- tolower(colnames(bacdive))
-  return(bacdive)
+    if (verbose)
+        message('Importing BacDive...')
+    url <- 'https://docs.google.com/spreadsheets/d/1smQTi1IKt4wSGTrGTW25I6u47M5txZkq/export?format=csv'
+    bacdive <- utils::read.csv(url)
+    colnames(bacdive) <- tolower(colnames(bacdive))
+    return(bacdive)
 }
 
 ## Helper function for .getBacDive
 .changeBDColNames <- function(x) {
-  dplyr::case_when(
-    x == 'bacdive_id' ~ 'BacDive_ID',
-    x == 'taxon_name' ~ 'Taxon_name',
-    x == 'ncbi_id' ~ 'NCBI_ID',
-    x == 'rank' ~ 'Rank',
-    x == 'parent_taxon_name' ~ 'Parent_name',
-    x == 'parent_ncbi_id' ~ 'Parent_NCBI_ID',
-    x == 'parent_rank' ~ 'Parent_rank',
-    x == 'sequence_16S_ncbi_id' ~ 'Seq16S_NCBI_ID',
-    x == 'sequence_genome_ncbi_id' ~ 'Genome_ID',
-    x == 'type_strain' ~ 'Type_strain',
-    TRUE ~ x
-  )
+    dplyr::case_when(
+        x == 'bacdive_id' ~ 'BacDive_ID',
+        x == 'taxon_name' ~ 'Taxon_name',
+        x == 'ncbi_id' ~ 'NCBI_ID',
+        x == 'rank' ~ 'Rank',
+        x == 'parent_taxon_name' ~ 'Parent_name',
+        x == 'parent_ncbi_id' ~ 'Parent_NCBI_ID',
+        x == 'parent_rank' ~ 'Parent_rank',
+        x == 'sequence_16S_ncbi_id' ~ 'Seq16S_NCBI_ID',
+        x == 'sequence_genome_ncbi_id' ~ 'Genome_ID',
+        x == 'type_strain' ~ 'Type_strain',
+        TRUE ~ x
+    )
 }
 
 ## Helper function for .getBacDive
 .getTidyBD <- function(bacdive_data) {
-  bacdive_data |>
-    tidyr::pivot_longer(
-      cols = gram_stain:tidyr::last_col(), # Attributes start in the gram_stain column
-      names_to = 'Attribute', values_to = 'Attribute_value'
-    ) |>
-    dplyr::filter(Attribute_value != '') |>
-    dplyr::mutate(Attribute = gsub('_', ' ', Attribute)) |>
-    dplyr::mutate(
-      Attribute = dplyr::case_when(
-        Attribute == 'oxygen tolerance' ~ 'aerophilicity',
-        Attribute == 'cell shape' ~ 'shape',
-        Attribute == 'pathogenicity animal' ~ 'animal pathongen',
-        Attribute == 'sample type' ~ 'isolation site',
-        TRUE ~ Attribute
-      )
-    ) |>
-    dplyr::distinct()
+    bacdive_data |>
+        tidyr::pivot_longer(
+            # Attributes start in the gram_stain column
+            cols = gram_stain:tidyr::last_col(),
+            names_to = 'Attribute', values_to = 'Attribute_value'
+        ) |>
+        dplyr::filter(Attribute_value != '') |>
+        dplyr::mutate(Attribute = gsub('_', ' ', Attribute)) |>
+        dplyr::mutate(
+            Attribute = dplyr::case_when(
+                Attribute == 'oxygen tolerance' ~ 'aerophilicity',
+                Attribute == 'cell shape' ~ 'shape',
+                Attribute == 'pathogenicity animal' ~ 'animal pathongen',
+                Attribute == 'sample type' ~ 'isolation site',
+                TRUE ~ Attribute
+            )
+        ) |>
+        dplyr::distinct()
 }
 
 ## Function for getting a list of data.frames (one per attribute)
 ## in tidy format from BacDive
 .reshapeBacDive <- function(df) {
 
-  df[['Attribute_source']] <- 'BacDive'
-  split_df <- split(df, factor(df[['Attribute']]))
+    df[['Attribute_source']] <- 'BacDive'
+    split_df <- split(df, factor(df[['Attribute']]))
 
-  ## Attributes that must be changed from character to logical (simplest fix)
-  attr_names <- c(
-    'aerophilicity',
-    'shape',
-    'country',
-    'cultivation medium used',
-    'geographic location',
-    'isolation site'
-    ## colony color (delete)
-  )
+    ## Attributes that must be changed from character to logical (simplest fix)
+    attr_names <- c(
+        'aerophilicity',
+        'shape',
+        'country',
+        'cultivation medium used',
+        'geographic location',
+        'isolation site'
+        ## colony color (delete)
+    )
 
-  for (i in seq_along(attr_names)) {
-    split_df[[attr_names[i]]] <- .catToLog(split_df[[attr_names[i]]])
-    if (attr_names[i] %in% c('aerophilicity', 'shape')) {
-        split_df[[attr_names[i]]]$Attribute_type <- 'multistate-intersection'
-    } else {
-        split_df[[attr_names[i]]]$Attribute_type <- 'multistate-union'
+    for (i in seq_along(attr_names)) {
+        split_df[[attr_names[i]]] <- .catToLog(split_df[[attr_names[i]]])
+        if (attr_names[i] %in% c('aerophilicity', 'shape')) {
+            split_df[[attr_names[i]]]$Attribute_type <-
+                'multistate-intersection'
+        } else {
+            split_df[[attr_names[i]]]$Attribute_type <- 'multistate-union'
+        }
     }
-  }
-
-  ## aerophilicity ####
-  ## This is only to match the data in the bugphyzz spreadsheet
-  aer <- split_df[['aerophilicity']]
-  aer$Attribute <- dplyr::case_when(
-    aer$Attribute == 'aerobe' ~ 'aerobic',
-    aer$Attribute == 'anaerobe' ~ 'anaerobic',
-    aer$Attribute == 'facultative anaerobe' ~ 'facultatively anaerobic',
-    aer$Attribute == 'microaerophile' ~ 'microaerophilic',
-    aer$Attribute == 'obligate anaerobe' ~ 'obligately anaerobic',
-    aer$Attribute == 'obligate aerobe' ~ 'obligately aerobic',
-    TRUE ~ aer$Attribute
-  )
-  split_df[['aerophilicity']] <- aer
 
-  ## animal pathogen ####
-  pos <- names(split_df) == 'animal pathongen'
-  names(split_df)[pos] <- 'animal pathogen'
-  x_ <- split_df[['animal pathogen']][['Attribute_value']]
-  x_ <- ifelse(x_ == "yes, in single cases", "yes", x_)
-  x_ <- dplyr::case_when(x_ == 'yes' ~ TRUE, x_ == 'no' ~ FALSE)
-  split_df[['animal pathogen']][['Attribute_value']] <- x_
-  split_df[['animal pathogen']][['Attribute_group']] <- 'animal pathogen'
-  split_df[['animal pathogen']][['Attribute']] <- 'animal pathogen'
-  split_df[['animal pathogen']][['Attribute_type']] <- 'binary'
+    ## aerophilicity ####
+    ## This is only to match the data in the bugphyzz spreadsheet
+    aer <- split_df[['aerophilicity']]
+    aer$Attribute <- dplyr::case_when(
+        aer$Attribute == 'aerobe' ~ 'aerobic',
+        aer$Attribute == 'anaerobe' ~ 'anaerobic',
+        aer$Attribute == 'facultative anaerobe' ~ 'facultatively anaerobic',
+        aer$Attribute == 'microaerophile' ~ 'microaerophilic',
+        aer$Attribute == 'obligate anaerobe' ~ 'obligately anaerobic',
+        aer$Attribute == 'obligate aerobe' ~ 'obligately aerobic',
+        TRUE ~ aer$Attribute
+    )
+    split_df[['aerophilicity']] <- aer
 
-  ## biosafety level ####
-  y <- split_df[['biosafety level comment']][, c('BacDive_ID', 'Attribute_value')]
-  colnames(y)[2] <- 'Note'
-  x <- dplyr::left_join(split_df[['biosafety level']], y, by = 'BacDive_ID')
-  x[['Attribute_value']] <- paste0('biosafety level ', x[['Attribute_value']])
-  x[['Attribute']] <- x[['Attribute_value']]
-  x[['Attribute_value']] <- TRUE
-  x[['Attribute_group']] <- 'biosafety level'
-  x[['Attribute_type']] <- 'multistate-intersection'
-  split_df[['biosafety level']] <- x
-  split_df[['biosafety level comment']] <- NULL
+    ## animal pathogen ####
+    pos <- names(split_df) == 'animal pathongen'
+    names(split_df)[pos] <- 'animal pathogen'
+    x_ <- split_df[['animal pathogen']][['Attribute_value']]
+    x_ <- ifelse(x_ == "yes, in single cases", "yes", x_)
+    x_ <- dplyr::case_when(x_ == 'yes' ~ TRUE, x_ == 'no' ~ FALSE)
+    split_df[['animal pathogen']][['Attribute_value']] <- x_
+    split_df[['animal pathogen']][['Attribute_group']] <- 'animal pathogen'
+    split_df[['animal pathogen']][['Attribute']] <- 'animal pathogen'
+    split_df[['animal pathogen']][['Attribute_type']] <- 'binary'
 
-  ## colony color ####
-  ## This one must be removed
-  split_df[['colony color']] <- NULL
+    ## biosafety level ####
+    y <- split_df[['biosafety level comment']][
+        , c('BacDive_ID', 'Attribute_value')
+    ]
+    colnames(y)[2] <- 'Note'
+    x <- dplyr::left_join(split_df[['biosafety level']], y, by = 'BacDive_ID')
+    x[['Attribute_value']] <- paste0('biosafety level ', x[['Attribute_value']])
+    x[['Attribute']] <- x[['Attribute_value']]
+    x[['Attribute_value']] <- TRUE
+    x[['Attribute_group']] <- 'biosafety level'
+    x[['Attribute_type']] <- 'multistate-intersection'
+    split_df[['biosafety level']] <- x
+    split_df[['biosafety level comment']] <- NULL
 
-  ## cultivation medium used - growth medium ####
-  pos <- names(split_df) == 'cultivation medium used'
-  names(split_df)[pos] <- 'growth medium'
-  split_df[['growth medium']][['Attribute_group']] <- 'growth medium'
+    ## colony color ####
+    ## This one must be removed
+    split_df[['colony color']] <- NULL
 
-  ## growth temperature ####
-  ## culture temperature
-  ## culture temperature growth
-  ## culture temperature range (ignore)
-  ## culture temperature type (ignore)
-  split_df[['culture temperature range']] <- NULL
-  split_df[['culture temperature type']] <- NULL
-  a <- split_df[['culture temperature']]
-  b <- split_df[['culture temperature growth']]
-  b_ <- b[,c('BacDive_ID', 'Attribute_value')]
-  colnames(b_)[2] <- 'growth'
-  ab <- dplyr::left_join(a, b_, by = 'BacDive_ID')
-  ab <- ab[ab[['growth']] == 'positive',]
-  ab[['growth']] <- NULL
-  ab[['Attribute_group']] <- 'growth temperature'
-  ab[['Attribute_type']] <- 'range'
-  ab[['Attribute']] <- 'growth temperature'
-  split_df[['growth temperature']] <- ab
-  split_df[['culture temperature']] <- NULL
-  split_df[['culture temperature growth']] <- NULL
+    ## cultivation medium used - growth medium ####
+    pos <- names(split_df) == 'cultivation medium used'
+    names(split_df)[pos] <- 'growth medium'
+    split_df[['growth medium']][['Attribute_group']] <- 'growth medium'
 
-  ## gram stain ####
-  gs <- split_df[['gram stain']]
-  gs[['Attribute']] <- paste(gs[['Attribute']], gs[['Attribute_value']])
-  gs[['Attribute_value']] <- TRUE
-  gs[['Attribute_group']] <- 'gram stain'
-  gs[['Attribute_type']] <- 'multistate-intersection'
-  split_df[['gram stain']] <- gs
+    ## growth temperature ####
+    ## culture temperature
+    ## culture temperature growth
+    ## culture temperature range (ignore)
+    ## culture temperature type (ignore)
+    split_df[['culture temperature range']] <- NULL
+    split_df[['culture temperature type']] <- NULL
+    a <- split_df[['culture temperature']]
+    b <- split_df[['culture temperature growth']]
+    b_ <- b[,c('BacDive_ID', 'Attribute_value')]
+    colnames(b_)[2] <- 'growth'
+    ab <- dplyr::left_join(a, b_, by = 'BacDive_ID')
+    ab <- ab[ab[['growth']] == 'positive',]
+    ab[['growth']] <- NULL
+    ab[['Attribute_group']] <- 'growth temperature'
+    ab[['Attribute_type']] <- 'range'
+    ab[['Attribute']] <- 'growth temperature'
+    split_df[['growth temperature']] <- ab
+    split_df[['culture temperature']] <- NULL
+    split_df[['culture temperature growth']] <- NULL
 
-  ## halophily ####
-  valid_terms <- c(
-    'NaCl', 'KCl', 'MgCl2', 'MgCl2x6H2O', 'Na\\+', 'MgSO4x7H2O', 'Na2SO4',
-    'Sea salts', 'Chromium \\(Cr6\\+\\)'
-  )
-  regex <- paste0('(', paste0(valid_terms, collapse = '|'), ')')
-  split_df[['halophily']] <- split_df[['halophily']] |>
-    dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |>
-    tidyr::unnest(cols = 'Attribute_value') |>
-    dplyr::filter(!grepl('no growth', Attribute_value)) |>
-    dplyr::mutate(
-      Attribute_value = stringr::str_squish(Attribute_value),
-      Attribute_value = sub('NaCL', 'NaCl', Attribute_value),
-      Attribute_value = sub('Marine', 'Sea', Attribute_value),
-      Attribute_value = sub('Salts', 'salts', Attribute_value)
-    ) |>
-    dplyr::filter(grepl(regex, Attribute_value)) |>
-    dplyr::mutate(
-      Attribute = stringr::str_extract(Attribute_value, regex),
-      Unit = Attribute_value |>
-        stringr::str_extract(' [<>]??[0-9]+\\.??[0-9]*.*') |>
-        stringr::str_squish() |>
-        stringr::str_remove('^.* '),
-      Attribute_value = Attribute_value |>
-        stringr::str_extract(' [<>]??[0-9]+\\.??[0-9]*.*') |>
-        stringr::str_squish() |>
-        stringr::str_remove(' .*$'),
-      Attribute_group = 'halophily',
-      Attribute_type = 'range'
-    ) |>
-    dplyr::filter(!grepl('[0-9]', Unit)) |>
-    dplyr::distinct()
+    ## gram stain ####
+    gs <- split_df[['gram stain']]
+    gs[['Attribute']] <- paste(gs[['Attribute']], gs[['Attribute_value']])
+    gs[['Attribute_value']] <- TRUE
+    gs[['Attribute_group']] <- 'gram stain'
+    gs[['Attribute_type']] <- 'multistate-intersection'
+    split_df[['gram stain']] <- gs
 
-  ## hemolysis ####
-  split_df[['hemolysis']] <- split_df[['hemolysis']] |>
-    dplyr::mutate(
-      Attribute_value = strsplit(Attribute_value, ';|/')
-    ) |>
-    tidyr::unnest(Attribute_value) |>
-    dplyr::mutate(Attribute_value = stringr::str_squish(Attribute_value)) |>
-    dplyr::filter(Attribute_value != '') |>
-    dplyr::mutate(
-      Attribute = Attribute_value,
-      Attribute_value = TRUE,
-      Attribute_group = 'hemolysis',
-      Attribute_type = 'multistate-intersection'
+    ## halophily ####
+    valid_terms <- c(
+        'NaCl', 'KCl', 'MgCl2', 'MgCl2x6H2O', 'Na\\+', 'MgSO4x7H2O', 'Na2SO4',
+        'Sea salts', 'Chromium \\(Cr6\\+\\)'
     )
+    regex <- paste0('(', paste0(valid_terms, collapse = '|'), ')')
+    split_df[['halophily']] <- split_df[['halophily']] |>
+        dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |>
+        tidyr::unnest(cols = 'Attribute_value') |>
+        dplyr::filter(!grepl('no growth', Attribute_value)) |>
+        dplyr::mutate(
+            Attribute_value = stringr::str_squish(Attribute_value),
+            Attribute_value = sub('NaCL', 'NaCl', Attribute_value),
+            Attribute_value = sub('Marine', 'Sea', Attribute_value),
+            Attribute_value = sub('Salts', 'salts', Attribute_value)
+        ) |>
+        dplyr::filter(grepl(regex, Attribute_value)) |>
+        dplyr::mutate(
+            Attribute = stringr::str_extract(Attribute_value, regex),
+            Unit = Attribute_value |>
+                stringr::str_extract(' [<>]??[0-9]+\\.??[0-9]*.*') |>
+                stringr::str_squish() |>
+                stringr::str_remove('^.* '),
+            Attribute_value = Attribute_value |>
+                stringr::str_extract(' [<>]??[0-9]+\\.??[0-9]*.*') |>
+                stringr::str_squish() |>
+                stringr::str_remove(' .*$'),
+            Attribute_group = 'halophily',
+            Attribute_type = 'range'
+        ) |>
+        dplyr::filter(!grepl('[0-9]', Unit)) |>
+        dplyr::distinct()
 
-  ## incubation period
-  ## This one must be removed
-  split_df[['incubation period']] <- NULL
+    ## hemolysis ####
+    split_df[['hemolysis']] <- split_df[['hemolysis']] |>
+        dplyr::mutate(
+            Attribute_value = strsplit(Attribute_value, ';|/')
+        ) |>
+        tidyr::unnest(Attribute_value) |>
+        dplyr::mutate(Attribute_value = stringr::str_squish(Attribute_value)) |>
+        dplyr::filter(Attribute_value != '') |>
+        dplyr::mutate(
+            Attribute = Attribute_value,
+            Attribute_value = TRUE,
+            Attribute_group = 'hemolysis',
+            Attribute_type = 'multistate-intersection'
+        )
 
-  ## motility ####
-  split_df[['motility']] <- split_df[['motility']] |>
-    dplyr::mutate(
-      Attribute_value = dplyr::case_when(
-        Attribute_value == 'yes' ~ TRUE,
-        Attribute_value == 'no' ~ FALSE
-      )
-    )
-  split_df[['motility']][['Attribute_group']] <- 'motility'
-  split_df[['motility']][['Attribute_type']] <- 'binary'
+    ## incubation period
+    ## This one must be removed
+    split_df[['incubation period']] <- NULL
 
-  ## pathogenicity human ####
-  pat <- split_df[['pathogenicity human']]
-  pat[['Note']] <- stringr::str_extract(pat[['Attribute_value']], 'in single cases')
-  pat[['Note']] <- ifelse(is.na(pat[['Note']]), "", pat[['Note']])
-  pat[['Attribute_value']] <- ifelse(grepl('^yes', pat[['Attribute_value']]), TRUE, NA)
-  pat <- pat[!is.na(pat[['Attribute_value']]),]
-  pat[['Attribute_group']] <- 'pathogenicity human'
-  pat[['Attribute_type']] <- 'binary'
-  split_df[['pathogenicity human']] <- pat
+    ## motility ####
+    split_df[['motility']] <- split_df[['motility']] |>
+        dplyr::mutate(
+            Attribute_value = dplyr::case_when(
+                Attribute_value == 'yes' ~ TRUE,
+                Attribute_value == 'no' ~ FALSE
+            )
+        )
+    split_df[['motility']][['Attribute_group']] <- 'motility'
+    split_df[['motility']][['Attribute_type']] <- 'binary'
 
-  ## metabolite production ####
-  mp <- split_df[['metabolite production']]
-  mp <- mp |>
-    dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |>
-    tidyr::unnest(Attribute_value)
-  x <- stringr::str_extract(mp[['Attribute_value']], '(yes|no)$')
-  mp <- mp[which(!is.na(x)),]
-  y <- stringr::str_extract(mp[['Attribute_value']], '(yes|no)$')
-  mp[['Attribute']] <- mp[['Attribute_value']]
-  mp[['Attribute_value']] <- ifelse(y == 'yes', TRUE , FALSE)
-  mp[['Attribute']] <- sub(' (yes|no)$', '', mp[['Attribute']])
-  mp[['Attribute_group']] <- 'metabolite utilization'
-  mp[['Attribute_type']] <- 'multistate-intersection'
-  split_df[['metabolite production']] <- mp
+    ## pathogenicity human ####
+    pat <- split_df[['pathogenicity human']]
+    pat[['Note']] <- stringr::str_extract(
+        pat[['Attribute_value']], 'in single cases'
+    )
+    pat[['Note']] <- ifelse(is.na(pat[['Note']]), "", pat[['Note']])
+    pat[['Attribute_value']] <- ifelse(
+        grepl('^yes', pat[['Attribute_value']]), TRUE, NA
+    )
+    pat <- pat[!is.na(pat[['Attribute_value']]),]
+    pat[['Attribute_group']] <- 'pathogenicity human'
+    pat[['Attribute_type']] <- 'binary'
+    split_df[['pathogenicity human']] <- pat
 
-  ## metabolite utilization ####
-  pos <- names(split_df) == 'metabolite utiilization'
-  names(split_df)[pos] <- 'metabolite utilization'
-  mu <- split_df[['metabolite utilization']]
-  mu <- mu |>
-    dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |>
-    tidyr::unnest(Attribute_value) |>
-    dplyr::mutate(Attribute_value = stringr::str_squish(Attribute_value))
-  x <- sub('^.* (\\+|-|\\+/-) *.*$', '\\1', mu[['Attribute_value']])
-  y <- ifelse(!x %in% c('+', '-', '+/-'), NA, x)
-  mu <- mu[which(!is.na(y)),]
-  mu[['Attribute']] <- stringr::str_remove(mu[['Attribute_value']], ' (\\+|-|\\+/-) *.*$')
-  mu[['Note']] <- sub('^.*(\\+|-|\\+/-) ', '', mu[['Attribute_value']])
-  mu[['Note']] <- paste('kind of utilization tested:', mu[['Note']])
-  y <- y[!is.na(y)]
-  mu[['Attribute_value']] <- dplyr::case_when(
-    y == '+' ~ 'TRUE',
-    y == '-' ~ 'FALSE',
-    y == '+/-' ~ 'TRUE/FALSE'
-  )
-  mu <- mu |>
-    dplyr::mutate(Attribute_value = strsplit(Attribute_value, '/')) |>
-    tidyr::unnest(Attribute_value) |>
-    dplyr::mutate(Attribute_value = as.logical(Attribute_value))
-  mu[['Attribute_group']] <- 'metabolite utilization'
-  mu[['Attribute_type']] <- 'multistate-intersection'
-  split_df[['metabolite utilization']] <- mu
+    ## metabolite production ####
+    mp <- split_df[['metabolite production']]
+    mp <- mp |>
+        dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |>
+        tidyr::unnest(Attribute_value)
+    x <- stringr::str_extract(mp[['Attribute_value']], '(yes|no)$')
+    mp <- mp[which(!is.na(x)),]
+    y <- stringr::str_extract(mp[['Attribute_value']], '(yes|no)$')
+    mp[['Attribute']] <- mp[['Attribute_value']]
+    mp[['Attribute_value']] <- ifelse(y == 'yes', TRUE , FALSE)
+    mp[['Attribute']] <- sub(' (yes|no)$', '', mp[['Attribute']])
+    mp[['Attribute_group']] <- 'metabolite utilization'
+    mp[['Attribute_type']] <- 'multistate-intersection'
+    split_df[['metabolite production']] <- mp
 
-  ## spore formation ####
-  sf <- split_df[['spore formation']]
-  sf <- sf |>
-    dplyr::mutate(
-      Attribute_value = dplyr::case_when(
-        Attribute_value == 'yes' ~ TRUE,
-        Attribute_value == 'no' ~ FALSE
-      ),
-      Attribute_group = 'spore formation',
-      Attribute_type = 'binary'
-    ) |>
-    dplyr::filter(!is.na(Attribute_value))
-  split_df[['spore formation']] <- sf
+    ## metabolite utilization ####
+    pos <- names(split_df) == 'metabolite utiilization'
+    names(split_df)[pos] <- 'metabolite utilization'
+    mu <- split_df[['metabolite utilization']]
+    mu <- mu |>
+        dplyr::mutate(Attribute_value = strsplit(Attribute_value, ';')) |>
+        tidyr::unnest(Attribute_value) |>
+        dplyr::mutate(Attribute_value = stringr::str_squish(Attribute_value))
+    x <- sub('^.* (\\+|-|\\+/-) *.*$', '\\1', mu[['Attribute_value']])
+    y <- ifelse(!x %in% c('+', '-', '+/-'), NA, x)
+    mu <- mu[which(!is.na(y)),]
+    mu[['Attribute']] <- stringr::str_remove(
+        mu[['Attribute_value']], ' (\\+|-|\\+/-) *.*$'
+    )
+    mu[['Note']] <- sub('^.*(\\+|-|\\+/-) ', '', mu[['Attribute_value']])
+    mu[['Note']] <- paste('kind of utilization tested:', mu[['Note']])
+    y <- y[!is.na(y)]
+    mu[['Attribute_value']] <- dplyr::case_when(
+        y == '+' ~ 'TRUE',
+        y == '-' ~ 'FALSE',
+        y == '+/-' ~ 'TRUE/FALSE'
+    )
+    mu <- mu |>
+        dplyr::mutate(Attribute_value = strsplit(Attribute_value, '/')) |>
+        tidyr::unnest(Attribute_value) |>
+        dplyr::mutate(Attribute_value = as.logical(Attribute_value))
+    mu[['Attribute_group']] <- 'metabolite utilization'
+    mu[['Attribute_type']] <- 'multistate-intersection'
+    split_df[['metabolite utilization']] <- mu
 
-  split_df <- lapply(split_df, function(x) {
-    x <- as.data.frame(x)
-    x[['NCBI_ID']] <- as.character(x[['NCBI_ID']])
-    x[['Parent_NCBI_ID']] <- as.character(x[['Parent_NCBI_ID']])
-    x[['Frequency']] <- 'always'
-    x <- x[!is.na(x[['Attribute_value']]),]
-    if (unique(x[['Attribute_type']]) == 'numeric') {
-      x <- .numericToRange(x)
-    } else if (unique(x[['Attribute_type']] == 'range')) {
-      x <- .modifyRange(x)
-    }
-    x <- x[,purrr::map_lgl(x, ~ !all(is.na(.x)))]
-    x <- dplyr::distinct(x)
-    as.data.frame(x)
-    # x <- .addSourceInfo(x)
-  })
+    ## spore formation ####
+    sf <- split_df[['spore formation']]
+    sf <- sf |>
+        dplyr::mutate(
+            Attribute_value = dplyr::case_when(
+                Attribute_value == 'yes' ~ TRUE,
+                Attribute_value == 'no' ~ FALSE
+            ),
+            Attribute_group = 'spore formation',
+            Attribute_type = 'binary'
+        ) |>
+        dplyr::filter(!is.na(Attribute_value))
+    split_df[['spore formation']] <- sf
+
+    split_df <- lapply(split_df, function(x) {
+        x <- as.data.frame(x)
+        x[['NCBI_ID']] <- as.character(x[['NCBI_ID']])
+        x[['Parent_NCBI_ID']] <- as.character(x[['Parent_NCBI_ID']])
+        x[['Frequency']] <- 'always'
+        x <- x[!is.na(x[['Attribute_value']]),]
+        if (unique(x[['Attribute_type']]) == 'numeric') {
+            x <- .numericToRange(x)
+        } else if (unique(x[['Attribute_type']] == 'range')) {
+            x <- .modifyRange(x)
+        }
+        x <- x[,purrr::map_lgl(x, ~ !all(is.na(.x)))]
+        x <- dplyr::distinct(x)
+        as.data.frame(x)
+    })
 
-  return(split_df)
+    return(split_df)
 }
 
 ## Helper function for .reshapeBacDive
 .catToLog <- function(df) {
-  df[['Attribute_group']] <- df[['Attribute']]
-  df[['Attribute']] <- df[['Attribute_value']]
-  df[['Attribute_value']] <- TRUE
-  df[['Attribute_type']] <- 'discrete' # TODO Maybe remove this line? I don't think this is being used
-  return(df)
+    df[['Attribute_group']] <- df[['Attribute']]
+    df[['Attribute']] <- df[['Attribute_value']]
+    df[['Attribute_value']] <- TRUE
+    df[['Attribute_type']] <- 'discrete'
+    return(df)
 }
diff --git a/R/bugphyzz.R b/R/bugphyzz.R
index 1986e0a6..dd48e225 100644
--- a/R/bugphyzz.R
+++ b/R/bugphyzz.R
@@ -1,9 +1,9 @@
 utils::globalVariables(c(
-  "Rank",
-  "Attribute", "Attribute_group", "Attribute_new", "Attribute_range",
-  "Attribute_value", "Attribute_value_max", "Attribute_value_min", "Br-C10:1",
-  "Evidence", "Frequency", "NCBI_ID", "Oxo-C19:1", "Taxon_name", "Unit",
-  "attribute", "functionname", "gram_stain", "physiology", "unit", "value"
+    "Rank",
+    "Attribute", "Attribute_group", "Attribute_new", "Attribute_range",
+    "Attribute_value", "Attribute_value_max", "Attribute_value_min", "Br-C10:1",
+    "Evidence", "Frequency", "NCBI_ID", "Oxo-C19:1", "Taxon_name", "Unit",
+    "attribute", "functionname", "gram_stain", "physiology", "unit", "value"
 ))
 
 #' Import bugphyzz
@@ -54,65 +54,76 @@ utils::globalVariables(c(
 #' names(bp)
 #'
 importBugphyzz <- function(
-    version = "d3fd894", force_download = FALSE, v = 0.5, exclude_rarely = TRUE
+        version = "d3fd894", force_download = FALSE, v = 0.5,
+        exclude_rarely = TRUE
 
 ) {
 
-  ## output is a list of three data.frames
-  ## one of each: binary, multistate, numeric
-  output <- .downloadResource(version, force_download)
+    ## output is a list of three data.frames
+    ## one of each: binary, multistate, numeric
+    output <- .downloadResource(version, force_download)
 
-  ## TODO add release version
-  output <- lapply(output, function(x) split(x, x$Attribute))
-  output <- purrr::list_flatten(output)
+    ## TODO add release version
+    output <- lapply(output, function(x) split(x, x$Attribute))
+    output <- purrr::list_flatten(output)
 
-  ## TODO correct plant pathogenicity name earlier in the workflow or
-  ## better yet, directly in the curation
-  pos <- which(names(output) == "plant pathogenity")
-  names(output)[pos] <- "plant pathogenicity"
-  output <- purrr::map(output, ~ {
-    .x |>
-      dplyr::mutate(
-        Attribute = ifelse(
-          Attribute == "plant pathogenity",
-          "plant pathogenicity",
-          Attribute
-        )
-      )
-  })
+    ## TODO correct plant pathogenicity name earlier in the workflow or
+    ## better yet, directly in the curation
+    pos <- which(names(output) == "plant pathogenity")
+    names(output)[pos] <- "plant pathogenicity"
+    output <- purrr::map(output, ~ {
+        .x |>
+            dplyr::mutate(
+                Attribute = ifelse(
+                    Attribute == "plant pathogenity",
+                    "plant pathogenicity",
+                    Attribute
+                )
+            )
+    })
 
-  names(output) <- purrr::map_chr(output, ~ unique(.x$Attribute))
-  val <- .validationData() |>
-    dplyr::filter(rank == "all") |>
-    dplyr::select(physiology, attribute, value) |>
-    dplyr::mutate(physiology = tolower(physiology)) |>
-    dplyr::mutate(attribute = tolower(attribute))
+    names(output) <- purrr::map_chr(output, ~ unique(.x$Attribute))
+    val <- .validationData() |>
+        dplyr::filter(rank == "all") |>
+        dplyr::select(physiology, attribute, value) |>
+        dplyr::mutate(physiology = tolower(physiology)) |>
+        dplyr::mutate(attribute = tolower(attribute))
 
-  output <- purrr::map(output, ~ {
-    attr_type <- unique(.x$Attribute_type)
-    if (attr_type == "binary") {
-      val <- dplyr::select(val, Attribute = attribute, value)
-      o <- dplyr::left_join(.x, val, by = "Attribute" )
-    } else if (attr_type == "multistate-intersection" || attr_type == "multistate-union") {
-      val <- dplyr::select(val, Attribute = physiology, Attribute_value = attribute, value)
-      o <- dplyr::left_join(dplyr::mutate(.x, Attribute_value = tolower(Attribute_value)) , val, by = c("Attribute", "Attribute_value"))
-    } else if (attr_type == "numeric") {
-      val <- dplyr::select(val, Attribute = attribute, value)
-      o <- dplyr::left_join(.x, val, by = "Attribute") |>
-        dplyr::rename(NSTI = nsti)
-    }
-    o |>
-      dplyr::filter(
-       !(value < v & Evidence == "asr")
-      ) |>
-      dplyr::mutate(value = ifelse(Evidence != "asr", NA, value)) |>
-      dplyr::rename(Validation = value)
-  })
+    output <- purrr::map(output, ~ {
+        attr_type <- unique(.x$Attribute_type)
+        if (attr_type == "binary") {
+            val <- dplyr::select(val, Attribute = attribute, value)
+            o <- dplyr::left_join(.x, val, by = "Attribute" )
+        } else if (
+            attr_type == "multistate-intersection" ||
+            attr_type == "multistate-union"
+        ) {
+            val <- dplyr::select(
+                val, Attribute = physiology, Attribute_value = attribute, value
+            )
+            o <- dplyr::left_join(
+                dplyr::mutate(.x, Attribute_value = tolower(Attribute_value)),
+                val, by = c("Attribute", "Attribute_value")
+            )
+        } else if (attr_type == "numeric") {
+            val <- dplyr::select(val, Attribute = attribute, value)
+            o <- dplyr::left_join(.x, val, by = "Attribute") |>
+                dplyr::rename(NSTI = nsti)
+        }
+        o |>
+            dplyr::filter(
+                !(value < v & Evidence == "asr")
+            ) |>
+            dplyr::mutate(value = ifelse(Evidence != "asr", NA, value)) |>
+            dplyr::rename(Validation = value)
+    })
 
-  if (exclude_rarely) {
-    output <- purrr::map(output, ~ dplyr::filter(.x, Frequency != "rarely"))
-  }
-  return(output)
+    if (exclude_rarely) {
+        output <- purrr::map(
+            output, ~ dplyr::filter(.x, Frequency != "rarely")
+        )
+    }
+    return(output)
 }
 
 #' Make signatures
@@ -126,8 +137,8 @@ importBugphyzz <- function(
 #' @param tax_level A character vector. Taxonomic rank. Valid options:
 #' superkingdom, kingdom, phylum, class, order, family, genus, species, strain.
 #' They can be combined. "mixed" is equivalent to select all valid ranks.
-#' @param evidence A character vector. Valid options: exp, igc, nas, tas, tax, asr.
-#' They can be combined. Default is all.
+#' @param evidence A character vector. Valid options: exp, igc, nas, tas, tax,
+#' asr. They can be combined. Default is all.
 #' @param frequency A character vector. Valid options: always, usually,
 #' sometimes, rarely, unknown. They can be combined. By default, "rarely" is
 #' excluded.
@@ -147,51 +158,56 @@ importBugphyzz <- function(
 #' sigs <- purrr::list_flatten(sigs, name_spec = "{inner}")
 #'
 makeSignatures <- function(
-    dat, tax_id_type = "NCBI_ID",
-    tax_level = "mixed",
-    evidence = c("exp", "igc", "tas", "nas", "tax", "asr"),
-    frequency = c("always", "usually", "sometimes", "unknown"),
-    min_size = 10, min = NULL, max = NULL
+        dat, tax_id_type = "NCBI_ID",
+        tax_level = "mixed",
+        evidence = c("exp", "igc", "tas", "nas", "tax", "asr"),
+        frequency = c("always", "usually", "sometimes", "unknown"),
+        min_size = 10, min = NULL, max = NULL
 ) {
-  attr_type <- unique(dat$Attribute_type)
-  if ("mixed" %in% tax_level) {
-    tax_level <- c(
-      "kingdom", "phylum", "class", "order", "family", "genus", "species",
-      "strain"
-    )
-  }
-  dat <- dat |>
-    dplyr::filter(Rank %in% tax_level) |>
-    dplyr::filter(Evidence %in% evidence) |>
-    dplyr::filter(Frequency %in% frequency)
-  if (!nrow(dat)) {
-    warning(
-      "Not enough data for creating signatures. Try different filtering options",
-      call. = FALSE
-    )
-    return(NULL)
-  }
-  if (attr_type %in% c("multistate-intersection", "binary", "multistate-union")) {
-    s <- .makeSignaturesDiscrete(dat = dat, tax_id_type = tax_id_type)
-  } else if (attr_type %in% c("range", "numeric")) {
-    s <- .makeSignaturesNumeric(
-      dat = dat, tax_id_type = tax_id_type, min = min, max = max
-    )
-  }
-  output <- purrr::keep(s, ~ length(.x) >= min_size)
-  if (!length(output)) {
-    warning(
-      "Not enough data for creating signatures. Try different filtering options",
-      call. = FALSE
-    )
-  }
-  return(output)
+    attr_type <- unique(dat$Attribute_type)
+    if ("mixed" %in% tax_level) {
+        tax_level <- c(
+            "kingdom", "phylum", "class", "order", "family", "genus", "species",
+            "strain"
+        )
+    }
+    dat <- dat |>
+        dplyr::filter(Rank %in% tax_level) |>
+        dplyr::filter(Evidence %in% evidence) |>
+        dplyr::filter(Frequency %in% frequency)
+    if (!nrow(dat)) {
+        warning(
+            "Not enough data for creating signatures.",
+            " Try different filtering options",
+            call. = FALSE
+        )
+        return(NULL)
+    }
+    if (
+        attr_type %in%
+        c("multistate-intersection", "binary", "multistate-union")
+        ) {
+        s <- .makeSignaturesDiscrete(dat = dat, tax_id_type = tax_id_type)
+    } else if (attr_type %in% c("range", "numeric")) {
+        s <- .makeSignaturesNumeric(
+            dat = dat, tax_id_type = tax_id_type, min = min, max = max
+        )
+    }
+    output <- purrr::keep(s, ~ length(.x) >= min_size)
+    if (!length(output)) {
+        warning(
+            "Not enough data for creating signatures.",
+            " Try different filtering options",
+            call. = FALSE
+        )
+    }
+    return(output)
 }
 
 #' Get Taxon Signatures
 #'
-#' \code{getTaxonSignatures} returns the names of all of the signatures associated
-#' with a particular taxon. More details can be found in the main
+#' \code{getTaxonSignatures} returns the names of all of the signatures
+#' associated with a particular taxon. More details can be found in the main
 #' bugphyzz vignette; please run `browseVignettes("bugphyzz")`.
 #'
 #' @param tax A valid NCBI ID or taxon name. If taxon name is used, the
@@ -210,151 +226,171 @@ makeSignatures <- function(
 #' sig_names_2 <- getTaxonSignatures(taxonName, bp, tax_id_type = "Taxon_name")
 #'
 getTaxonSignatures <- function(tax, bp, ...) {
-  sigs <- purrr::map(bp, makeSignatures, ...)
-  sigs <- purrr::list_flatten(sigs, name_spec = "{inner}")
-  pos <- which(purrr::map_lgl(sigs, ~ tax %in% .x))
-  output <- names(sigs)[pos]
-  return(output)
+    sigs <- purrr::map(bp, makeSignatures, ...)
+    sigs <- purrr::list_flatten(sigs, name_spec = "{inner}")
+    pos <- which(purrr::map_lgl(sigs, ~ tax %in% .x))
+    output <- names(sigs)[pos]
+    return(output)
 }
 
 # Non exported functions ----------------------------------------------------
 .makeSignaturesDiscrete <- function(dat, tax_id_type = "NCBI_ID") {
-  dat |>
-    dplyr::mutate(
-      Attribute = paste0("bugphyzz:", Attribute, "|", Attribute_value)
-      ) |>
-    {\(y) split(y, y$Attribute)}() |>
-    lapply(function(x) unique(x[[tax_id_type]]))
+    dat |>
+        dplyr::mutate(
+            Attribute = paste0("bugphyzz:", Attribute, "|", Attribute_value)
+        ) |>
+        {\(y) split(y, y$Attribute)}() |>
+        lapply(function(x) unique(x[[tax_id_type]]))
 }
 
 .makeSignaturesNumeric <- function(
-    dat, tax_id_type = "NCBI_ID", min = NULL, max = NULL
+        dat, tax_id_type = "NCBI_ID", min = NULL, max = NULL
 ) {
-  if (!is.null(min) || !is.null(max)) {
-    if (is.null(min)) {
-      message("Minimum unespecified. Using ", min(dat$Attribute_value), ".")
-      min <- min(dat$Attribute_value)
-    }
-    if (is.null(max)) {
-      message("Maximum unespecified. Using ", max(dat$Attribute_value), ".")
-      max <- max(dat$Attribute_value)
-    }
-    dat <- dat |>
-      dplyr::filter(
-        Attribute_value >= min & Attribute_value <= max
-      ) |>
-      dplyr::mutate(
-        Attribute = paste0("bugphyzz:", Attribute, "| >=", min, " & <=", max)
-      )
-  } else {
-    thr <- .thresholds() |>
-      dplyr::filter(Attribute_group == unique(dat$Attribute))
-    attr_name <- thr$Attribute
-    min_values <- thr$lower
-    max_values <- thr$upper
-    dat$tmp_col <- NA
-    for (i in seq_along(attr_name)) {
-      if (is.na(min_values[i]))
-        min_values[i] <- min(dat$Attribute_value) - 0.01
-      if (is.na(max_values[i]))
-        max_values[i] <- max(dat$Attribute_value)
-      pos <- which(dat$Attribute_value > min_values[i] & dat$Attribute_value <= max_values[i])
-      dat$tmp_col[pos] <- attr_name[i]
-      dat$Attribute[pos] <- paste0("bugphyzz:", dat$Attribute[pos], "|", attr_name[i], "| > ", round(min_values[i], 2), " & <= ", max_values[i])
+    if (!is.null(min) || !is.null(max)) {
+        if (is.null(min)) {
+            message(
+                "Minimum unespecified. Using ", min(dat$Attribute_value), "."
+            )
+            min <- min(dat$Attribute_value)
+        }
+        if (is.null(max)) {
+            message(
+                "Maximum unespecified. Using ", max(dat$Attribute_value), "."
+            )
+            max <- max(dat$Attribute_value)
+        }
+        dat <- dat |>
+            dplyr::filter(
+                Attribute_value >= min & Attribute_value <= max
+            ) |>
+            dplyr::mutate(
+                Attribute = paste0(
+                    "bugphyzz:", Attribute, "| >=", min, " & <=", max
+                )
+            )
+    } else {
+        thr <- .thresholds() |>
+            dplyr::filter(Attribute_group == unique(dat$Attribute))
+        attr_name <- thr$Attribute
+        min_values <- thr$lower
+        max_values <- thr$upper
+        dat$tmp_col <- NA
+        for (i in seq_along(attr_name)) {
+            if (is.na(min_values[i]))
+                min_values[i] <- min(dat$Attribute_value) - 0.01
+            if (is.na(max_values[i]))
+                max_values[i] <- max(dat$Attribute_value)
+            pos <- which(
+                dat$Attribute_value > min_values[i] &
+                    dat$Attribute_value <= max_values[i]
+            )
+            dat$tmp_col[pos] <- attr_name[i]
+            dat$Attribute[pos] <- paste0(
+                "bugphyzz:", dat$Attribute[pos], "|", attr_name[i], "| > ",
+                round(min_values[i], 2), " & <= ", max_values[i]
+            )
+        }
     }
-  }
-  dat |>
-    {\(y) split(y, y$Attribute)}() |>
-    lapply(function(x) unique(x[[tax_id_type]]))
+    dat |>
+        {\(y) split(y, y$Attribute)}() |>
+        lapply(function(x) unique(x[[tax_id_type]]))
 }
 
 .thresholds <- function() {
-  fpath <- file.path('extdata', 'thresholds.tsv')
-  fname <- system.file(fpath, package = 'bugphyzz', mustWork = TRUE)
-  utils::read.table(fname, header = TRUE, sep = '\t') |>
-    dplyr::mutate(
-      range = dplyr::case_when(
-        is.na(lower) ~ paste0('<=', upper),
-        is.na(upper) ~ paste0('>=', lower),
-        TRUE ~ paste0(lower, '-', upper)
-      ),
-      unit = ifelse(is.na(unit), '', unit)
-    ) |>
-    dplyr::mutate(Attribute_range = paste0(range, unit)) |>
-    dplyr::relocate(
-      Attribute_group, Attribute, Attribute_range
-    )
+    fpath <- file.path('extdata', 'thresholds.tsv')
+    fname <- system.file(fpath, package = 'bugphyzz', mustWork = TRUE)
+    utils::read.table(fname, header = TRUE, sep = '\t') |>
+        dplyr::mutate(
+            range = dplyr::case_when(
+                is.na(lower) ~ paste0('<=', upper),
+                is.na(upper) ~ paste0('>=', lower),
+                TRUE ~ paste0(lower, '-', upper)
+            ),
+            unit = ifelse(is.na(unit), '', unit)
+        ) |>
+        dplyr::mutate(Attribute_range = paste0(range, unit)) |>
+        dplyr::relocate(
+            Attribute_group, Attribute, Attribute_range
+        )
 }
 
 .validationData <- function() {
-  fname <- system.file("extdata", "validation_summary.tsv", package = "bugphyzz")
-  utils::read.table(
-    file = fname, header = TRUE, sep = "\t", row.names = NULL
-  ) |>
-    dplyr::mutate(
-      value = dplyr::case_when(
-        !is.na(mcc_mean) & is.na(r2_mean) ~ mcc_mean,
-        is.na(mcc_mean) & !is.na(r2_mean) ~ r2_mean
-      )
+    fname <- system.file(
+        "extdata", "validation_summary.tsv", package = "bugphyzz"
     )
+    utils::read.table(
+        file = fname, header = TRUE, sep = "\t", row.names = NULL
+    ) |>
+        dplyr::mutate(
+            value = dplyr::case_when(
+                !is.na(mcc_mean) & is.na(r2_mean) ~ mcc_mean,
+                is.na(mcc_mean) & !is.na(r2_mean) ~ r2_mean
+            )
+        )
 }
 
 ## Import a version of bupghyzz
 .downloadResource <- function(version, force_download) {
-  if (stringr::str_detect(version, "^10.5281/zenodo.[0-9]+$")) {
-    suffix <- sub("^10.5281/zenodo\\.", "", version)
-    output <- .downloadZ(suffix, force_download)
-  } else if (version == "devel" || stringr::str_detect(version, stringr::regex("^[:alnum:]{7}$")) ){
-    output <- .downloadGH(version, force_download)
-  } else {
-    stop("Version must be a Zenodo DOI, GitHub commit hash, or 'devel'.")
-  }
-  return(output)
+    if (stringr::str_detect(version, "^10.5281/zenodo.[0-9]+$")) {
+        suffix <- sub("^10.5281/zenodo\\.", "", version)
+        output <- .downloadZ(suffix, force_download)
+    } else if (
+        version == "devel" ||
+            stringr::str_detect(version, stringr::regex("^[:alnum:]{7}$"))
+        ){
+        output <- .downloadGH(version, force_download)
+    } else {
+        stop("Version must be a Zenodo DOI, GitHub commit hash, or 'devel'.")
+    }
+    return(output)
 }
 
 ## Function for downloading data on Zenodo
 .downloadZ <- function(record, force_download) {
-  base_url <- paste0("https://zenodo.org/api/records/", record)
-  req <- httr2::request(base_url)
-  res <- httr2::req_perform(req)
-  l <- httr2::resp_body_json(res)
+    base_url <- paste0("https://zenodo.org/api/records/", record)
+    req <- httr2::request(base_url)
+    res <- httr2::req_perform(req)
+    l <- httr2::resp_body_json(res)
 
-  file_names_api <- purrr::map_chr(l$files, ~ .x$links$self)
-  file_names_url <- sub("(^.*)(api/)(.*)(/content$)", "\\1\\3", file_names_api)
+    file_names_api <- purrr::map_chr(l$files, ~ .x$links$self)
+    file_names_url <- sub(
+        "(^.*)(api/)(.*)(/content$)", "\\1\\3", file_names_api
+    )
 
-  rpath <- .getResource(
-    rname = paste0("bugphyzz.zip"),
-    url = file_names_url, verbose = TRUE, force = force_download
-  )
-  temp_dir <- tempdir()
-  utils::unzip(zipfile = rpath, exdir = temp_dir, junkpaths = TRUE)
-  files <- list.files(temp_dir, pattern = "csv", full.names = TRUE)
+    rpath <- .getResource(
+        rname = paste0("bugphyzz.zip"),
+        url = file_names_url, verbose = TRUE, force = force_download
+    )
+    temp_dir <- tempdir()
+    utils::unzip(zipfile = rpath, exdir = temp_dir, junkpaths = TRUE)
+    files <- list.files(temp_dir, pattern = "csv", full.names = TRUE)
 
-  output <- vector("list", length(files))
-  for (i in seq_along(output)) {
-    output[[i]] <- utils::read.csv(files, header = TRUE, skip = 1) |>
-      dplyr::mutate(Attribute = tolower(Attribute))
-  }
-  return(output)
+    output <- vector("list", length(files))
+    for (i in seq_along(output)) {
+        output[[i]] <- utils::read.csv(files, header = TRUE, skip = 1) |>
+            dplyr::mutate(Attribute = tolower(Attribute))
+    }
+    return(output)
 }
 
 ## Function for downloading data on GitHub
 .downloadGH <- function(version, force_download) {
     file_suffix <- c("binary", "multistate", "numeric")
-    urls <- paste0("https://github.com/waldronlab/bugphyzzExports/raw/",
-           version, "/bugphyzz_", file_suffix, ".csv"
+    urls <- paste0(
+        "https://github.com/waldronlab/bugphyzzExports/raw/", version,
+        "/bugphyzz_", file_suffix, ".csv"
     )
     names(urls) <-  c("binary", "multistate", "numeric")
     output <- vector("list", length(urls))
     for (i in seq_along(output)) {
-      message("Importing ", names(urls)[i], " data...")
-      names(output)[i] <- names(urls)[i]
-      rpath <- .getResource(
-        rname = paste0("bugphyzz_", names(urls)[i], ".csv"),
-        url = urls[i], verbose = TRUE, force = force_download
-      )
-      output[[i]] <- utils::read.csv(rpath, header = TRUE, skip = 1) |>
-        dplyr::mutate(Attribute = tolower(Attribute))
+        message("Importing ", names(urls)[i], " data...")
+        names(output)[i] <- names(urls)[i]
+        rpath <- .getResource(
+            rname = paste0("bugphyzz_", names(urls)[i], ".csv"),
+            url = urls[i], verbose = TRUE, force = force_download
+        )
+        output[[i]] <- utils::read.csv(rpath, header = TRUE, skip = 1) |>
+            dplyr::mutate(Attribute = tolower(Attribute))
     }
     return(output)
 }
diff --git a/R/cache.R b/R/cache.R
index d1aebb41..70f2f329 100644
--- a/R/cache.R
+++ b/R/cache.R
@@ -8,18 +8,18 @@
     res <- BiocFileCache::bfcquery(cache, rname, "rname", exact = TRUE)
     rid <- res$rid
     if (length(rid) > 0 && force) {
-      time <- res$create_time
-      if (verbose) {
-        message('Replacing previous version downloaded on ', time, '.')
-      }
-      BiocFileCache::bfcremove(cache, rid)
-      rid <- names(BiocFileCache::bfcadd(cache, rname, url))
+        time <- res$create_time
+        if (verbose) {
+            message('Replacing previous version downloaded on ', time, '.')
+        }
+        BiocFileCache::bfcremove(cache, rid)
+        rid <- names(BiocFileCache::bfcadd(cache, rname, url))
     }
     if (!length(rid)) {
-      if (verbose) {
-        message( "Downloading, ", rname, ".")
-      }
-      rid <- names(BiocFileCache::bfcadd(cache, rname, url))
+        if (verbose) {
+            message( "Downloading, ", rname, ".")
+        }
+        rid <- names(BiocFileCache::bfcadd(cache, rname, url))
     }
     time <- BiocFileCache::bfcinfo(cache, rid)$create_time
     message('Using data downloaded on ', time, '.')
@@ -27,6 +27,6 @@
 }
 
 .removeCache <- function() {
-  cache <- .getCache()
-  BiocFileCache::removebfc(cache)
+    cache <- .getCache()
+    BiocFileCache::removebfc(cache)
 }
diff --git a/R/fattyAcidComposition.R b/R/fattyAcidComposition.R
index 92b5c67e..7e9097e4 100644
--- a/R/fattyAcidComposition.R
+++ b/R/fattyAcidComposition.R
@@ -4,26 +4,28 @@
 ## TODO Names of the Fatty Acids should be more "user-friendly"
 ## TODO Maybe a threshold should be decided to consider a FA as present or not.
 .fattyAcidComposition <- function(){
-  link <- .customLinks() |>
-    dplyr::filter(functionname == "fattyAcidComposition") |>
-    dplyr::pull(link)
-  fac_wide <- utils::read.csv(link, check.names = FALSE)
-  fac_long <- fac_wide |>
-    tidyr::pivot_longer(
-      cols = `Br-C10:1`:`Oxo-C19:1`,
-      names_to = "Attribute_new", values_to = "Attribute_value"
-  ) |>
-    dplyr::mutate(NCBI_ID = as.character(NCBI_ID))
-  dplyr::left_join(fac_long, ranks_parents, by = "NCBI_ID") |>
-    as.data.frame() |>
-    .addSourceInfo() |>
-    purrr::modify_at(
-      .at = c('Attribute', 'Frequency', 'Evidence', 'Confidence_in_curation'),
-      .f = ~ stringr::str_to_lower(.x)
-    ) |>
-    dplyr::select(-Attribute) |>
-    dplyr::rename(Attribute = Attribute_new) |>
-    .reorderColumns(attr_type = 'numeric')
+    link <- .customLinks() |>
+        dplyr::filter(functionname == "fattyAcidComposition") |>
+        dplyr::pull(link)
+    fac_wide <- utils::read.csv(link, check.names = FALSE)
+    fac_long <- fac_wide |>
+        tidyr::pivot_longer(
+            cols = `Br-C10:1`:`Oxo-C19:1`,
+            names_to = "Attribute_new", values_to = "Attribute_value"
+        ) |>
+        dplyr::mutate(NCBI_ID = as.character(NCBI_ID))
+    dplyr::left_join(fac_long, ranks_parents, by = "NCBI_ID") |>
+        as.data.frame() |>
+        .addSourceInfo() |>
+        purrr::modify_at(
+            .at = c(
+                'Attribute', 'Frequency', 'Evidence', 'Confidence_in_curation'
+            ),
+            .f = ~ stringr::str_to_lower(.x)
+        ) |>
+        dplyr::select(-Attribute) |>
+        dplyr::rename(Attribute = Attribute_new) |>
+        .reorderColumns(attr_type = 'numeric')
 }
 
 ## Function to import custom links
@@ -31,7 +33,9 @@
     fname <-
         system.file("extdata/spreadsheet_customlinks.tsv", package = "bugphyzz")
     links <- utils::read.table(fname, sep = '\t', header = TRUE)
-    ifelse(keyword[1] == "all", links, links <-
-                      links[links$physiology %in% keyword,])
+    ifelse(
+        keyword[1] == "all", links,
+        links <- links[links$physiology %in% keyword,]
+    )
     links
 }
diff --git a/R/physiologies.R b/R/physiologies.R
index c401a9ba..651cd795 100644
--- a/R/physiologies.R
+++ b/R/physiologies.R
@@ -9,8 +9,8 @@
 #' Valid keyboards can be checked with \code{showPhys}. If 'all', all
 #' physiologies are imported.
 #' @param full_source Logical. If `TRUE`, the Attribute_source column will
-#' contain full source information. If `FALSE`, the Attribute_source column will
-#' contain shortened versions of the sources. Default is `FALSE`.
+#' contain full source information. If `FALSE`, the Attribute_source column
+#' will contain shortened versions of the sources. Default is `FALSE`.
 #'
 #' @return A list of data.frames in tidy format.
 #'
@@ -21,69 +21,69 @@
 #' df <- physiologies('aerophilicity')[[1]]
 #'
 physiologies <- function(keyword = 'all', full_source = FALSE) {
-  keyword <- .checkKeyword(keyword)
-  cond1 <- any(keyword %in% showPhys('spreadsheets'))
-  cond2 <- any(keyword %in% showPhys('bacdive'))
+    keyword <- .checkKeyword(keyword)
+    cond1 <- any(keyword %in% showPhys('spreadsheets'))
+    cond2 <- any(keyword %in% showPhys('bacdive'))
 
-  if (cond1 && cond2) {
-    spreadsheets <- .importSpreadsheets(keyword = keyword)
-    spreadsheets <- spreadsheets[names(spreadsheets) %in% keyword]
-    bacdive <- .reshapeBacDive(.getBacDive(verbose = FALSE))
-    bacdive <- bacdive[names(bacdive) %in% keyword]
-    physiologies <- vector('list', length(keyword))
-    for (i in seq_along(keyword)) {
-        df1 <- spreadsheets[[keyword[i]]]
-        df2 <- bacdive[[keyword[i]]]
-        physiologies[[i]] <- dplyr::bind_rows(df1, df2)
-        names(physiologies)[i] <- keyword[i]
-        message('Finished ', keyword[i], '.')
+    if (cond1 && cond2) {
+        spreadsheets <- .importSpreadsheets(keyword = keyword)
+        spreadsheets <- spreadsheets[names(spreadsheets) %in% keyword]
+        bacdive <- .reshapeBacDive(.getBacDive(verbose = FALSE))
+        bacdive <- bacdive[names(bacdive) %in% keyword]
+        physiologies <- vector('list', length(keyword))
+        for (i in seq_along(keyword)) {
+            df1 <- spreadsheets[[keyword[i]]]
+            df2 <- bacdive[[keyword[i]]]
+            physiologies[[i]] <- dplyr::bind_rows(df1, df2)
+            names(physiologies)[i] <- keyword[i]
+            message('Finished ', keyword[i], '.')
+        }
+    } else if (cond1 && !cond2) {
+        spreadsheets <- .importSpreadsheets(keyword = keyword)
+        physiologies <- spreadsheets[names(spreadsheets) %in% keyword]
+        for (i in seq_along(keyword)) {
+            message('Finished ', keyword[i], '.')
+        }
+    } else if (!cond1 && cond2) {
+        bacdive <- .reshapeBacDive(.getBacDive(verbose = FALSE))
+        physiologies <- bacdive[names(bacdive) %in% keyword]
+        for (i in seq_along(keyword)) {
+            message('Finished ', keyword[i], '.')
+        }
     }
-  } else if (cond1 && !cond2) {
-    spreadsheets <- .importSpreadsheets(keyword = keyword)
-    physiologies <- spreadsheets[names(spreadsheets) %in% keyword]
-    for (i in seq_along(keyword)) {
-      message('Finished ', keyword[i], '.')
-    }
-  } else if (!cond1 && cond2) {
-    bacdive <- .reshapeBacDive(.getBacDive(verbose = FALSE))
-    physiologies <- bacdive[names(bacdive) %in% keyword]
-    for (i in seq_along(keyword)) {
-      message('Finished ', keyword[i], '.')
-    }
-  }
 
-  physiologies <- lapply(physiologies, function(df) {
-    df <- df |>
-      purrr::modify_if(.p = is.character, ~ stringr::str_squish(.x)) |>
-      .addSourceInfo() |>
-      purrr::modify_at(
-        .at = c('Frequency', 'Evidence', 'Confidence_in_curation'),
-        ~ stringr::str_squish(stringr::str_to_lower(.x))
-      ) |>
-      dplyr::distinct()
+    physiologies <- lapply(physiologies, function(df) {
+        df <- df |>
+            purrr::modify_if(.p = is.character, ~ stringr::str_squish(.x)) |>
+            .addSourceInfo() |>
+            purrr::modify_at(
+                .at = c('Frequency', 'Evidence', 'Confidence_in_curation'),
+                ~ stringr::str_squish(stringr::str_to_lower(.x))
+            ) |>
+            dplyr::distinct()
 
-    if (full_source) {
-      df$Attribute_source <- df$full_source
-    }
-    df$full_source <- NULL
+        if (full_source) {
+            df$Attribute_source <- df$full_source
+        }
+        df$full_source <- NULL
 
-    df <- .reorderColumns(
-      df = df,
-      name = unique(df$Attribute_group),
-      attr_type = unique(df$Attribute_type)
-    )
+        df <- .reorderColumns(
+            df = df,
+            name = unique(df$Attribute_group),
+            attr_type = unique(df$Attribute_type)
+        )
 
-    df <- as.data.frame(df[, vapply(df, \(y) !all(is.na(y)), logical(1))])
+        df <- as.data.frame(df[, vapply(df, \(y) !all(is.na(y)), logical(1))])
 
-    ## TODO this code could be somewhere else
-    if (unique(df$Attribute_group) == 'aerophilicity') {
-      df <- .homogenizeAerophilicityAttributeNames(df)
-    }
+        ## TODO this code could be somewhere else
+        if (unique(df$Attribute_group) == 'aerophilicity') {
+            df <- .homogenizeAerophilicityAttributeNames(df)
+        }
 
-    dplyr::distinct(df)
-  })
+        dplyr::distinct(df)
+    })
 
-  return(physiologies)
+    return(physiologies)
 }
 
 #' Show list of available physiologies (for devs)
@@ -104,243 +104,267 @@ physiologies <- function(keyword = 'all', full_source = FALSE) {
 #' showPhys('spreadsheets')
 #'
 showPhys <- function(which_names = 'all') {
-  fname <- system.file('extdata', 'spreadsheet_links.tsv', package = 'bugphyzz')
-  links <- utils::read.table(fname, header = TRUE, sep = '\t')
-  spreadsheet_phys <- links[['physiology']]
-  if (which_names == 'all')
-    ## bacdive_phys_names is a character vector saved as internal data
-    phys_names <- sort(unique(c(spreadsheet_phys, bacdive_phys_names)))
-  if (which_names == 'spreadsheets')
-    phys_names <- spreadsheet_phys
-  if (which_names == 'bacdive')
-    phys_names <- bacdive_phys_names
-  return(phys_names)
+    fname <- system.file(
+        'extdata', 'spreadsheet_links.tsv', package = 'bugphyzz'
+    )
+    links <- utils::read.table(fname, header = TRUE, sep = '\t')
+    spreadsheet_phys <- links[['physiology']]
+    if (which_names == 'all')
+        ## bacdive_phys_names is a character vector saved as internal data
+        phys_names <- sort(unique(c(spreadsheet_phys, bacdive_phys_names)))
+    if (which_names == 'spreadsheets')
+        phys_names <- spreadsheet_phys
+    if (which_names == 'bacdive')
+        phys_names <- bacdive_phys_names
+    return(phys_names)
 }
 
 ## Helper function for physiologies
 .checkKeyword <- function(keyword) {
-  keyword <- unique(sort(keyword))
-  if ('all' %in% keyword) {
-    if (length(keyword) > 1) {
-      stop(
-        "Found 'all' among the keywords. Are you sure that you want to ",
-        "import all of the physiologies? If so, use 'all' alone. Quitting.",
-        call. = FALSE
-      )
-    } else if (length(keyword) == 1) {
-      message('All physiologies will be imported.')
-      keyword <- showPhys()
+    keyword <- unique(sort(keyword))
+    if ('all' %in% keyword) {
+        if (length(keyword) > 1) {
+            stop(
+                "Found 'all' among the keywords.",
+                "Are you sure that you want to import all of the physiologies?",
+                "If so, use 'all' alone. Quitting.",
+                call. = FALSE
+            )
+        } else if (length(keyword) == 1) {
+            message('All physiologies will be imported.')
+            keyword <- showPhys()
+        }
     }
-  }
-  valid_keywords <- showPhys()
-  lgl_vct <- keyword %in% valid_keywords
-  if (any(!lgl_vct) ) {
-    invalid_keywords <- keyword[!lgl_vct]
-    stop(
-      "Invalid keyword(s): ", paste0(invalid_keywords, collapse = ', '), '.',
-      " Check valid keywords with showPhys() or use 'all' to import all",
-      " physiologies.",
-      call. = FALSE
-    )
-  }
-  return(keyword)
+    valid_keywords <- showPhys()
+    lgl_vct <- keyword %in% valid_keywords
+    if (any(!lgl_vct) ) {
+        invalid_keywords <- keyword[!lgl_vct]
+        stop(
+            "Invalid keyword(s): ",
+            paste0(invalid_keywords, collapse = ', '), '.',
+            " Check valid keywords with showPhys() or use 'all' to import all",
+            " physiologies.",
+            call. = FALSE
+        )
+    }
+    return(keyword)
 }
 
 ## Helper function for physiologies
 .importSpreadsheets <- function(keyword) {
-  parent_col_names <- c('Parent_name', 'Parent_NCBI_ID', 'Parent_rank')
-  fname <- system.file('extdata', 'spreadsheet_links.tsv', package = 'bugphyzz')
-  links <- utils::read.table(fname, header = TRUE, sep = '\t')
-  links <- links[links[['physiology']] %in% keyword,]
-  spreadsheets <- vector('list', nrow(links))
-  for (i in seq_along(spreadsheets)) {
-    phys_name <- links[i, 'physiology', drop = FALSE][[1]]
-    attr_type <- links[i, 'attribute_type', drop = FALSE][[1]]
-    names(spreadsheets)[i] <- phys_name
-    url <- links[i, 'link', drop = FALSE][[1]]
-    df <- dplyr::distinct(utils::read.csv(url))
-    df[['Attribute_type']] <- attr_type
-    df[['Attribute_group']] <- phys_name
-    df[['NCBI_ID']] <- as.character(df[['NCBI_ID']])
-    df <- df[!is.na(df[['Attribute_value']]),]
+    parent_col_names <- c('Parent_name', 'Parent_NCBI_ID', 'Parent_rank')
+    fname <- system.file(
+        'extdata', 'spreadsheet_links.tsv', package = 'bugphyzz'
+    )
+    links <- utils::read.table(fname, header = TRUE, sep = '\t')
+    links <- links[links[['physiology']] %in% keyword,]
+    spreadsheets <- vector('list', nrow(links))
+    for (i in seq_along(spreadsheets)) {
+        phys_name <- links[i, 'physiology', drop = FALSE][[1]]
+        attr_type <- links[i, 'attribute_type', drop = FALSE][[1]]
+        names(spreadsheets)[i] <- phys_name
+        url <- links[i, 'link', drop = FALSE][[1]]
+        df <- dplyr::distinct(utils::read.csv(url))
+        df[['Attribute_type']] <- attr_type
+        df[['Attribute_group']] <- phys_name
+        df[['NCBI_ID']] <- as.character(df[['NCBI_ID']])
+        df <- df[!is.na(df[['Attribute_value']]),]
 
-    if (unique(df[['Attribute_type']]) == 'numeric') {
-      df <- .numericToRange(df)
-    } else if (unique(df[['Attribute_type']] == 'range')) {
-      df <- .modifyRange(df)
-    } else if (unique(df[['Attribute_type']] %in% .DISCRETE_ATTRIBUTE_TYPES())) {
-      df <- dplyr::filter(df, Attribute_value == TRUE | Attribute_value == FALSE)
-    }
+        if (unique(df[['Attribute_type']]) == 'numeric') {
+            df <- .numericToRange(df)
+        } else if (unique(df[['Attribute_type']] == 'range')) {
+            df <- .modifyRange(df)
+        } else if (
+            unique(df[['Attribute_type']] %in% .DISCRETE_ATTRIBUTE_TYPES())
+        ) {
+            df <- dplyr::filter(
+                df, Attribute_value == TRUE | Attribute_value == FALSE
+            )
+        }
 
-    if (all(parent_col_names %in% colnames(df))) {
-      df$Parent_NCBI_ID <- stringr::str_squish(as.character(df$Parent_NCBI_ID))
-    } else {
-        ## ranks_parents is an internal object (data.frame) in bugphyzz
-        rp <- purrr::modify_at(
-          .x = ranks_parents,
-          .at = c('NCBI_ID', 'Parent_NCBI_ID'),
-          .f = as.character
-        )
-        df <- dplyr::left_join(df, rp, by = "NCBI_ID")
+        if (all(parent_col_names %in% colnames(df))) {
+            df$Parent_NCBI_ID <- stringr::str_squish(
+                as.character(df$Parent_NCBI_ID)
+            )
+        } else {
+            ## ranks_parents is an internal object (data.frame) in bugphyzz
+            rp <- purrr::modify_at(
+                .x = ranks_parents,
+                .at = c('NCBI_ID', 'Parent_NCBI_ID'),
+                .f = as.character
+            )
+            df <- dplyr::left_join(df, rp, by = "NCBI_ID")
+        }
+        spreadsheets[[i]] <- df
     }
-    spreadsheets[[i]] <- df
-  }
-  return(spreadsheets)
+    return(spreadsheets)
 }
 
 ## Helper function for .importSpreadsheets
 .numericToRange <- function(df) {
-  df <- df |>
-    dplyr::group_by(NCBI_ID, Taxon_name) |>
-    dplyr::mutate(
-      Attribute_value_min = as.double(Attribute_value),
-      Attribute_value_max = as.double(Attribute_value),
-      Attribute_type = 'range'
-    ) |>
-    dplyr::ungroup() |>
-    dplyr::distinct()
-  df[['Attribute_value']] <- NULL
-  return(df)
+    df <- df |>
+        dplyr::group_by(NCBI_ID, Taxon_name) |>
+        dplyr::mutate(
+            Attribute_value_min = as.double(Attribute_value),
+            Attribute_value_max = as.double(Attribute_value),
+            Attribute_type = 'range'
+        ) |>
+        dplyr::ungroup() |>
+        dplyr::distinct()
+    df[['Attribute_value']] <- NULL
+    return(df)
 }
 
 ## Helper function for .importSpreadsheets
 .modifyRange <- function(df) {
-  num <- '[0-9]+(\\.[0-9]+)?'
-  regex1 <- paste0('^\\-?', num, '(\\-', num, ')?$')
-  regex2 <- paste0('^(<|>)(\\-)?', num, '$')
-  regex <- paste0('(', regex1, '|', regex2, ')')
-  df <- df |>
-    dplyr::filter(grepl(regex, Attribute_value)) |>
-    dplyr::mutate(
-      Attribute_value = sub('^(\\-)([0-9]+(\\.[0-9]+)?)', 'minus\\2', Attribute_value)
-    ) |>
-    dplyr::mutate(
-      Attribute_value = gsub(' ', '', Attribute_value),
-      Attribute_value = dplyr::case_when(
-        grepl('<', Attribute_value) ~ paste0('-', Attribute_value),
-        grepl('>', Attribute_value) ~ paste0(Attribute_value, '-'),
-        !grepl("\\-", Attribute_value) ~ paste0(Attribute_value, '-', Attribute_value),
-        grepl("^\\-", Attribute_value) ~ paste0("minusInf", Attribute_value),
-        grepl("\\-$", Attribute_value) ~ paste0(Attribute_value, "Inf"),
-        TRUE ~ Attribute_value
-      ),
-      Attribute_value = sub('(<|>)', '', Attribute_value),
-      Attribute_value = dplyr::case_when(
-        grepl("^\\-", Attribute_value) ~ paste0("minusInf", Attribute_value),
-        grepl("\\-$", Attribute_value) ~ paste0(Attribute_value, "Inf"),
-        TRUE ~ Attribute_value
-      )
-    ) |>
-    tidyr::separate(
-      col = 'Attribute_value',
-      into = c('Attribute_value_min', 'Attribute_value_max'), sep = '-'
-    ) |>
-    dplyr::mutate(
-      Attribute_value_min = sub('minus', '-', Attribute_value_min),
-      Attribute_value_max = sub('minus', '-', Attribute_value_min)
-    ) |>
-    dplyr::mutate(
-      Attribute_value_min = as.double(Attribute_value_min),
-      Attribute_value_max = as.double(Attribute_value_max)
-    ) |>
-    dplyr::distinct()
+    num <- '[0-9]+(\\.[0-9]+)?'
+    regex1 <- paste0('^\\-?', num, '(\\-', num, ')?$')
+    regex2 <- paste0('^(<|>)(\\-)?', num, '$')
+    regex <- paste0('(', regex1, '|', regex2, ')')
+    df <- df |>
+        dplyr::filter(grepl(regex, Attribute_value)) |>
+        dplyr::mutate(
+            Attribute_value = sub(
+                '^(\\-)([0-9]+(\\.[0-9]+)?)', 'minus\\2', Attribute_value
+            )
+        ) |>
+        dplyr::mutate(
+            Attribute_value = gsub(' ', '', Attribute_value),
+            Attribute_value = dplyr::case_when(
+                grepl('<', Attribute_value) ~ paste0('-', Attribute_value),
+                grepl('>', Attribute_value) ~ paste0(Attribute_value, '-'),
+                !grepl("\\-", Attribute_value) ~
+                    paste0(Attribute_value, '-', Attribute_value),
+                grepl("^\\-", Attribute_value) ~
+                    paste0("minusInf", Attribute_value),
+                grepl("\\-$", Attribute_value) ~ paste0(Attribute_value, "Inf"),
+                TRUE ~ Attribute_value
+            ),
+            Attribute_value = sub('(<|>)', '', Attribute_value),
+            Attribute_value = dplyr::case_when(
+                grepl("^\\-", Attribute_value) ~
+                    paste0("minusInf", Attribute_value),
+                grepl("\\-$", Attribute_value) ~ paste0(Attribute_value, "Inf"),
+                TRUE ~ Attribute_value
+            )
+        ) |>
+        tidyr::separate(
+            col = 'Attribute_value',
+            into = c('Attribute_value_min', 'Attribute_value_max'), sep = '-'
+        ) |>
+        dplyr::mutate(
+            Attribute_value_min = sub('minus', '-', Attribute_value_min),
+            Attribute_value_max = sub('minus', '-', Attribute_value_min)
+        ) |>
+        dplyr::mutate(
+            Attribute_value_min = as.double(Attribute_value_min),
+            Attribute_value_max = as.double(Attribute_value_max)
+        ) |>
+        dplyr::distinct()
 }
 
 ## helper function for .importSpreadsheets
 .DISCRETE_ATTRIBUTE_TYPES <- function() {
-  fname <- system.file('extdata', 'spreadsheet_links.tsv', package = 'bugphyzz')
-  dat <- utils::read.table(file = fname, header = TRUE, sep = '\t')
-  unique(dat[dat$trait_type == 'discrete',]$attribute_type)
+    fname <- system.file(
+        'extdata', 'spreadsheet_links.tsv', package = 'bugphyzz'
+    )
+    dat <- utils::read.table(file = fname, header = TRUE, sep = '\t')
+    unique(dat[dat$trait_type == 'discrete',]$attribute_type)
 }
 
 ## Helper function for physiologies
 .addSourceInfo <- function(dat) {
-  fpath <- system.file('extdata', 'attribute_sources.tsv', package = 'bugphyzz')
-  source_data <- utils::read.table(
-    file = fpath, header = TRUE, sep = '\t', quote = '', check.names = FALSE,
-    comment.char = ''
-  )
-  dplyr::left_join(dat, source_data, by = 'Attribute_source')
+    fpath <- system.file(
+        'extdata', 'attribute_sources.tsv', package = 'bugphyzz'
+    )
+    source_data <- utils::read.table(
+        file = fpath, header = TRUE, sep = '\t', quote = '',
+        check.names = FALSE, comment.char = ''
+    )
+    dplyr::left_join(dat, source_data, by = 'Attribute_source')
 }
 
 ## Helper function for physiologies
 .reorderColumns <- function(df, name = NULL, attr_type) {
-  col_names <- colnames(df)
-  req_cols <- .requiredColumns(attr_type)
-  cols_lgl <- req_cols %in% col_names
-  if (!all(cols_lgl)) {
-    missing_cols <- paste0(req_cols[!cols_lgl], collapse = ', ')
-    if (!is.null(name)) {
-      msg <- paste0(
-        'Missing columns in ', name, '.', ' Missing columns are: ',
-        missing_cols
-      )
-    } else {
-      msg <- paste0(
-        'Missing columns.', ' Missing columns are: ', missing_cols
-      )
+    col_names <- colnames(df)
+    req_cols <- .requiredColumns(attr_type)
+    cols_lgl <- req_cols %in% col_names
+    if (!all(cols_lgl)) {
+        missing_cols <- paste0(req_cols[!cols_lgl], collapse = ', ')
+        if (!is.null(name)) {
+            msg <- paste0(
+                'Missing columns in ', name, '.', ' Missing columns are: ',
+                missing_cols
+            )
+        } else {
+            msg <- paste0(
+                'Missing columns.', ' Missing columns are: ', missing_cols
+            )
+        }
+        warning(msg, call. = FALSE)
     }
-    warning(msg, call. = FALSE)
-  }
-  cols <- req_cols[cols_lgl]
-  df |>
-    dplyr::relocate(dplyr::all_of(cols))
+    cols <- req_cols[cols_lgl]
+    df |>
+        dplyr::relocate(dplyr::all_of(cols))
 }
 
 ## Helper function for physiologies
 .homogenizeAerophilicityAttributeNames <- function(df) {
-  df |> dplyr::mutate(
-    Attribute = dplyr::case_when(
-      Attribute == 'obligately anaerobic' ~ 'anaerobic',
-      Attribute == 'microaerophilic' ~ 'aerobic',
-      Attribute == 'obligately aerobic' ~ 'aerobic',
-      TRUE ~ Attribute
+    df |> dplyr::mutate(
+        Attribute = dplyr::case_when(
+            Attribute == 'obligately anaerobic' ~ 'anaerobic',
+            Attribute == 'microaerophilic' ~ 'aerobic',
+            Attribute == 'obligately aerobic' ~ 'aerobic',
+            TRUE ~ Attribute
+        )
     )
-  )
 }
 
 ## Required columns
 .requiredColumns <- function(attr_type) {
-  fname <- system.file("extdata/curation_template.tsv", package = "bugphyzz")
-  df <- utils::read.table(fname, sep = "\t", header = TRUE)
-  lgl_vct_1 <- df$requiredness == "required"
-  lgl_vct_2 <- grepl(attr_type, df$attribute_types)
-  df <- df[lgl_vct_1 & lgl_vct_2,]
-  df[order(df[["required_column_order"]]), , drop = FALSE]
-  output <- df[['column_name']]
-  return(output)
+    fname <- system.file("extdata/curation_template.tsv", package = "bugphyzz")
+    df <- utils::read.table(fname, sep = "\t", header = TRUE)
+    lgl_vct_1 <- df$requiredness == "required"
+    lgl_vct_2 <- grepl(attr_type, df$attribute_types)
+    df <- df[lgl_vct_1 & lgl_vct_2,]
+    df[order(df[["required_column_order"]]), , drop = FALSE]
+    output <- df[['column_name']]
+    return(output)
 }
 
 ## Generate a template for a bugphyzz dataset
 .template <- function(dataset) {
-  template_tsv <- system.file("extdata/curation_template.tsv", package = "bugphyzz")
-  template <- utils::read.table(
-    file = template_tsv, sep = "\t", check.names = FALSE, header = TRUE,
-    allowEscapes = TRUE )
-  # template <- readr::read_tsv(template_tsv, show_col_types = FALSE)
-  template[template[["column_name"]] %in% colnames(dataset), ]
+    template_tsv <- system.file(
+        "extdata/curation_template.tsv", package = "bugphyzz"
+    )
+    template <- utils::read.table(
+        file = template_tsv, sep = "\t", check.names = FALSE, header = TRUE,
+        allowEscapes = TRUE )
+    # template <- readr::read_tsv(template_tsv, show_col_types = FALSE)
+    template[template[["column_name"]] %in% colnames(dataset), ]
 }
 
 ## Print valid attributes
 .attributes <- function() {
-  fname <- system.file("extdata/attributes.tsv", package = "bugphyzz")
-  df <- utils::read.table(
-    fname, sep = "\t", header = TRUE, check.names = FALSE
-  )
-  unique(df[,"attribute"])
+    fname <- system.file("extdata/attributes.tsv", package = "bugphyzz")
+    df <- utils::read.table(
+        fname, sep = "\t", header = TRUE, check.names = FALSE
+    )
+    unique(df[,"attribute"])
 }
 
 ## Append links to error table
 .appendLinks <- function(x) {
-  fname1 <- system.file('extdata/spreadsheet_links.tsv', package = 'bugphyzz')
-  links <- utils::read.table(fname1, header = TRUE, sep = '\t')
-  select_cols <- c("physiology", "source_link")
-  phys_links <- links |>
-    dplyr::select(tidyselect::all_of(select_cols))
-  custom_links <- .customLinks() |>
-    dplyr::select(tidyselect::all_of((select_cols)))
-  links <- dplyr::bind_rows(phys_links, custom_links)
-  x |>
-    dplyr::left_join(links, by = c("dataset" = "physiology"))
+    fname1 <- system.file(
+        'extdata/spreadsheet_links.tsv', package = 'bugphyzz'
+    )
+    links <- utils::read.table(fname1, header = TRUE, sep = '\t')
+    select_cols <- c("physiology", "source_link")
+    phys_links <- links |>
+        dplyr::select(tidyselect::all_of(select_cols))
+    custom_links <- .customLinks() |>
+        dplyr::select(tidyselect::all_of((select_cols)))
+    links <- dplyr::bind_rows(phys_links, custom_links)
+    x |>
+        dplyr::left_join(links, by = c("dataset" = "physiology"))
 }
-
diff --git a/bugphyzz.Rproj b/bugphyzz.Rproj
index 69fafd4b..fd8dd284 100644
--- a/bugphyzz.Rproj
+++ b/bugphyzz.Rproj
@@ -6,7 +6,7 @@ AlwaysSaveHistory: Default
 
 EnableCodeIndexing: Yes
 UseSpacesForTab: Yes
-NumSpacesForTab: 2
+NumSpacesForTab: 4
 Encoding: UTF-8
 
 RnwWeave: Sweave
diff --git a/man/getTaxonSignatures.Rd b/man/getTaxonSignatures.Rd
index 7957c6a9..e6b5dde1 100644
--- a/man/getTaxonSignatures.Rd
+++ b/man/getTaxonSignatures.Rd
@@ -18,8 +18,8 @@ argument tax_id_type = "Taxon_name" must also be used.}
 A character vector with the names of the signatures for a taxon.
 }
 \description{
-\code{getTaxonSignatures} returns the names of all of the signatures associated
-with a particular taxon. More details can be found in the main
+\code{getTaxonSignatures} returns the names of all of the signatures
+associated with a particular taxon. More details can be found in the main
 bugphyzz vignette; please run \code{browseVignettes("bugphyzz")}.
 }
 \examples{
diff --git a/man/makeSignatures.Rd b/man/makeSignatures.Rd
index b6ae2c14..5eb2b59e 100644
--- a/man/makeSignatures.Rd
+++ b/man/makeSignatures.Rd
@@ -24,8 +24,8 @@ makeSignatures(
 superkingdom, kingdom, phylum, class, order, family, genus, species, strain.
 They can be combined. "mixed" is equivalent to select all valid ranks.}
 
-\item{evidence}{A character vector. Valid options: exp, igc, nas, tas, tax, asr.
-They can be combined. Default is all.}
+\item{evidence}{A character vector. Valid options: exp, igc, nas, tas, tax,
+asr. They can be combined. Default is all.}
 
 \item{frequency}{A character vector. Valid options: always, usually,
 sometimes, rarely, unknown. They can be combined. By default, "rarely" is
diff --git a/man/physiologies.Rd b/man/physiologies.Rd
index bbe8230b..fe7fc3b5 100644
--- a/man/physiologies.Rd
+++ b/man/physiologies.Rd
@@ -12,8 +12,8 @@ Valid keyboards can be checked with \code{showPhys}. If 'all', all
 physiologies are imported.}
 
 \item{full_source}{Logical. If \code{TRUE}, the Attribute_source column will
-contain full source information. If \code{FALSE}, the Attribute_source column will
-contain shortened versions of the sources. Default is \code{FALSE}.}
+contain full source information. If \code{FALSE}, the Attribute_source column
+will contain shortened versions of the sources. Default is \code{FALSE}.}
 }
 \value{
 A list of data.frames in tidy format.
diff --git a/vignettes/bugphyzz.Rmd b/vignettes/bugphyzz.Rmd
index 3faafd25..dbdd2073 100644
--- a/vignettes/bugphyzz.Rmd
+++ b/vignettes/bugphyzz.Rmd
@@ -1,6 +1,8 @@
 ---
 title: "bugphyzz"
-subtitle: "A harmonized data resource and software for enrichment analysis of microbial physiologies"
+subtitle:
+    "A harmonized data resource and software for enrichment analysis of
+    microbial physiologies"
 author:
   - name: "Samuel Gamboa"
     email: "Samuel.Gamboa.Tuz@gmail.com"
@@ -18,8 +20,8 @@ vignette: >
 
 ```{r, include = FALSE}
 knitr::opts_chunk$set(
-  collapse = TRUE,
-  comment = "#>"
+    collapse = TRUE,
+    comment = "#>"
 )
 ```
 
@@ -28,7 +30,8 @@ knitr::opts_chunk$set(
 [bugphyzz](https://github.com/waldronlab/bugphyzzExports)
 is an electronic database of standardized microbial annotations that allows:
 1. annotation of microbial taxa from multiple sources, and
-2. creation of microbial signatures based on shared attributes for bug set enrichment analysis.
+2. creation of microbial signatures based on shared attributes for bug set
+enrichment analysis.
 
 # Data schema
 
@@ -63,7 +66,8 @@ observed or measured.
 5. _Attribute type_: A character string indicating the data type:
     * numeric: Attributes with numeric values (e.g., growth temperature: 25°C).
     * binary: Attributes with boolean values (e.g., butyrate-producing: TRUE).
-    * multistate-intersection: A set of related binary attributes (e.g., habitat).
+    * multistate-intersection: A set of related binary attributes
+    (e.g., habitat).
     * multistate-union: Attributes with three or more values represented as
     character strings (e.g., aerophilicity: aerobic, anaerobic,
     or facultatively anaerobic).
@@ -86,7 +90,8 @@ Metadata associated with attribute values:
 9. _Support values_:
     * Frequency and Score: Confidence that a given taxon exhibits a trait
     based on the curator’s knowledge or results of ASR or IBD.
-    * Validation: Score from the [10-fold cross-validation analysis](https://github.com/waldronlab/taxPProValidation).
+    * Validation: Score from the
+    [10-fold cross-validation analysis](https://github.com/waldronlab/taxPProValidation).
     Matthews correlation coefficient (MCC) for discrete attributes and
     R-squared for numeric attributes. Default threshold value is 0.5 and above.
     * NSTI: Nearest sequence taxon index from [PICRUSt](https://doi.org/10.1038/nbt.2676)
@@ -103,13 +108,17 @@ three, two, or one of these criteria.
 
 **Additional information**
 
-+ Description of **sources** and **attributes**: https://waldronlab.io/bugphyzz/articles/attributes.html
++ Description of **sources** and **attributes**:
+https://waldronlab.io/bugphyzz/articles/attributes.html
 
-+ Description of ontology **evidence** codes: https://geneontology.org/docs/guide-go-evidence-codes/
++ Description of ontology **evidence** codes:
+https://geneontology.org/docs/guide-go-evidence-codes/
 
-+ Description of **frequency** keywords and scores were based on: https://grammarist.com/grammar/adverbs-of-frequency/
++ Description of **frequency** keywords and scores were based on:
+https://grammarist.com/grammar/adverbs-of-frequency/
 
-+ IBD and ASR were performed with taxPPro: https://github.com/waldronlab/taxPPro
++ IBD and ASR were performed with taxPPro:
+https://github.com/waldronlab/taxPPro
 
 # Analysis and Stats
 
@@ -176,7 +185,7 @@ attribute (discrete):
 
 ```{r}
 aer_sigs_g <- makeSignatures(
-  dat = bp[["aerophilicity"]], tax_id_type = "Taxon_name", tax_level = "genus"
+    dat = bp[["aerophilicity"]], tax_id_type = "Taxon_name", tax_level = "genus"
 )
 map(aer_sigs_g, head)
 ```
@@ -186,8 +195,8 @@ temperature attribute (numeric):
 
 ```{r}
 gt_sigs_sp <- makeSignatures(
-  dat = bp[["growth temperature"]], tax_id_type = "Taxon_name",
-  tax_level = 'species'
+    dat = bp[["growth temperature"]], tax_id_type = "Taxon_name",
+    tax_level = 'species'
 )
 map(gt_sigs_sp, head)
 ```
@@ -197,8 +206,8 @@ attribute (numeric):
 
 ```{r}
 gt_sigs_mix <- makeSignatures(
-  dat = bp[["growth temperature"]], tax_id_type = "Taxon_name",
-  tax_level = "mixed", min = 0, max = 25
+    dat = bp[["growth temperature"]], tax_id_type = "Taxon_name",
+    tax_level = "mixed", min = 0, max = 25
 )
 map(gt_sigs_mix, head)
 ```
@@ -207,8 +216,8 @@ map(gt_sigs_mix, head)
 
 ```{r}
 ap_sigs_mix <- makeSignatures(
-  dat = bp[["animal pathogen"]], tax_id_type = "NCBI_ID",
-  tax_level = "mixed", evidence = c("exp", "igc", "nas", "tas")
+    dat = bp[["animal pathogen"]], tax_id_type = "NCBI_ID",
+    tax_level = "mixed", evidence = c("exp", "igc", "nas", "tas")
 )
 map(ap_sigs_mix, head)
 ```
@@ -217,7 +226,7 @@ map(ap_sigs_mix, head)
 
 ```{r}
 sigs <- map(bp, makeSignatures) |> 
-  list_flatten(name_spec = "{inner}")
+    list_flatten(name_spec = "{inner}")
 length(sigs)
 ```
 
@@ -228,12 +237,14 @@ head(map(sigs, head))
 # Run a bug set enrichment analysis
 
 Bugphyzz signatures are suitable for conducting bug set enrichment analysis
-using existing tools available in R. In this example, we will perform a set enrichment analysis using a dataset
+using existing tools available in R. In this example, we will perform a set
+enrichment analysis using a dataset
 with a known biological ground truth.
 
 The dataset originates from the Human Microbiome Project (2012) and compares
 subgingival and supragingival plaque.
-This data will be imported using the [MicrobiomeBenchmarkData package](https://bioconductor.org/packages/release/data/experiment/html/MicrobiomeBenchmarkData.html).
+This data will be imported using the
+[MicrobiomeBenchmarkData package](https://bioconductor.org/packages/release/data/experiment/html/MicrobiomeBenchmarkData.html).
 For the implementation of the enrichment analysis, we will utilize the
 Gene Set Enrichment Analysis (GSEA) method available in the
 [EnrichmentBrowser package](https://bioconductor.org/packages/release/bioc/html/EnrichmentBrowser.html).
@@ -265,7 +276,7 @@ Perform differential abundance (DA) analysis to get sets of microbes:
 
 ```{r}
 tse_subset$GROUP <- ifelse(
-  tse_subset$body_subsite == 'subgingival_plaque', 0, 1
+    tse_subset$body_subsite == 'subgingival_plaque', 0, 1
 )
 se <- EnrichmentBrowser::deAna(
     expr = tse_subset, de.method = 'limma', padj.method = 'fdr', 
@@ -275,7 +286,7 @@ se <- EnrichmentBrowser::deAna(
 dat <- data.frame(colData(se))
 design <- stats::model.matrix(~ GROUP, data = dat)
 assay(se) <- limma::voom(
-  counts = assay(se), design = design, plot = FALSE
+    counts = assay(se), design = design, plot = FALSE
 )$E
 ```
 
@@ -283,15 +294,15 @@ Perform GSEA and display the results:
 
 ```{r, message=FALSE}
 gsea <- EnrichmentBrowser::sbea(
-  method = 'gsea', se = se, gs = aer_sigs_g, perm = 1000,
-  alpha = 0.1 
+    method = 'gsea', se = se, gs = aer_sigs_g, perm = 1000,
+    alpha = 0.1 
 )
 gsea_tbl <- as.data.frame(gsea$res.tbl) |> 
-  mutate(
-    GENE.SET = ifelse(PVAL < 0.05, paste0(GENE.SET, ' *'), GENE.SET),
-    PVAL = round(PVAL, 3),
-  ) |> 
-  dplyr::rename(BUG.SET = GENE.SET)
+    mutate(
+        GENE.SET = ifelse(PVAL < 0.05, paste0(GENE.SET, ' *'), GENE.SET),
+        PVAL = round(PVAL, 3),
+    ) |> 
+        dplyr::rename(BUG.SET = GENE.SET)
 knitr::kable(gsea_tbl)
 ```