Title: | Core 'hubverse' Utilities |
---|---|
Description: | Core set of low-level utilities common across the 'hubverse'. Used to interact with 'hubverse' schema, Hub configuration files and model outputs and designed to be primarily used internally by other 'hubverse' packages. See Reich et al. (2022) <doi:10.2105/AJPH.2022.306831> for an overview of Collaborative Hubs. |
Authors: | Anna Krystalli [aut, cre] , Li Shandross [ctb], Nicholas G. Reich [ctb] , Evan L. Ray [ctb], Zhian N. Kamvar [ctb] , Consortium of Infectious Disease Modeling Hubs [cph] |
Maintainer: | Anna Krystalli <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.3.0 |
Built: | 2024-11-25 08:36:58 UTC |
Source: | https://github.com/hubverse-org/hubUtils |
Coerce a config list to a config class object
as_config(x)
as_config(x)
x |
a list representation of the contents a |
a config list object with subclass <config>
.
config_tasks <- read_config( hub_path = system.file("testhubs/simple", package = "hubUtils") ) # Remove all attributes except names to demonstrate functionality attributes(config_tasks) <- attributes(config_tasks)[ names(attributes(config_tasks)) == "names" ] # Convert to config object as_config(config_tasks)
config_tasks <- read_config( hub_path = system.file("testhubs/simple", package = "hubUtils") ) # Remove all attributes except names to demonstrate functionality attributes(config_tasks) <- attributes(config_tasks)[ names(attributes(config_tasks)) == "names" ] # Convert to config object as_config(config_tasks)
model_out_tbl
class object.Convert model output to a model_out_tbl
class object.
as_model_out_tbl( tbl, model_id_col = NULL, output_type_col = NULL, output_type_id_col = NULL, value_col = NULL, sep = "-", trim_to_task_ids = FALSE, hub_con = NULL, task_id_cols = NULL, remove_empty = FALSE )
as_model_out_tbl( tbl, model_id_col = NULL, output_type_col = NULL, output_type_id_col = NULL, value_col = NULL, sep = "-", trim_to_task_ids = FALSE, hub_con = NULL, task_id_cols = NULL, remove_empty = FALSE )
tbl |
a |
model_id_col |
character string. If a |
output_type_col |
character string. If an |
output_type_id_col |
character string. If an |
value_col |
character string. If a |
sep |
character string. Character used as separator when concatenating
|
trim_to_task_ids |
logical. Whether to trim |
hub_con |
a |
task_id_cols |
a character vector of column names. Only used if
|
remove_empty |
Logical. Whether to remove columns containing only |
A model_out_tbl
class object.
as_model_out_tbl(hub_con_output)
as_model_out_tbl(hub_con_output)
Function compares the current schema version in a config file to a valid version, If config file version deprecated compared to valid version, the function issues a lifecycle warning to prompt user to upgrade.
check_deprecated_schema( config_version, config, valid_version = "v2.0.0", hubutils_version = "0.0.0.9010" )
check_deprecated_schema( config_version, config, valid_version = "v2.0.0", hubutils_version = "0.0.0.9010" )
config_version |
Character string of the schema version. |
config |
List representation of config file. |
valid_version |
Character string of minimum valid schema version. |
hubutils_version |
The version of the hubUtils package in which deprecation of
the schema version below |
Invisibly, TRUE
if the schema version is deprecated, FALSE
otherwise.
Primarily used for the side effect of issuing a lifecycle warning.
id
or config schema_version
property
character stringExtract the schema version from a schema id
or config schema_version
property
character string
extract_schema_version(id)
extract_schema_version(id)
id |
A schema |
The schema version number as a character string.
extract_schema_version("schema_version: v3.0.0") extract_schema_version("refs/heads/main/v3.0.0")
extract_schema_version("schema_version: v3.0.0") extract_schema_version("refs/heads/main/v3.0.0")
Version can be provided either directly through the config_version
argument
or extracted from a config_tasks
object.
get_config_tid(config_version, config_tasks)
get_config_tid(config_version, config_tasks)
config_version |
Character string of the schema version. |
config_tasks |
a list version of the content's of a hub's |
character string of the name of the output type id column
get_config_tid("v3.0.0") get_config_tid("v2.0.0") # this will produce a warning because support for schema version 1.0.0 # has been dropped. get_config_tid("v1.0.0")
get_config_tid("v3.0.0") get_config_tid("v2.0.0") # this will produce a warning because support for schema version 1.0.0 # has been dropped. get_config_tid("v1.0.0")
Get hub configuration fields
get_hub_timezone(hub_path) get_hub_model_output_dir(hub_path) get_hub_file_formats(hub_path, round_id = NULL) get_hub_derived_task_ids(hub_path, round_id = NULL)
get_hub_timezone(hub_path) get_hub_model_output_dir(hub_path) get_hub_file_formats(hub_path, round_id = NULL) get_hub_derived_task_ids(hub_path, round_id = NULL)
hub_path |
Either a character string path to a local Modeling Hub directory
or an object of class |
round_id |
Character string. Round identifier. If the round is set to
|
get_hub_timezone
: The timezone of the hub
get_hub_model_output_dir
: The model output directory name
get_hub_file_formats
: character vector accepted hub or round level
file formats. If round_id
is NULL
or the round does not have a round level
file_format
setting, returns the hub level file_format
setting.
get_hub_derived_task_ids
: character vector of hub or round level derived
task ID names. If round_id
is NULL
or the round does not have a round level
derived_tasks_ids
setting, returns the hub level derived_tasks_ids
setting.
get_hub_timezone()
: Get the hub timezone
get_hub_model_output_dir()
: Get the model output directory name
get_hub_file_formats()
: Get the hub or round level file formats
get_hub_derived_task_ids()
: Get the hub or round level derived_tasks_ids
hub_path <- system.file("testhubs", "flusight", package = "hubUtils") get_hub_timezone(hub_path) get_hub_model_output_dir(hub_path) get_hub_file_formats(hub_path) get_hub_file_formats(hub_path, "2022-12-12")
hub_path <- system.file("testhubs", "flusight", package = "hubUtils") get_hub_timezone(hub_path) get_hub_model_output_dir(hub_path) get_hub_file_formats(hub_path) get_hub_file_formats(hub_path, "2022-12-12")
Utilities for accessing round ID metadata
get_round_idx(config_tasks, round_id) get_round_ids( config_tasks, flatten = c("all", "model_task", "task_id", "none") )
get_round_idx(config_tasks, round_id) get_round_ids( config_tasks, flatten = c("all", "model_task", "task_id", "none") )
config_tasks |
a list version of the content's of a hub's |
round_id |
Character string. Round identifier. If the round is set to
|
flatten |
Character. Whether and how much to flatten output.
|
the integer index of the element in config_tasks$rounds
that a
character round identifier maps to
a list or character vector of hub round IDs
A character vector is returned only if flatten = "all"
A list is returned otherwise (see flatten
for more details)
get_round_idx()
: Get an integer index of the element in
config_tasks$rounds
that a character round identifier maps to.
get_round_ids()
: Get a list or character vector of hub round IDs.
For each round, if round_id_from_variable
is TRUE
, round IDs returned are
the values of the task ID defined in the round_id
property. Otherwise, if
round_id_from_variable
is FALSE
, the value of the round_id
property is
returned.
config_tasks <- read_config( hub_path = system.file("testhubs/simple", package = "hubUtils") ) # Get round IDs get_round_ids(config_tasks) get_round_ids(config_tasks, flatten = "model_task") get_round_ids(config_tasks, flatten = "task_id") get_round_ids(config_tasks, flatten = "none") # Get round integer index using a round_id get_round_idx(config_tasks, "2022-10-01") get_round_idx(config_tasks, "2022-10-29")
config_tasks <- read_config( hub_path = system.file("testhubs/simple", package = "hubUtils") ) # Get round IDs get_round_ids(config_tasks) get_round_ids(config_tasks, flatten = "model_task") get_round_ids(config_tasks, flatten = "task_id") get_round_ids(config_tasks, flatten = "none") # Get round integer index using a round_id get_round_idx(config_tasks, "2022-10-01") get_round_idx(config_tasks, "2022-10-29")
Get the model tasks for a given round
get_round_model_tasks(config_tasks, round_id)
get_round_model_tasks(config_tasks, round_id)
config_tasks |
a list version of the content's of a hub's |
round_id |
Character string. Round identifier. If the round is set to
|
a list representation of model tasks for a given round.
hub_path <- system.file("testhubs/simple", package = "hubUtils") config_tasks <- read_config(hub_path, "tasks") get_round_model_tasks(config_tasks, round_id = "2022-10-08") get_round_model_tasks(config_tasks, round_id = "2022-10-15")
hub_path <- system.file("testhubs/simple", package = "hubUtils") config_tasks <- read_config(hub_path, "tasks") get_round_model_tasks(config_tasks, round_id = "2022-10-08") get_round_model_tasks(config_tasks, round_id = "2022-10-15")
Get task ID names for a given round
get_round_task_id_names(config_tasks, round_id)
get_round_task_id_names(config_tasks, round_id)
config_tasks |
a list version of the content's of a hub's |
round_id |
Character string. Round identifier. If the round is set to
|
a character vector of task ID names
hub_path <- system.file("testhubs/simple", package = "hubUtils") config_tasks <- read_config(hub_path, "tasks") get_round_task_id_names(config_tasks, round_id = "2022-10-08") get_round_task_id_names(config_tasks, round_id = "2022-10-15")
hub_path <- system.file("testhubs/simple", package = "hubUtils") config_tasks <- read_config(hub_path, "tasks") get_round_task_id_names(config_tasks, round_id = "2022-10-08") get_round_task_id_names(config_tasks, round_id = "2022-10-15")
Download a schema
get_schema(schema_url)
get_schema(schema_url)
schema_url |
The download URL for a given config schema version. |
Contents of the JSON schema as a character string.
Other functions supporting config file validation:
get_schema_url()
,
get_schema_valid_versions()
schema_url <- get_schema_url(config = "tasks", version = "v0.0.0.9") get_schema(schema_url)
schema_url <- get_schema_url(config = "tasks", version = "v0.0.0.9") get_schema(schema_url)
Get the JSON schema download URL for a given config file version
get_schema_url(config = c("tasks", "admin", "model"), version, branch = "main")
get_schema_url(config = c("tasks", "admin", "model"), version, branch = "main")
config |
Name of config file to validate. One of |
version |
A valid version of hubverse
schema
(e.g. |
branch |
The branch of the hubverse
schemas repository
from which to fetch schema. Defaults to |
The JSON schema download URL for a given config file version.
Other functions supporting config file validation:
get_schema()
,
get_schema_valid_versions()
get_schema_url(config = "tasks", version = "v0.0.0.9")
get_schema_url(config = "tasks", version = "v0.0.0.9")
Get a vector of valid schema version
get_schema_valid_versions(branch = "main")
get_schema_valid_versions(branch = "main")
branch |
The branch of the hubverse
schemas repository
from which to fetch schema. Defaults to |
a character vector of valid versions of hubverse schema.
Other functions supporting config file validation:
get_schema()
,
get_schema_url()
get_schema_valid_versions()
get_schema_valid_versions()
Get the latest schema version from the schema repository if "latest" requested (default) or ignore if specific version provided.
get_schema_version_latest(schema_version = "latest", branch = "main")
get_schema_version_latest(schema_version = "latest", branch = "main")
schema_version |
A character vector. Either "latest" or a valid schema version. |
branch |
The branch of the hubverse
schemas repository
from which to fetch schema. Defaults to |
a schema version string. If schema_version
is "latest", the latest schema
version from the schema repository. If specific version provided to schema_version
, the same version is returned.
# Get the latest version of the schema get_schema_version_latest() get_schema_version_latest(schema_version = "v3.0.0")
# Get the latest version of the schema get_schema_version_latest() get_schema_version_latest(schema_version = "v3.0.0")
Get hub task IDs
get_task_id_names(config_tasks)
get_task_id_names(config_tasks)
config_tasks |
a list version of the content's of a hub's |
a character vector of all unique task ID names across all rounds.
hub_path <- system.file("testhubs/simple", package = "hubUtils") config_tasks <- read_config(hub_path, "tasks") get_task_id_names(config_tasks)
hub_path <- system.file("testhubs/simple", package = "hubUtils") config_tasks <- read_config(hub_path, "tasks") get_task_id_names(config_tasks)
Get hub config schema versions
get_version_config(config) get_version_file(config_path) get_version_hub(hub_path, config_type = c("tasks", "admin"))
get_version_config(config) get_version_file(config_path) get_version_hub(hub_path, config_type = c("tasks", "admin"))
config |
A |
config_path |
Character string. Path to JSON config file. |
hub_path |
Either a character string path to a local Modeling Hub directory
or an object of class |
config_type |
Character vector specifying the type of config file to read. One of "tasks" or "admin". Default is "tasks". |
The schema version number as a character string.
get_version_config()
: Get schema version from config list representation.
get_version_file()
: Get schema version from config file at specific path.
get_version_hub()
: Get schema version from config file at specific path.
config <- read_config_file( system.file("config", "tasks.json", package = "hubUtils") ) get_version_config(config) config_path <- system.file("config", "tasks.json", package = "hubUtils") get_version_file(config_path) hub_path <- system.file("testhubs/simple", package = "hubUtils") get_version_hub(hub_path) get_version_hub(hub_path, "admin")
config <- read_config_file( system.file("config", "tasks.json", package = "hubUtils") ) get_version_config(config) config_path <- system.file("config", "tasks.json", package = "hubUtils") get_version_file(config_path) hub_path <- system.file("testhubs/simple", package = "hubUtils") get_version_hub(hub_path) get_version_hub(hub_path, "admin")
A subset of model output data accessed using hubData
from the simple example
hub contained in the hubUtils
package. The subset consists of "quantile"
output
type data for "US"
location and the most recent forecast date.
hub_con_output
hub_con_output
A tbl
with 92 rows and 8 columns:
forecast_date
: Origin date of the forecast.
horizon
: Forecast horizon relative to the forecast_date
.
target
: Target variable.
location
: Location of the forecast.
output_type
: Output type of forecast.
output_type_id
: Forecast output type level/identifier. In this case,
quantile level.
value
: Forecast value.
model_id
: Model identifier.
Is config list representation using v3.0.0 schema?
is_v3_config(config)
is_v3_config(config)
config |
List representation of the JSON config file. |
Logical, whether the config list representation is using v3.0.0 schema.
config <- read_config_file( system.file("config", "tasks.json", package = "hubUtils") ) is_v3_config(config)
config <- read_config_file( system.file("config", "tasks.json", package = "hubUtils") ) is_v3_config(config)
Is config file using v3.0.0 schema?
is_v3_config_file(config_path)
is_v3_config_file(config_path)
config_path |
Path to the config file. |
Logical, whether the config file is using v3.0.0 schema.
config_path <- system.file("config", "tasks.json", package = "hubUtils") is_v3_config_file(config_path)
config_path <- system.file("config", "tasks.json", package = "hubUtils") is_v3_config_file(config_path)
Is hub configured using v3.0.0 schema?
is_v3_hub(hub_path, config = c("tasks", "admin"))
is_v3_hub(hub_path, config = c("tasks", "admin"))
hub_path |
Either a character string path to a local Modeling Hub directory
or an object of class |
config |
Type of config file to read. One of |
Logical, whether the hub is configured using v3.0.0 schema.
is_v3_hub(hub_path = system.file("testhubs", "flusight", package = "hubUtils"))
is_v3_hub(hub_path = system.file("testhubs", "flusight", package = "hubUtils"))
model_id
columnMerge/Split model output tbl model_id
column
model_id_merge(tbl, sep = "-") model_id_split(tbl, sep = "-")
model_id_merge(tbl, sep = "-") model_id_split(tbl, sep = "-")
tbl |
a |
sep |
character string. Character used as separator when concatenating
|
tbl
with either team_abbr
and model_abbr
merged into a single model_id
column or model_id
split into columns team_abbr
and model_abbr
.
a tibble with model_id
column split into separate
team_abbr
and model_abbr
columns
model_id_merge()
: merge team_abbr
and model_abbr
into a single
model_id
column.
model_id_split()
: split model_id
column into separate team_abbr
and model_abbr
columns.
tbl_split <- model_id_split(hub_con_output) tbl_split # Merge model_id tbl_merged <- model_id_merge(tbl_split) tbl_merged # Split / Merge using custom separator tbl_sep <- hub_con_output tbl_sep$model_id <- gsub("-", "_", tbl_sep$model_id) tbl_sep <- model_id_split(tbl_sep, sep = "_") tbl_sep tbl_sep <- model_id_merge(tbl_sep, sep = "_") tbl_sep
tbl_split <- model_id_split(hub_con_output) tbl_split # Merge model_id tbl_merged <- model_id_merge(tbl_split) tbl_merged # Split / Merge using custom separator tbl_sep <- hub_con_output tbl_sep$model_id <- gsub("-", "_", tbl_sep$model_id) tbl_sep <- model_id_split(tbl_sep, sep = "_") tbl_sep tbl_sep <- model_id_merge(tbl_sep, sep = "_") tbl_sep
Read a hub config file into R
read_config( hub_path, config = c("tasks", "admin", "model-metadata-schema"), silent = TRUE )
read_config( hub_path, config = c("tasks", "admin", "model-metadata-schema"), silent = TRUE )
hub_path |
Either a character string path to a local Modeling Hub directory
or an object of class |
config |
Type of config file to read. One of |
silent |
Logical. If |
The contents of the config file as an R list. If possible, the output is
further converted to a <config>
class object before returning. Note that
"model-metadata-schema"
files are never converted to a <config>
object.
# Read config files from local hub hub_path <- system.file("testhubs/simple", package = "hubUtils") read_config(hub_path, "tasks") read_config(hub_path, "admin") # Read config file from AWS S3 bucket hub hub_path <- arrow::s3_bucket("hubverse/hubutils/testhubs/simple/") read_config(hub_path, "admin")
# Read config files from local hub hub_path <- system.file("testhubs/simple", package = "hubUtils") read_config(hub_path, "tasks") read_config(hub_path, "admin") # Read config file from AWS S3 bucket hub hub_path <- arrow::s3_bucket("hubverse/hubutils/testhubs/simple/") read_config(hub_path, "admin")
Read a JSON config file from a path
read_config_file(config_path, silent = TRUE)
read_config_file(config_path, silent = TRUE)
config_path |
Character string. Path to JSON config file. |
silent |
Logical. If |
The contents of the config file as an R list. If possible, the output is
further converted to a <config>
class object before returning. Note that
"model-metadata-schema"
files are never converted to a <config>
object.
read_config_file(system.file("config", "tasks.json", package = "hubUtils"))
read_config_file(system.file("config", "tasks.json", package = "hubUtils"))
A named character string of standard column names used in hubverse model output data files. The terms currently used for standard column names in the hubverse are English. In future, however, this could be expanded to provide the basis for hub terminology localisation.
std_colnames
std_colnames
An object of class character
of length 4.
model_out_tbl
object.Validate a model_out_tbl
object.
validate_model_out_tbl(tbl)
validate_model_out_tbl(tbl)
tbl |
a |
If valid, returns a model_out_tbl
class object. Otherwise, throws an
error.
md_out <- as_model_out_tbl(hub_con_output) validate_model_out_tbl(md_out)
md_out <- as_model_out_tbl(hub_con_output) validate_model_out_tbl(md_out)
schema_version
s to specific version numbers from
a variety of sourcesCompare hub config schema_version
s to specific version numbers from
a variety of sources
version_equal( version, config = NULL, config_path = NULL, hub_path = NULL, schema_version = NULL ) version_gte( version, config = NULL, config_path = NULL, hub_path = NULL, schema_version = NULL ) version_gt( version, config = NULL, config_path = NULL, hub_path = NULL, schema_version = NULL ) version_lte( version, config = NULL, config_path = NULL, hub_path = NULL, schema_version = NULL ) version_lt( version, config = NULL, config_path = NULL, hub_path = NULL, schema_version = NULL )
version_equal( version, config = NULL, config_path = NULL, hub_path = NULL, schema_version = NULL ) version_gte( version, config = NULL, config_path = NULL, hub_path = NULL, schema_version = NULL ) version_gt( version, config = NULL, config_path = NULL, hub_path = NULL, schema_version = NULL ) version_lte( version, config = NULL, config_path = NULL, hub_path = NULL, schema_version = NULL ) version_lt( version, config = NULL, config_path = NULL, hub_path = NULL, schema_version = NULL )
version |
Character string. Version number to compare against, must be in
the format |
config |
A |
config_path |
Character string. Path to JSON config file. |
hub_path |
Either a character string path to a local Modeling Hub directory
or an object of class |
schema_version |
Character string. A config |
TRUE
or FALSE
depending on how the schema version compares to the
version number specified.
version_equal()
: Check whether a schema version property is equal
to a specific version number.
version_gte()
: Check whether a schema version property is equal to
or greater than a specific version number.
version_gt()
: Check whether a schema version property is greater
than a specific version number.
version_lte()
: Check whether a schema version property is equal to
or less than a specific version number.
version_lt()
: Check whether a schema version property is less
than a specific version number.
# Actual version "v2.0.0" hub_path <- system.file("testhubs/simple", package = "hubUtils") # Actual version "v3.0.0" config_path <- system.file("config", "tasks.json", package = "hubUtils") config <- read_config_file(config_path) schema_version <- config$schema_version # Check whether schema_version equal to v3.0.0 version_equal("v3.0.0", config = config) version_equal("v3.0.0", config_path = config_path) version_equal("v3.0.0", hub_path = hub_path) version_equal("v3.0.0", schema_version = schema_version) # Check whether schema_version equal to or greater than v3.0.0 version_gte("v3.0.0", config = config) version_gte("v3.0.0", config_path = config_path) version_gte("v3.0.0", hub_path = hub_path) version_gte("v3.0.0", schema_version = schema_version) # Check whether schema_version greater than v3.0.0 version_gt("v3.0.0", config = config) version_gt("v3.0.0", config_path = config_path) version_gt("v3.0.0", hub_path = hub_path) version_gt("v3.0.0", schema_version = schema_version) # Check whether schema_version equal to or less than v3.0.0 version_lte("v3.0.0", config = config) version_lte("v3.0.0", config_path = config_path) version_lte("v3.0.0", hub_path = hub_path) version_lte("v3.0.0", schema_version = schema_version) # Check whether schema_version less than v3.0.0 version_lt("v3.0.0", config = config) version_lt("v3.0.0", config_path = config_path) version_lt("v3.0.0", hub_path = hub_path) version_lt("v3.0.0", schema_version = schema_version)
# Actual version "v2.0.0" hub_path <- system.file("testhubs/simple", package = "hubUtils") # Actual version "v3.0.0" config_path <- system.file("config", "tasks.json", package = "hubUtils") config <- read_config_file(config_path) schema_version <- config$schema_version # Check whether schema_version equal to v3.0.0 version_equal("v3.0.0", config = config) version_equal("v3.0.0", config_path = config_path) version_equal("v3.0.0", hub_path = hub_path) version_equal("v3.0.0", schema_version = schema_version) # Check whether schema_version equal to or greater than v3.0.0 version_gte("v3.0.0", config = config) version_gte("v3.0.0", config_path = config_path) version_gte("v3.0.0", hub_path = hub_path) version_gte("v3.0.0", schema_version = schema_version) # Check whether schema_version greater than v3.0.0 version_gt("v3.0.0", config = config) version_gt("v3.0.0", config_path = config_path) version_gt("v3.0.0", hub_path = hub_path) version_gt("v3.0.0", schema_version = schema_version) # Check whether schema_version equal to or less than v3.0.0 version_lte("v3.0.0", config = config) version_lte("v3.0.0", config_path = config_path) version_lte("v3.0.0", hub_path = hub_path) version_lte("v3.0.0", schema_version = schema_version) # Check whether schema_version less than v3.0.0 version_lt("v3.0.0", config = config) version_lt("v3.0.0", config_path = config_path) version_lt("v3.0.0", hub_path = hub_path) version_lt("v3.0.0", schema_version = schema_version)