Skip to content

Commit

Permalink
[Form Builder] Added presigining of S3 URLs functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
meta-paul committed Feb 9, 2024
1 parent 0da7255 commit 2b7780f
Show file tree
Hide file tree
Showing 31 changed files with 446 additions and 176 deletions.
8 changes: 4 additions & 4 deletions examples/form_composer_demo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ These form-based questionnaires are example of FormComposer task generator.

## How to configure

1. For simple form config you need to provide FormComposer with one JSON file - a configuration of your form fields. An example is found in `examples/form_composer_demo/data/simple/data.json` file.
1. For simple form config you need to provide FormComposer with one JSON file - a configuration of your form fields. An example is found in `examples/form_composer_demo/data/simple/task_data.json` file.
2. For dynamic form configs you need two JSON files in `examples/form_composer_demo/data/dynamic` directory:
- Form configuration `form_config.json`
- Token sets values `token_sets_values_config.json`
- To generate extrapolated `data.json` config, run this command: `mephisto form_composer_config --extrapolate-token-sets True`
- Note that `data.json` file will be overwritten with the resulting config
3. To generate `token_sets_values_config.json` file from token values permutations in `single_token_values_config.json`, run this command: `mephisto form_composer_config --permutate-single-tokens True`
- To generate extrapolated `task_data.json` config, run this command: `mephisto form_composer_config --extrapolate-token-sets True`
- Note that `task_data.json` file will be overwritten with the resulting config
3. To generate `token_sets_values_config.json` file from token values permutations in `separate_token_values_config.json`, run this command: `mephisto form_composer_config --permutate-separate-tokens`
- Note that `token_sets_values_config.json` file will be overwriten with new sets of tokens values

---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ mephisto:
profile_name: mephisto-router-iam
subdomain: "0125_mturk.5"
blueprint:
data_json: ${task_dir}/data/dynamic/data.json
data_json: ${task_dir}/data/dynamic/task_data.json
task_source: ${task_dir}/webapp/build/bundle.js
task_source_review: ${task_dir}/webapp/build/bundle.review.js
preview_source: ${task_dir}/preview/mturk_preview.html
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ mephisto:
profile_name: mephisto-router-iam
subdomain: "0124.6"
blueprint:
data_json: ${task_dir}/data/dynamic/data.json
data_json: ${task_dir}/data/dynamic/task_data.json
task_source: ${task_dir}/webapp/build/bundle.js
task_source_review: ${task_dir}/webapp/build/bundle.review.js
link_task_source: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ defaults:

mephisto:
blueprint:
data_json: ${task_dir}/data/dynamic/data.json
data_json: ${task_dir}/data/dynamic/task_data.json
task_source: ${task_dir}/webapp/build/bundle.js
task_source_review: ${task_dir}/webapp/build/bundle.review.js
link_task_source: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ defaults:

mephisto:
blueprint:
data_json: ${task_dir}/data/simple/data.json
data_json: ${task_dir}/data/simple/task_data.json
task_source: ${task_dir}/webapp/build/bundle.js
task_source_review: ${task_dir}/webapp/build/bundle.review.js
link_task_source: false
Expand Down
13 changes: 9 additions & 4 deletions examples/form_composer_demo/run_task_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

from omegaconf import DictConfig

from mephisto.client.cli import FORM_COMPOSER_DATA_CONFIG_NAME
from mephisto.client.cli import FORM_COMPOSER_FORM_CONFIG_NAME
from mephisto.client.cli import FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME
from mephisto.generators.form_composer.config_validation.task_data_config import (
create_extrapolated_config
)
Expand Down Expand Up @@ -69,15 +72,17 @@ def _build_custom_bundles(cfg: DictConfig) -> None:

def generate_data_json_config():
"""
Generate extrapolated `data.json` config file,
Generate extrapolated `task_data.json` config file,
based on existing form and tokens values config files
"""
app_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(app_path, "data")

form_config_path = os.path.join(data_path, "dynamic", "form_config.json")
token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json")
task_data_config_path = os.path.join(data_path, "dynamic", "data.json")
form_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_FORM_CONFIG_NAME)
token_sets_values_config_path = os.path.join(
data_path, "dynamic", FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME,
)
task_data_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_DATA_CONFIG_NAME)

create_extrapolated_config(
form_config_path=form_config_path,
Expand Down
15 changes: 10 additions & 5 deletions examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import (
SharedStaticTaskState,
)
from mephisto.client.cli import FORM_COMPOSER_DATA_CONFIG_NAME
from mephisto.client.cli import FORM_COMPOSER_FORM_CONFIG_NAME
from mephisto.client.cli import FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME
from mephisto.generators.form_composer.config_validation.task_data_config import (
create_extrapolated_config
)
Expand Down Expand Up @@ -87,15 +90,17 @@ def _build_custom_bundles(cfg: DictConfig) -> None:

def generate_data_json_config():
"""
Generate extrapolated `data.json` config file,
Generate extrapolated `task_data.json` config file,
based on existing form and tokens values config files
"""
app_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(app_path, "data")

form_config_path = os.path.join(data_path, "dynamic", "form_config.json")
token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json")
task_data_config_path = os.path.join(data_path, "dynamic", "data.json")
form_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_FORM_CONFIG_NAME)
token_sets_values_config_path = os.path.join(
data_path, "dynamic", FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME,
)
task_data_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_DATA_CONFIG_NAME)

create_extrapolated_config(
form_config_path=form_config_path,
Expand All @@ -109,7 +114,7 @@ def generate_preview_html():
preview_path = os.path.join(app_path, "preview")
data_path = os.path.join(app_path, "data", "dynamic")

data_config_path = os.path.join(data_path, "data.json")
data_config_path = os.path.join(data_path, FORM_COMPOSER_DATA_CONFIG_NAME)
preview_template_path = os.path.join(preview_path, "mturk_preview_template.html")
preview_html_path = os.path.join(preview_path, "mturk_preview.html")

Expand Down
13 changes: 9 additions & 4 deletions examples/form_composer_demo/run_task_dynamic_ec2_prolific.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import (
SharedStaticTaskState,
)
from mephisto.client.cli import FORM_COMPOSER_DATA_CONFIG_NAME
from mephisto.client.cli import FORM_COMPOSER_FORM_CONFIG_NAME
from mephisto.client.cli import FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME
from mephisto.data_model.qualification import QUAL_GREATER_EQUAL
from mephisto.generators.form_composer.config_validation.task_data_config import (
create_extrapolated_config
Expand Down Expand Up @@ -91,15 +94,17 @@ def _build_custom_bundles(cfg: DictConfig) -> None:

def generate_data_json_config():
"""
Generate extrapolated `data.json` config file,
Generate extrapolated `task_data.json` config file,
based on existing form and tokens values config files
"""
app_path = os.path.dirname(os.path.abspath(__file__))
data_path = os.path.join(app_path, "data")

form_config_path = os.path.join(data_path, "dynamic", "form_config.json")
token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json")
task_data_config_path = os.path.join(data_path, "dynamic", "data.json")
form_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_FORM_CONFIG_NAME)
token_sets_values_config_path = os.path.join(
data_path, "dynamic", FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME,
)
task_data_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_DATA_CONFIG_NAME)

create_extrapolated_config(
form_config_path=form_config_path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class SharedRemoteProcedureTaskState(
Mapping[
str,
Callable[
[str, Dict[str, Any], "RemoteProcedureAgentState"],
[str, Any, "RemoteProcedureAgentState"],
Optional[Dict[str, Any]],
],
]
Expand Down
105 changes: 68 additions & 37 deletions mephisto/client/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
from mephisto.generators.form_composer.config_validation.task_data_config import (
verify_form_composer_configs
)
from mephisto.generators.form_composer.config_validation.single_token_values_config import (
update_single_token_values_config_with_file_urls
from mephisto.generators.form_composer.config_validation.separate_token_values_config import (
update_separate_token_values_config_with_file_urls
)
from mephisto.generators.form_composer.config_validation.token_sets_values_config import (
update_token_sets_values_config_with_premutated_data
Expand All @@ -48,10 +48,10 @@
from mephisto.utils.rich import create_table

FORM_COMPOSER_DATA_DIR_NAME = "data"
FORM_COMPOSER_DATA_CONFIG_NAME = "data.json"
FORM_COMPOSER_DATA_CONFIG_NAME = "task_data.json"
FORM_COMPOSER_FORM_CONFIG_NAME = "form_config.json"
FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME = "token_sets_values_config.json"
FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME = "single_token_values_config.json"
FORM_COMPOSER_SEPARATE_TOKEN_VALUES_CONFIG_NAME = "separate_token_values_config.json"


@click.group(cls=RichGroup)
Expand Down Expand Up @@ -443,6 +443,10 @@ def _get_form_composer_app_path() -> str:
@cli.command("form_composer", cls=RichCommand)
@click.option("-o", "--task-data-config-only", type=(bool), default=True)
def form_composer(task_data_config_only: bool = True):
# Get app path to run Python script from there (instead of the current file's directory).
# This is necessary, because the whole infrastructure is built relative to the location
# of the called command-line script.
# The other parts of the logic are inside `form_composer/run***.py` script
app_path = _get_form_composer_app_path()
app_data_path = os.path.join(app_path, FORM_COMPOSER_DATA_DIR_NAME)

Expand Down Expand Up @@ -471,85 +475,102 @@ def form_composer(task_data_config_only: bool = True):


@cli.command("form_composer_config", cls=RichCommand)
@click.option("-v", "--verify", type=(bool), default=False)
@click.option("-v", "--verify", type=(bool), default=False, is_flag=True)
@click.option("-f", "--update-file-location-values", type=(str), default=None)
@click.option("-e", "--extrapolate-token-sets", type=(bool), default=False)
@click.option("-p", "--permutate-single-tokens", type=(bool), default=False)
@click.option("-e", "--extrapolate-token-sets", type=(bool), default=False, is_flag=True)
@click.option("-p", "--permutate-separate-tokens", type=(bool), default=False, is_flag=True)
@click.option("-d", "--directory", type=(str), default=None)
@click.option("-u", "--use-presigned-urls", type=(bool), default=False, is_flag=True)
def form_composer_config(
verify: bool = False,
extrapolate_token_sets: bool = False,
verify: Optional[bool] = False,
update_file_location_values: Optional[str] = None,
permutate_single_tokens: bool = False,
extrapolate_token_sets: Optional[bool] = False,
permutate_separate_tokens: Optional[bool] = False,
directory: Optional[str] = None,
use_presigned_urls: Optional[bool] = False,
):
"""
Prepare (parts of) config for the `form_composer` command.
Note that each parameter is essentially a separate command, and they cannot be mixed.
:param verify: Validate all JSON configs currently present in the form builder config directory
:param update_file_location_values: Update existing single-token values config
:param update_file_location_values: Update existing separate-token values config
with file URLs automatically taken from a location (e.g. an S3 folder)
:param extrapolate_token_sets: Generate form versions based on extrapolated values of token sets
:param permutate_single_tokens: Create tokens sets as all possible permutations of values lists
defined in single-token values config
:param permutate_separate_tokens: Create tokens sets as all possible permutations of
values lists defined in separate-token values config
:param directory: Path to the directory where form and token configs are located.
By default it's the `data` directory of `form_composer` generator
:param use_presigned_urls: a modifier for `--update_file_location_values` parameter.
Wraps every S3 URL with a standard handler that presigns these URLs during form rendering when we use `--update_file_location_values` command
"""
# Get app path to run Python script from there (instead of the current file's directory).
# This is necessary, because the whole infrastructure is built relative to the location
# of the called command-line script.
# The other parts of the logic are inside `form_composer/run***.py` script
app_path = _get_form_composer_app_path().
app_data_path = os.path.join(app_path, FORM_COMPOSER_DATA_DIR_NAME)

full_path = lambda data_file: os.path.join(app_data_path, data_file)
# Substitute defaults for missing param values
if directory:
app_data_path = directory
else:
app_path = _get_form_composer_app_path()
app_data_path = os.path.join(app_path, FORM_COMPOSER_DATA_DIR_NAME)
print(f"[blue]Using config directory: {app_data_path}[/blue]")

# Validate param values
if not os.path.exists(app_data_path):
print(f"[red]Directory '{app_data_path}' does not exist[/red]")
return None

if use_presigned_urls and not update_file_location_values:
print(
f"[red]Parameter `--use-presigned-urls` can be used "
f"only with `--update-file-location-values` option[/red]"
)
return None

# Check files and create `data.json` config with tokens data before running a task
full_path = lambda data_file: os.path.join(app_data_path, data_file)
task_data_config_path = full_path(FORM_COMPOSER_DATA_CONFIG_NAME)
form_config_path = full_path(FORM_COMPOSER_FORM_CONFIG_NAME)
token_sets_values_config_path = full_path(FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME)
single_token_values_config_path = full_path(FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME)

# Change dir to app dir
os.chdir(app_path)
separate_token_values_config_path = full_path(FORM_COMPOSER_SEPARATE_TOKEN_VALUES_CONFIG_NAME)

# Run the command
if verify:
print(f"[green]Started configs verification in '{task_data_config_path}'[/green]")
verify_form_composer_configs(
task_data_config_path=task_data_config_path,
form_config_path=form_config_path,
token_sets_values_config_path=token_sets_values_config_path,
single_token_values_config_path=single_token_values_config_path,
separate_token_values_config_path=separate_token_values_config_path,
task_data_config_only=False,
)
print(f"[green]Finished successfully[/green]")
return None

if update_file_location_values:
elif update_file_location_values:
print(
f"[green]Started updating '{FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME}' "
f"[green]Started updating '{FORM_COMPOSER_SEPARATE_TOKEN_VALUES_CONFIG_NAME}' "
f"with file URLs from '{update_file_location_values}'[/green]"
)
if is_s3_url(update_file_location_values):
update_single_token_values_config_with_file_urls(
update_separate_token_values_config_with_file_urls(
url=update_file_location_values,
single_token_values_config_path=single_token_values_config_path,
separate_token_values_config_path=separate_token_values_config_path,
use_presigned_urls=use_presigned_urls,
)
print(f"[green]Finished successfully[/green]")
else:
print("`--update-file-location-values` must be a valid S3 URL")
return None

if permutate_single_tokens:
elif permutate_separate_tokens:
print(
f"[green]Started updating '{FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME}' "
f"with permutated single-token values[/green]"
f"with permutated separate-token values[/green]"
)
update_token_sets_values_config_with_premutated_data(
single_token_values_config_path=single_token_values_config_path,
separate_token_values_config_path=separate_token_values_config_path,
token_sets_values_config_path=token_sets_values_config_path,
)
print(f"[green]Finished successfully[/green]")
return None

if extrapolate_token_sets:
elif extrapolate_token_sets:
print(
f"[green]Started extrapolating token sets values "
f"from '{FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME}' [/green]"
Expand All @@ -560,7 +581,17 @@ def form_composer_config(
task_data_config_path=task_data_config_path,
)
print(f"[green]Finished successfully[/green]")
return None

else:
print(
f"[red]"
f"This command must have one of following parameters:"
f"\n-v/--verify"
f"\n-f/--update-file-location-value"
f"\n-e/--extrapolate-token-set"
f"\n-p/--permutate-separate-tokens"
f"[/red]"
)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 2b7780f

Please sign in to comment.