From 2b7780f5b482951794cbdf108b04e7a23f043c62 Mon Sep 17 00:00:00 2001 From: Paul Abumov Date: Fri, 9 Feb 2024 11:21:58 -0500 Subject: [PATCH] [Form Builder] Added presigining of S3 URLs functionality --- examples/form_composer_demo/README.md | 8 +- .../dynamic/{data.json => task_data.json} | 0 .../data/simple/{data.json => task_data.json} | 0 .../dynamic_example_ec2_mturk_sandbox.yaml | 2 +- .../conf/dynamic_example_ec2_prolific.yaml | 2 +- .../conf/dynamic_example_local_mock.yaml | 2 +- .../conf/example_local_mock.yaml | 2 +- .../form_composer_demo/run_task_dynamic.py | 13 ++- .../run_task_dynamic_ec2_mturk_sandbox.py | 15 ++- .../run_task_dynamic_ec2_prolific.py | 13 ++- .../remote_procedure_blueprint.py | 2 +- mephisto/client/cli.py | 105 ++++++++++++------ mephisto/generators/form_composer/README.md | 64 +++++------ .../config_validation/__init__.py | 5 + .../config_validation/common_validation.py | 5 + .../config_validation_constants.py | 7 +- .../config_validation/form_config.py | 5 + ...fig.py => separate_token_values_config.py} | 27 ++++- .../config_validation/task_data_config.py | 39 ++----- .../token_sets_values_config.py | 28 +++-- .../form_composer/config_validation/utils.py | 104 ++++++++++++++--- .../generators/form_composer/constants.py | 24 +++- .../hydra_configs/conf/default.yaml | 3 +- .../form_composer/remote_procedures.py | 23 ++++ mephisto/generators/form_composer/run.py | 19 +++- .../form_composer/webapp/src/app.jsx | 13 ++- .../webapp/src/components/core_components.jsx | 5 +- .../form_composer/webapp/src/reviewapp.jsx | 8 +- .../webapp/webpack.config.review.js | 2 +- .../src/FormComposer/FormComposer.js | 23 ++-- .../src/FormComposer/utils.js | 54 +++++++++ 31 files changed, 446 insertions(+), 176 deletions(-) rename examples/form_composer_demo/data/dynamic/{data.json => task_data.json} (100%) rename examples/form_composer_demo/data/simple/{data.json => task_data.json} (100%) rename mephisto/generators/form_composer/config_validation/{single_token_values_config.py => separate_token_values_config.py} (61%) create mode 100644 mephisto/generators/form_composer/remote_procedures.py create mode 100644 packages/react-form-composer/src/FormComposer/utils.js diff --git a/examples/form_composer_demo/README.md b/examples/form_composer_demo/README.md index b3a222782..241ad0afe 100644 --- a/examples/form_composer_demo/README.md +++ b/examples/form_composer_demo/README.md @@ -16,13 +16,13 @@ These form-based questionnaires are example of FormComposer task generator. ## How to configure -1. For simple form config you need to provide FormComposer with one JSON file - a configuration of your form fields. An example is found in `examples/form_composer_demo/data/simple/data.json` file. +1. For simple form config you need to provide FormComposer with one JSON file - a configuration of your form fields. An example is found in `examples/form_composer_demo/data/simple/task_data.json` file. 2. For dynamic form configs you need two JSON files in `examples/form_composer_demo/data/dynamic` directory: - Form configuration `form_config.json` - Token sets values `token_sets_values_config.json` - - To generate extrapolated `data.json` config, run this command: `mephisto form_composer_config --extrapolate-token-sets True` - - Note that `data.json` file will be overwritten with the resulting config -3. To generate `token_sets_values_config.json` file from token values permutations in `single_token_values_config.json`, run this command: `mephisto form_composer_config --permutate-single-tokens True` + - To generate extrapolated `task_data.json` config, run this command: `mephisto form_composer_config --extrapolate-token-sets True` + - Note that `task_data.json` file will be overwritten with the resulting config +3. To generate `token_sets_values_config.json` file from token values permutations in `separate_token_values_config.json`, run this command: `mephisto form_composer_config --permutate-separate-tokens` - Note that `token_sets_values_config.json` file will be overwriten with new sets of tokens values --- diff --git a/examples/form_composer_demo/data/dynamic/data.json b/examples/form_composer_demo/data/dynamic/task_data.json similarity index 100% rename from examples/form_composer_demo/data/dynamic/data.json rename to examples/form_composer_demo/data/dynamic/task_data.json diff --git a/examples/form_composer_demo/data/simple/data.json b/examples/form_composer_demo/data/simple/task_data.json similarity index 100% rename from examples/form_composer_demo/data/simple/data.json rename to examples/form_composer_demo/data/simple/task_data.json diff --git a/examples/form_composer_demo/hydra_configs/conf/dynamic_example_ec2_mturk_sandbox.yaml b/examples/form_composer_demo/hydra_configs/conf/dynamic_example_ec2_mturk_sandbox.yaml index b18188fd9..a1bfa5fc0 100644 --- a/examples/form_composer_demo/hydra_configs/conf/dynamic_example_ec2_mturk_sandbox.yaml +++ b/examples/form_composer_demo/hydra_configs/conf/dynamic_example_ec2_mturk_sandbox.yaml @@ -15,7 +15,7 @@ mephisto: profile_name: mephisto-router-iam subdomain: "0125_mturk.5" blueprint: - data_json: ${task_dir}/data/dynamic/data.json + data_json: ${task_dir}/data/dynamic/task_data.json task_source: ${task_dir}/webapp/build/bundle.js task_source_review: ${task_dir}/webapp/build/bundle.review.js preview_source: ${task_dir}/preview/mturk_preview.html diff --git a/examples/form_composer_demo/hydra_configs/conf/dynamic_example_ec2_prolific.yaml b/examples/form_composer_demo/hydra_configs/conf/dynamic_example_ec2_prolific.yaml index 801e85297..d4c816059 100644 --- a/examples/form_composer_demo/hydra_configs/conf/dynamic_example_ec2_prolific.yaml +++ b/examples/form_composer_demo/hydra_configs/conf/dynamic_example_ec2_prolific.yaml @@ -15,7 +15,7 @@ mephisto: profile_name: mephisto-router-iam subdomain: "0124.6" blueprint: - data_json: ${task_dir}/data/dynamic/data.json + data_json: ${task_dir}/data/dynamic/task_data.json task_source: ${task_dir}/webapp/build/bundle.js task_source_review: ${task_dir}/webapp/build/bundle.review.js link_task_source: false diff --git a/examples/form_composer_demo/hydra_configs/conf/dynamic_example_local_mock.yaml b/examples/form_composer_demo/hydra_configs/conf/dynamic_example_local_mock.yaml index ea3c0363b..4552077b3 100644 --- a/examples/form_composer_demo/hydra_configs/conf/dynamic_example_local_mock.yaml +++ b/examples/form_composer_demo/hydra_configs/conf/dynamic_example_local_mock.yaml @@ -11,7 +11,7 @@ defaults: mephisto: blueprint: - data_json: ${task_dir}/data/dynamic/data.json + data_json: ${task_dir}/data/dynamic/task_data.json task_source: ${task_dir}/webapp/build/bundle.js task_source_review: ${task_dir}/webapp/build/bundle.review.js link_task_source: false diff --git a/examples/form_composer_demo/hydra_configs/conf/example_local_mock.yaml b/examples/form_composer_demo/hydra_configs/conf/example_local_mock.yaml index af5385102..de1a46a6d 100644 --- a/examples/form_composer_demo/hydra_configs/conf/example_local_mock.yaml +++ b/examples/form_composer_demo/hydra_configs/conf/example_local_mock.yaml @@ -11,7 +11,7 @@ defaults: mephisto: blueprint: - data_json: ${task_dir}/data/simple/data.json + data_json: ${task_dir}/data/simple/task_data.json task_source: ${task_dir}/webapp/build/bundle.js task_source_review: ${task_dir}/webapp/build/bundle.review.js link_task_source: false diff --git a/examples/form_composer_demo/run_task_dynamic.py b/examples/form_composer_demo/run_task_dynamic.py index 5ab0c8e02..c615e9f1d 100644 --- a/examples/form_composer_demo/run_task_dynamic.py +++ b/examples/form_composer_demo/run_task_dynamic.py @@ -8,6 +8,9 @@ from omegaconf import DictConfig +from mephisto.client.cli import FORM_COMPOSER_DATA_CONFIG_NAME +from mephisto.client.cli import FORM_COMPOSER_FORM_CONFIG_NAME +from mephisto.client.cli import FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME from mephisto.generators.form_composer.config_validation.task_data_config import ( create_extrapolated_config ) @@ -69,15 +72,17 @@ def _build_custom_bundles(cfg: DictConfig) -> None: def generate_data_json_config(): """ - Generate extrapolated `data.json` config file, + Generate extrapolated `task_data.json` config file, based on existing form and tokens values config files """ app_path = os.path.dirname(os.path.abspath(__file__)) data_path = os.path.join(app_path, "data") - form_config_path = os.path.join(data_path, "dynamic", "form_config.json") - token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json") - task_data_config_path = os.path.join(data_path, "dynamic", "data.json") + form_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_FORM_CONFIG_NAME) + token_sets_values_config_path = os.path.join( + data_path, "dynamic", FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME, + ) + task_data_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_DATA_CONFIG_NAME) create_extrapolated_config( form_config_path=form_config_path, diff --git a/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py b/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py index 80b2af130..ab6bd5ba4 100644 --- a/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py +++ b/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py @@ -14,6 +14,9 @@ from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import ( SharedStaticTaskState, ) +from mephisto.client.cli import FORM_COMPOSER_DATA_CONFIG_NAME +from mephisto.client.cli import FORM_COMPOSER_FORM_CONFIG_NAME +from mephisto.client.cli import FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME from mephisto.generators.form_composer.config_validation.task_data_config import ( create_extrapolated_config ) @@ -87,15 +90,17 @@ def _build_custom_bundles(cfg: DictConfig) -> None: def generate_data_json_config(): """ - Generate extrapolated `data.json` config file, + Generate extrapolated `task_data.json` config file, based on existing form and tokens values config files """ app_path = os.path.dirname(os.path.abspath(__file__)) data_path = os.path.join(app_path, "data") - form_config_path = os.path.join(data_path, "dynamic", "form_config.json") - token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json") - task_data_config_path = os.path.join(data_path, "dynamic", "data.json") + form_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_FORM_CONFIG_NAME) + token_sets_values_config_path = os.path.join( + data_path, "dynamic", FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME, + ) + task_data_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_DATA_CONFIG_NAME) create_extrapolated_config( form_config_path=form_config_path, @@ -109,7 +114,7 @@ def generate_preview_html(): preview_path = os.path.join(app_path, "preview") data_path = os.path.join(app_path, "data", "dynamic") - data_config_path = os.path.join(data_path, "data.json") + data_config_path = os.path.join(data_path, FORM_COMPOSER_DATA_CONFIG_NAME) preview_template_path = os.path.join(preview_path, "mturk_preview_template.html") preview_html_path = os.path.join(preview_path, "mturk_preview.html") diff --git a/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py b/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py index 6f4b4d55a..5b1c0b11b 100644 --- a/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py +++ b/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py @@ -11,6 +11,9 @@ from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import ( SharedStaticTaskState, ) +from mephisto.client.cli import FORM_COMPOSER_DATA_CONFIG_NAME +from mephisto.client.cli import FORM_COMPOSER_FORM_CONFIG_NAME +from mephisto.client.cli import FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME from mephisto.data_model.qualification import QUAL_GREATER_EQUAL from mephisto.generators.form_composer.config_validation.task_data_config import ( create_extrapolated_config @@ -91,15 +94,17 @@ def _build_custom_bundles(cfg: DictConfig) -> None: def generate_data_json_config(): """ - Generate extrapolated `data.json` config file, + Generate extrapolated `task_data.json` config file, based on existing form and tokens values config files """ app_path = os.path.dirname(os.path.abspath(__file__)) data_path = os.path.join(app_path, "data") - form_config_path = os.path.join(data_path, "dynamic", "form_config.json") - token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json") - task_data_config_path = os.path.join(data_path, "dynamic", "data.json") + form_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_FORM_CONFIG_NAME) + token_sets_values_config_path = os.path.join( + data_path, "dynamic", FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME, + ) + task_data_config_path = os.path.join(data_path, "dynamic", FORM_COMPOSER_DATA_CONFIG_NAME) create_extrapolated_config( form_config_path=form_config_path, diff --git a/mephisto/abstractions/blueprints/remote_procedure/remote_procedure_blueprint.py b/mephisto/abstractions/blueprints/remote_procedure/remote_procedure_blueprint.py index 8d2ecfacf..4f825b88d 100644 --- a/mephisto/abstractions/blueprints/remote_procedure/remote_procedure_blueprint.py +++ b/mephisto/abstractions/blueprints/remote_procedure/remote_procedure_blueprint.py @@ -70,7 +70,7 @@ class SharedRemoteProcedureTaskState( Mapping[ str, Callable[ - [str, Dict[str, Any], "RemoteProcedureAgentState"], + [str, Any, "RemoteProcedureAgentState"], Optional[Dict[str, Any]], ], ] diff --git a/mephisto/client/cli.py b/mephisto/client/cli.py index 5b8bd915a..7bc4e7609 100644 --- a/mephisto/client/cli.py +++ b/mephisto/client/cli.py @@ -35,8 +35,8 @@ from mephisto.generators.form_composer.config_validation.task_data_config import ( verify_form_composer_configs ) -from mephisto.generators.form_composer.config_validation.single_token_values_config import ( - update_single_token_values_config_with_file_urls +from mephisto.generators.form_composer.config_validation.separate_token_values_config import ( + update_separate_token_values_config_with_file_urls ) from mephisto.generators.form_composer.config_validation.token_sets_values_config import ( update_token_sets_values_config_with_premutated_data @@ -48,10 +48,10 @@ from mephisto.utils.rich import create_table FORM_COMPOSER_DATA_DIR_NAME = "data" -FORM_COMPOSER_DATA_CONFIG_NAME = "data.json" +FORM_COMPOSER_DATA_CONFIG_NAME = "task_data.json" FORM_COMPOSER_FORM_CONFIG_NAME = "form_config.json" FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME = "token_sets_values_config.json" -FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME = "single_token_values_config.json" +FORM_COMPOSER_SEPARATE_TOKEN_VALUES_CONFIG_NAME = "separate_token_values_config.json" @click.group(cls=RichGroup) @@ -443,6 +443,10 @@ def _get_form_composer_app_path() -> str: @cli.command("form_composer", cls=RichCommand) @click.option("-o", "--task-data-config-only", type=(bool), default=True) def form_composer(task_data_config_only: bool = True): + # Get app path to run Python script from there (instead of the current file's directory). + # This is necessary, because the whole infrastructure is built relative to the location + # of the called command-line script. + # The other parts of the logic are inside `form_composer/run***.py` script app_path = _get_form_composer_app_path() app_data_path = os.path.join(app_path, FORM_COMPOSER_DATA_DIR_NAME) @@ -471,85 +475,102 @@ def form_composer(task_data_config_only: bool = True): @cli.command("form_composer_config", cls=RichCommand) -@click.option("-v", "--verify", type=(bool), default=False) +@click.option("-v", "--verify", type=(bool), default=False, is_flag=True) @click.option("-f", "--update-file-location-values", type=(str), default=None) -@click.option("-e", "--extrapolate-token-sets", type=(bool), default=False) -@click.option("-p", "--permutate-single-tokens", type=(bool), default=False) +@click.option("-e", "--extrapolate-token-sets", type=(bool), default=False, is_flag=True) +@click.option("-p", "--permutate-separate-tokens", type=(bool), default=False, is_flag=True) +@click.option("-d", "--directory", type=(str), default=None) +@click.option("-u", "--use-presigned-urls", type=(bool), default=False, is_flag=True) def form_composer_config( - verify: bool = False, - extrapolate_token_sets: bool = False, + verify: Optional[bool] = False, update_file_location_values: Optional[str] = None, - permutate_single_tokens: bool = False, + extrapolate_token_sets: Optional[bool] = False, + permutate_separate_tokens: Optional[bool] = False, + directory: Optional[str] = None, + use_presigned_urls: Optional[bool] = False, ): """ Prepare (parts of) config for the `form_composer` command. Note that each parameter is essentially a separate command, and they cannot be mixed. :param verify: Validate all JSON configs currently present in the form builder config directory - :param update_file_location_values: Update existing single-token values config + :param update_file_location_values: Update existing separate-token values config with file URLs automatically taken from a location (e.g. an S3 folder) :param extrapolate_token_sets: Generate form versions based on extrapolated values of token sets - :param permutate_single_tokens: Create tokens sets as all possible permutations of values lists - defined in single-token values config + :param permutate_separate_tokens: Create tokens sets as all possible permutations of + values lists defined in separate-token values config + :param directory: Path to the directory where form and token configs are located. + By default it's the `data` directory of `form_composer` generator + :param use_presigned_urls: a modifier for `--update_file_location_values` parameter. + Wraps every S3 URL with a standard handler that presigns these URLs during form rendering when we use `--update_file_location_values` command """ - # Get app path to run Python script from there (instead of the current file's directory). - # This is necessary, because the whole infrastructure is built relative to the location - # of the called command-line script. - # The other parts of the logic are inside `form_composer/run***.py` script - app_path = _get_form_composer_app_path(). - app_data_path = os.path.join(app_path, FORM_COMPOSER_DATA_DIR_NAME) - full_path = lambda data_file: os.path.join(app_data_path, data_file) + # Substitute defaults for missing param values + if directory: + app_data_path = directory + else: + app_path = _get_form_composer_app_path() + app_data_path = os.path.join(app_path, FORM_COMPOSER_DATA_DIR_NAME) + print(f"[blue]Using config directory: {app_data_path}[/blue]") + + # Validate param values + if not os.path.exists(app_data_path): + print(f"[red]Directory '{app_data_path}' does not exist[/red]") + return None + + if use_presigned_urls and not update_file_location_values: + print( + f"[red]Parameter `--use-presigned-urls` can be used " + f"only with `--update-file-location-values` option[/red]" + ) + return None # Check files and create `data.json` config with tokens data before running a task + full_path = lambda data_file: os.path.join(app_data_path, data_file) task_data_config_path = full_path(FORM_COMPOSER_DATA_CONFIG_NAME) form_config_path = full_path(FORM_COMPOSER_FORM_CONFIG_NAME) token_sets_values_config_path = full_path(FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME) - single_token_values_config_path = full_path(FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME) - - # Change dir to app dir - os.chdir(app_path) + separate_token_values_config_path = full_path(FORM_COMPOSER_SEPARATE_TOKEN_VALUES_CONFIG_NAME) + # Run the command if verify: print(f"[green]Started configs verification in '{task_data_config_path}'[/green]") verify_form_composer_configs( task_data_config_path=task_data_config_path, form_config_path=form_config_path, token_sets_values_config_path=token_sets_values_config_path, - single_token_values_config_path=single_token_values_config_path, + separate_token_values_config_path=separate_token_values_config_path, task_data_config_only=False, ) print(f"[green]Finished successfully[/green]") - return None - if update_file_location_values: + elif update_file_location_values: print( - f"[green]Started updating '{FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME}' " + f"[green]Started updating '{FORM_COMPOSER_SEPARATE_TOKEN_VALUES_CONFIG_NAME}' " f"with file URLs from '{update_file_location_values}'[/green]" ) if is_s3_url(update_file_location_values): - update_single_token_values_config_with_file_urls( + update_separate_token_values_config_with_file_urls( url=update_file_location_values, - single_token_values_config_path=single_token_values_config_path, + separate_token_values_config_path=separate_token_values_config_path, + use_presigned_urls=use_presigned_urls, ) print(f"[green]Finished successfully[/green]") else: print("`--update-file-location-values` must be a valid S3 URL") - return None - if permutate_single_tokens: + elif permutate_separate_tokens: print( f"[green]Started updating '{FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME}' " - f"with permutated single-token values[/green]" + f"with permutated separate-token values[/green]" ) update_token_sets_values_config_with_premutated_data( - single_token_values_config_path=single_token_values_config_path, + separate_token_values_config_path=separate_token_values_config_path, token_sets_values_config_path=token_sets_values_config_path, ) print(f"[green]Finished successfully[/green]") - return None - if extrapolate_token_sets: + elif extrapolate_token_sets: print( f"[green]Started extrapolating token sets values " f"from '{FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME}' [/green]" @@ -560,7 +581,17 @@ def form_composer_config( task_data_config_path=task_data_config_path, ) print(f"[green]Finished successfully[/green]") - return None + + else: + print( + f"[red]" + f"This command must have one of following parameters:" + f"\n-v/--verify" + f"\n-f/--update-file-location-value" + f"\n-e/--extrapolate-token-set" + f"\n-p/--permutate-separate-tokens" + f"[/red]" + ) if __name__ == "__main__": diff --git a/mephisto/generators/form_composer/README.md b/mephisto/generators/form_composer/README.md index 98327b694..88c6af4a3 100644 --- a/mephisto/generators/form_composer/README.md +++ b/mephisto/generators/form_composer/README.md @@ -33,7 +33,7 @@ You can launch FormComposer inside a Docker container: docker-compose -f docker/docker-compose.dev.yml run \ --build \ --rm mephisto_dc \ - mephisto form_composer_config --extrapolate-token-sets True + mephisto form_composer_config --extrapolate-token-sets ``` 2. Run composer itself (`form_composer` command) @@ -53,7 +53,7 @@ First ensure that mephisto package is installed locally - please refer to [Mephi Once that is done, run `form_composer_config` command(s) if needed, and then `form_composer` command: ```shell -mephisto form_composer_config --extrapolate-token-sets True +mephisto form_composer_config --extrapolate-token-sets mephisto form_composer ``` @@ -64,17 +64,17 @@ The `form_composer_config` utility command helps auto-generate FormComposer conf ```shell # Sample launching commands -mephisto form_composer_config --verify True -mephisto form_composer_config --extrapolate-token-sets True -mephisto form_composer_config --permutate-single-tokens True +mephisto form_composer_config --verify +mephisto form_composer_config --extrapolate-token-sets +mephisto form_composer_config --permutate-separate-tokens mephisto form_composer_config --update-file-location-values "https://s3.amazon.com/...." ``` where -- `-v/--verify BOOLEAN` - if truthy, validates all JSON configs currently present in the form builder config directory +- `-v/--verify` - if truthy, validates all JSON configs currently present in the form builder config directory - `-f/--update-file-location-values S3_URL` - generates token values based on file names found within specified S3 folder (see a separate section about this mode of running FormComposer) -- `-e/--extrapolate-token-sets BOOLEAN` - if truthy, generates Task data config based on provided form config and takon sets values -- `-p/--permutate-single-tokens BOOLEAN` - if truthy, generates token sets values as all possible combinations of values of individual tokens +- `-e/--extrapolate-token-sets` - if truthy, generates Task data config based on provided form config and takon sets values +- `-p/--permutate-sepatate-tokens` - if truthy, generates token sets values as all possible combinations of values of individual tokens To understand what "tokens" means, read on about FormComposer config structure. @@ -84,8 +84,8 @@ To understand what "tokens" means, read on about FormComposer config structure. You will need to provide FormComposer with a JSON configuration of your form fields, and place it in `generators/form-composer/data` directory. -- The form config file should be named `data.json`, and contain a list of JSON objects, each one with one key `form`. -- If you want to slightly vary your form within a Task (by inserting different values into its text), you need to add two files (that will be used to auto-generate `data.json` file): +- The task config file should be named `task_data.json`, and contain a list of JSON objects, each one with one key `form`. +- If you want to slightly vary your form within a Task (by inserting different values into its text), you need to add two files (that will be used to auto-generate `task_data.json` file): - `token_sets_values_config.json` containing a JSON array of objects (each with one key `tokens_values` and value representing name-value pairs for a set of text tokens to be used in one form version). - `form_config.json` containing a single JSON object with one key `form`. - For more detail, read on about dynamic form configs. @@ -93,18 +93,18 @@ and place it in `generators/form-composer/data` directory. For detailed structure of each config file, see [Config file reference](#config-file-reference). Working config examples are provided in `examples/form_composer_demo/data` directory: -- task data config: `simple/data.json` +- task data config: `simple/task_data.json` - form config: `dynamic/form_config.json` - token sets values config: `dynamic/token_sets_values_config.json` -- single tokens values: `dynamic/single_token_values_config.json` to create `token_sets_values_config.json` -- resulting extrapolated config: `dynamic/data.json` +- separate tokens values: `dynamic/separate_token_values_config.json` to create `token_sets_values_config.json` +- resulting extrapolated config: `dynamic/task_data.json` ## Embedding FormComposer into custom application A few tips if you wish to embed FormComposer in your custom application: -- to extrapolate form config (and generate the `data.json` file), call the extrapolator function `mephisto.generators.form_composer.configs_validation.extrapolated_config.create_extrapolated_config` +- to extrapolate form config (and generate the `task_data.json` file), call the extrapolator function `mephisto.generators.form_composer.configs_validation.extrapolated_config.create_extrapolated_config` - For a live example, you can explore the source code of [run_task_dynamic.py](/examples/form_composer_demo/run_task_dynamic.py) module @@ -115,8 +115,8 @@ A few tips if you wish to embed FormComposer in your custom application: The simplest Task scenario is showing the same exact form to all of your workers. In that case you need to: -- Compose `data.json` file containing definition of a single form (and place it into FormComposer config folder) -- Optionally, verify your config: `mephisto form_composer_config --verify True` +- Compose `task_data.json` file containing definition of a single form (and place it into FormComposer config folder) +- Optionally, verify your config: `mephisto form_composer_config --verify` - Run FormComposer: `mephisto form_composer` But suppose you wish to show a slightly different version of the form to your workers. You can do so by defining multiple form versions. FormComposer provides several ways of doing so. @@ -127,8 +127,8 @@ But suppose you wish to show a slightly different version of the form to your wo If your form versions vary considerably (e.g. showing different sets of fields), you should do the following steps: -- Populate these form versions into `data.json` file manually (it will be basically a JSON array of N individual form versions configs) -- Optionally, verify your config: `mephisto form_composer_config --verify True` +- Populate these form versions into `task_data.json` file manually (it will be basically a JSON array of N individual form versions configs) +- Optionally, verify your config: `mephisto form_composer_config --verify` - Run FormComposer: `mephisto form_composer` _As a result, for each Task assignment Mephisto will automatically produce N units, each unit having a different form version. In total you will be collecting data from `N * units_per_assignment` workers._ @@ -142,9 +142,9 @@ If your form versions vary only slightly (e.g. same set of fields, but showing d - Ensure you populate these files, and place them into your FormComposer config folder: - `form_config.json`: tokenized form config - same as regular form config, except it will contain tokens within certain objects' attributes (see [Tokens extrapolation](#tokens-extrapolation)) - `token_sets_values_config.json`: file containing sets of token values, where each set is used to generate one version of the form (and each form version will be completed by `units_per_assignment` different workers). -- Optionally, verify your files: `mephisto form_composer_config --verify True` -- Generate task data config: `mephisto form_composer_config --extrapolate-token-sets True` - - This will overwrite existing `data.json` file with auto-generated form versions, by extrapolating provided token sets values +- Optionally, verify your files: `mephisto form_composer_config --verify` +- Generate task data config: `mephisto form_composer_config --extrapolate-token-sets` + - This will overwrite existing `task_data.json` file with auto-generated form versions, by extrapolating provided token sets values - Run FormComposer: `mephisto form_composer` _The number of generated form versions N will be same as number of provided token sets. In total you will be collecting data from `N * units_per_assignment` workers._ @@ -176,21 +176,21 @@ If you wish to reuse the same token across different form attributes and levels, In a special case when all of your tokens sets are simply permutations of several value lists, sets of token values can be easily auto-generated. -- Populate your lists of values for every single token into `single_token_values_config.json` file -- Optionally, verify your config: `mephisto form_composer_config --verify True` -- Generate `token_sets_values_config.json` with command: `mephisto form_composer_config --permutate-single-tokens True` +- Populate your lists of values for every separate token into `separate_token_values_config.json` file +- Optionally, verify your config: `mephisto form_composer_config --verify` +- Generate `token_sets_values_config.json` with command: `mephisto form_composer_config --permutate-separate-tokens` _"Permutation" means all possible combinations of values. For example, permutations of amounts `2, 3`, sizes `big` and animals `cats, dogs` will produce result `2 big cats, 2 big dogs, 3 big cats, 3 big dogs`._ --- -#### Generate single token values with `--update-file-location-values` +#### Generate separate token values with `--update-file-location-values` In a special case when one of your tokens is an S3 file URL, that token values can be easily auto-generated. - Make a public S3 folder that will contain only the files that you want (all of them) - Run command: `mephisto form_composer --update-file-location-values S3_FOLDER_URL` -- As a result, a token with name `"file_location"` will be added to your `single_token_values_config.json` config file. Its values will be S3 URLs of all files found .recursively within the `S3_FOLDER_URL` +- As a result, a token with name `"file_location"` will be added to your `separate_token_values_config.json` config file. Its values will be S3 URLs of all files found .recursively within the `S3_FOLDER_URL` --- @@ -200,7 +200,7 @@ Putting it altogether, this is a brief example of composing a dynamic form confi #### Single token values config -Let's start with separate token values in `single_token_values_config.json` file: +Let's start with separate token values in `separate_token_values_config.json` file: ```json { @@ -252,7 +252,7 @@ These tokens are placed into the `form_config.json` dynamic form config like so: #### Task data config -After extrapolating attributes from `form_config.json` with token sets from `token_sets_values_config.json`, we get the resulting `data.json` file used for the task: +After extrapolating attributes from `form_config.json` with token sets from `token_sets_values_config.json`, we get the resulting `task_data.json` file used for the task: ```json // First extrapolated form version @@ -306,9 +306,9 @@ TBD (aka "remote procedure") # Config file reference -## Config file: `data.json` +## Config file: `task_data.json` -Task data config file `data.json` specifies layout of all form versions that are completed by workers. Here's an abbreviated example of such config: +Task data config file `task_data.json` specifies layout of all form versions that are completed by workers. Here's an abbreviated example of such config: ```json [ @@ -527,7 +527,7 @@ The most important attributes are: `label`, `name`, `type`, `validators` ## Config file: `form_config.json` -Form config file `form_config.json` specifies layout of a form in the same way as `data.json`, but with a few notable differences: +Form config file `form_config.json` specifies layout of a form in the same way as `task_data.json`, but with a few notable differences: - It contains a single JSON object (not a JSON array of objects) - Some of its form attributes definitions must contain dynamic tokens (whose values will be extrapolated, i.e. substituted with variable chunks of text) - see further below. @@ -557,7 +557,7 @@ Example: ] ``` -## Config file: `single_token_values_config.json` +## Config file: `separate_token_values_config.json` Lists of separate tokens values are specified as JSON object with key-value pairs, where keys are token names, and values are JSON arrays of their values. diff --git a/mephisto/generators/form_composer/config_validation/__init__.py b/mephisto/generators/form_composer/config_validation/__init__.py index e69de29bb..cfaca7562 100644 --- a/mephisto/generators/form_composer/config_validation/__init__.py +++ b/mephisto/generators/form_composer/config_validation/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +# Copyright (c) Meta Platforms and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. diff --git a/mephisto/generators/form_composer/config_validation/common_validation.py b/mephisto/generators/form_composer/config_validation/common_validation.py index 986dcd3ff..251c84654 100644 --- a/mephisto/generators/form_composer/config_validation/common_validation.py +++ b/mephisto/generators/form_composer/config_validation/common_validation.py @@ -1,3 +1,8 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + from typing import List from .config_validation_constants import AvailableAttrsType diff --git a/mephisto/generators/form_composer/config_validation/config_validation_constants.py b/mephisto/generators/form_composer/config_validation/config_validation_constants.py index 60627c249..6d01fc28b 100644 --- a/mephisto/generators/form_composer/config_validation/config_validation_constants.py +++ b/mephisto/generators/form_composer/config_validation/config_validation_constants.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. @@ -8,6 +8,9 @@ AvailableAttrsType = Dict[str, Dict[str, Union[type, bool]]] +TOKENS_VALUES_KEY = "tokens_values" +FILE_URL_TOKEN_KEY = "file_location" + AVAILABLE_CONFIG_ATTRS: AvailableAttrsType = { "form": { "type": dict, @@ -182,7 +185,7 @@ } AVAILABLE_TASK_ATTRS: AvailableAttrsType = { - "tokens_values": { + TOKENS_VALUES_KEY: { "type": dict, "required": True, }, diff --git a/mephisto/generators/form_composer/config_validation/form_config.py b/mephisto/generators/form_composer/config_validation/form_config.py index b4b3bec0a..2a5eccca0 100644 --- a/mephisto/generators/form_composer/config_validation/form_config.py +++ b/mephisto/generators/form_composer/config_validation/form_config.py @@ -1,3 +1,8 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + from typing import Dict from typing import List from typing import Tuple diff --git a/mephisto/generators/form_composer/config_validation/single_token_values_config.py b/mephisto/generators/form_composer/config_validation/separate_token_values_config.py similarity index 61% rename from mephisto/generators/form_composer/config_validation/single_token_values_config.py rename to mephisto/generators/form_composer/config_validation/separate_token_values_config.py index f2b9d51ab..6a2d160d6 100644 --- a/mephisto/generators/form_composer/config_validation/single_token_values_config.py +++ b/mephisto/generators/form_composer/config_validation/separate_token_values_config.py @@ -1,3 +1,8 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + from typing import Dict from typing import List from typing import Tuple @@ -6,11 +11,13 @@ from botocore.exceptions import ClientError from botocore.exceptions import NoCredentialsError +from mephisto.generators.form_composer.remote_procedures import ProcedureName +from .config_validation_constants import FILE_URL_TOKEN_KEY from .utils import get_file_urls_from_s3_storage from .utils import write_config_to_file -def validate_single_token_values_config( +def validate_separate_token_values_config( config_json: Dict[str, List[str]], ) -> Tuple[bool, List[str]]: is_valid = True @@ -34,8 +41,10 @@ def validate_single_token_values_config( return is_valid, errors -def update_single_token_values_config_with_file_urls( - url: str, single_token_values_config_path: str, +def update_separate_token_values_config_with_file_urls( + url: str, + separate_token_values_config_path: str, + use_presigned_urls: bool, ): try: files_locations = get_file_urls_from_s3_storage(url) @@ -50,7 +59,13 @@ def update_single_token_values_config_with_file_urls( ) return None - single_token_values_config_data = { - "file_location": files_locations, + if use_presigned_urls: + files_locations = [ + "{{" + f"{ProcedureName.GET_PRESIGNED_URL}({url})" + "}}" + for url in files_locations + ] + + separate_token_values_config_data = { + FILE_URL_TOKEN_KEY: files_locations, } - write_config_to_file(single_token_values_config_data, single_token_values_config_path) + write_config_to_file(separate_token_values_config_data, separate_token_values_config_path) diff --git a/mephisto/generators/form_composer/config_validation/task_data_config.py b/mephisto/generators/form_composer/config_validation/task_data_config.py index 910ffad8e..7ae30ec93 100644 --- a/mephisto/generators/form_composer/config_validation/task_data_config.py +++ b/mephisto/generators/form_composer/config_validation/task_data_config.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. @@ -11,8 +11,9 @@ from typing import Tuple from .config_validation_constants import ATTRS_SUPPORTING_TOKENS +from .config_validation_constants import TOKENS_VALUES_KEY from .form_config import validate_form_config -from .single_token_values_config import validate_single_token_values_config +from .separate_token_values_config import validate_separate_token_values_config from .token_sets_values_config import validate_token_sets_values_config from .utils import make_error_message from .utils import read_config_file @@ -106,7 +107,7 @@ def _validate_tokens_in_both_configs( tokens_from_token_sets_values_config = set([ token_name for token_set_values_data in token_sets_values_config_data - for token_name in token_set_values_data.get("tokens_values", {}).keys() + for token_name in token_set_values_data.get(TOKENS_VALUES_KEY, {}).keys() ]) # Token names present in token values config, but not in form config @@ -179,7 +180,7 @@ def _combine_extrapolated_form_configs( combined_config.append(form_config_data) else: form_config_data_with_tokens = _extrapolate_tokens_in_form_config( - deepcopy(form_config_data), token_sets_values["tokens_values"], + deepcopy(form_config_data), token_sets_values[TOKENS_VALUES_KEY], ) combined_config.append(form_config_data_with_tokens) else: @@ -218,22 +219,6 @@ def create_extrapolated_config( print(f"Could not extrapolate form configs: {e}") -def generate_tokens_values_config_from_files(token_sets_values_config_path: str, files: List[str]): - token_sets_values_config_data = [] - - for i, file_location in enumerate(files): - token_sets_values_config_data.append(dict( - tokens_values={ - FILE_LOCATION_TOKEN_NAME: file_location, - }, - )) - - try: - write_config_to_file(token_sets_values_config_data, token_sets_values_config_path) - except ValueError as e: - print(f"Could not write tokens values to file: {e}") - - def validate_task_data_config(config_json: List[dict]) -> Tuple[bool, List[str]]: is_valid = True errors = [] @@ -261,7 +246,7 @@ def verify_form_composer_configs( task_data_config_path: str, form_config_path: Optional[str] = None, token_sets_values_config_path: Optional[str] = None, - single_token_values_config_path: Optional[str] = None, + separate_token_values_config_path: Optional[str] = None, task_data_config_only: bool = False, ): errors = [] @@ -324,16 +309,16 @@ def verify_form_composer_configs( if tokens_in_unexpected_attrs_errors: errors = errors + tokens_in_unexpected_attrs_errors - # 4. Validate single token values config - single_token_values_config_data = read_config_file(single_token_values_config_path) + # 4. Validate separate token values config + separate_token_values_config_data = read_config_file(separate_token_values_config_path) - single_token_values_config_is_valid, single_token_values_config_errors = ( - validate_single_token_values_config(single_token_values_config_data) + separate_token_values_config_is_valid, separate_token_values_config_errors = ( + validate_separate_token_values_config(separate_token_values_config_data) ) - if not single_token_values_config_is_valid: + if not separate_token_values_config_is_valid: token_sets_values_data_config_errors = [ - f" - {e}" for e in single_token_values_config_errors + f" - {e}" for e in separate_token_values_config_errors ] errors_string = "\n".join(token_sets_values_data_config_errors) errors.append(f"Single token values config is invalid. Errors:\n{errors_string}") diff --git a/mephisto/generators/form_composer/config_validation/token_sets_values_config.py b/mephisto/generators/form_composer/config_validation/token_sets_values_config.py index 5f7d8d014..6e946e02f 100644 --- a/mephisto/generators/form_composer/config_validation/token_sets_values_config.py +++ b/mephisto/generators/form_composer/config_validation/token_sets_values_config.py @@ -1,13 +1,17 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + import itertools -import json -from json import JSONDecodeError from typing import Dict from typing import List from typing import Tuple from .common_validation import validate_config_dict_item from .config_validation_constants import AVAILABLE_TASK_ATTRS -from .single_token_values_config import validate_single_token_values_config +from .config_validation_constants import TOKENS_VALUES_KEY +from .separate_token_values_config import validate_separate_token_values_config from .utils import make_error_message from .utils import read_config_file from .utils import write_config_to_file @@ -44,7 +48,7 @@ def validate_token_sets_values_config(config_json: List[dict]) -> Tuple[bool, Li return is_valid, errors -def _premutate_single_tokents(data: Dict[str, List[str]]) -> TokensPermutationType: +def _premutate_separate_tokens(data: Dict[str, List[str]]) -> TokensPermutationType: all_permutations = [] # Make a list to iterate many times data_keys = list(data.keys()) @@ -60,7 +64,7 @@ def _premutate_single_tokents(data: Dict[str, List[str]]) -> TokensPermutationTy all_permutations.append( { - "tokens_values": single_permudation, + TOKENS_VALUES_KEY: single_permudation, } ) @@ -68,26 +72,26 @@ def _premutate_single_tokents(data: Dict[str, List[str]]) -> TokensPermutationTy def update_token_sets_values_config_with_premutated_data( - single_token_values_config_path: str, + separate_token_values_config_path: str, token_sets_values_config_path: str, ): # Read JSON from files - single_token_values_config_data = read_config_file(single_token_values_config_path) + separate_token_values_config_data = read_config_file(separate_token_values_config_path) - single_token_values_config_is_valid, single_token_values_config_errors = ( - validate_single_token_values_config(single_token_values_config_data) + separate_token_values_config_is_valid, separate_token_values_config_errors = ( + validate_separate_token_values_config(separate_token_values_config_data) ) errors = [] - if not single_token_values_config_is_valid: + if not separate_token_values_config_is_valid: errors.append(make_error_message( - "Single token values config is invalid.", single_token_values_config_errors, + "Separate token values config is invalid.", separate_token_values_config_errors, )) if errors: # Stop generating a Task, the config is incorrect raise ValueError("\n" + "\n\n".join(errors)) - premutated_data = _premutate_single_tokents(single_token_values_config_data) + premutated_data = _premutate_separate_tokens(separate_token_values_config_data) write_config_to_file(premutated_data, token_sets_values_config_path) diff --git a/mephisto/generators/form_composer/config_validation/utils.py b/mephisto/generators/form_composer/config_validation/utils.py index 931d0d7ba..812e91a61 100644 --- a/mephisto/generators/form_composer/config_validation/utils.py +++ b/mephisto/generators/form_composer/config_validation/utils.py @@ -1,6 +1,13 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + import json import os from json import JSONDecodeError +from pathlib import Path +from typing import Any from typing import List from typing import Tuple from typing import Union @@ -8,8 +15,17 @@ from urllib.parse import urlparse import boto3 +from botocore.exceptions import BotoCoreError +from botocore.exceptions import NoCredentialsError +from rich import print +from mephisto.generators.form_composer.constants import CONTENTTYPE_BY_EXTENSION from mephisto.generators.form_composer.constants import JSON_IDENTATION +from mephisto.generators.form_composer.constants import S3_URL_EXPIRATION_SECONDS +from mephisto.utils.logger_core import get_logger + +logger = get_logger(name=__name__) +s3_client = boto3.client("s3") def write_config_to_file(config_data: Union[List[dict], dict], file_path: str): @@ -19,11 +35,52 @@ def write_config_to_file(config_data: Union[List[dict], dict], file_path: str): f.write(config_str) +def read_config_file(config_path: str) -> Union[List[dict], dict]: + try: + with open(config_path) as config_file: + config_data = json.load(config_file) + except (JSONDecodeError, TypeError, FileNotFoundError): + print(f"[red]Could not read JSON from '{config_path}' file[/red]") + exit() + return config_data + + +def make_error_message(main_message: str, error_list: List[str]) -> str: + errors_bullet = "\n - " + "\n - ".join(map(str, error_list)) + return f"{main_message}. Errors:{errors_bullet}" + + +def get_file_ext(file_name: str) -> str: + """ Cut off file extension without period """ + return Path(file_name).suffix.lower()[1:] + + +# ----- S3 ----- + + +def _run_and_handle_boto_errors( + fn: callable, + error_message: str = "Error occurred", + reraise: bool = True, +) -> Any: + """ Handles standard boto errors in a standard way """ + try: + return fn() + + except BotoCoreError as e: + if isinstance(e, NoCredentialsError): + error_message = "Missing AWS credentials caused the following error: " + error_message + logger.exception(error_message) + + if reraise: + raise + + def is_s3_url(value: str) -> bool: if isinstance(value, str): parsed_url = urlparse(value) return bool( - parsed_url.scheme == 'https' and + parsed_url.scheme == "https" and "s3" in parsed_url.hostname and parsed_url.netloc and parsed_url.path @@ -34,11 +91,12 @@ def is_s3_url(value: str) -> bool: def _get_bucket_and_key_from_S3_url(s3_url: str) -> Tuple[str, str]: parsed_url = urlparse(s3_url) - bucket_name = parsed_url.hostname.split('.')[0] + bucket_name = parsed_url.hostname.split(".")[0] relative_path = parsed_url.path if not relative_path: - raise ValueError(f'Cannot extract S3 key from invalid URL "{s3_url}"') + print(f"[red]Cannot extract S3 key from invalid URL '{s3_url}'[/red]") + exit() # Remove a slash from the beginning of the path s3_key = relative_path[1:] @@ -64,16 +122,34 @@ def get_file_urls_from_s3_storage(s3_url: str) -> List[str]: return urls -def read_config_file(config_path: str) -> Union[List[dict], dict]: - try: - with open(config_path) as config_file: - config_data = json.load(config_file) - except (JSONDecodeError, TypeError, FileNotFoundError): - print(f"Could not read JSON from '{config_path}' file") - raise - return config_data +def get_s3_presigned_url(s3_url: str, expires_in_secs: int = S3_URL_EXPIRATION_SECONDS) -> str: + """ Generate expiring URL to access protected content """ + def boto_action(): + return s3_client.generate_presigned_url( + ClientMethod="get_object", + Params={ + "Bucket": bucket_name, + "Key": s3_key, + **aws_params, + }, + ExpiresIn=expires_in_secs, + ) + bucket_name, s3_key = _get_bucket_and_key_from_S3_url(s3_url) -def make_error_message(main_message: str, error_list: List[str]) -> str: - errors_bullet = "\n - " + "\n - ".join(map(str, error_list)) - return f"{main_message}. Errors:{errors_bullet}" + # If we don't set a Content-Type for a presigned URL, + # browser cannot even embed PDF files correctly in iframes or separate tab. + # We need to specify the exact Content-Type + # for all file types we use at least in private buckets + aws_params = {} + extension = get_file_ext(s3_key) + + content_type = CONTENTTYPE_BY_EXTENSION.get(extension) + + if extension and content_type: + aws_params["ResponseContentType"] = content_type + + error_message = f"Could not make presigned URL for key '{s3_key}'" + presigned_url = _run_and_handle_boto_errors(boto_action, error_message) + + return presigned_url diff --git a/mephisto/generators/form_composer/constants.py b/mephisto/generators/form_composer/constants.py index 56b739643..8c947be18 100644 --- a/mephisto/generators/form_composer/constants.py +++ b/mephisto/generators/form_composer/constants.py @@ -1,6 +1,28 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +CONTENTTYPE_BY_EXTENSION = { + # Docs + 'csv': 'text/csv', + 'doc': 'application/msword', + 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'pdf': 'application/pdf', + # Images + 'bmp': 'image/bmp', + 'gif': 'image/gif', + 'heic': 'image/heic', + 'heif': 'image/heif', + 'jpeg': 'image/jpeg', + 'jpg': 'image/jpeg', + 'png': 'image/png', + # Videos + 'mkv': 'video/x-matroska', + 'mp4': 'video/mp4', + 'webm': 'video/webm', +} + JSON_IDENTATION = 2 + +S3_URL_EXPIRATION_SECONDS = 60 diff --git a/mephisto/generators/form_composer/hydra_configs/conf/default.yaml b/mephisto/generators/form_composer/hydra_configs/conf/default.yaml index a8ae4484f..2eff8fb35 100644 --- a/mephisto/generators/form_composer/hydra_configs/conf/default.yaml +++ b/mephisto/generators/form_composer/hydra_configs/conf/default.yaml @@ -5,13 +5,12 @@ # LICENSE file in the root directory of this source tree. defaults: - - /mephisto/blueprint: static_react_task + - /mephisto/blueprint: remote_procedure - /mephisto/architect: local - /mephisto/provider: mock mephisto: blueprint: - data_json: ${task_dir}/data/data.json task_source: ${task_dir}/webapp/build/bundle.js task_source_review: ${task_dir}/webapp/build/bundle.review.js link_task_source: false diff --git a/mephisto/generators/form_composer/remote_procedures.py b/mephisto/generators/form_composer/remote_procedures.py new file mode 100644 index 000000000..38d1c63fb --- /dev/null +++ b/mephisto/generators/form_composer/remote_procedures.py @@ -0,0 +1,23 @@ +from mephisto.abstractions.blueprints.remote_procedure.remote_procedure_agent_state import ( + RemoteProcedureAgentState +) +from mephisto.generators.form_composer.config_validation.utils import get_s3_presigned_url +from mephisto.utils.logger_core import get_logger + +logger = get_logger(name=__name__) + + +class ProcedureName: + GET_PRESIGNED_URL = "getPresignedUrl" + + +def _get_presigned_url(request_id: str, url: str, agent_state: RemoteProcedureAgentState): + logger.debug(f"Presigning S3 URL '{url}' ({request_id=})") + presigned_url = get_s3_presigned_url(url) + logger.debug(f"Presigned S3 URL '{presigned_url}'") + return presigned_url + + +JS_NAME_FUNCTION_MAPPING = { + ProcedureName.GET_PRESIGNED_URL: _get_presigned_url, +} diff --git a/mephisto/generators/form_composer/run.py b/mephisto/generators/form_composer/run.py index 4dcd21f3d..335adfeb3 100644 --- a/mephisto/generators/form_composer/run.py +++ b/mephisto/generators/form_composer/run.py @@ -8,6 +8,11 @@ from omegaconf import DictConfig +from mephisto.abstractions.blueprints.remote_procedure.remote_procedure_blueprint import ( + SharedRemoteProcedureTaskState +) +from mephisto.generators.form_composer.config_validation.utils import read_config_file +from mephisto.generators.form_composer.remote_procedures import JS_NAME_FUNCTION_MAPPING from mephisto.operations.operator import Operator from mephisto.tools.scripts import build_custom_bundle from mephisto.tools.scripts import task_script @@ -18,8 +23,20 @@ def main(operator: Operator, cfg: DictConfig) -> None: # Build packages _build_custom_bundles(cfg) + # Configure shared state + task_data_config_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "data", + "task_data.json", + ) + task_data = read_config_file(task_data_config_path) + shared_state = SharedRemoteProcedureTaskState( + static_task_data=task_data, + function_registry=JS_NAME_FUNCTION_MAPPING, + ) + # Launch Task Run - operator.launch_task_run(cfg.mephisto) + operator.launch_task_run(cfg.mephisto, shared_state) operator.wait_for_runs_then_shutdown(skip_input=True, log_rate=30) diff --git a/mephisto/generators/form_composer/webapp/src/app.jsx b/mephisto/generators/form_composer/webapp/src/app.jsx index cb8140d94..3fdfc22d5 100644 --- a/mephisto/generators/form_composer/webapp/src/app.jsx +++ b/mephisto/generators/form_composer/webapp/src/app.jsx @@ -7,7 +7,7 @@ import React from "react"; import ReactDOM from "react-dom"; import { AutoComposingFormFrontend, LoadingScreen } from "./components/core_components.jsx"; -import { useMephistoTask, ErrorBoundary } from "mephisto-task-multipart"; +import { useMephistoRemoteProcedureTask, ErrorBoundary } from "mephisto-task-multipart"; /* ================= Application Components ================= */ @@ -15,21 +15,28 @@ function MainApp() { const { isLoading, initialTaskData, + remoteProcedure, handleSubmit, handleFatalError, - } = useMephistoTask(); + } = useMephistoRemoteProcedureTask(); if (isLoading || !initialTaskData) { return ; } + let _initialTaskData = initialTaskData; + if (initialTaskData.hasOwnProperty("task_data")) { + _initialTaskData = initialTaskData.task_data; + } + return (
diff --git a/mephisto/generators/form_composer/webapp/src/components/core_components.jsx b/mephisto/generators/form_composer/webapp/src/components/core_components.jsx index a67a19540..71c1f51bc 100644 --- a/mephisto/generators/form_composer/webapp/src/components/core_components.jsx +++ b/mephisto/generators/form_composer/webapp/src/components/core_components.jsx @@ -23,8 +23,11 @@ function Directions({ children }) { ); } -function AutoComposingFormFrontend({ taskData, onSubmit, onError, finalResults = null }) { +function AutoComposingFormFrontend({ + taskData, onSubmit, onError, finalResults = null, remoteProcedure, +}) { let formData = taskData.form; + window["getPresignedUrl"] = remoteProcedure("getPresignedUrl"); if (!formData) { return ( diff --git a/mephisto/generators/form_composer/webapp/src/reviewapp.jsx b/mephisto/generators/form_composer/webapp/src/reviewapp.jsx index b26312970..3224ac79a 100644 --- a/mephisto/generators/form_composer/webapp/src/reviewapp.jsx +++ b/mephisto/generators/form_composer/webapp/src/reviewapp.jsx @@ -1,9 +1,7 @@ /* - * Copyright (c) 2017-present, Facebook, Inc. - * All rights reserved. - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * Copyright (c) Meta Platforms and its affiliates. + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. */ import React from "react"; diff --git a/mephisto/generators/form_composer/webapp/webpack.config.review.js b/mephisto/generators/form_composer/webapp/webpack.config.review.js index ab1d6a116..1bbdc63d8 100644 --- a/mephisto/generators/form_composer/webapp/webpack.config.review.js +++ b/mephisto/generators/form_composer/webapp/webpack.config.review.js @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. and its affiliates. + * Copyright (c) Meta Platforms and its affiliates. * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ diff --git a/packages/react-form-composer/src/FormComposer/FormComposer.js b/packages/react-form-composer/src/FormComposer/FormComposer.js index 443e1cab8..6d932cb0e 100644 --- a/packages/react-form-composer/src/FormComposer/FormComposer.js +++ b/packages/react-form-composer/src/FormComposer/FormComposer.js @@ -13,9 +13,10 @@ import { RadioField } from "./fields/RadioField"; import { SelectField } from "./fields/SelectField"; import { TextareaField } from "./fields/TextareaField"; import "./FormComposer.css"; +import { FormErrors } from "./FormErrors"; import { SectionErrors } from "./SectionErrors"; import { SectionErrorsCountBadge } from "./SectionErrorsCountBadge"; -import { FormErrors } from "./FormErrors"; +import { formatStringWithProcedureTokens } from "./utils"; import { checkFieldRequiredness, validateFormFields } from "./validation/helpers"; function FormComposer({ data, onSubmit, finalResults, serverSubmitErrors }) { @@ -39,8 +40,8 @@ function FormComposer({ data, onSubmit, finalResults, serverSubmitErrors }) { const inReviewState = finalResults !== null; - let formTitle = data.title; - let formInstruction = data.instruction; + let formTitle = formatStringWithProcedureTokens(data.title); + let formInstruction = formatStringWithProcedureTokens(data.instruction); let formSections = data.sections; let formSubmitButton = data.submit_button; @@ -186,8 +187,8 @@ function FormComposer({ data, onSubmit, finalResults, serverSubmitErrors }) { {/* Sections */} {formSections.map(( section, sectionIndex ) => { - const sectionTitle = section.title; - const sectionInstruction = section.instruction; + const sectionTitle = formatStringWithProcedureTokens(section.title); + const sectionInstruction = formatStringWithProcedureTokens(section.instruction); const fieldsets = section.fieldsets; const collapsable = ( @@ -282,8 +283,8 @@ function FormComposer({ data, onSubmit, finalResults, serverSubmitErrors }) { /> {fieldsets.map(( fieldset, fieldsetIndex ) => { - const fieldsetTitle = fieldset.title; - const fieldsetInstruction = fieldset.instruction; + const fieldsetTitle = formatStringWithProcedureTokens(fieldset.title); + const fieldsetInstruction = formatStringWithProcedureTokens(fieldset.instruction); const rows = fieldset.rows; return ( @@ -312,7 +313,7 @@ function FormComposer({ data, onSubmit, finalResults, serverSubmitErrors }) { )} {rows.map(( row, rowIndex ) => { - const rowHelp = row.help; + const rowHelp = formatStringWithProcedureTokens(row.help); const fields = row.fields; return ( @@ -321,6 +322,8 @@ function FormComposer({ data, onSubmit, finalResults, serverSubmitErrors }) { className={`row`} > {fields.map(( field, fieldIndex ) => { + const fieldLabel = formatStringWithProcedureTokens(field.label); + const fieldTooltip = formatStringWithProcedureTokens(field.tooltip); const fieldHelp = field.help; return ( @@ -333,14 +336,14 @@ function FormComposer({ data, onSubmit, finalResults, serverSubmitErrors }) { col ${checkFieldRequiredness(field) ? "required" : ""} `} - title={field.tooltip} + title={fieldTooltip} > {field.icon} {["input", "email", "password", "number"].includes(field.type) && ( diff --git a/packages/react-form-composer/src/FormComposer/utils.js b/packages/react-form-composer/src/FormComposer/utils.js new file mode 100644 index 000000000..c40142914 --- /dev/null +++ b/packages/react-form-composer/src/FormComposer/utils.js @@ -0,0 +1,54 @@ +/* + * Copyright (c) Meta Platforms and its affiliates. + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +let tokenProcedureResultMapping = {}; + +export function formatStringWithProcedureTokens(string) { + if (string.includes("{{") && string.includes("}}")) { + let _string = string; + + const procedureTokenRegex = /\{\{\s*([\w\(\)\"\'\/\.\_\-\:\{\}\\]+)\s*\}\}/gi; + + // Find list of pairs [[token with brackets, procedure with arguments], ...] + const matches = [...string.matchAll(procedureTokenRegex)]; + + // Request all procedures and associate them with map key (token for this procedure) + matches.forEach((match) => { + const token = match[0]; + + if (!tokenProcedureResultMapping.hasOwnProperty(token)) { + const procedureCleanString = match[1].trim(); + const procedureName = procedureCleanString.split("(")[0]; + + // If there's no global procedure (in `window`) with the name from the token, + // we just skip the evaluation, and return the raw token string. + // Normally all procedures must be defined as global vars before the form begins to render + if (!window.hasOwnProperty(procedureName)) { + console.error(`Could not find remote procedire ${procedureName}`); + return string; + } + + // Lookup the procedure in global variables and call it (note: all procedures are Promises) + const procedurePromise = eval("window." + procedureCleanString); + + procedurePromise.then((response) => { + tokenProcedureResultMapping[token] = response; + }).catch((error) => { + console.error(`Could not get remote response for ${procedureName}`, error); + }); + } + }); + + // Override tokens with values received from the server + Object.keys(tokenProcedureResultMapping).forEach((token) => { + _string = _string.replaceAll(token, tokenProcedureResultMapping[token]); + }); + + return _string; + } + + return string; +}