diff --git a/examples/form_composer_demo/README.md b/examples/form_composer_demo/README.md index 999f274b4..39a657637 100644 --- a/examples/form_composer_demo/README.md +++ b/examples/form_composer_demo/README.md @@ -1,4 +1,4 @@ -This form-based questionnaire is a simple example of Form Composer task generator. +These form-based questionnaires are example of FormComposer task generator. --- @@ -16,11 +16,12 @@ This form-based questionnaire is a simple example of Form Composer task generato ## How to configure -1. For simple form config you need to provide Form Composer with one JSON file - a configuration of your form fields. +1. For simple form config you need to provide FormComposer with one JSON file - a configuration of your form fields. An example is found in `examples/form_composer_demo/data/simple/data.json` file. 2. For dynamic form configs you need two JSON files: - form configuration `examples/form_composer_demo/data/dynamic/form_config.json` - - tokens values `examples/form_composer_demo/data/dynamic/tokens_values_config.json` + - tokens values `examples/form_composer_demo/data/dynamic/token_sets_values_config.json` + - token values as a list for each token `examples/form_composer_demo/data/dynamic/single_token_values_config.json` to create `token_sets_values_config.json` from it Note that during bulding a Task with dynamic form config, the resulting data config will be placed in `data.json` file, i.e. `examples/form_composer_demo/data/dynamic/data.json` (in this example it's already been created and will be overwritten when you build a Task). @@ -28,29 +29,4 @@ Note that during bulding a Task with dynamic form config, the resulting data con ### Form config -For details on how form config is composed, and how data fields are validated please see the main Form Composer's README. - -Here's a sample part of form config: - -```json -{ - "fields": [ - { - "id": "id_name_first", - "label": "First name", - "name": "name_first", - "placeholder": "Type first name", - "title": "First name of a person", - "type": "input", - "validators": { - "required": true, - "minLength": 2, - "maxLength": 20, - "regexp": ["^[a-zA-Z0-9._-]+@mephisto\\.ai$", "ig"] - // or just string "regexp": "^[a-zA-Z0-9._-]+@mephisto\\.ai$" - }, - "value": "" - } - ] -} -``` +For details on how form config is composed, and how its data fields are validated, please see the main FormComposer's [README.md](/mephisto/generators/form_composer/README.md). diff --git a/examples/form_composer_demo/run_task_dynamic.py b/examples/form_composer_demo/run_task_dynamic.py index 8630de004..5ab0c8e02 100644 --- a/examples/form_composer_demo/run_task_dynamic.py +++ b/examples/form_composer_demo/run_task_dynamic.py @@ -8,7 +8,7 @@ from omegaconf import DictConfig -from mephisto.generators.form_composer.configs_validation.extrapolated_config import ( +from mephisto.generators.form_composer.config_validation.task_data_config import ( create_extrapolated_config ) from mephisto.operations.operator import Operator @@ -76,13 +76,13 @@ def generate_data_json_config(): data_path = os.path.join(app_path, "data") form_config_path = os.path.join(data_path, "dynamic", "form_config.json") - tokens_values_config_path = os.path.join(data_path, "dynamic", "tokens_values_config.json") - extrapolated_form_config_path = os.path.join(data_path, "dynamic", "data.json") + token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json") + task_data_config_path = os.path.join(data_path, "dynamic", "data.json") create_extrapolated_config( form_config_path=form_config_path, - tokens_values_config_path=tokens_values_config_path, - extrapolated_form_config_path=extrapolated_form_config_path, + token_sets_values_config_path=token_sets_values_config_path, + task_data_config_path=task_data_config_path, ) diff --git a/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py b/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py index 9d18e2d62..80b2af130 100644 --- a/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py +++ b/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py @@ -14,7 +14,7 @@ from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import ( SharedStaticTaskState, ) -from mephisto.generators.form_composer.configs_validation.extrapolated_config import ( +from mephisto.generators.form_composer.config_validation.task_data_config import ( create_extrapolated_config ) from mephisto.operations.operator import Operator @@ -94,13 +94,13 @@ def generate_data_json_config(): data_path = os.path.join(app_path, "data") form_config_path = os.path.join(data_path, "dynamic", "form_config.json") - tokens_values_config_path = os.path.join(data_path, "dynamic", "tokens_values_config.json") - extrapolated_form_config_path = os.path.join(data_path, "dynamic", "data.json") + token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json") + task_data_config_path = os.path.join(data_path, "dynamic", "data.json") create_extrapolated_config( form_config_path=form_config_path, - tokens_values_config_path=tokens_values_config_path, - extrapolated_form_config_path=extrapolated_form_config_path, + token_sets_values_config_path=token_sets_values_config_path, + task_data_config_path=task_data_config_path, ) diff --git a/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py b/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py index 927d2f4ec..6f4b4d55a 100644 --- a/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py +++ b/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py @@ -12,7 +12,7 @@ SharedStaticTaskState, ) from mephisto.data_model.qualification import QUAL_GREATER_EQUAL -from mephisto.generators.form_composer.configs_validation.extrapolated_config import ( +from mephisto.generators.form_composer.config_validation.task_data_config import ( create_extrapolated_config ) from mephisto.operations.operator import Operator @@ -98,13 +98,13 @@ def generate_data_json_config(): data_path = os.path.join(app_path, "data") form_config_path = os.path.join(data_path, "dynamic", "form_config.json") - tokens_values_config_path = os.path.join(data_path, "dynamic", "tokens_values_config.json") - extrapolated_form_config_path = os.path.join(data_path, "dynamic", "data.json") + token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json") + task_data_config_path = os.path.join(data_path, "dynamic", "data.json") create_extrapolated_config( form_config_path=form_config_path, - tokens_values_config_path=tokens_values_config_path, - extrapolated_form_config_path=extrapolated_form_config_path, + token_sets_values_config_path=token_sets_values_config_path, + task_data_config_path=task_data_config_path, ) diff --git a/mephisto/client/cli.py b/mephisto/client/cli.py index 86b41177d..5b8bd915a 100644 --- a/mephisto/client/cli.py +++ b/mephisto/client/cli.py @@ -9,9 +9,6 @@ from typing import Optional import rich_click as click # type: ignore -from botocore.exceptions import BotoCoreError -from botocore.exceptions import ClientError -from botocore.exceptions import NoCredentialsError from flask.cli import pass_script_info from rich import print from rich.markdown import Markdown @@ -32,21 +29,30 @@ import mephisto.scripts.mturk.print_outstanding_hit_status as print_outstanding_hit_status_mturk import mephisto.scripts.mturk.print_outstanding_hit_status as soft_block_workers_by_mturk_id_mturk from mephisto.client.cli_commands import get_wut_arguments -from mephisto.generators.form_composer.configs_validation.extrapolated_config import ( +from mephisto.generators.form_composer.config_validation.task_data_config import ( create_extrapolated_config ) -from mephisto.generators.form_composer.configs_validation.extrapolated_config import ( - generate_tokens_values_config_from_files +from mephisto.generators.form_composer.config_validation.task_data_config import ( + verify_form_composer_configs ) -from mephisto.generators.form_composer.configs_validation.extrapolated_config import ( - get_file_urls_from_s3_storage +from mephisto.generators.form_composer.config_validation.single_token_values_config import ( + update_single_token_values_config_with_file_urls ) -from mephisto.generators.form_composer.configs_validation.extrapolated_config import is_s3_url +from mephisto.generators.form_composer.config_validation.token_sets_values_config import ( + update_token_sets_values_config_with_premutated_data +) +from mephisto.generators.form_composer.config_validation.utils import is_s3_url from mephisto.operations.registry import get_valid_provider_types from mephisto.tools.scripts import build_custom_bundle from mephisto.utils.rich import console from mephisto.utils.rich import create_table +FORM_COMPOSER_DATA_DIR_NAME = "data" +FORM_COMPOSER_DATA_CONFIG_NAME = "data.json" +FORM_COMPOSER_FORM_CONFIG_NAME = "form_config.json" +FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME = "token_sets_values_config.json" +FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME = "single_token_values_config.json" + @click.group(cls=RichGroup) def cli(): @@ -425,64 +431,30 @@ def review_app( ) -@cli.command("form_composer", cls=RichCommand) -@click.option("-m", "--manual-versions", type=(bool), default=False) -@click.option("-f", "--files-folder", type=(str), default=None) -def form_composer(manual_versions: bool, files_folder: Optional[str] = None): - # Get app path to run Python script from there (instead of the current file's directory). - # This is necessary, because the whole infrastructure is built relative to the location - # of the called command-line script. - # The other parts of the logic are inside `form_composer/run.py` script +def _get_form_composer_app_path() -> str: app_path = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "generators", "form_composer", ) + return app_path - # Check files and create `data.json` config with units data before running a task - data_path = os.path.join(app_path, "data") - extrapolated_form_config_path = os.path.join(data_path, "data.json") - form_config_path = os.path.join(data_path, "form_config.json") - tokens_values_config_path = os.path.join(data_path, "tokens_values_config.json") - # Change dir to app dir - os.chdir(app_path) - - if manual_versions and files_folder: - print("`--manual-versions` and `--files-folder` parameters cannot be used concurrently") - return None +@cli.command("form_composer", cls=RichCommand) +@click.option("-o", "--task-data-config-only", type=(bool), default=True) +def form_composer(task_data_config_only: bool = True): + app_path = _get_form_composer_app_path() + app_data_path = os.path.join(app_path, FORM_COMPOSER_DATA_DIR_NAME) - if files_folder: - if is_s3_url(files_folder): - try: - files_locations = get_file_urls_from_s3_storage(files_folder) - except (BotoCoreError, ClientError, NoCredentialsError) as e: - print(f"Could not retrieve images from S3 URL '{files_folder}'. Reason: {e}") - return None - - if not files_locations: - print( - f"Could not retrieve files from '{files_folder}' - " - f"check if this location exists and contains files" - ) - return None + task_data_config_path = os.path.join(app_data_path, FORM_COMPOSER_DATA_CONFIG_NAME) - generate_tokens_values_config_from_files(tokens_values_config_path, files_locations) - else: - print("`--images-path` must be URL on S3 directory") - return None + # Change dir to app dir + os.chdir(app_path) - if manual_versions: - # When user wants to use manually composed `data.json` config, - # we don't need to auto-generate an extrapolated config - pass - else: - create_extrapolated_config( - form_config_path=form_config_path, - tokens_values_config_path=tokens_values_config_path, - extrapolated_form_config_path=extrapolated_form_config_path, - skip_validating_tokens_values_config=bool(files_folder), - ) + verify_form_composer_configs( + task_data_config_path=task_data_config_path, + task_data_config_only=task_data_config_only, + ) # Start the process process = subprocess.Popen("python ./run.py", shell=True, cwd=app_path) @@ -498,5 +470,98 @@ def form_composer(manual_versions: bool, files_folder: Optional[str] = None): process.wait() +@cli.command("form_composer_config", cls=RichCommand) +@click.option("-v", "--verify", type=(bool), default=False) +@click.option("-f", "--update-file-location-values", type=(str), default=None) +@click.option("-e", "--extrapolate-token-sets", type=(bool), default=False) +@click.option("-p", "--permutate-single-tokens", type=(bool), default=False) +def form_composer_config( + verify: bool = False, + extrapolate_token_sets: bool = False, + update_file_location_values: Optional[str] = None, + permutate_single_tokens: bool = False, +): + """ + Prepare (parts of) config for the `form_composer` command. + Note that each parameter is essentially a separate command, and they cannot be mixed. + + :param verify: Validate all JSON configs currently present in the form builder config directory + :param update_file_location_values: Update existing single-token values config + with file URLs automatically taken from a location (e.g. an S3 folder) + :param extrapolate_token_sets: Generate form versions based on extrapolated values of token sets + :param permutate_single_tokens: Create tokens sets as all possible permutations of values lists + defined in single-token values config + """ + # Get app path to run Python script from there (instead of the current file's directory). + # This is necessary, because the whole infrastructure is built relative to the location + # of the called command-line script. + # The other parts of the logic are inside `form_composer/run***.py` script + app_path = _get_form_composer_app_path(). + app_data_path = os.path.join(app_path, FORM_COMPOSER_DATA_DIR_NAME) + + full_path = lambda data_file: os.path.join(app_data_path, data_file) + + # Check files and create `data.json` config with tokens data before running a task + task_data_config_path = full_path(FORM_COMPOSER_DATA_CONFIG_NAME) + form_config_path = full_path(FORM_COMPOSER_FORM_CONFIG_NAME) + token_sets_values_config_path = full_path(FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME) + single_token_values_config_path = full_path(FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME) + + # Change dir to app dir + os.chdir(app_path) + + if verify: + print(f"[green]Started configs verification in '{task_data_config_path}'[/green]") + verify_form_composer_configs( + task_data_config_path=task_data_config_path, + form_config_path=form_config_path, + token_sets_values_config_path=token_sets_values_config_path, + single_token_values_config_path=single_token_values_config_path, + task_data_config_only=False, + ) + print(f"[green]Finished successfully[/green]") + return None + + if update_file_location_values: + print( + f"[green]Started updating '{FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME}' " + f"with file URLs from '{update_file_location_values}'[/green]" + ) + if is_s3_url(update_file_location_values): + update_single_token_values_config_with_file_urls( + url=update_file_location_values, + single_token_values_config_path=single_token_values_config_path, + ) + print(f"[green]Finished successfully[/green]") + else: + print("`--update-file-location-values` must be a valid S3 URL") + return None + + if permutate_single_tokens: + print( + f"[green]Started updating '{FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME}' " + f"with permutated single-token values[/green]" + ) + update_token_sets_values_config_with_premutated_data( + single_token_values_config_path=single_token_values_config_path, + token_sets_values_config_path=token_sets_values_config_path, + ) + print(f"[green]Finished successfully[/green]") + return None + + if extrapolate_token_sets: + print( + f"[green]Started extrapolating token sets values " + f"from '{FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME}' [/green]" + ) + create_extrapolated_config( + form_config_path=form_config_path, + token_sets_values_config_path=token_sets_values_config_path, + task_data_config_path=task_data_config_path, + ) + print(f"[green]Finished successfully[/green]") + return None + + if __name__ == "__main__": cli() diff --git a/mephisto/generators/form_composer/README.md b/mephisto/generators/form_composer/README.md index d81511580..98327b694 100644 --- a/mephisto/generators/form_composer/README.md +++ b/mephisto/generators/form_composer/README.md @@ -6,13 +6,17 @@ This package provides `FormComposer` widget for React-based front-end development for Mephisto tasks. +You can find working demo of FormComposer in `examples/form_composer_demo` + +- For details on how to run these examples, refer to the demo's [README.md](/examples/form_composer_demo/README.md) + # How to Run -To create and launch a Form Composer task, create your JSON form configuration, +To create and launch a FormComposer task, create your JSON form configuration, and then run the below commands. -Once Form Composer launches, in the console you will see links like this: +Once FormComposer launches, in the console you will see links like this: http://localhost:3000/?worker_id=x&assignment_id=1 To view your Task as a worker, take one of these links and paste it in your browser. @@ -21,7 +25,18 @@ If launched with `docker-compose`, replace 3000 with the remapped port (e.g. for #### With docker-compose -You can launch Form Composer inside a Docker container: +You can launch FormComposer inside a Docker container: + +1. Prepare configs (`form_composer_config` command) + +```shell +docker-compose -f docker/docker-compose.dev.yml run \ + --build \ + --rm mephisto_dc \ + mephisto form_composer_config --extrapolate-token-sets True +``` + +2. Run composer itself (`form_composer` command) ```shell docker-compose -f docker/docker-compose.dev.yml run \ @@ -35,35 +50,265 @@ docker-compose -f docker/docker-compose.dev.yml run \ #### Without docker-compose First ensure that mephisto package is installed locally - please refer to [Mephisto's main doc](https://mephisto.ai/docs/guides/quickstart/). -Once that is done, run a `form_composer` command: +Once that is done, run `form_composer_config` command(s) if needed, and then `form_composer` command: ```shell -# Sample launching commands +mephisto form_composer_config --extrapolate-token-sets True mephisto form_composer -mephisto form_composer --manual-versions True -mephisto form_composer --files-folder "https://s3.amazon.com/...." +``` + + +## Using `form_composer_config` utility + +The `form_composer_config` utility command helps auto-generate FormComposer config. It supports several options: + +```shell +# Sample launching commands +mephisto form_composer_config --verify True +mephisto form_composer_config --extrapolate-token-sets True +mephisto form_composer_config --permutate-single-tokens True +mephisto form_composer_config --update-file-location-values "https://s3.amazon.com/...." ``` where -- `-m/--manual-versions` argument skips auto-generating form versions in `data.json` by extrapolating token values, and instead uses an existing `data.json` file (see [Custom form versions](#custom-form-versions) section) -- `-f/--files-folder` argument generates token values based on file names found within specified file folder (see a separate section about this mode of running Form Composer) +- `-v/--verify BOOLEAN` - if truthy, validates all JSON configs currently present in the form builder config directory +- `-f/--update-file-location-values S3_URL` - generates token values based on file names found within specified S3 folder (see a separate section about this mode of running FormComposer) +- `-e/--extrapolate-token-sets BOOLEAN` - if truthy, generates Task data config based on provided form config and takon sets values +- `-p/--permutate-single-tokens BOOLEAN` - if truthy, generates token sets values as all possible combinations of values of individual tokens + +To understand what "tokens" means, read on about FormComposer config structure. ---- -# Config file structure +## Config files -You will need to provide Form Composer with a JSON configuration of your form fields, +You will need to provide FormComposer with a JSON configuration of your form fields, and place it in `generators/form-composer/data` directory. -- The form config file should be named `form_config.json`, and contain a JSON object with one key `form`. -- If you want to slightly vary your form within a Task (by inserting different values into its text), you need to add a file named `tokens_values_config.json` and containing a JSON array of objects, each with one key `tokens_values` and value representing name-value pairs for the text tokens. -- For more details, read about dynamic form configs further down. -Config examples: -- form config: `examples/form_composer_demo/data/dynamic/form_config.json` -- token values config: `examples/form_composer_demo/data/dynamic/tokens_values_config.json` -- resulting extrapolated config: `examples/form_composer_demo/data/dynamic/data.json` +- The form config file should be named `data.json`, and contain a list of JSON objects, each one with one key `form`. +- If you want to slightly vary your form within a Task (by inserting different values into its text), you need to add two files (that will be used to auto-generate `data.json` file): + - `token_sets_values_config.json` containing a JSON array of objects (each with one key `tokens_values` and value representing name-value pairs for a set of text tokens to be used in one form version). + - `form_config.json` containing a single JSON object with one key `form`. +- For more detail, read on about dynamic form configs. + +For detailed structure of each config file, see [Config file reference](#config-file-reference). + +Working config examples are provided in `examples/form_composer_demo/data` directory: +- task data config: `simple/data.json` +- form config: `dynamic/form_config.json` +- token sets values config: `dynamic/token_sets_values_config.json` +- single tokens values: `dynamic/single_token_values_config.json` to create `token_sets_values_config.json` +- resulting extrapolated config: `dynamic/data.json` + + +## Embedding FormComposer into custom application + +A few tips if you wish to embed FormComposer in your custom application: + +- to extrapolate form config (and generate the `data.json` file), call the extrapolator function `mephisto.generators.form_composer.configs_validation.extrapolated_config.create_extrapolated_config` + - For a live example, you can explore the source code of [run_task_dynamic.py](/examples/form_composer_demo/run_task_dynamic.py) module + + +--- + + +# Multiple form versions + +The simplest Task scenario is showing the same exact form to all of your workers. In that case you need to: + +- Compose `data.json` file containing definition of a single form (and place it into FormComposer config folder) +- Optionally, verify your config: `mephisto form_composer_config --verify True` +- Run FormComposer: `mephisto form_composer` + +But suppose you wish to show a slightly different version of the form to your workers. You can do so by defining multiple form versions. FormComposer provides several ways of doing so. + +--- + +## Custom form versions + +If your form versions vary considerably (e.g. showing different sets of fields), you should do the following steps: + +- Populate these form versions into `data.json` file manually (it will be basically a JSON array of N individual form versions configs) +- Optionally, verify your config: `mephisto form_composer_config --verify True` +- Run FormComposer: `mephisto form_composer` + +_As a result, for each Task assignment Mephisto will automatically produce N units, each unit having a different form version. In total you will be collecting data from `N * units_per_assignment` workers._ + +--- + +## Dynamic form config + +If your form versions vary only slightly (e.g. same set of fields, but showing different images or different text), you should use a dynamic form config as follows: + +- Ensure you populate these files, and place them into your FormComposer config folder: + - `form_config.json`: tokenized form config - same as regular form config, except it will contain tokens within certain objects' attributes (see [Tokens extrapolation](#tokens-extrapolation)) + - `token_sets_values_config.json`: file containing sets of token values, where each set is used to generate one version of the form (and each form version will be completed by `units_per_assignment` different workers). +- Optionally, verify your files: `mephisto form_composer_config --verify True` +- Generate task data config: `mephisto form_composer_config --extrapolate-token-sets True` + - This will overwrite existing `data.json` file with auto-generated form versions, by extrapolating provided token sets values +- Run FormComposer: `mephisto form_composer` + +_The number of generated form versions N will be same as number of provided token sets. In total you will be collecting data from `N * units_per_assignment` workers._ + +--- + +#### Tokens extrapolation + +How does token extrapolation work? + +A token is a named text placeholder that gets replaced ("extrapolated") by values specified in `token_sets_values_config.json` (each set of token values produces one form version based on dynamic form config `form_data.json`). + +Token placeholders within an attribute are formatted like so: `{{TOKEN_NAME}}` + +Tokens can be placed within the following object attributes: + +- `help` +- `instruction` +- `label` +- `title` +- `tooltip` + +If you wish to reuse the same token across different form attributes and levels, it's enough to specify it in a set of token values just once. (This also means that token names must be unique within token values sets) + + +--- + +#### Generate token sets with `--update-file-location-values` + +In a special case when all of your tokens sets are simply permutations of several value lists, sets of token values can be easily auto-generated. + +- Populate your lists of values for every single token into `single_token_values_config.json` file +- Optionally, verify your config: `mephisto form_composer_config --verify True` +- Generate `token_sets_values_config.json` with command: `mephisto form_composer_config --permutate-single-tokens True` + +_"Permutation" means all possible combinations of values. For example, permutations of amounts `2, 3`, sizes `big` and animals `cats, dogs` will produce result `2 big cats, 2 big dogs, 3 big cats, 3 big dogs`._ + +--- + +#### Generate single token values with `--update-file-location-values` + +In a special case when one of your tokens is an S3 file URL, that token values can be easily auto-generated. -Here's a brief example of a form config: +- Make a public S3 folder that will contain only the files that you want (all of them) +- Run command: `mephisto form_composer --update-file-location-values S3_FOLDER_URL` +- As a result, a token with name `"file_location"` will be added to your `single_token_values_config.json` config file. Its values will be S3 URLs of all files found .recursively within the `S3_FOLDER_URL` + +--- + +## Dynamic form config example + +Putting it altogether, this is a brief example of composing a dynamic form config. + +#### Single token values config + +Let's start with separate token values in `single_token_values_config.json` file: + +```json +{ + "actor": ["Carrie Fisher", "Mark Hamill"], + "movie_name": ["Star Wars"] +} +``` + +#### Token values config + +Permutating these token values will produce this `form_config.json` file with token sets values: + +```json +[ + { + "tokens_values": { + "actor": "Carrie Fisher", + "movie_name": "Star Wars" + } + }, + { + "tokens_values": { + "actor": "Mark Hamill", + "movie_name": "Star Wars" + } + }, +] +``` + +#### Form config + +These tokens are placed into the `form_config.json` dynamic form config like so: + +```json +{ + ... + "instruction": "Rate {{actor}}'s performance in movie '{{movie_name}}'", + ... + "help": "Please only consider the movie '{{movie_name}}'", + ... +} +... +{ + ... + "instruction": "Rate the plot in movie '{{movie_name}}' out of 10", + ... +} +``` + +#### Task data config + +After extrapolating attributes from `form_config.json` with token sets from `token_sets_values_config.json`, we get the resulting `data.json` file used for the task: + +```json +// First extrapolated form version +{ + ... + "instruction": "Rate Carrie Fisher's performance in movie 'Star Wars'", + ... + "help": "Please only consider the movie 'Star Wars'", + ... +} +... +{ + ... + "instruction": "Rate the plot in movie 'Star Wars' out of 10?", + ... +}, +// Second extrapolated form version +{ + ... + "instruction": "Rate Mark Hamill's performance in movie 'Star Wars'", + ... + "help": "Please only consider the movie 'Star Wars'", + ... +} +... +{ + ... + "instruction": "Rate the plot in movie 'Star Wars' out of 10?", + ... +} +``` + + +--- + + +# Custom field handlers + +TBD + +--- + + +# Custom callbacks + +TBD (aka "remote procedure") + + +----- + + +# Config file reference + +## Config file: `data.json` + +Task data config file `data.json` specifies layout of all form versions that are completed by workers. Here's an abbreviated example of such config: ```json [ @@ -142,13 +387,12 @@ Here's a brief example of a form config: "tooltip": "Submit form" } } - } + }, + ... ] ``` ---- - -## Form config levels +#### Form config levels Form UI layout consists of the following layers of UI object hierarchy: @@ -173,7 +417,7 @@ _Note that, due to limitations of JSON format, HTML content needs to be converte --- -#### Config level: form +###### Config level: form `form` is a top-level config object with the following attributes: @@ -187,7 +431,7 @@ _Note that, due to limitations of JSON format, HTML content needs to be converte --- -#### Config level: section +###### Config level: section Each item of `sections` list is an object with the following attributes: @@ -200,7 +444,7 @@ Each item of `sections` list is an object with the following attributes: --- -#### Config level: fieldset +###### Config level: fieldset Each item of `fieldsets` list is an object with the following attributes: @@ -210,7 +454,7 @@ Each item of `fieldsets` list is an object with the following attributes: --- -#### Config level: row +###### Config level: row Each item of `rows` list is an object with the following attributes: @@ -218,7 +462,7 @@ Each item of `rows` list is an object with the following attributes: --- -#### Config level: field +###### Config level: field Each item of `fields` list is an object that corresponds to the actual form field displayed in the resulting Task UI page. @@ -243,7 +487,7 @@ Here's example of a single field config: } ``` -###### Attributes - all fields +######## Attributes - all fields The most important attributes are: `label`, `name`, `type`, `validators` @@ -265,7 +509,7 @@ The most important attributes are: `label`, `name`, `type`, `validators` - `value` - Initial value of the field (String, Optional) -###### Attributes - select field +######## Attributes - select field - `multiple` - Support selection of multiple provided options, not just one (Boolean. Default: false) - `options` - list of available options to select from. Each option is an object with these attributes: @@ -273,7 +517,7 @@ The most important attributes are: `label`, `name`, `type`, `validators` - `value`: value sent to the server (String|Number|Boolean) -###### Attributes - checkbox and radio fields +######## Attributes - checkbox and radio fields - `options` - list of available options to select from. Each option is an object with these attributes: - `label`: displayed text (String) @@ -281,200 +525,49 @@ The most important attributes are: `label`, `name`, `type`, `validators` - `checked`: initial state of selection (Boolean, default: false) -# Dynamic form config - -If you wish to slightly vary form instructions within the same Task (e.g. show different images or different text), you should use a dynamic form config. - ---- - -## Dynamic form config files - -Dynamic form config consists of two parts: - -- `form_config.json`: tokenized form config - same as non-dynamic form config, except it may contain tokens within certain objects attributes (see [Tokens extrapolation](#tokens-extrapolation)) -- `tokens_values_config.json`: file containing sets of token values, where each set is plugged into a dynamic form config to generate its form version (each form version will be completed by `units_per_assignment` different workers). - +## Config file: `form_config.json` -#### Extrapolated config +Form config file `form_config.json` specifies layout of a form in the same way as `data.json`, but with a few notable differences: +- It contains a single JSON object (not a JSON array of objects) +- Some of its form attributes definitions must contain dynamic tokens (whose values will be extrapolated, i.e. substituted with variable chunks of text) - see further below. -During bulding a Task with dynamic form config, the resulting config containing all form vesions will be placed in `data.json` file (next to `form_config.json` file). -Note that each form version in `data.json` represents one assignment in Mephisto. +## Config file: `token_sets_values_config.json` -- In your YAML Task config, always refer to the extrapolated config file `data.json` (not the foorm config file) -- Every time you re-run Form Composer, `data.json` file will be overwritten -- Run generator with command: `mephisto form_composer` +Sets of token values are specified as a JSON array of objects, where each object has one key `"tokens_values"`. Under that key there's a key-value definition of all tokens in that set. - -#### Custom form versions - -Suppose your form variations go beyond slight text changes (e.g. you wish to add a fieldset in one version of form config). In that case: - -- Create your own `data.json` file manually (it will be basically a JSON list of copy-pasted individual form config versions) -- You don't need to create `form_config.json` and `tokens_values_config.json` files -- Run generator with command: `mephisto form_composer --manual-versions True` - ---- - -## Tokens extrapolation - -A token is a named text placeholder that gets replaced ("extrapolated") by values specified in `tokens_values_config.json` (each set of `tokens_values` specifies a form version, and contains one such value). - -Token placeholders within an attribute looks like so: `{{TOKEN_NAME}}` - -Tokens can be placed within the following object attributes: - -- `help` -- `instruction` -- `label` -- `title` -- `tooltip` - -When reusing a token with same name in different form attributes (across all levels of form config), you should specify it in each `tokens_values` just once, for convenience. -(This also means that token names must be unique within the entire form config.) - - ---- - -## Dynamic form config with `--files-folder` - -Consider a special case when form config has only one token, a file path. Form Composer offers a shortcut to save your time on creating `tokens_values_config.json` file in this scenario. Simply launch task with this command: - -``` -mephisto form_composer --files-folder [value] -``` - -Argument `--files-folder [value]` does the following: -- finds folder specified by `[value]`, which currently can be an S3 folder URL like `"https://s3.amazon.com/...."` -- finds (recursively) location of all files within that folder (e.g. S3 URLs) -- generates `tokens_values_config.json` file that looks like so: -```json -[ - { - "tokens_values": { - "file_location": "[location/of/file1]", - } - }, - { - "tokens_values": { - "file_location": "[location/of/file2]" - } - }, - ... -] -``` -- now that tokens values config is generated, Task launch proceeds like for a normal dynamic form config - -Note that: -- `form_config.json` file must contain one, and only one, token name `{{file_location}}` -- `tokens_values_config.json` file is not needed in this case (it will be auto-generated) - ---- - -## Embedding FormBuilder into custom application - -If you wish to embed FormComposer in your custom application, a few tips about extrapolator function `mephisto.generators.form_composer.configs_validation.extrapolated_config.create_extrapolated_config` (that generates extrapolated `data.json` config): - -- call extrapolator function if you want to extrapolate token values - - You can see how it's done from [Example docs](#live-examples) here and by exploring source code of [run_task_dynamic.py](/examples/form_composer_demo/run_task_dynamic.py) module. -- NOT call extrapolator function if you already have a custom `data.json` config - ---- - -## Config files example - - -#### Form config - -Here's how fields with tokens look like in `form_config.json` file: - -```json -{ - ... - "instruction": "Rate {{actor}}'s performance in movie '{{movie_name}}'", - ... - "help": "Please only consider the movie '{{movie_name}}'", - ... -} -... -{ - ... - "instruction": "Rate the plot in movie '{{movie_name}}'?", - ... -} -``` - - -#### Token values config - -Here's how token values are specified `tokens_values_config.json` file: +Example: ```json [ { "tokens_values": { "actor": "Carrie Fisher", - "movie_name": "Star Wars" + "movie_name": "Star Wars", + "genre": "Sci-Fi" } }, { "tokens_values": { "actor": "Keanu Reeves", - "movie_name": "The Matrix" + "movie_name": "The Matrix", + "genre": "Sci-Fi" } } ] ``` +## Config file: `single_token_values_config.json` -#### Extrapolated config +Lists of separate tokens values are specified as JSON object with key-value pairs, where keys are token names, and values are JSON arrays of their values. -This is how resulting `data.json` file will look like, after form attributes from `form_config.json` get extrapolated with values from `tokens_values_config.json`: +Example: ```json -// First extrapolated form version -{ - ... - "instruction": "Rate Carrie Fisher's performance in movie 'Star Wars'", - ... - "help": "Please only consider the movie 'Star Wars'", - ... -} -... { - ... - "instruction": "Rate the plot in movie 'Star Wars'?", - ... -}, -// Second extrapolated form version -{ - ... - "instruction": "Rate Keanu Reeves's performance in movie 'The Matrix'", - ... - "help": "Please only consider the movie 'The Matrix'", - ... + "actor": ["Carrie Fisher", "Keanu Reeves"], + "movie_name": ["Star Wars", "The Matrix"], + "genre": ["Sci-Fi"] } -... -{ - ... - "instruction": "Rate the plot in movie 'The Matrix'?", - ... -} -``` - -Once a Task is launched, each of these two form versions will be completed `units_per_assignment` times (by different workers) - - ---- -## Custom field handlers - -TBD - ---- - -## Live Examples - -You can investigate live examples of Form Composer in `examples/form_composer_demo` directory, - -For more details on how to run these examples, refer to this [README.md](/examples/form_composer_demo/README.md). +``` diff --git a/mephisto/generators/form_composer/configs_validation/__init__.py b/mephisto/generators/form_composer/config_validation/__init__.py similarity index 100% rename from mephisto/generators/form_composer/configs_validation/__init__.py rename to mephisto/generators/form_composer/config_validation/__init__.py diff --git a/mephisto/generators/form_composer/configs_validation/common_validation.py b/mephisto/generators/form_composer/config_validation/common_validation.py similarity index 100% rename from mephisto/generators/form_composer/configs_validation/common_validation.py rename to mephisto/generators/form_composer/config_validation/common_validation.py diff --git a/mephisto/generators/form_composer/configs_validation/config_validation_constants.py b/mephisto/generators/form_composer/config_validation/config_validation_constants.py similarity index 100% rename from mephisto/generators/form_composer/configs_validation/config_validation_constants.py rename to mephisto/generators/form_composer/config_validation/config_validation_constants.py diff --git a/mephisto/generators/form_composer/configs_validation/form_config.py b/mephisto/generators/form_composer/config_validation/form_config.py similarity index 98% rename from mephisto/generators/form_composer/configs_validation/form_config.py rename to mephisto/generators/form_composer/config_validation/form_config.py index 3beefafbe..b4b3bec0a 100644 --- a/mephisto/generators/form_composer/configs_validation/form_config.py +++ b/mephisto/generators/form_composer/config_validation/form_config.py @@ -55,7 +55,7 @@ def validate_form_config(config_json: dict) -> Tuple[bool, List[str]]: if not isinstance(config_json, dict): is_valid = False - errors.append("Form config must be a dictionary.") + errors.append("Form config must be a key/value JSON Object.") elif config_json.keys() != AVAILABLE_CONFIG_ATTRS.keys(): is_valid = False diff --git a/mephisto/generators/form_composer/config_validation/single_token_values_config.py b/mephisto/generators/form_composer/config_validation/single_token_values_config.py new file mode 100644 index 000000000..f2b9d51ab --- /dev/null +++ b/mephisto/generators/form_composer/config_validation/single_token_values_config.py @@ -0,0 +1,56 @@ +from typing import Dict +from typing import List +from typing import Tuple + +from botocore.exceptions import BotoCoreError +from botocore.exceptions import ClientError +from botocore.exceptions import NoCredentialsError + +from .utils import get_file_urls_from_s3_storage +from .utils import write_config_to_file + + +def validate_single_token_values_config( + config_json: Dict[str, List[str]], +) -> Tuple[bool, List[str]]: + is_valid = True + errors = [] + + if not isinstance(config_json, dict): + is_valid = False + errors.append("Config must be a key/value JSON Object.") + return is_valid, errors + + for i, token_values in enumerate(config_json.items()): + token, values = token_values + + if not values: + is_valid = False + errors.append( + f"You passed empty array of values for token '{token}'. " + f"It must contain at least one value or just remove it you left it by mistake." + ) + + return is_valid, errors + + +def update_single_token_values_config_with_file_urls( + url: str, single_token_values_config_path: str, +): + try: + files_locations = get_file_urls_from_s3_storage(url) + except (BotoCoreError, ClientError, NoCredentialsError) as e: + print(f"Could not retrieve files from S3 URL '{url}'. Reason: {e}") + return None + + if not files_locations: + print( + f"Could not retrieve files from '{url}' - " + f"check if this location exists and contains files" + ) + return None + + single_token_values_config_data = { + "file_location": files_locations, + } + write_config_to_file(single_token_values_config_data, single_token_values_config_path) diff --git a/mephisto/generators/form_composer/config_validation/task_data_config.py b/mephisto/generators/form_composer/config_validation/task_data_config.py new file mode 100644 index 000000000..910ffad8e --- /dev/null +++ b/mephisto/generators/form_composer/config_validation/task_data_config.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os.path +import re +from copy import deepcopy +from typing import List +from typing import Optional +from typing import Tuple + +from .config_validation_constants import ATTRS_SUPPORTING_TOKENS +from .form_config import validate_form_config +from .single_token_values_config import validate_single_token_values_config +from .token_sets_values_config import validate_token_sets_values_config +from .utils import make_error_message +from .utils import read_config_file +from .utils import write_config_to_file + +FILE_LOCATION_TOKEN_NAME = "file_location" + + +def _extrapolate_tokens_values(text: str, tokens_values: dict) -> str: + for token, value in tokens_values.items(): + text = re.sub(r"\{\{(\s*)" + token + r"(\s*)\}\}", value, text) + return text + + +def _set_tokens_in_form_config_item(item: dict, tokens_values: dict): + for attr_name in ATTRS_SUPPORTING_TOKENS: + item_attr = item.get(attr_name) + if not item_attr: + continue + + item[attr_name] = _extrapolate_tokens_values(item_attr, tokens_values) + + +def _collect_form_config_items_to_extrapolate(config_data: dict) -> List[dict]: + items_to_extrapolate = [] + + form = config_data["form"] + items_to_extrapolate.append(form) + + sections = form["sections"] + for section in sections: + items_to_extrapolate.append(section) + + fieldsets = section["fieldsets"] + for fieldset in fieldsets: + items_to_extrapolate.append(fieldset) + + rows = fieldset["rows"] + for row in rows: + items_to_extrapolate.append(row) + + fields = row["fields"] + for field in fields: + items_to_extrapolate.append(field) + + return items_to_extrapolate + + +def _collect_tokens_from_form_config(config_data: dict) -> Tuple[set, List[str]]: + items_to_extrapolate = _collect_form_config_items_to_extrapolate(config_data) + tokens_in_form_config = set() + tokens_in_unexpected_attrs_errors = [] + + for item in items_to_extrapolate: + for attr_name in ATTRS_SUPPORTING_TOKENS: + item_attr = item.get(attr_name) + if not item_attr: + continue + tokens_in_form_config.update(set(re.findall(r"\{\{\s*(\w+?)\s*\}\}", item_attr))) + + attrs_not_suppoting_tokens = set(item.keys()) - set(ATTRS_SUPPORTING_TOKENS) + for attr_name in attrs_not_suppoting_tokens: + item_attr = item.get(attr_name) + if isinstance(item_attr, str): + found_attr_tokens = re.findall(r"\{\{\s*(\w+?)\s*\}\}", item_attr) + if found_attr_tokens: + found_attr_tokens_string = ", ".join([f"'{t}'" for t in found_attr_tokens]) + tokens_in_unexpected_attrs_errors.append( + f"You tried to set tokens {found_attr_tokens_string} " + f"in attribute '{attr_name}' with value '{item_attr}'. " + f"You can use tokens only in following attributes: " + f"{', '.join(ATTRS_SUPPORTING_TOKENS)}" + ) + + return tokens_in_form_config, tokens_in_unexpected_attrs_errors + + +def _extrapolate_tokens_in_form_config(config_data: dict, tokens_values: dict) -> dict: + items_to_extrapolate = _collect_form_config_items_to_extrapolate(config_data) + for item in items_to_extrapolate: + _set_tokens_in_form_config_item(item, tokens_values) + return config_data + + +def _validate_tokens_in_both_configs( + form_config_data, token_sets_values_config_data, +) -> Tuple[set, set, list]: + tokens_from_form_config, tokens_in_unexpected_attrs_errors = ( + _collect_tokens_from_form_config(form_config_data) + ) + tokens_from_token_sets_values_config = set([ + token_name + for token_set_values_data in token_sets_values_config_data + for token_name in token_set_values_data.get("tokens_values", {}).keys() + + ]) + # Token names present in token values config, but not in form config + overspecified_tokens = tokens_from_token_sets_values_config - tokens_from_form_config + # Token names present in form config, but not in token values config + underspecified_tokens = tokens_from_form_config - tokens_from_token_sets_values_config + return overspecified_tokens, underspecified_tokens, tokens_in_unexpected_attrs_errors + + +def _combine_extrapolated_form_configs( + form_config_data: dict, + token_sets_values_config_data: List[dict], +) -> List[dict]: + errors = [] + + # Validate Form config + form_config_is_valid, form_config_errors = validate_form_config(form_config_data) + + if not form_config_is_valid: + # Stop generating a Task, the config is incorrect + raise ValueError("\n" + "\n\n".join(form_config_errors)) + + token_sets_values_config_is_valid, token_sets_values_data_config_errors = ( + validate_token_sets_values_config(token_sets_values_config_data) + ) + + # Validate that same token names are present in both configs + ( + overspecified_tokens, + underspecified_tokens, + tokens_in_unexpected_attrs_errors, + ) = _validate_tokens_in_both_configs( + form_config_data, token_sets_values_config_data, + ) + + # Output errors, if any + if overspecified_tokens: + errors.append( + f"Values for the following tokens are provided in token sets values config, " + f"but they are not defined in the form config: " + f"{', '.join(overspecified_tokens)}." + ) + if underspecified_tokens: + errors.append( + f"The following tokens are specified in the form config, " + f"but their values are not provided in the token sets values config: " + f"{', '.join(underspecified_tokens)}." + ) + + if tokens_in_unexpected_attrs_errors: + errors = errors + tokens_in_unexpected_attrs_errors + + if not form_config_is_valid: + errors.append(make_error_message("Form config is invalid.", form_config_errors)) + + if not token_sets_values_config_is_valid: + errors.append(make_error_message( + "Toekn sets values config is invalid.", token_sets_values_data_config_errors, + )) + + if errors: + # Stop generating a Task, the config is incorrect + raise ValueError("\n" + "\n\n".join(errors)) + + # If no errors, combine extrapolated form versions to create Task data config + combined_config = [] + if token_sets_values_config_data: + for token_sets_values in token_sets_values_config_data: + if token_sets_values == {}: + combined_config.append(form_config_data) + else: + form_config_data_with_tokens = _extrapolate_tokens_in_form_config( + deepcopy(form_config_data), token_sets_values["tokens_values"], + ) + combined_config.append(form_config_data_with_tokens) + else: + # If no config with tokens values was added than + # we just create one-unit config and copy form config into it as-is + combined_config.append(form_config_data) + + return combined_config + + +def create_extrapolated_config( + form_config_path: str, + token_sets_values_config_path: str, + task_data_config_path: str, +): + # Check if files exist + if not os.path.exists(form_config_path): + raise FileNotFoundError(f"Create file '{form_config_path}' and add form configuration") + + # Read JSON from files + form_config_data = read_config_file(form_config_path) + + if os.path.exists(token_sets_values_config_path): + token_sets_values_data = read_config_file(token_sets_values_config_path) + else: + token_sets_values_data = [] + + # Create combined config + try: + extrapolated_form_config_data = _combine_extrapolated_form_configs( + form_config_data, + token_sets_values_data, + ) + write_config_to_file(extrapolated_form_config_data, task_data_config_path) + except ValueError as e: + print(f"Could not extrapolate form configs: {e}") + + +def generate_tokens_values_config_from_files(token_sets_values_config_path: str, files: List[str]): + token_sets_values_config_data = [] + + for i, file_location in enumerate(files): + token_sets_values_config_data.append(dict( + tokens_values={ + FILE_LOCATION_TOKEN_NAME: file_location, + }, + )) + + try: + write_config_to_file(token_sets_values_config_data, token_sets_values_config_path) + except ValueError as e: + print(f"Could not write tokens values to file: {e}") + + +def validate_task_data_config(config_json: List[dict]) -> Tuple[bool, List[str]]: + is_valid = True + errors = [] + + if not isinstance(config_json, list): + is_valid = False + errors.append("Config must be a JSON Array.") + + if config_json: + if not all(config_json): + is_valid = False + errors.append("Task data config must contain at least one non-empty item.") + + # Validate each form version contained in task data config + for item in config_json: + form_config_is_valid, form_config_errors = validate_form_config(item) + if not form_config_is_valid: + is_valid = False + errors += form_config_errors + + return is_valid, errors + + +def verify_form_composer_configs( + task_data_config_path: str, + form_config_path: Optional[str] = None, + token_sets_values_config_path: Optional[str] = None, + single_token_values_config_path: Optional[str] = None, + task_data_config_only: bool = False, +): + errors = [] + + try: + # 1. Validate data config + task_data_config_data = read_config_file(task_data_config_path) + + task_data_config_is_valid, task_data_config_errors = validate_task_data_config( + task_data_config_data, + ) + + if not task_data_config_is_valid: + errors.append(make_error_message( + "Task data config is invalid.", task_data_config_errors, + )) + + if task_data_config_only: + if errors: + raise ValueError("\n" + "\n\n".join(errors)) + + return None + + # 2. Validate form config config + form_config_data = read_config_file(form_config_path) + + form_config_is_valid, form_config_errors = validate_form_config(form_config_data) + + if not form_config_is_valid: + errors.append(make_error_message("Form config is invalid.", form_config_errors)) + + # 3. Validate token sets values config + if os.path.exists(token_sets_values_config_path): + token_sets_values_data = read_config_file(token_sets_values_config_path) + else: + token_sets_values_data = [] + + ( + overspecified_tokens, + underspecified_tokens, + tokens_in_unexpected_attrs_errors, + ) = _validate_tokens_in_both_configs( + form_config_data, token_sets_values_data, + ) + + # Output errors, if any + if overspecified_tokens: + errors.append( + f"Values for the following tokens are provided in token sets values config, " + f"but they are not defined in the form config: " + f"{', '.join(overspecified_tokens)}." + ) + if underspecified_tokens: + errors.append( + f"The following tokens are specified in the form config, " + f"but their values are not provided in the token sets values config: " + f"{', '.join(underspecified_tokens)}." + ) + + if tokens_in_unexpected_attrs_errors: + errors = errors + tokens_in_unexpected_attrs_errors + + # 4. Validate single token values config + single_token_values_config_data = read_config_file(single_token_values_config_path) + + single_token_values_config_is_valid, single_token_values_config_errors = ( + validate_single_token_values_config(single_token_values_config_data) + ) + + if not single_token_values_config_is_valid: + token_sets_values_data_config_errors = [ + f" - {e}" for e in single_token_values_config_errors + ] + errors_string = "\n".join(token_sets_values_data_config_errors) + errors.append(f"Single token values config is invalid. Errors:\n{errors_string}") + + if errors: + raise ValueError("\n" + "\n\n".join(errors)) + + except ValueError as e: + print(f"Could not extrapolate form configs: {e}") diff --git a/mephisto/generators/form_composer/config_validation/token_sets_values_config.py b/mephisto/generators/form_composer/config_validation/token_sets_values_config.py new file mode 100644 index 000000000..5f7d8d014 --- /dev/null +++ b/mephisto/generators/form_composer/config_validation/token_sets_values_config.py @@ -0,0 +1,93 @@ +import itertools +import json +from json import JSONDecodeError +from typing import Dict +from typing import List +from typing import Tuple + +from .common_validation import validate_config_dict_item +from .config_validation_constants import AVAILABLE_TASK_ATTRS +from .single_token_values_config import validate_single_token_values_config +from .utils import make_error_message +from .utils import read_config_file +from .utils import write_config_to_file + +TokensPermutationType = List[ + Dict[ + str, Dict[ + str, List[str] + ] + ] +] + + +def validate_token_sets_values_config(config_json: List[dict]) -> Tuple[bool, List[str]]: + is_valid = True + errors = [] + + if not isinstance(config_json, list): + is_valid = False + errors.append("Config must be a JSON Array.") + + if config_json: + if not all(config_json): + is_valid = False + errors.append("Config must contain at least one non-empty item.") + + for item in config_json: + item_is_valid = validate_config_dict_item( + item, "item_tokens_values", AVAILABLE_TASK_ATTRS, errors, + ) + if not item_is_valid: + is_valid = False + + return is_valid, errors + + +def _premutate_single_tokents(data: Dict[str, List[str]]) -> TokensPermutationType: + all_permutations = [] + # Make a list to iterate many times + data_keys = list(data.keys()) + + # Collect a list of values lists in data keys order + sorted_values_lists: List[list] = [values for token, values in data.items()] + + # Making a list of premutated dicts + for i, row in enumerate(itertools.product(*sorted_values_lists, repeat=1)): + single_permudation = {} + for y, key in enumerate(data_keys): + single_permudation[key] = row[y] + + all_permutations.append( + { + "tokens_values": single_permudation, + } + ) + + return all_permutations + + +def update_token_sets_values_config_with_premutated_data( + single_token_values_config_path: str, + token_sets_values_config_path: str, +): + # Read JSON from files + single_token_values_config_data = read_config_file(single_token_values_config_path) + + single_token_values_config_is_valid, single_token_values_config_errors = ( + validate_single_token_values_config(single_token_values_config_data) + ) + + errors = [] + if not single_token_values_config_is_valid: + errors.append(make_error_message( + "Single token values config is invalid.", single_token_values_config_errors, + )) + + if errors: + # Stop generating a Task, the config is incorrect + raise ValueError("\n" + "\n\n".join(errors)) + + premutated_data = _premutate_single_tokents(single_token_values_config_data) + + write_config_to_file(premutated_data, token_sets_values_config_path) diff --git a/mephisto/generators/form_composer/config_validation/utils.py b/mephisto/generators/form_composer/config_validation/utils.py new file mode 100644 index 000000000..931d0d7ba --- /dev/null +++ b/mephisto/generators/form_composer/config_validation/utils.py @@ -0,0 +1,79 @@ +import json +import os +from json import JSONDecodeError +from typing import List +from typing import Tuple +from typing import Union +from urllib.parse import urljoin +from urllib.parse import urlparse + +import boto3 + +from mephisto.generators.form_composer.constants import JSON_IDENTATION + + +def write_config_to_file(config_data: Union[List[dict], dict], file_path: str): + config_str = json.dumps(config_data, indent=JSON_IDENTATION) + + with open(file_path, "w") as f: + f.write(config_str) + + +def is_s3_url(value: str) -> bool: + if isinstance(value, str): + parsed_url = urlparse(value) + return bool( + parsed_url.scheme == 'https' and + "s3" in parsed_url.hostname and + parsed_url.netloc and + parsed_url.path + ) + + return False + + +def _get_bucket_and_key_from_S3_url(s3_url: str) -> Tuple[str, str]: + parsed_url = urlparse(s3_url) + bucket_name = parsed_url.hostname.split('.')[0] + relative_path = parsed_url.path + + if not relative_path: + raise ValueError(f'Cannot extract S3 key from invalid URL "{s3_url}"') + + # Remove a slash from the beginning of the path + s3_key = relative_path[1:] + return bucket_name, s3_key + + +def get_file_urls_from_s3_storage(s3_url: str) -> List[str]: + urls = [] + + base_url = "{0.scheme}://{0.netloc}/".format(urlparse(s3_url)) + bucket, s3_path = _get_bucket_and_key_from_S3_url(s3_url) + + s3 = boto3.resource("s3") + my_bucket = s3.Bucket(bucket) + + for object_summary in my_bucket.objects.filter(Prefix=s3_path): + file_s3_key: str = object_summary.key + filename = os.path.basename(file_s3_key) + is_file = bool(filename) + if is_file: + urls.append(urljoin(base_url, file_s3_key)) + + return urls + + +def read_config_file(config_path: str) -> Union[List[dict], dict]: + try: + with open(config_path) as config_file: + config_data = json.load(config_file) + except (JSONDecodeError, TypeError, FileNotFoundError): + print(f"Could not read JSON from '{config_path}' file") + raise + return config_data + + +def make_error_message(main_message: str, error_list: List[str]) -> str: + errors_bullet = "\n - " + "\n - ".join(map(str, error_list)) + return f"{main_message}. Errors:{errors_bullet}" diff --git a/mephisto/generators/form_composer/configs_validation/extrapolated_config.py b/mephisto/generators/form_composer/configs_validation/extrapolated_config.py deleted file mode 100644 index 2b6b9c0e2..000000000 --- a/mephisto/generators/form_composer/configs_validation/extrapolated_config.py +++ /dev/null @@ -1,292 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -import json -import os.path -import re -from copy import deepcopy -from json import JSONDecodeError -from typing import List -from typing import Tuple -from urllib.parse import urljoin -from urllib.parse import urlparse - -import boto3 - -from mephisto.generators.form_composer.constants import JSON_IDENTATION -from .config_validation_constants import ATTRS_SUPPORTING_TOKENS -from .form_config import validate_form_config -from .tokens_values_config import validate_tokens_values_config - -FILE_LOCATION_TOKEN_NAME = "file_location" - - -def _extrapolate_tokens_values(text: str, tokens_values: dict) -> str: - for token, value in tokens_values.items(): - text = re.sub(r"\{\{(\s*)" + token + r"(\s*)\}\}", value, text) - return text - - -def _set_tokens_in_form_config_item(item: dict, tokens_values: dict): - for attr_name in ATTRS_SUPPORTING_TOKENS: - item_attr = item.get(attr_name) - if not item_attr: - continue - - item[attr_name] = _extrapolate_tokens_values(item_attr, tokens_values) - - -def _collect_form_config_items_to_extrapolate(config_data: dict) -> List[dict]: - items_to_extrapolate = [] - - form = config_data["form"] - items_to_extrapolate.append(form) - - sections = form["sections"] - for section in sections: - items_to_extrapolate.append(section) - - fieldsets = section["fieldsets"] - for fieldset in fieldsets: - items_to_extrapolate.append(fieldset) - - rows = fieldset["rows"] - for row in rows: - items_to_extrapolate.append(row) - - fields = row["fields"] - for field in fields: - items_to_extrapolate.append(field) - - return items_to_extrapolate - - -def _collect_tokens_from_form_config(config_data: dict) -> Tuple[set, List[str]]: - items_to_extrapolate = _collect_form_config_items_to_extrapolate(config_data) - tokens_in_form_config = set() - tokens_in_unexpected_attrs_errors = [] - - for item in items_to_extrapolate: - for attr_name in ATTRS_SUPPORTING_TOKENS: - item_attr = item.get(attr_name) - if not item_attr: - continue - tokens_in_form_config.update(set(re.findall(r"\{\{\s*(\w+?)\s*\}\}", item_attr))) - - attrs_not_suppoting_tokens = set(item.keys()) - set(ATTRS_SUPPORTING_TOKENS) - for attr_name in attrs_not_suppoting_tokens: - item_attr = item.get(attr_name) - if isinstance(item_attr, str): - found_attr_tokens = re.findall(r"\{\{\s*(\w+?)\s*\}\}", item_attr) - if found_attr_tokens: - found_attr_tokens_string = ", ".join([f"'{t}'" for t in found_attr_tokens]) - tokens_in_unexpected_attrs_errors.append( - f"You tried to set tokens {found_attr_tokens_string} " - f"in attribute '{attr_name}' with value '{item_attr}'. " - f"You can use tokens only in following attributes: " - f"{', '.join(ATTRS_SUPPORTING_TOKENS)}" - ) - - return tokens_in_form_config, tokens_in_unexpected_attrs_errors - - -def _extrapolate_tokens_in_form_config(config_data: dict, tokens_values: dict) -> dict: - items_to_extrapolate = _collect_form_config_items_to_extrapolate(config_data) - for item in items_to_extrapolate: - _set_tokens_in_form_config_item(item, tokens_values) - return config_data - - -def _combine_extrapolated_form_configs( - form_config_data: dict, - tokens_values_config_data: List[dict], - skip_validating_tokens_values_config: bool, -) -> List[dict]: - errors = [] - - # Validate Form config - form_config_is_valid, form_config_errors = validate_form_config(form_config_data) - - if not form_config_is_valid: - # Stop generating a Task, the config is incorrect - raise ValueError("\n" + "\n\n".join(form_config_errors)) - - # Validate token values config - if skip_validating_tokens_values_config: - tokens_values_config_is_valid, tokens_values_data_config_errors = True, [] - else: - tokens_values_config_is_valid, tokens_values_data_config_errors = ( - validate_tokens_values_config(tokens_values_config_data) - ) - - # Validate tokens in both configs - tokens_from_form_config, tokens_in_unexpected_attrs_errors = _collect_tokens_from_form_config( - form_config_data, - ) - tokens_from_tokens_values_config = set(sum( - [list(u["tokens_values"].keys()) for u in tokens_values_config_data], - [], - )) - - # Token names present in token values config, but not in form config - overspecified_tokens = tokens_from_tokens_values_config - tokens_from_form_config - # Token names present in form config, but not in token values config - underspecified_tokens = tokens_from_form_config - tokens_from_tokens_values_config - - # Output errors, if any - if overspecified_tokens: - errors.append( - f"Values for the following tokens are provided in tokens value config, " - f"but they are not defined in the form config: " - f"{', '.join(overspecified_tokens)}." - ) - if underspecified_tokens: - errors.append( - f"The following tokens are specified in the form config, " - f"but their values are not provided in the tokens values config: " - f"{', '.join(underspecified_tokens)}." - ) - - if tokens_in_unexpected_attrs_errors: - errors = errors + tokens_in_unexpected_attrs_errors - - if not form_config_is_valid: - form_config_errors = [f" - {e}" for e in form_config_errors] - errors_string = "\n".join(form_config_errors) - errors.append(f"Form config is invalid. Errors:\n{errors_string}") - - if not tokens_values_config_is_valid: - tokens_values_data_config_errors = [f" - {e}" for e in tokens_values_data_config_errors] - errors_string = "\n".join(tokens_values_data_config_errors) - errors.append(f"Units data config is invalid. Errors:\n{errors_string}") - - if errors: - # Stop generating a Task, the config is incorrect - raise ValueError("\n" + "\n\n".join(errors)) - - # If no errors, combine extrapolated form versions to create Task data config - combined_config = [] - if tokens_values_config_data: - for unit_tokens_values in tokens_values_config_data: - if unit_tokens_values == {}: - combined_config.append(form_config_data) - else: - form_config_data_with_tokens = _extrapolate_tokens_in_form_config( - deepcopy(form_config_data), unit_tokens_values["tokens_values"], - ) - combined_config.append(form_config_data_with_tokens) - else: - # If no config with tokens values was added than - # we just create one-unit config and copy form config into it as-is - combined_config.append(form_config_data) - - return combined_config - - -def _write_config_to_file(config_data: List[dict], file_path: str): - config_str = json.dumps(config_data, indent=JSON_IDENTATION) - - with open(file_path, "w") as f: - f.write(config_str) - - -def create_extrapolated_config( - form_config_path: str, - tokens_values_config_path: str, - extrapolated_form_config_path: str, - skip_validating_tokens_values_config: bool = False, -): - # Check if files exist - if not os.path.exists(form_config_path): - raise FileNotFoundError(f"Create file '{form_config_path}' and add form configuration") - - # Read JSON from files - try: - with open(form_config_path) as form_config_file: - form_config_data = json.load(form_config_file) - except (JSONDecodeError, TypeError): - print(f"Could not read JSON from '{form_config_path}' file") - raise - - if os.path.exists(tokens_values_config_path): - try: - with open(tokens_values_config_path) as tokens_values_data_config_file: - tokens_values_data = json.load(tokens_values_data_config_file) - except (JSONDecodeError, TypeError): - print(f"Could not read JSON from '{tokens_values_config_path}' file") - else: - tokens_values_data = [] - - # Create combined config - try: - extrapolated_form_config_data = _combine_extrapolated_form_configs( - form_config_data, - tokens_values_data, - skip_validating_tokens_values_config, - ) - _write_config_to_file(extrapolated_form_config_data, extrapolated_form_config_path) - except ValueError as e: - print(f"Could not extrapolate form configs: {e}") - - -def _get_bucket_and_key_from_S3_url(s3_url: str) -> Tuple[str, str]: - parsed_url = urlparse(s3_url) - bucket_name = parsed_url.hostname.split('.')[0] - relative_path = parsed_url.path - - if not relative_path: - raise ValueError(f'Cannot extract S3 key from invalid URL "{s3_url}"') - - # Remove a slash from the beginning of the path - s3_key = relative_path[1:] - return bucket_name, s3_key - - -def is_s3_url(value: str) -> bool: - if isinstance(value, str): - parsed_url = urlparse(value) - return bool( - parsed_url.scheme == 'https' and - "s3" in parsed_url.hostname and - parsed_url.netloc and - parsed_url.path - ) - - return False - - -def get_file_urls_from_s3_storage(s3_url: str) -> List[str]: - urls = [] - - base_url = "{0.scheme}://{0.netloc}/".format(urlparse(s3_url)) - bucket, s3_path = _get_bucket_and_key_from_S3_url(s3_url) - - s3 = boto3.resource("s3") - my_bucket = s3.Bucket(bucket) - - for object_summary in my_bucket.objects.filter(Prefix=s3_path): - file_s3_key: str = object_summary.key - filename = os.path.basename(file_s3_key) - is_file = bool(filename) - if is_file: - urls.append(urljoin(base_url, file_s3_key)) - - return urls - - -def generate_tokens_values_config_from_files(tokens_values_config_path: str, files: List[str]): - tokens_values_config_data = [] - - for i, file_location in enumerate(files): - tokens_values_config_data.append(dict( - tokens_values={ - FILE_LOCATION_TOKEN_NAME: file_location, - }, - )) - - try: - _write_config_to_file(tokens_values_config_data, tokens_values_config_path) - except ValueError as e: - print(f"Could not generate tokens values config: {e}") diff --git a/mephisto/generators/form_composer/configs_validation/tokens_values_config.py b/mephisto/generators/form_composer/configs_validation/tokens_values_config.py deleted file mode 100644 index 3fbccf6e2..000000000 --- a/mephisto/generators/form_composer/configs_validation/tokens_values_config.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import List -from typing import Tuple - -from .common_validation import validate_config_dict_item -from .config_validation_constants import AVAILABLE_TASK_ATTRS - - -def validate_tokens_values_config(config_json: List[dict]) -> Tuple[bool, List[str]]: - is_valid = True - errors = [] - - if not isinstance(config_json, list): - is_valid = False - errors.append("Config must be 'Array.") - - if config_json: - if not all(config_json): - is_valid = False - errors.append("Config must contain at least one non-empty item.") - - for item in config_json: - unit_is_valid = validate_config_dict_item( - item, "unit_tokens_values", AVAILABLE_TASK_ATTRS, errors, - ) - if not unit_is_valid: - is_valid = False - - return is_valid, errors diff --git a/mephisto/review_app/client/src/pages/TaskPage/TaskPage.css b/mephisto/review_app/client/src/pages/TaskPage/TaskPage.css index 6b5498872..ea1b44afe 100644 --- a/mephisto/review_app/client/src/pages/TaskPage/TaskPage.css +++ b/mephisto/review_app/client/src/pages/TaskPage/TaskPage.css @@ -72,6 +72,22 @@ padding: 10px 30px; } +.task .content .results .results-header { + cursor: pointer; +} + +.task .content .results .results-icon { + display: inline-block; + margin-left: 10px; + font-style: normal; + font-size: 50px; + line-height: 1; +} + +.task .content .results .results-closed{ + display: none; +} + .task .content .results .results-table { max-width: 1000px; } diff --git a/mephisto/review_app/client/src/pages/TaskPage/TaskPage.tsx b/mephisto/review_app/client/src/pages/TaskPage/TaskPage.tsx index 5025f7347..2ef59ddb2 100644 --- a/mephisto/review_app/client/src/pages/TaskPage/TaskPage.tsx +++ b/mephisto/review_app/client/src/pages/TaskPage/TaskPage.tsx @@ -103,6 +103,8 @@ function TaskPage(props: PropsType) { const [unitResultsIsJSON, setUnitResultsIsJSON] = React.useState(false); + const [resultsVisibility, setResultsVisibility] = React.useState(true); + window.onmessage = function (e) { if ( e.data && @@ -535,21 +537,26 @@ function TaskPage(props: PropsType) { <> {/* Results table */}
-

- Results: +

setResultsVisibility(!resultsVisibility)}> + Results + + {resultsVisibility ? <>▾ : <>▸} +

- {unitResultsIsJSON ? ( - - ) : ( -
- {JSON.stringify(currentUnitDetails.outputs)} -
- )} +
+ {unitResultsIsJSON ? ( + + ) : ( +
+ {JSON.stringify(currentUnitDetails.outputs)} +
+ )} +
{/* Task info */} diff --git a/mephisto/review_app/client/src/pages/TasksPage/TasksPage.tsx b/mephisto/review_app/client/src/pages/TasksPage/TasksPage.tsx index d61e8536c..b5194fed2 100644 --- a/mephisto/review_app/client/src/pages/TasksPage/TasksPage.tsx +++ b/mephisto/review_app/client/src/pages/TasksPage/TasksPage.tsx @@ -24,6 +24,7 @@ function TasksPage(props: PropsType) { const [tasks, setTasks] = React.useState>(null); const [loading, setLoading] = React.useState(false); + const [taskIdExportResults, setTaskIdExportResults] = React.useState(null); const [loadingExportResults, setLoadingExportResults] = React.useState(false); const onTaskRowClick = (id: number) => { @@ -43,7 +44,11 @@ function TasksPage(props: PropsType) { }; const requestTaskResults = (taskId: number, nUnits: number) => { + setTaskIdExportResults(taskId); + const onSuccessExportResults = (data) => { + setTaskIdExportResults(null); + if (data.file_created) { // Create pseudo link and click it const linkId = "result-json"; @@ -126,7 +131,10 @@ function TasksPage(props: PropsType) { {task.unit_count} {date} - {task.is_reviewed && !loadingExportResults && ( + {( + task.is_reviewed && + !(loadingExportResults && taskIdExportResults === task.id) + ) && ( requestTaskResults(task.id, task.unit_count)} @@ -134,7 +142,7 @@ function TasksPage(props: PropsType) { Download )} - {loadingExportResults && ( + {(taskIdExportResults === task.id && loadingExportResults) && (