diff --git a/examples/form_composer_demo/README.md b/examples/form_composer_demo/README.md
index 999f274b4..39a657637 100644
--- a/examples/form_composer_demo/README.md
+++ b/examples/form_composer_demo/README.md
@@ -1,4 +1,4 @@
-This form-based questionnaire is a simple example of Form Composer task generator.
+These form-based questionnaires are example of FormComposer task generator.
---
@@ -16,11 +16,12 @@ This form-based questionnaire is a simple example of Form Composer task generato
## How to configure
-1. For simple form config you need to provide Form Composer with one JSON file - a configuration of your form fields.
+1. For simple form config you need to provide FormComposer with one JSON file - a configuration of your form fields.
An example is found in `examples/form_composer_demo/data/simple/data.json` file.
2. For dynamic form configs you need two JSON files:
- form configuration `examples/form_composer_demo/data/dynamic/form_config.json`
- - tokens values `examples/form_composer_demo/data/dynamic/tokens_values_config.json`
+ - tokens values `examples/form_composer_demo/data/dynamic/token_sets_values_config.json`
+ - token values as a list for each token `examples/form_composer_demo/data/dynamic/single_token_values_config.json` to create `token_sets_values_config.json` from it
Note that during bulding a Task with dynamic form config, the resulting data config will be placed in `data.json` file, i.e. `examples/form_composer_demo/data/dynamic/data.json` (in this example it's already been created and will be overwritten when you build a Task).
@@ -28,29 +29,4 @@ Note that during bulding a Task with dynamic form config, the resulting data con
### Form config
-For details on how form config is composed, and how data fields are validated please see the main Form Composer's README.
-
-Here's a sample part of form config:
-
-```json
-{
- "fields": [
- {
- "id": "id_name_first",
- "label": "First name",
- "name": "name_first",
- "placeholder": "Type first name",
- "title": "First name of a person",
- "type": "input",
- "validators": {
- "required": true,
- "minLength": 2,
- "maxLength": 20,
- "regexp": ["^[a-zA-Z0-9._-]+@mephisto\\.ai$", "ig"]
- // or just string "regexp": "^[a-zA-Z0-9._-]+@mephisto\\.ai$"
- },
- "value": ""
- }
- ]
-}
-```
+For details on how form config is composed, and how its data fields are validated, please see the main FormComposer's [README.md](/mephisto/generators/form_composer/README.md).
diff --git a/examples/form_composer_demo/run_task_dynamic.py b/examples/form_composer_demo/run_task_dynamic.py
index 8630de004..5ab0c8e02 100644
--- a/examples/form_composer_demo/run_task_dynamic.py
+++ b/examples/form_composer_demo/run_task_dynamic.py
@@ -8,7 +8,7 @@
from omegaconf import DictConfig
-from mephisto.generators.form_composer.configs_validation.extrapolated_config import (
+from mephisto.generators.form_composer.config_validation.task_data_config import (
create_extrapolated_config
)
from mephisto.operations.operator import Operator
@@ -76,13 +76,13 @@ def generate_data_json_config():
data_path = os.path.join(app_path, "data")
form_config_path = os.path.join(data_path, "dynamic", "form_config.json")
- tokens_values_config_path = os.path.join(data_path, "dynamic", "tokens_values_config.json")
- extrapolated_form_config_path = os.path.join(data_path, "dynamic", "data.json")
+ token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json")
+ task_data_config_path = os.path.join(data_path, "dynamic", "data.json")
create_extrapolated_config(
form_config_path=form_config_path,
- tokens_values_config_path=tokens_values_config_path,
- extrapolated_form_config_path=extrapolated_form_config_path,
+ token_sets_values_config_path=token_sets_values_config_path,
+ task_data_config_path=task_data_config_path,
)
diff --git a/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py b/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py
index 9d18e2d62..80b2af130 100644
--- a/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py
+++ b/examples/form_composer_demo/run_task_dynamic_ec2_mturk_sandbox.py
@@ -14,7 +14,7 @@
from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import (
SharedStaticTaskState,
)
-from mephisto.generators.form_composer.configs_validation.extrapolated_config import (
+from mephisto.generators.form_composer.config_validation.task_data_config import (
create_extrapolated_config
)
from mephisto.operations.operator import Operator
@@ -94,13 +94,13 @@ def generate_data_json_config():
data_path = os.path.join(app_path, "data")
form_config_path = os.path.join(data_path, "dynamic", "form_config.json")
- tokens_values_config_path = os.path.join(data_path, "dynamic", "tokens_values_config.json")
- extrapolated_form_config_path = os.path.join(data_path, "dynamic", "data.json")
+ token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json")
+ task_data_config_path = os.path.join(data_path, "dynamic", "data.json")
create_extrapolated_config(
form_config_path=form_config_path,
- tokens_values_config_path=tokens_values_config_path,
- extrapolated_form_config_path=extrapolated_form_config_path,
+ token_sets_values_config_path=token_sets_values_config_path,
+ task_data_config_path=task_data_config_path,
)
diff --git a/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py b/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py
index 927d2f4ec..6f4b4d55a 100644
--- a/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py
+++ b/examples/form_composer_demo/run_task_dynamic_ec2_prolific.py
@@ -12,7 +12,7 @@
SharedStaticTaskState,
)
from mephisto.data_model.qualification import QUAL_GREATER_EQUAL
-from mephisto.generators.form_composer.configs_validation.extrapolated_config import (
+from mephisto.generators.form_composer.config_validation.task_data_config import (
create_extrapolated_config
)
from mephisto.operations.operator import Operator
@@ -98,13 +98,13 @@ def generate_data_json_config():
data_path = os.path.join(app_path, "data")
form_config_path = os.path.join(data_path, "dynamic", "form_config.json")
- tokens_values_config_path = os.path.join(data_path, "dynamic", "tokens_values_config.json")
- extrapolated_form_config_path = os.path.join(data_path, "dynamic", "data.json")
+ token_sets_values_config_path = os.path.join(data_path, "dynamic", "token_sets_values_config.json")
+ task_data_config_path = os.path.join(data_path, "dynamic", "data.json")
create_extrapolated_config(
form_config_path=form_config_path,
- tokens_values_config_path=tokens_values_config_path,
- extrapolated_form_config_path=extrapolated_form_config_path,
+ token_sets_values_config_path=token_sets_values_config_path,
+ task_data_config_path=task_data_config_path,
)
diff --git a/mephisto/client/cli.py b/mephisto/client/cli.py
index 86b41177d..5b8bd915a 100644
--- a/mephisto/client/cli.py
+++ b/mephisto/client/cli.py
@@ -9,9 +9,6 @@
from typing import Optional
import rich_click as click # type: ignore
-from botocore.exceptions import BotoCoreError
-from botocore.exceptions import ClientError
-from botocore.exceptions import NoCredentialsError
from flask.cli import pass_script_info
from rich import print
from rich.markdown import Markdown
@@ -32,21 +29,30 @@
import mephisto.scripts.mturk.print_outstanding_hit_status as print_outstanding_hit_status_mturk
import mephisto.scripts.mturk.print_outstanding_hit_status as soft_block_workers_by_mturk_id_mturk
from mephisto.client.cli_commands import get_wut_arguments
-from mephisto.generators.form_composer.configs_validation.extrapolated_config import (
+from mephisto.generators.form_composer.config_validation.task_data_config import (
create_extrapolated_config
)
-from mephisto.generators.form_composer.configs_validation.extrapolated_config import (
- generate_tokens_values_config_from_files
+from mephisto.generators.form_composer.config_validation.task_data_config import (
+ verify_form_composer_configs
)
-from mephisto.generators.form_composer.configs_validation.extrapolated_config import (
- get_file_urls_from_s3_storage
+from mephisto.generators.form_composer.config_validation.single_token_values_config import (
+ update_single_token_values_config_with_file_urls
)
-from mephisto.generators.form_composer.configs_validation.extrapolated_config import is_s3_url
+from mephisto.generators.form_composer.config_validation.token_sets_values_config import (
+ update_token_sets_values_config_with_premutated_data
+)
+from mephisto.generators.form_composer.config_validation.utils import is_s3_url
from mephisto.operations.registry import get_valid_provider_types
from mephisto.tools.scripts import build_custom_bundle
from mephisto.utils.rich import console
from mephisto.utils.rich import create_table
+FORM_COMPOSER_DATA_DIR_NAME = "data"
+FORM_COMPOSER_DATA_CONFIG_NAME = "data.json"
+FORM_COMPOSER_FORM_CONFIG_NAME = "form_config.json"
+FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME = "token_sets_values_config.json"
+FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME = "single_token_values_config.json"
+
@click.group(cls=RichGroup)
def cli():
@@ -425,64 +431,30 @@ def review_app(
)
-@cli.command("form_composer", cls=RichCommand)
-@click.option("-m", "--manual-versions", type=(bool), default=False)
-@click.option("-f", "--files-folder", type=(str), default=None)
-def form_composer(manual_versions: bool, files_folder: Optional[str] = None):
- # Get app path to run Python script from there (instead of the current file's directory).
- # This is necessary, because the whole infrastructure is built relative to the location
- # of the called command-line script.
- # The other parts of the logic are inside `form_composer/run.py` script
+def _get_form_composer_app_path() -> str:
app_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
"generators",
"form_composer",
)
+ return app_path
- # Check files and create `data.json` config with units data before running a task
- data_path = os.path.join(app_path, "data")
- extrapolated_form_config_path = os.path.join(data_path, "data.json")
- form_config_path = os.path.join(data_path, "form_config.json")
- tokens_values_config_path = os.path.join(data_path, "tokens_values_config.json")
- # Change dir to app dir
- os.chdir(app_path)
-
- if manual_versions and files_folder:
- print("`--manual-versions` and `--files-folder` parameters cannot be used concurrently")
- return None
+@cli.command("form_composer", cls=RichCommand)
+@click.option("-o", "--task-data-config-only", type=(bool), default=True)
+def form_composer(task_data_config_only: bool = True):
+ app_path = _get_form_composer_app_path()
+ app_data_path = os.path.join(app_path, FORM_COMPOSER_DATA_DIR_NAME)
- if files_folder:
- if is_s3_url(files_folder):
- try:
- files_locations = get_file_urls_from_s3_storage(files_folder)
- except (BotoCoreError, ClientError, NoCredentialsError) as e:
- print(f"Could not retrieve images from S3 URL '{files_folder}'. Reason: {e}")
- return None
-
- if not files_locations:
- print(
- f"Could not retrieve files from '{files_folder}' - "
- f"check if this location exists and contains files"
- )
- return None
+ task_data_config_path = os.path.join(app_data_path, FORM_COMPOSER_DATA_CONFIG_NAME)
- generate_tokens_values_config_from_files(tokens_values_config_path, files_locations)
- else:
- print("`--images-path` must be URL on S3 directory")
- return None
+ # Change dir to app dir
+ os.chdir(app_path)
- if manual_versions:
- # When user wants to use manually composed `data.json` config,
- # we don't need to auto-generate an extrapolated config
- pass
- else:
- create_extrapolated_config(
- form_config_path=form_config_path,
- tokens_values_config_path=tokens_values_config_path,
- extrapolated_form_config_path=extrapolated_form_config_path,
- skip_validating_tokens_values_config=bool(files_folder),
- )
+ verify_form_composer_configs(
+ task_data_config_path=task_data_config_path,
+ task_data_config_only=task_data_config_only,
+ )
# Start the process
process = subprocess.Popen("python ./run.py", shell=True, cwd=app_path)
@@ -498,5 +470,98 @@ def form_composer(manual_versions: bool, files_folder: Optional[str] = None):
process.wait()
+@cli.command("form_composer_config", cls=RichCommand)
+@click.option("-v", "--verify", type=(bool), default=False)
+@click.option("-f", "--update-file-location-values", type=(str), default=None)
+@click.option("-e", "--extrapolate-token-sets", type=(bool), default=False)
+@click.option("-p", "--permutate-single-tokens", type=(bool), default=False)
+def form_composer_config(
+ verify: bool = False,
+ extrapolate_token_sets: bool = False,
+ update_file_location_values: Optional[str] = None,
+ permutate_single_tokens: bool = False,
+):
+ """
+ Prepare (parts of) config for the `form_composer` command.
+ Note that each parameter is essentially a separate command, and they cannot be mixed.
+
+ :param verify: Validate all JSON configs currently present in the form builder config directory
+ :param update_file_location_values: Update existing single-token values config
+ with file URLs automatically taken from a location (e.g. an S3 folder)
+ :param extrapolate_token_sets: Generate form versions based on extrapolated values of token sets
+ :param permutate_single_tokens: Create tokens sets as all possible permutations of values lists
+ defined in single-token values config
+ """
+ # Get app path to run Python script from there (instead of the current file's directory).
+ # This is necessary, because the whole infrastructure is built relative to the location
+ # of the called command-line script.
+ # The other parts of the logic are inside `form_composer/run***.py` script
+ app_path = _get_form_composer_app_path().
+ app_data_path = os.path.join(app_path, FORM_COMPOSER_DATA_DIR_NAME)
+
+ full_path = lambda data_file: os.path.join(app_data_path, data_file)
+
+ # Check files and create `data.json` config with tokens data before running a task
+ task_data_config_path = full_path(FORM_COMPOSER_DATA_CONFIG_NAME)
+ form_config_path = full_path(FORM_COMPOSER_FORM_CONFIG_NAME)
+ token_sets_values_config_path = full_path(FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME)
+ single_token_values_config_path = full_path(FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME)
+
+ # Change dir to app dir
+ os.chdir(app_path)
+
+ if verify:
+ print(f"[green]Started configs verification in '{task_data_config_path}'[/green]")
+ verify_form_composer_configs(
+ task_data_config_path=task_data_config_path,
+ form_config_path=form_config_path,
+ token_sets_values_config_path=token_sets_values_config_path,
+ single_token_values_config_path=single_token_values_config_path,
+ task_data_config_only=False,
+ )
+ print(f"[green]Finished successfully[/green]")
+ return None
+
+ if update_file_location_values:
+ print(
+ f"[green]Started updating '{FORM_COMPOSER_SINGLE_TOKEN_VALUES_CONFIG_NAME}' "
+ f"with file URLs from '{update_file_location_values}'[/green]"
+ )
+ if is_s3_url(update_file_location_values):
+ update_single_token_values_config_with_file_urls(
+ url=update_file_location_values,
+ single_token_values_config_path=single_token_values_config_path,
+ )
+ print(f"[green]Finished successfully[/green]")
+ else:
+ print("`--update-file-location-values` must be a valid S3 URL")
+ return None
+
+ if permutate_single_tokens:
+ print(
+ f"[green]Started updating '{FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME}' "
+ f"with permutated single-token values[/green]"
+ )
+ update_token_sets_values_config_with_premutated_data(
+ single_token_values_config_path=single_token_values_config_path,
+ token_sets_values_config_path=token_sets_values_config_path,
+ )
+ print(f"[green]Finished successfully[/green]")
+ return None
+
+ if extrapolate_token_sets:
+ print(
+ f"[green]Started extrapolating token sets values "
+ f"from '{FORM_COMPOSER_TOKEN_SETS_VALUES_CONFIG_NAME}' [/green]"
+ )
+ create_extrapolated_config(
+ form_config_path=form_config_path,
+ token_sets_values_config_path=token_sets_values_config_path,
+ task_data_config_path=task_data_config_path,
+ )
+ print(f"[green]Finished successfully[/green]")
+ return None
+
+
if __name__ == "__main__":
cli()
diff --git a/mephisto/generators/form_composer/README.md b/mephisto/generators/form_composer/README.md
index d81511580..98327b694 100644
--- a/mephisto/generators/form_composer/README.md
+++ b/mephisto/generators/form_composer/README.md
@@ -6,13 +6,17 @@
This package provides `FormComposer` widget for React-based front-end development for Mephisto tasks.
+You can find working demo of FormComposer in `examples/form_composer_demo`
+
+- For details on how to run these examples, refer to the demo's [README.md](/examples/form_composer_demo/README.md)
+
# How to Run
-To create and launch a Form Composer task, create your JSON form configuration,
+To create and launch a FormComposer task, create your JSON form configuration,
and then run the below commands.
-Once Form Composer launches, in the console you will see links like this:
+Once FormComposer launches, in the console you will see links like this:
http://localhost:3000/?worker_id=x&assignment_id=1
To view your Task as a worker, take one of these links and paste it in your browser.
@@ -21,7 +25,18 @@ If launched with `docker-compose`, replace 3000 with the remapped port (e.g. for
#### With docker-compose
-You can launch Form Composer inside a Docker container:
+You can launch FormComposer inside a Docker container:
+
+1. Prepare configs (`form_composer_config` command)
+
+```shell
+docker-compose -f docker/docker-compose.dev.yml run \
+ --build \
+ --rm mephisto_dc \
+ mephisto form_composer_config --extrapolate-token-sets True
+```
+
+2. Run composer itself (`form_composer` command)
```shell
docker-compose -f docker/docker-compose.dev.yml run \
@@ -35,35 +50,265 @@ docker-compose -f docker/docker-compose.dev.yml run \
#### Without docker-compose
First ensure that mephisto package is installed locally - please refer to [Mephisto's main doc](https://mephisto.ai/docs/guides/quickstart/).
-Once that is done, run a `form_composer` command:
+Once that is done, run `form_composer_config` command(s) if needed, and then `form_composer` command:
```shell
-# Sample launching commands
+mephisto form_composer_config --extrapolate-token-sets True
mephisto form_composer
-mephisto form_composer --manual-versions True
-mephisto form_composer --files-folder "https://s3.amazon.com/...."
+```
+
+
+## Using `form_composer_config` utility
+
+The `form_composer_config` utility command helps auto-generate FormComposer config. It supports several options:
+
+```shell
+# Sample launching commands
+mephisto form_composer_config --verify True
+mephisto form_composer_config --extrapolate-token-sets True
+mephisto form_composer_config --permutate-single-tokens True
+mephisto form_composer_config --update-file-location-values "https://s3.amazon.com/...."
```
where
-- `-m/--manual-versions` argument skips auto-generating form versions in `data.json` by extrapolating token values, and instead uses an existing `data.json` file (see [Custom form versions](#custom-form-versions) section)
-- `-f/--files-folder` argument generates token values based on file names found within specified file folder (see a separate section about this mode of running Form Composer)
+- `-v/--verify BOOLEAN` - if truthy, validates all JSON configs currently present in the form builder config directory
+- `-f/--update-file-location-values S3_URL` - generates token values based on file names found within specified S3 folder (see a separate section about this mode of running FormComposer)
+- `-e/--extrapolate-token-sets BOOLEAN` - if truthy, generates Task data config based on provided form config and takon sets values
+- `-p/--permutate-single-tokens BOOLEAN` - if truthy, generates token sets values as all possible combinations of values of individual tokens
+
+To understand what "tokens" means, read on about FormComposer config structure.
----
-# Config file structure
+## Config files
-You will need to provide Form Composer with a JSON configuration of your form fields,
+You will need to provide FormComposer with a JSON configuration of your form fields,
and place it in `generators/form-composer/data` directory.
-- The form config file should be named `form_config.json`, and contain a JSON object with one key `form`.
-- If you want to slightly vary your form within a Task (by inserting different values into its text), you need to add a file named `tokens_values_config.json` and containing a JSON array of objects, each with one key `tokens_values` and value representing name-value pairs for the text tokens.
-- For more details, read about dynamic form configs further down.
-Config examples:
-- form config: `examples/form_composer_demo/data/dynamic/form_config.json`
-- token values config: `examples/form_composer_demo/data/dynamic/tokens_values_config.json`
-- resulting extrapolated config: `examples/form_composer_demo/data/dynamic/data.json`
+- The form config file should be named `data.json`, and contain a list of JSON objects, each one with one key `form`.
+- If you want to slightly vary your form within a Task (by inserting different values into its text), you need to add two files (that will be used to auto-generate `data.json` file):
+ - `token_sets_values_config.json` containing a JSON array of objects (each with one key `tokens_values` and value representing name-value pairs for a set of text tokens to be used in one form version).
+ - `form_config.json` containing a single JSON object with one key `form`.
+- For more detail, read on about dynamic form configs.
+
+For detailed structure of each config file, see [Config file reference](#config-file-reference).
+
+Working config examples are provided in `examples/form_composer_demo/data` directory:
+- task data config: `simple/data.json`
+- form config: `dynamic/form_config.json`
+- token sets values config: `dynamic/token_sets_values_config.json`
+- single tokens values: `dynamic/single_token_values_config.json` to create `token_sets_values_config.json`
+- resulting extrapolated config: `dynamic/data.json`
+
+
+## Embedding FormComposer into custom application
+
+A few tips if you wish to embed FormComposer in your custom application:
+
+- to extrapolate form config (and generate the `data.json` file), call the extrapolator function `mephisto.generators.form_composer.configs_validation.extrapolated_config.create_extrapolated_config`
+ - For a live example, you can explore the source code of [run_task_dynamic.py](/examples/form_composer_demo/run_task_dynamic.py) module
+
+
+---
+
+
+# Multiple form versions
+
+The simplest Task scenario is showing the same exact form to all of your workers. In that case you need to:
+
+- Compose `data.json` file containing definition of a single form (and place it into FormComposer config folder)
+- Optionally, verify your config: `mephisto form_composer_config --verify True`
+- Run FormComposer: `mephisto form_composer`
+
+But suppose you wish to show a slightly different version of the form to your workers. You can do so by defining multiple form versions. FormComposer provides several ways of doing so.
+
+---
+
+## Custom form versions
+
+If your form versions vary considerably (e.g. showing different sets of fields), you should do the following steps:
+
+- Populate these form versions into `data.json` file manually (it will be basically a JSON array of N individual form versions configs)
+- Optionally, verify your config: `mephisto form_composer_config --verify True`
+- Run FormComposer: `mephisto form_composer`
+
+_As a result, for each Task assignment Mephisto will automatically produce N units, each unit having a different form version. In total you will be collecting data from `N * units_per_assignment` workers._
+
+---
+
+## Dynamic form config
+
+If your form versions vary only slightly (e.g. same set of fields, but showing different images or different text), you should use a dynamic form config as follows:
+
+- Ensure you populate these files, and place them into your FormComposer config folder:
+ - `form_config.json`: tokenized form config - same as regular form config, except it will contain tokens within certain objects' attributes (see [Tokens extrapolation](#tokens-extrapolation))
+ - `token_sets_values_config.json`: file containing sets of token values, where each set is used to generate one version of the form (and each form version will be completed by `units_per_assignment` different workers).
+- Optionally, verify your files: `mephisto form_composer_config --verify True`
+- Generate task data config: `mephisto form_composer_config --extrapolate-token-sets True`
+ - This will overwrite existing `data.json` file with auto-generated form versions, by extrapolating provided token sets values
+- Run FormComposer: `mephisto form_composer`
+
+_The number of generated form versions N will be same as number of provided token sets. In total you will be collecting data from `N * units_per_assignment` workers._
+
+---
+
+#### Tokens extrapolation
+
+How does token extrapolation work?
+
+A token is a named text placeholder that gets replaced ("extrapolated") by values specified in `token_sets_values_config.json` (each set of token values produces one form version based on dynamic form config `form_data.json`).
+
+Token placeholders within an attribute are formatted like so: `{{TOKEN_NAME}}`
+
+Tokens can be placed within the following object attributes:
+
+- `help`
+- `instruction`
+- `label`
+- `title`
+- `tooltip`
+
+If you wish to reuse the same token across different form attributes and levels, it's enough to specify it in a set of token values just once. (This also means that token names must be unique within token values sets)
+
+
+---
+
+#### Generate token sets with `--update-file-location-values`
+
+In a special case when all of your tokens sets are simply permutations of several value lists, sets of token values can be easily auto-generated.
+
+- Populate your lists of values for every single token into `single_token_values_config.json` file
+- Optionally, verify your config: `mephisto form_composer_config --verify True`
+- Generate `token_sets_values_config.json` with command: `mephisto form_composer_config --permutate-single-tokens True`
+
+_"Permutation" means all possible combinations of values. For example, permutations of amounts `2, 3`, sizes `big` and animals `cats, dogs` will produce result `2 big cats, 2 big dogs, 3 big cats, 3 big dogs`._
+
+---
+
+#### Generate single token values with `--update-file-location-values`
+
+In a special case when one of your tokens is an S3 file URL, that token values can be easily auto-generated.
-Here's a brief example of a form config:
+- Make a public S3 folder that will contain only the files that you want (all of them)
+- Run command: `mephisto form_composer --update-file-location-values S3_FOLDER_URL`
+- As a result, a token with name `"file_location"` will be added to your `single_token_values_config.json` config file. Its values will be S3 URLs of all files found .recursively within the `S3_FOLDER_URL`
+
+---
+
+## Dynamic form config example
+
+Putting it altogether, this is a brief example of composing a dynamic form config.
+
+#### Single token values config
+
+Let's start with separate token values in `single_token_values_config.json` file:
+
+```json
+{
+ "actor": ["Carrie Fisher", "Mark Hamill"],
+ "movie_name": ["Star Wars"]
+}
+```
+
+#### Token values config
+
+Permutating these token values will produce this `form_config.json` file with token sets values:
+
+```json
+[
+ {
+ "tokens_values": {
+ "actor": "Carrie Fisher",
+ "movie_name": "Star Wars"
+ }
+ },
+ {
+ "tokens_values": {
+ "actor": "Mark Hamill",
+ "movie_name": "Star Wars"
+ }
+ },
+]
+```
+
+#### Form config
+
+These tokens are placed into the `form_config.json` dynamic form config like so:
+
+```json
+{
+ ...
+ "instruction": "Rate {{actor}}'s performance in movie '{{movie_name}}'",
+ ...
+ "help": "Please only consider the movie '{{movie_name}}'",
+ ...
+}
+...
+{
+ ...
+ "instruction": "Rate the plot in movie '{{movie_name}}' out of 10",
+ ...
+}
+```
+
+#### Task data config
+
+After extrapolating attributes from `form_config.json` with token sets from `token_sets_values_config.json`, we get the resulting `data.json` file used for the task:
+
+```json
+// First extrapolated form version
+{
+ ...
+ "instruction": "Rate Carrie Fisher's performance in movie 'Star Wars'",
+ ...
+ "help": "Please only consider the movie 'Star Wars'",
+ ...
+}
+...
+{
+ ...
+ "instruction": "Rate the plot in movie 'Star Wars' out of 10?",
+ ...
+},
+// Second extrapolated form version
+{
+ ...
+ "instruction": "Rate Mark Hamill's performance in movie 'Star Wars'",
+ ...
+ "help": "Please only consider the movie 'Star Wars'",
+ ...
+}
+...
+{
+ ...
+ "instruction": "Rate the plot in movie 'Star Wars' out of 10?",
+ ...
+}
+```
+
+
+---
+
+
+# Custom field handlers
+
+TBD
+
+---
+
+
+# Custom callbacks
+
+TBD (aka "remote procedure")
+
+
+-----
+
+
+# Config file reference
+
+## Config file: `data.json`
+
+Task data config file `data.json` specifies layout of all form versions that are completed by workers. Here's an abbreviated example of such config:
```json
[
@@ -142,13 +387,12 @@ Here's a brief example of a form config:
"tooltip": "Submit form"
}
}
- }
+ },
+ ...
]
```
----
-
-## Form config levels
+#### Form config levels
Form UI layout consists of the following layers of UI object hierarchy:
@@ -173,7 +417,7 @@ _Note that, due to limitations of JSON format, HTML content needs to be converte
---
-#### Config level: form
+###### Config level: form
`form` is a top-level config object with the following attributes:
@@ -187,7 +431,7 @@ _Note that, due to limitations of JSON format, HTML content needs to be converte
---
-#### Config level: section
+###### Config level: section
Each item of `sections` list is an object with the following attributes:
@@ -200,7 +444,7 @@ Each item of `sections` list is an object with the following attributes:
---
-#### Config level: fieldset
+###### Config level: fieldset
Each item of `fieldsets` list is an object with the following attributes:
@@ -210,7 +454,7 @@ Each item of `fieldsets` list is an object with the following attributes:
---
-#### Config level: row
+###### Config level: row
Each item of `rows` list is an object with the following attributes:
@@ -218,7 +462,7 @@ Each item of `rows` list is an object with the following attributes:
---
-#### Config level: field
+###### Config level: field
Each item of `fields` list is an object that corresponds to the actual form field displayed in the resulting Task UI page.
@@ -243,7 +487,7 @@ Here's example of a single field config:
}
```
-###### Attributes - all fields
+######## Attributes - all fields
The most important attributes are: `label`, `name`, `type`, `validators`
@@ -265,7 +509,7 @@ The most important attributes are: `label`, `name`, `type`, `validators`
- `value` - Initial value of the field (String, Optional)
-###### Attributes - select field
+######## Attributes - select field
- `multiple` - Support selection of multiple provided options, not just one (Boolean. Default: false)
- `options` - list of available options to select from. Each option is an object with these attributes:
@@ -273,7 +517,7 @@ The most important attributes are: `label`, `name`, `type`, `validators`
- `value`: value sent to the server (String|Number|Boolean)
-###### Attributes - checkbox and radio fields
+######## Attributes - checkbox and radio fields
- `options` - list of available options to select from. Each option is an object with these attributes:
- `label`: displayed text (String)
@@ -281,200 +525,49 @@ The most important attributes are: `label`, `name`, `type`, `validators`
- `checked`: initial state of selection (Boolean, default: false)
-# Dynamic form config
-
-If you wish to slightly vary form instructions within the same Task (e.g. show different images or different text), you should use a dynamic form config.
-
----
-
-## Dynamic form config files
-
-Dynamic form config consists of two parts:
-
-- `form_config.json`: tokenized form config - same as non-dynamic form config, except it may contain tokens within certain objects attributes (see [Tokens extrapolation](#tokens-extrapolation))
-- `tokens_values_config.json`: file containing sets of token values, where each set is plugged into a dynamic form config to generate its form version (each form version will be completed by `units_per_assignment` different workers).
-
+## Config file: `form_config.json`
-#### Extrapolated config
+Form config file `form_config.json` specifies layout of a form in the same way as `data.json`, but with a few notable differences:
+- It contains a single JSON object (not a JSON array of objects)
+- Some of its form attributes definitions must contain dynamic tokens (whose values will be extrapolated, i.e. substituted with variable chunks of text) - see further below.
-During bulding a Task with dynamic form config, the resulting config containing all form vesions will be placed in `data.json` file (next to `form_config.json` file).
-Note that each form version in `data.json` represents one assignment in Mephisto.
+## Config file: `token_sets_values_config.json`
-- In your YAML Task config, always refer to the extrapolated config file `data.json` (not the foorm config file)
-- Every time you re-run Form Composer, `data.json` file will be overwritten
-- Run generator with command: `mephisto form_composer`
+Sets of token values are specified as a JSON array of objects, where each object has one key `"tokens_values"`. Under that key there's a key-value definition of all tokens in that set.
-
-#### Custom form versions
-
-Suppose your form variations go beyond slight text changes (e.g. you wish to add a fieldset in one version of form config). In that case:
-
-- Create your own `data.json` file manually (it will be basically a JSON list of copy-pasted individual form config versions)
-- You don't need to create `form_config.json` and `tokens_values_config.json` files
-- Run generator with command: `mephisto form_composer --manual-versions True`
-
----
-
-## Tokens extrapolation
-
-A token is a named text placeholder that gets replaced ("extrapolated") by values specified in `tokens_values_config.json` (each set of `tokens_values` specifies a form version, and contains one such value).
-
-Token placeholders within an attribute looks like so: `{{TOKEN_NAME}}`
-
-Tokens can be placed within the following object attributes:
-
-- `help`
-- `instruction`
-- `label`
-- `title`
-- `tooltip`
-
-When reusing a token with same name in different form attributes (across all levels of form config), you should specify it in each `tokens_values` just once, for convenience.
-(This also means that token names must be unique within the entire form config.)
-
-
----
-
-## Dynamic form config with `--files-folder`
-
-Consider a special case when form config has only one token, a file path. Form Composer offers a shortcut to save your time on creating `tokens_values_config.json` file in this scenario. Simply launch task with this command:
-
-```
-mephisto form_composer --files-folder [value]
-```
-
-Argument `--files-folder [value]` does the following:
-- finds folder specified by `[value]`, which currently can be an S3 folder URL like `"https://s3.amazon.com/...."`
-- finds (recursively) location of all files within that folder (e.g. S3 URLs)
-- generates `tokens_values_config.json` file that looks like so:
-```json
-[
- {
- "tokens_values": {
- "file_location": "[location/of/file1]",
- }
- },
- {
- "tokens_values": {
- "file_location": "[location/of/file2]"
- }
- },
- ...
-]
-```
-- now that tokens values config is generated, Task launch proceeds like for a normal dynamic form config
-
-Note that:
-- `form_config.json` file must contain one, and only one, token name `{{file_location}}`
-- `tokens_values_config.json` file is not needed in this case (it will be auto-generated)
-
----
-
-## Embedding FormBuilder into custom application
-
-If you wish to embed FormComposer in your custom application, a few tips about extrapolator function `mephisto.generators.form_composer.configs_validation.extrapolated_config.create_extrapolated_config` (that generates extrapolated `data.json` config):
-
-- call extrapolator function if you want to extrapolate token values
- - You can see how it's done from [Example docs](#live-examples) here and by exploring source code of [run_task_dynamic.py](/examples/form_composer_demo/run_task_dynamic.py) module.
-- NOT call extrapolator function if you already have a custom `data.json` config
-
----
-
-## Config files example
-
-
-#### Form config
-
-Here's how fields with tokens look like in `form_config.json` file:
-
-```json
-{
- ...
- "instruction": "Rate {{actor}}'s performance in movie '{{movie_name}}'",
- ...
- "help": "Please only consider the movie '{{movie_name}}'",
- ...
-}
-...
-{
- ...
- "instruction": "Rate the plot in movie '{{movie_name}}'?",
- ...
-}
-```
-
-
-#### Token values config
-
-Here's how token values are specified `tokens_values_config.json` file:
+Example:
```json
[
{
"tokens_values": {
"actor": "Carrie Fisher",
- "movie_name": "Star Wars"
+ "movie_name": "Star Wars",
+ "genre": "Sci-Fi"
}
},
{
"tokens_values": {
"actor": "Keanu Reeves",
- "movie_name": "The Matrix"
+ "movie_name": "The Matrix",
+ "genre": "Sci-Fi"
}
}
]
```
+## Config file: `single_token_values_config.json`
-#### Extrapolated config
+Lists of separate tokens values are specified as JSON object with key-value pairs, where keys are token names, and values are JSON arrays of their values.
-This is how resulting `data.json` file will look like, after form attributes from `form_config.json` get extrapolated with values from `tokens_values_config.json`:
+Example:
```json
-// First extrapolated form version
-{
- ...
- "instruction": "Rate Carrie Fisher's performance in movie 'Star Wars'",
- ...
- "help": "Please only consider the movie 'Star Wars'",
- ...
-}
-...
{
- ...
- "instruction": "Rate the plot in movie 'Star Wars'?",
- ...
-},
-// Second extrapolated form version
-{
- ...
- "instruction": "Rate Keanu Reeves's performance in movie 'The Matrix'",
- ...
- "help": "Please only consider the movie 'The Matrix'",
- ...
+ "actor": ["Carrie Fisher", "Keanu Reeves"],
+ "movie_name": ["Star Wars", "The Matrix"],
+ "genre": ["Sci-Fi"]
}
-...
-{
- ...
- "instruction": "Rate the plot in movie 'The Matrix'?",
- ...
-}
-```
-
-Once a Task is launched, each of these two form versions will be completed `units_per_assignment` times (by different workers)
-
-
----
-## Custom field handlers
-
-TBD
-
----
-
-## Live Examples
-
-You can investigate live examples of Form Composer in `examples/form_composer_demo` directory,
-
-For more details on how to run these examples, refer to this [README.md](/examples/form_composer_demo/README.md).
+```
diff --git a/mephisto/generators/form_composer/configs_validation/__init__.py b/mephisto/generators/form_composer/config_validation/__init__.py
similarity index 100%
rename from mephisto/generators/form_composer/configs_validation/__init__.py
rename to mephisto/generators/form_composer/config_validation/__init__.py
diff --git a/mephisto/generators/form_composer/configs_validation/common_validation.py b/mephisto/generators/form_composer/config_validation/common_validation.py
similarity index 100%
rename from mephisto/generators/form_composer/configs_validation/common_validation.py
rename to mephisto/generators/form_composer/config_validation/common_validation.py
diff --git a/mephisto/generators/form_composer/configs_validation/config_validation_constants.py b/mephisto/generators/form_composer/config_validation/config_validation_constants.py
similarity index 100%
rename from mephisto/generators/form_composer/configs_validation/config_validation_constants.py
rename to mephisto/generators/form_composer/config_validation/config_validation_constants.py
diff --git a/mephisto/generators/form_composer/configs_validation/form_config.py b/mephisto/generators/form_composer/config_validation/form_config.py
similarity index 98%
rename from mephisto/generators/form_composer/configs_validation/form_config.py
rename to mephisto/generators/form_composer/config_validation/form_config.py
index 3beefafbe..b4b3bec0a 100644
--- a/mephisto/generators/form_composer/configs_validation/form_config.py
+++ b/mephisto/generators/form_composer/config_validation/form_config.py
@@ -55,7 +55,7 @@ def validate_form_config(config_json: dict) -> Tuple[bool, List[str]]:
if not isinstance(config_json, dict):
is_valid = False
- errors.append("Form config must be a dictionary.")
+ errors.append("Form config must be a key/value JSON Object.")
elif config_json.keys() != AVAILABLE_CONFIG_ATTRS.keys():
is_valid = False
diff --git a/mephisto/generators/form_composer/config_validation/single_token_values_config.py b/mephisto/generators/form_composer/config_validation/single_token_values_config.py
new file mode 100644
index 000000000..f2b9d51ab
--- /dev/null
+++ b/mephisto/generators/form_composer/config_validation/single_token_values_config.py
@@ -0,0 +1,56 @@
+from typing import Dict
+from typing import List
+from typing import Tuple
+
+from botocore.exceptions import BotoCoreError
+from botocore.exceptions import ClientError
+from botocore.exceptions import NoCredentialsError
+
+from .utils import get_file_urls_from_s3_storage
+from .utils import write_config_to_file
+
+
+def validate_single_token_values_config(
+ config_json: Dict[str, List[str]],
+) -> Tuple[bool, List[str]]:
+ is_valid = True
+ errors = []
+
+ if not isinstance(config_json, dict):
+ is_valid = False
+ errors.append("Config must be a key/value JSON Object.")
+ return is_valid, errors
+
+ for i, token_values in enumerate(config_json.items()):
+ token, values = token_values
+
+ if not values:
+ is_valid = False
+ errors.append(
+ f"You passed empty array of values for token '{token}'. "
+ f"It must contain at least one value or just remove it you left it by mistake."
+ )
+
+ return is_valid, errors
+
+
+def update_single_token_values_config_with_file_urls(
+ url: str, single_token_values_config_path: str,
+):
+ try:
+ files_locations = get_file_urls_from_s3_storage(url)
+ except (BotoCoreError, ClientError, NoCredentialsError) as e:
+ print(f"Could not retrieve files from S3 URL '{url}'. Reason: {e}")
+ return None
+
+ if not files_locations:
+ print(
+ f"Could not retrieve files from '{url}' - "
+ f"check if this location exists and contains files"
+ )
+ return None
+
+ single_token_values_config_data = {
+ "file_location": files_locations,
+ }
+ write_config_to_file(single_token_values_config_data, single_token_values_config_path)
diff --git a/mephisto/generators/form_composer/config_validation/task_data_config.py b/mephisto/generators/form_composer/config_validation/task_data_config.py
new file mode 100644
index 000000000..910ffad8e
--- /dev/null
+++ b/mephisto/generators/form_composer/config_validation/task_data_config.py
@@ -0,0 +1,345 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os.path
+import re
+from copy import deepcopy
+from typing import List
+from typing import Optional
+from typing import Tuple
+
+from .config_validation_constants import ATTRS_SUPPORTING_TOKENS
+from .form_config import validate_form_config
+from .single_token_values_config import validate_single_token_values_config
+from .token_sets_values_config import validate_token_sets_values_config
+from .utils import make_error_message
+from .utils import read_config_file
+from .utils import write_config_to_file
+
+FILE_LOCATION_TOKEN_NAME = "file_location"
+
+
+def _extrapolate_tokens_values(text: str, tokens_values: dict) -> str:
+ for token, value in tokens_values.items():
+ text = re.sub(r"\{\{(\s*)" + token + r"(\s*)\}\}", value, text)
+ return text
+
+
+def _set_tokens_in_form_config_item(item: dict, tokens_values: dict):
+ for attr_name in ATTRS_SUPPORTING_TOKENS:
+ item_attr = item.get(attr_name)
+ if not item_attr:
+ continue
+
+ item[attr_name] = _extrapolate_tokens_values(item_attr, tokens_values)
+
+
+def _collect_form_config_items_to_extrapolate(config_data: dict) -> List[dict]:
+ items_to_extrapolate = []
+
+ form = config_data["form"]
+ items_to_extrapolate.append(form)
+
+ sections = form["sections"]
+ for section in sections:
+ items_to_extrapolate.append(section)
+
+ fieldsets = section["fieldsets"]
+ for fieldset in fieldsets:
+ items_to_extrapolate.append(fieldset)
+
+ rows = fieldset["rows"]
+ for row in rows:
+ items_to_extrapolate.append(row)
+
+ fields = row["fields"]
+ for field in fields:
+ items_to_extrapolate.append(field)
+
+ return items_to_extrapolate
+
+
+def _collect_tokens_from_form_config(config_data: dict) -> Tuple[set, List[str]]:
+ items_to_extrapolate = _collect_form_config_items_to_extrapolate(config_data)
+ tokens_in_form_config = set()
+ tokens_in_unexpected_attrs_errors = []
+
+ for item in items_to_extrapolate:
+ for attr_name in ATTRS_SUPPORTING_TOKENS:
+ item_attr = item.get(attr_name)
+ if not item_attr:
+ continue
+ tokens_in_form_config.update(set(re.findall(r"\{\{\s*(\w+?)\s*\}\}", item_attr)))
+
+ attrs_not_suppoting_tokens = set(item.keys()) - set(ATTRS_SUPPORTING_TOKENS)
+ for attr_name in attrs_not_suppoting_tokens:
+ item_attr = item.get(attr_name)
+ if isinstance(item_attr, str):
+ found_attr_tokens = re.findall(r"\{\{\s*(\w+?)\s*\}\}", item_attr)
+ if found_attr_tokens:
+ found_attr_tokens_string = ", ".join([f"'{t}'" for t in found_attr_tokens])
+ tokens_in_unexpected_attrs_errors.append(
+ f"You tried to set tokens {found_attr_tokens_string} "
+ f"in attribute '{attr_name}' with value '{item_attr}'. "
+ f"You can use tokens only in following attributes: "
+ f"{', '.join(ATTRS_SUPPORTING_TOKENS)}"
+ )
+
+ return tokens_in_form_config, tokens_in_unexpected_attrs_errors
+
+
+def _extrapolate_tokens_in_form_config(config_data: dict, tokens_values: dict) -> dict:
+ items_to_extrapolate = _collect_form_config_items_to_extrapolate(config_data)
+ for item in items_to_extrapolate:
+ _set_tokens_in_form_config_item(item, tokens_values)
+ return config_data
+
+
+def _validate_tokens_in_both_configs(
+ form_config_data, token_sets_values_config_data,
+) -> Tuple[set, set, list]:
+ tokens_from_form_config, tokens_in_unexpected_attrs_errors = (
+ _collect_tokens_from_form_config(form_config_data)
+ )
+ tokens_from_token_sets_values_config = set([
+ token_name
+ for token_set_values_data in token_sets_values_config_data
+ for token_name in token_set_values_data.get("tokens_values", {}).keys()
+
+ ])
+ # Token names present in token values config, but not in form config
+ overspecified_tokens = tokens_from_token_sets_values_config - tokens_from_form_config
+ # Token names present in form config, but not in token values config
+ underspecified_tokens = tokens_from_form_config - tokens_from_token_sets_values_config
+ return overspecified_tokens, underspecified_tokens, tokens_in_unexpected_attrs_errors
+
+
+def _combine_extrapolated_form_configs(
+ form_config_data: dict,
+ token_sets_values_config_data: List[dict],
+) -> List[dict]:
+ errors = []
+
+ # Validate Form config
+ form_config_is_valid, form_config_errors = validate_form_config(form_config_data)
+
+ if not form_config_is_valid:
+ # Stop generating a Task, the config is incorrect
+ raise ValueError("\n" + "\n\n".join(form_config_errors))
+
+ token_sets_values_config_is_valid, token_sets_values_data_config_errors = (
+ validate_token_sets_values_config(token_sets_values_config_data)
+ )
+
+ # Validate that same token names are present in both configs
+ (
+ overspecified_tokens,
+ underspecified_tokens,
+ tokens_in_unexpected_attrs_errors,
+ ) = _validate_tokens_in_both_configs(
+ form_config_data, token_sets_values_config_data,
+ )
+
+ # Output errors, if any
+ if overspecified_tokens:
+ errors.append(
+ f"Values for the following tokens are provided in token sets values config, "
+ f"but they are not defined in the form config: "
+ f"{', '.join(overspecified_tokens)}."
+ )
+ if underspecified_tokens:
+ errors.append(
+ f"The following tokens are specified in the form config, "
+ f"but their values are not provided in the token sets values config: "
+ f"{', '.join(underspecified_tokens)}."
+ )
+
+ if tokens_in_unexpected_attrs_errors:
+ errors = errors + tokens_in_unexpected_attrs_errors
+
+ if not form_config_is_valid:
+ errors.append(make_error_message("Form config is invalid.", form_config_errors))
+
+ if not token_sets_values_config_is_valid:
+ errors.append(make_error_message(
+ "Toekn sets values config is invalid.", token_sets_values_data_config_errors,
+ ))
+
+ if errors:
+ # Stop generating a Task, the config is incorrect
+ raise ValueError("\n" + "\n\n".join(errors))
+
+ # If no errors, combine extrapolated form versions to create Task data config
+ combined_config = []
+ if token_sets_values_config_data:
+ for token_sets_values in token_sets_values_config_data:
+ if token_sets_values == {}:
+ combined_config.append(form_config_data)
+ else:
+ form_config_data_with_tokens = _extrapolate_tokens_in_form_config(
+ deepcopy(form_config_data), token_sets_values["tokens_values"],
+ )
+ combined_config.append(form_config_data_with_tokens)
+ else:
+ # If no config with tokens values was added than
+ # we just create one-unit config and copy form config into it as-is
+ combined_config.append(form_config_data)
+
+ return combined_config
+
+
+def create_extrapolated_config(
+ form_config_path: str,
+ token_sets_values_config_path: str,
+ task_data_config_path: str,
+):
+ # Check if files exist
+ if not os.path.exists(form_config_path):
+ raise FileNotFoundError(f"Create file '{form_config_path}' and add form configuration")
+
+ # Read JSON from files
+ form_config_data = read_config_file(form_config_path)
+
+ if os.path.exists(token_sets_values_config_path):
+ token_sets_values_data = read_config_file(token_sets_values_config_path)
+ else:
+ token_sets_values_data = []
+
+ # Create combined config
+ try:
+ extrapolated_form_config_data = _combine_extrapolated_form_configs(
+ form_config_data,
+ token_sets_values_data,
+ )
+ write_config_to_file(extrapolated_form_config_data, task_data_config_path)
+ except ValueError as e:
+ print(f"Could not extrapolate form configs: {e}")
+
+
+def generate_tokens_values_config_from_files(token_sets_values_config_path: str, files: List[str]):
+ token_sets_values_config_data = []
+
+ for i, file_location in enumerate(files):
+ token_sets_values_config_data.append(dict(
+ tokens_values={
+ FILE_LOCATION_TOKEN_NAME: file_location,
+ },
+ ))
+
+ try:
+ write_config_to_file(token_sets_values_config_data, token_sets_values_config_path)
+ except ValueError as e:
+ print(f"Could not write tokens values to file: {e}")
+
+
+def validate_task_data_config(config_json: List[dict]) -> Tuple[bool, List[str]]:
+ is_valid = True
+ errors = []
+
+ if not isinstance(config_json, list):
+ is_valid = False
+ errors.append("Config must be a JSON Array.")
+
+ if config_json:
+ if not all(config_json):
+ is_valid = False
+ errors.append("Task data config must contain at least one non-empty item.")
+
+ # Validate each form version contained in task data config
+ for item in config_json:
+ form_config_is_valid, form_config_errors = validate_form_config(item)
+ if not form_config_is_valid:
+ is_valid = False
+ errors += form_config_errors
+
+ return is_valid, errors
+
+
+def verify_form_composer_configs(
+ task_data_config_path: str,
+ form_config_path: Optional[str] = None,
+ token_sets_values_config_path: Optional[str] = None,
+ single_token_values_config_path: Optional[str] = None,
+ task_data_config_only: bool = False,
+):
+ errors = []
+
+ try:
+ # 1. Validate data config
+ task_data_config_data = read_config_file(task_data_config_path)
+
+ task_data_config_is_valid, task_data_config_errors = validate_task_data_config(
+ task_data_config_data,
+ )
+
+ if not task_data_config_is_valid:
+ errors.append(make_error_message(
+ "Task data config is invalid.", task_data_config_errors,
+ ))
+
+ if task_data_config_only:
+ if errors:
+ raise ValueError("\n" + "\n\n".join(errors))
+
+ return None
+
+ # 2. Validate form config config
+ form_config_data = read_config_file(form_config_path)
+
+ form_config_is_valid, form_config_errors = validate_form_config(form_config_data)
+
+ if not form_config_is_valid:
+ errors.append(make_error_message("Form config is invalid.", form_config_errors))
+
+ # 3. Validate token sets values config
+ if os.path.exists(token_sets_values_config_path):
+ token_sets_values_data = read_config_file(token_sets_values_config_path)
+ else:
+ token_sets_values_data = []
+
+ (
+ overspecified_tokens,
+ underspecified_tokens,
+ tokens_in_unexpected_attrs_errors,
+ ) = _validate_tokens_in_both_configs(
+ form_config_data, token_sets_values_data,
+ )
+
+ # Output errors, if any
+ if overspecified_tokens:
+ errors.append(
+ f"Values for the following tokens are provided in token sets values config, "
+ f"but they are not defined in the form config: "
+ f"{', '.join(overspecified_tokens)}."
+ )
+ if underspecified_tokens:
+ errors.append(
+ f"The following tokens are specified in the form config, "
+ f"but their values are not provided in the token sets values config: "
+ f"{', '.join(underspecified_tokens)}."
+ )
+
+ if tokens_in_unexpected_attrs_errors:
+ errors = errors + tokens_in_unexpected_attrs_errors
+
+ # 4. Validate single token values config
+ single_token_values_config_data = read_config_file(single_token_values_config_path)
+
+ single_token_values_config_is_valid, single_token_values_config_errors = (
+ validate_single_token_values_config(single_token_values_config_data)
+ )
+
+ if not single_token_values_config_is_valid:
+ token_sets_values_data_config_errors = [
+ f" - {e}" for e in single_token_values_config_errors
+ ]
+ errors_string = "\n".join(token_sets_values_data_config_errors)
+ errors.append(f"Single token values config is invalid. Errors:\n{errors_string}")
+
+ if errors:
+ raise ValueError("\n" + "\n\n".join(errors))
+
+ except ValueError as e:
+ print(f"Could not extrapolate form configs: {e}")
diff --git a/mephisto/generators/form_composer/config_validation/token_sets_values_config.py b/mephisto/generators/form_composer/config_validation/token_sets_values_config.py
new file mode 100644
index 000000000..5f7d8d014
--- /dev/null
+++ b/mephisto/generators/form_composer/config_validation/token_sets_values_config.py
@@ -0,0 +1,93 @@
+import itertools
+import json
+from json import JSONDecodeError
+from typing import Dict
+from typing import List
+from typing import Tuple
+
+from .common_validation import validate_config_dict_item
+from .config_validation_constants import AVAILABLE_TASK_ATTRS
+from .single_token_values_config import validate_single_token_values_config
+from .utils import make_error_message
+from .utils import read_config_file
+from .utils import write_config_to_file
+
+TokensPermutationType = List[
+ Dict[
+ str, Dict[
+ str, List[str]
+ ]
+ ]
+]
+
+
+def validate_token_sets_values_config(config_json: List[dict]) -> Tuple[bool, List[str]]:
+ is_valid = True
+ errors = []
+
+ if not isinstance(config_json, list):
+ is_valid = False
+ errors.append("Config must be a JSON Array.")
+
+ if config_json:
+ if not all(config_json):
+ is_valid = False
+ errors.append("Config must contain at least one non-empty item.")
+
+ for item in config_json:
+ item_is_valid = validate_config_dict_item(
+ item, "item_tokens_values", AVAILABLE_TASK_ATTRS, errors,
+ )
+ if not item_is_valid:
+ is_valid = False
+
+ return is_valid, errors
+
+
+def _premutate_single_tokents(data: Dict[str, List[str]]) -> TokensPermutationType:
+ all_permutations = []
+ # Make a list to iterate many times
+ data_keys = list(data.keys())
+
+ # Collect a list of values lists in data keys order
+ sorted_values_lists: List[list] = [values for token, values in data.items()]
+
+ # Making a list of premutated dicts
+ for i, row in enumerate(itertools.product(*sorted_values_lists, repeat=1)):
+ single_permudation = {}
+ for y, key in enumerate(data_keys):
+ single_permudation[key] = row[y]
+
+ all_permutations.append(
+ {
+ "tokens_values": single_permudation,
+ }
+ )
+
+ return all_permutations
+
+
+def update_token_sets_values_config_with_premutated_data(
+ single_token_values_config_path: str,
+ token_sets_values_config_path: str,
+):
+ # Read JSON from files
+ single_token_values_config_data = read_config_file(single_token_values_config_path)
+
+ single_token_values_config_is_valid, single_token_values_config_errors = (
+ validate_single_token_values_config(single_token_values_config_data)
+ )
+
+ errors = []
+ if not single_token_values_config_is_valid:
+ errors.append(make_error_message(
+ "Single token values config is invalid.", single_token_values_config_errors,
+ ))
+
+ if errors:
+ # Stop generating a Task, the config is incorrect
+ raise ValueError("\n" + "\n\n".join(errors))
+
+ premutated_data = _premutate_single_tokents(single_token_values_config_data)
+
+ write_config_to_file(premutated_data, token_sets_values_config_path)
diff --git a/mephisto/generators/form_composer/config_validation/utils.py b/mephisto/generators/form_composer/config_validation/utils.py
new file mode 100644
index 000000000..931d0d7ba
--- /dev/null
+++ b/mephisto/generators/form_composer/config_validation/utils.py
@@ -0,0 +1,79 @@
+import json
+import os
+from json import JSONDecodeError
+from typing import List
+from typing import Tuple
+from typing import Union
+from urllib.parse import urljoin
+from urllib.parse import urlparse
+
+import boto3
+
+from mephisto.generators.form_composer.constants import JSON_IDENTATION
+
+
+def write_config_to_file(config_data: Union[List[dict], dict], file_path: str):
+ config_str = json.dumps(config_data, indent=JSON_IDENTATION)
+
+ with open(file_path, "w") as f:
+ f.write(config_str)
+
+
+def is_s3_url(value: str) -> bool:
+ if isinstance(value, str):
+ parsed_url = urlparse(value)
+ return bool(
+ parsed_url.scheme == 'https' and
+ "s3" in parsed_url.hostname and
+ parsed_url.netloc and
+ parsed_url.path
+ )
+
+ return False
+
+
+def _get_bucket_and_key_from_S3_url(s3_url: str) -> Tuple[str, str]:
+ parsed_url = urlparse(s3_url)
+ bucket_name = parsed_url.hostname.split('.')[0]
+ relative_path = parsed_url.path
+
+ if not relative_path:
+ raise ValueError(f'Cannot extract S3 key from invalid URL "{s3_url}"')
+
+ # Remove a slash from the beginning of the path
+ s3_key = relative_path[1:]
+ return bucket_name, s3_key
+
+
+def get_file_urls_from_s3_storage(s3_url: str) -> List[str]:
+ urls = []
+
+ base_url = "{0.scheme}://{0.netloc}/".format(urlparse(s3_url))
+ bucket, s3_path = _get_bucket_and_key_from_S3_url(s3_url)
+
+ s3 = boto3.resource("s3")
+ my_bucket = s3.Bucket(bucket)
+
+ for object_summary in my_bucket.objects.filter(Prefix=s3_path):
+ file_s3_key: str = object_summary.key
+ filename = os.path.basename(file_s3_key)
+ is_file = bool(filename)
+ if is_file:
+ urls.append(urljoin(base_url, file_s3_key))
+
+ return urls
+
+
+def read_config_file(config_path: str) -> Union[List[dict], dict]:
+ try:
+ with open(config_path) as config_file:
+ config_data = json.load(config_file)
+ except (JSONDecodeError, TypeError, FileNotFoundError):
+ print(f"Could not read JSON from '{config_path}' file")
+ raise
+ return config_data
+
+
+def make_error_message(main_message: str, error_list: List[str]) -> str:
+ errors_bullet = "\n - " + "\n - ".join(map(str, error_list))
+ return f"{main_message}. Errors:{errors_bullet}"
diff --git a/mephisto/generators/form_composer/configs_validation/extrapolated_config.py b/mephisto/generators/form_composer/configs_validation/extrapolated_config.py
deleted file mode 100644
index 2b6b9c0e2..000000000
--- a/mephisto/generators/form_composer/configs_validation/extrapolated_config.py
+++ /dev/null
@@ -1,292 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) Facebook, Inc. and its affiliates.
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-import json
-import os.path
-import re
-from copy import deepcopy
-from json import JSONDecodeError
-from typing import List
-from typing import Tuple
-from urllib.parse import urljoin
-from urllib.parse import urlparse
-
-import boto3
-
-from mephisto.generators.form_composer.constants import JSON_IDENTATION
-from .config_validation_constants import ATTRS_SUPPORTING_TOKENS
-from .form_config import validate_form_config
-from .tokens_values_config import validate_tokens_values_config
-
-FILE_LOCATION_TOKEN_NAME = "file_location"
-
-
-def _extrapolate_tokens_values(text: str, tokens_values: dict) -> str:
- for token, value in tokens_values.items():
- text = re.sub(r"\{\{(\s*)" + token + r"(\s*)\}\}", value, text)
- return text
-
-
-def _set_tokens_in_form_config_item(item: dict, tokens_values: dict):
- for attr_name in ATTRS_SUPPORTING_TOKENS:
- item_attr = item.get(attr_name)
- if not item_attr:
- continue
-
- item[attr_name] = _extrapolate_tokens_values(item_attr, tokens_values)
-
-
-def _collect_form_config_items_to_extrapolate(config_data: dict) -> List[dict]:
- items_to_extrapolate = []
-
- form = config_data["form"]
- items_to_extrapolate.append(form)
-
- sections = form["sections"]
- for section in sections:
- items_to_extrapolate.append(section)
-
- fieldsets = section["fieldsets"]
- for fieldset in fieldsets:
- items_to_extrapolate.append(fieldset)
-
- rows = fieldset["rows"]
- for row in rows:
- items_to_extrapolate.append(row)
-
- fields = row["fields"]
- for field in fields:
- items_to_extrapolate.append(field)
-
- return items_to_extrapolate
-
-
-def _collect_tokens_from_form_config(config_data: dict) -> Tuple[set, List[str]]:
- items_to_extrapolate = _collect_form_config_items_to_extrapolate(config_data)
- tokens_in_form_config = set()
- tokens_in_unexpected_attrs_errors = []
-
- for item in items_to_extrapolate:
- for attr_name in ATTRS_SUPPORTING_TOKENS:
- item_attr = item.get(attr_name)
- if not item_attr:
- continue
- tokens_in_form_config.update(set(re.findall(r"\{\{\s*(\w+?)\s*\}\}", item_attr)))
-
- attrs_not_suppoting_tokens = set(item.keys()) - set(ATTRS_SUPPORTING_TOKENS)
- for attr_name in attrs_not_suppoting_tokens:
- item_attr = item.get(attr_name)
- if isinstance(item_attr, str):
- found_attr_tokens = re.findall(r"\{\{\s*(\w+?)\s*\}\}", item_attr)
- if found_attr_tokens:
- found_attr_tokens_string = ", ".join([f"'{t}'" for t in found_attr_tokens])
- tokens_in_unexpected_attrs_errors.append(
- f"You tried to set tokens {found_attr_tokens_string} "
- f"in attribute '{attr_name}' with value '{item_attr}'. "
- f"You can use tokens only in following attributes: "
- f"{', '.join(ATTRS_SUPPORTING_TOKENS)}"
- )
-
- return tokens_in_form_config, tokens_in_unexpected_attrs_errors
-
-
-def _extrapolate_tokens_in_form_config(config_data: dict, tokens_values: dict) -> dict:
- items_to_extrapolate = _collect_form_config_items_to_extrapolate(config_data)
- for item in items_to_extrapolate:
- _set_tokens_in_form_config_item(item, tokens_values)
- return config_data
-
-
-def _combine_extrapolated_form_configs(
- form_config_data: dict,
- tokens_values_config_data: List[dict],
- skip_validating_tokens_values_config: bool,
-) -> List[dict]:
- errors = []
-
- # Validate Form config
- form_config_is_valid, form_config_errors = validate_form_config(form_config_data)
-
- if not form_config_is_valid:
- # Stop generating a Task, the config is incorrect
- raise ValueError("\n" + "\n\n".join(form_config_errors))
-
- # Validate token values config
- if skip_validating_tokens_values_config:
- tokens_values_config_is_valid, tokens_values_data_config_errors = True, []
- else:
- tokens_values_config_is_valid, tokens_values_data_config_errors = (
- validate_tokens_values_config(tokens_values_config_data)
- )
-
- # Validate tokens in both configs
- tokens_from_form_config, tokens_in_unexpected_attrs_errors = _collect_tokens_from_form_config(
- form_config_data,
- )
- tokens_from_tokens_values_config = set(sum(
- [list(u["tokens_values"].keys()) for u in tokens_values_config_data],
- [],
- ))
-
- # Token names present in token values config, but not in form config
- overspecified_tokens = tokens_from_tokens_values_config - tokens_from_form_config
- # Token names present in form config, but not in token values config
- underspecified_tokens = tokens_from_form_config - tokens_from_tokens_values_config
-
- # Output errors, if any
- if overspecified_tokens:
- errors.append(
- f"Values for the following tokens are provided in tokens value config, "
- f"but they are not defined in the form config: "
- f"{', '.join(overspecified_tokens)}."
- )
- if underspecified_tokens:
- errors.append(
- f"The following tokens are specified in the form config, "
- f"but their values are not provided in the tokens values config: "
- f"{', '.join(underspecified_tokens)}."
- )
-
- if tokens_in_unexpected_attrs_errors:
- errors = errors + tokens_in_unexpected_attrs_errors
-
- if not form_config_is_valid:
- form_config_errors = [f" - {e}" for e in form_config_errors]
- errors_string = "\n".join(form_config_errors)
- errors.append(f"Form config is invalid. Errors:\n{errors_string}")
-
- if not tokens_values_config_is_valid:
- tokens_values_data_config_errors = [f" - {e}" for e in tokens_values_data_config_errors]
- errors_string = "\n".join(tokens_values_data_config_errors)
- errors.append(f"Units data config is invalid. Errors:\n{errors_string}")
-
- if errors:
- # Stop generating a Task, the config is incorrect
- raise ValueError("\n" + "\n\n".join(errors))
-
- # If no errors, combine extrapolated form versions to create Task data config
- combined_config = []
- if tokens_values_config_data:
- for unit_tokens_values in tokens_values_config_data:
- if unit_tokens_values == {}:
- combined_config.append(form_config_data)
- else:
- form_config_data_with_tokens = _extrapolate_tokens_in_form_config(
- deepcopy(form_config_data), unit_tokens_values["tokens_values"],
- )
- combined_config.append(form_config_data_with_tokens)
- else:
- # If no config with tokens values was added than
- # we just create one-unit config and copy form config into it as-is
- combined_config.append(form_config_data)
-
- return combined_config
-
-
-def _write_config_to_file(config_data: List[dict], file_path: str):
- config_str = json.dumps(config_data, indent=JSON_IDENTATION)
-
- with open(file_path, "w") as f:
- f.write(config_str)
-
-
-def create_extrapolated_config(
- form_config_path: str,
- tokens_values_config_path: str,
- extrapolated_form_config_path: str,
- skip_validating_tokens_values_config: bool = False,
-):
- # Check if files exist
- if not os.path.exists(form_config_path):
- raise FileNotFoundError(f"Create file '{form_config_path}' and add form configuration")
-
- # Read JSON from files
- try:
- with open(form_config_path) as form_config_file:
- form_config_data = json.load(form_config_file)
- except (JSONDecodeError, TypeError):
- print(f"Could not read JSON from '{form_config_path}' file")
- raise
-
- if os.path.exists(tokens_values_config_path):
- try:
- with open(tokens_values_config_path) as tokens_values_data_config_file:
- tokens_values_data = json.load(tokens_values_data_config_file)
- except (JSONDecodeError, TypeError):
- print(f"Could not read JSON from '{tokens_values_config_path}' file")
- else:
- tokens_values_data = []
-
- # Create combined config
- try:
- extrapolated_form_config_data = _combine_extrapolated_form_configs(
- form_config_data,
- tokens_values_data,
- skip_validating_tokens_values_config,
- )
- _write_config_to_file(extrapolated_form_config_data, extrapolated_form_config_path)
- except ValueError as e:
- print(f"Could not extrapolate form configs: {e}")
-
-
-def _get_bucket_and_key_from_S3_url(s3_url: str) -> Tuple[str, str]:
- parsed_url = urlparse(s3_url)
- bucket_name = parsed_url.hostname.split('.')[0]
- relative_path = parsed_url.path
-
- if not relative_path:
- raise ValueError(f'Cannot extract S3 key from invalid URL "{s3_url}"')
-
- # Remove a slash from the beginning of the path
- s3_key = relative_path[1:]
- return bucket_name, s3_key
-
-
-def is_s3_url(value: str) -> bool:
- if isinstance(value, str):
- parsed_url = urlparse(value)
- return bool(
- parsed_url.scheme == 'https' and
- "s3" in parsed_url.hostname and
- parsed_url.netloc and
- parsed_url.path
- )
-
- return False
-
-
-def get_file_urls_from_s3_storage(s3_url: str) -> List[str]:
- urls = []
-
- base_url = "{0.scheme}://{0.netloc}/".format(urlparse(s3_url))
- bucket, s3_path = _get_bucket_and_key_from_S3_url(s3_url)
-
- s3 = boto3.resource("s3")
- my_bucket = s3.Bucket(bucket)
-
- for object_summary in my_bucket.objects.filter(Prefix=s3_path):
- file_s3_key: str = object_summary.key
- filename = os.path.basename(file_s3_key)
- is_file = bool(filename)
- if is_file:
- urls.append(urljoin(base_url, file_s3_key))
-
- return urls
-
-
-def generate_tokens_values_config_from_files(tokens_values_config_path: str, files: List[str]):
- tokens_values_config_data = []
-
- for i, file_location in enumerate(files):
- tokens_values_config_data.append(dict(
- tokens_values={
- FILE_LOCATION_TOKEN_NAME: file_location,
- },
- ))
-
- try:
- _write_config_to_file(tokens_values_config_data, tokens_values_config_path)
- except ValueError as e:
- print(f"Could not generate tokens values config: {e}")
diff --git a/mephisto/generators/form_composer/configs_validation/tokens_values_config.py b/mephisto/generators/form_composer/configs_validation/tokens_values_config.py
deleted file mode 100644
index 3fbccf6e2..000000000
--- a/mephisto/generators/form_composer/configs_validation/tokens_values_config.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from typing import List
-from typing import Tuple
-
-from .common_validation import validate_config_dict_item
-from .config_validation_constants import AVAILABLE_TASK_ATTRS
-
-
-def validate_tokens_values_config(config_json: List[dict]) -> Tuple[bool, List[str]]:
- is_valid = True
- errors = []
-
- if not isinstance(config_json, list):
- is_valid = False
- errors.append("Config must be 'Array.")
-
- if config_json:
- if not all(config_json):
- is_valid = False
- errors.append("Config must contain at least one non-empty item.")
-
- for item in config_json:
- unit_is_valid = validate_config_dict_item(
- item, "unit_tokens_values", AVAILABLE_TASK_ATTRS, errors,
- )
- if not unit_is_valid:
- is_valid = False
-
- return is_valid, errors
diff --git a/mephisto/review_app/client/src/pages/TaskPage/TaskPage.css b/mephisto/review_app/client/src/pages/TaskPage/TaskPage.css
index 6b5498872..ea1b44afe 100644
--- a/mephisto/review_app/client/src/pages/TaskPage/TaskPage.css
+++ b/mephisto/review_app/client/src/pages/TaskPage/TaskPage.css
@@ -72,6 +72,22 @@
padding: 10px 30px;
}
+.task .content .results .results-header {
+ cursor: pointer;
+}
+
+.task .content .results .results-icon {
+ display: inline-block;
+ margin-left: 10px;
+ font-style: normal;
+ font-size: 50px;
+ line-height: 1;
+}
+
+.task .content .results .results-closed{
+ display: none;
+}
+
.task .content .results .results-table {
max-width: 1000px;
}
diff --git a/mephisto/review_app/client/src/pages/TaskPage/TaskPage.tsx b/mephisto/review_app/client/src/pages/TaskPage/TaskPage.tsx
index 5025f7347..2ef59ddb2 100644
--- a/mephisto/review_app/client/src/pages/TaskPage/TaskPage.tsx
+++ b/mephisto/review_app/client/src/pages/TaskPage/TaskPage.tsx
@@ -103,6 +103,8 @@ function TaskPage(props: PropsType) {
const [unitResultsIsJSON, setUnitResultsIsJSON] = React.useState(false);
+ const [resultsVisibility, setResultsVisibility] = React.useState(true);
+
window.onmessage = function (e) {
if (
e.data &&
@@ -535,21 +537,26 @@ function TaskPage(props: PropsType) {
<>
{/* Results table */}