Skip to content

Commit

Permalink
[Form Builder] Adjusted Task Review app to work with presigned S3 URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
meta-paul committed Feb 14, 2024
1 parent 68dbd58 commit 54a4f96
Show file tree
Hide file tree
Showing 11 changed files with 156 additions and 45 deletions.
8 changes: 3 additions & 5 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,14 @@ Putting it altogether, let's prepare and launch a task featuring a form containi
- Remove content of folder `/tmp` (if you didn't shut the previous Task run correctly)
- Specify your AWS credentials
- Create file `docker/aws_credentials` and populate it with AWS keys info
- Populate your AWS credentials into Docker env
- create local env `docker/envs/env.local` and add AWS keys into it (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_DEFAULT_REGION`)
- clone `docker/docker-compose.dev.yml` file as `docker/docker-compose.dev.yml`, and point it to the `env.local` environment
- Populate your AWS credentials into `docker/envs/env.local` file
- Stand up docker containers: `docker-compose -f docker/docker-compose.local.vscode.yml up`
- SSH into the running container: `docker exec -it mephisto_dc bash`
- Generate your task data config with these commands:
```shell
mephisto form_composer_config \
--directory "/mephisto/examples/form_composer_demo/data/dynamic_presigned_urls" \
--update-file-location-values "https://dev-alented-private.s3.amazonaws.com/mephisto" \
--update-file-location-values "https://your-bucket.s3.amazonaws.com/..." \
--use-presigned-urls

mephisto form_composer_config \
Expand All @@ -178,7 +176,7 @@ Putting it altogether, let's prepare and launch a task featuring a form containi
```
- Launch your task:
```shell
cd /mephisto/examples/form_composer_demo && python run_task_dynamic_presigned_urls_ec2_prolific.py
cd /mephisto/examples/form_composer_demo && python run_task_dynamic_ec2_prolific.py
```
- After the Task is completed by all workers, launch task review app (for more details see `mephisto/review_app/README.md`):
```shell
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def generate_preview_html():

# Erase all tokens from the text since HTML preview is inherently static
erase_tokens = lambda text: re.sub(
TOKEN_START_REGEX + r"(.*?)" + TOKEN_END_REGEX, ".....", text,
TOKEN_START_REGEX + r"(.*?)" + TOKEN_END_REGEX, "...", text,
)
preview_data = {
"title": erase_tokens(first_form_version["title"]),
Expand Down
2 changes: 1 addition & 1 deletion mephisto/generators/form_composer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ This is how URL pre-signing works:
Presigned S3 URLs use the following environment variables:
- Required: valid AWS credentials: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_DEFAULT_REGION`
form_composer_config` command)
- Optional: URL expiration time `S3_URL_EXPIRATION_MINUTES`, can be up to 7 days long (if missing the default value is 60 minutes)
- Optional: URL expiration time `S3_URL_EXPIRATION_MINUTES` (if missing the default value is 60 minutes)


## Custom callbacks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,22 @@
import os.path
import re
from copy import deepcopy
from rich import print
from typing import List
from typing import Optional
from typing import Tuple

from rich import print

from mephisto.generators.form_composer.constants import S3_URL_EXPIRATION_MINUTES_MAX
from mephisto.generators.form_composer.constants import TOKEN_END_REGEX
from mephisto.generators.form_composer.constants import TOKEN_START_REGEX
from mephisto.generators.form_composer.remote_procedures import ProcedureName
from .config_validation_constants import ATTRS_SUPPORTING_TOKENS
from .config_validation_constants import TOKENS_VALUES_KEY
from .form_config import validate_form_config
from .separate_token_values_config import validate_separate_token_values_config
from .token_sets_values_config import validate_token_sets_values_config
from .utils import get_s3_presigned_url
from .utils import make_error_message
from .utils import read_config_file
from .utils import write_config_to_file
Expand All @@ -28,7 +32,12 @@
def _extrapolate_tokens_values(text: str, tokens_values: dict) -> str:
for token, value in tokens_values.items():
text = re.sub(
TOKEN_START_REGEX + r"(\s*)" + token + r"(\s*)" + TOKEN_END_REGEX,
(
TOKEN_START_REGEX + r"(\s*)" +
# Escape and add parentheses around the token, in case it has special characters
r"(" + re.escape(token) + r")" +
"(\s*)" + TOKEN_END_REGEX
),
str(value),
text,
)
Expand Down Expand Up @@ -69,7 +78,11 @@ def _collect_form_config_items_to_extrapolate(config_data: dict) -> List[dict]:
return items_to_extrapolate


def _collect_tokens_from_form_config(config_data: dict) -> Tuple[set, List[str]]:
def _collect_tokens_from_form_config(
config_data: dict, regex: Optional[str] = None,
) -> Tuple[set, List[str]]:
regex = regex or r"\s*(\w+?)\s*"

items_to_extrapolate = _collect_form_config_items_to_extrapolate(config_data)
tokens_in_form_config = set()
tokens_in_unexpected_attrs_errors = []
Expand All @@ -80,7 +93,7 @@ def _collect_tokens_from_form_config(config_data: dict) -> Tuple[set, List[str]]
if not item_attr:
continue
tokens_in_form_config.update(set(re.findall(
TOKEN_START_REGEX + r"\s*(\w+?)\s*" + TOKEN_END_REGEX,
TOKEN_START_REGEX + regex + TOKEN_END_REGEX,
item_attr,
)))

Expand All @@ -89,7 +102,7 @@ def _collect_tokens_from_form_config(config_data: dict) -> Tuple[set, List[str]]
item_attr = item.get(attr_name)
if isinstance(item_attr, str):
found_attr_tokens = re.findall(
TOKEN_START_REGEX + r"\s*(\w+?)\s*" + TOKEN_END_REGEX,
TOKEN_START_REGEX + regex + TOKEN_END_REGEX,
item_attr,
)
if found_attr_tokens:
Expand Down Expand Up @@ -360,3 +373,28 @@ def verify_form_composer_configs(

except ValueError as e:
print(f"\n[red]Provided Form Composer config files are invalid:[/red] {e}\n")


def prepare_task_config_for_review_app(config: dict) -> dict:
config = deepcopy(config)

procedure_code_regex = r"\s*(.+?)\s*"
tokens_from_inputs, _ = _collect_tokens_from_form_config(config, regex=procedure_code_regex)

url_from_rpocedure_code_regex = r"\(\"(.+?)\"\)"
token_values = {}
for token in tokens_from_inputs:
presigned_url_procedure_names = [
ProcedureName.GET_MULTIPLE_PRESIGNED_URLS,
ProcedureName.GET_PRESIGNED_URL,
]
if any([p in token for p in presigned_url_procedure_names]):
url = re.findall(url_from_rpocedure_code_regex, token)[0]
# Presign URL for max possible perioid of time,
# because there's no need to hide files from researchers
# and review can last for a long time
presigned_url = get_s3_presigned_url(url, S3_URL_EXPIRATION_MINUTES_MAX)
token_values[token] = presigned_url

prepared_config = _extrapolate_tokens_in_form_config(config, token_values)
return prepared_config
2 changes: 2 additions & 0 deletions mephisto/generators/form_composer/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@

S3_URL_EXPIRATION_MINUTES = int(os.environ.get("S3_URL_EXPIRATION_MINUTES", 60))

S3_URL_EXPIRATION_MINUTES_MAX = 7 * 24 * 60 # Week

TOKEN_START_SYMBOLS = "{{"

TOKEN_END_SYMBOLS = "}}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,24 @@ function FormComposerBaseFrontend({
const [formData, setFormData] = React.useState(null);
const [formComposerRenderingErrors, setFormComposerRenderingErrors] = React.useState(null);

let initialConfigFormData = taskData.form;
const inReviewState = finalResults !== null;
const initialConfigFormData = taskData.form;

prepareRemoteProcedures(remoteProcedure);
if (!inReviewState) {
prepareRemoteProcedures(remoteProcedure);
}

React.useEffect(() => {
prepareFormData(
taskData,
setFormData,
setLoadingFormData,
setFormComposerRenderingErrors,
);
if (inReviewState) {
setFormData(initialConfigFormData);
} else {
prepareFormData(
taskData,
setFormData,
setLoadingFormData,
setFormComposerRenderingErrors,
);
}
}, [taskData.form]);

if (!initialConfigFormData) {
Expand Down
42 changes: 39 additions & 3 deletions mephisto/review_app/client/src/pages/TaskPage/TaskPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,11 @@ function TaskPage(props: PropsType) {

const currentUnitDetails = unitDetailsMap[String(currentUnitOnReview)];

const [unitInputsIsJSON, setUnitInputsIsJSON] = React.useState<boolean>(false);
const [unitResultsIsJSON, setUnitResultsIsJSON] = React.useState<boolean>(false);

const [resultsVisibility, setResultsVisibility] = React.useState<boolean>(true);
const [inputsVisibility, setInputsVisibility] = React.useState<boolean>(false);
const [resultsVisibility, setResultsVisibility] = React.useState<boolean>(false);

window.onmessage = function (e) {
if (
Expand Down Expand Up @@ -356,7 +358,7 @@ function TaskPage(props: PropsType) {
const sendDataToTaskIframe = (data: object) => {
const reviewData = {
REVIEW_DATA: {
inputs: data["inputs"],
inputs: data["prepared_inputs"],
outputs: data["outputs"],
},
};
Expand Down Expand Up @@ -471,7 +473,13 @@ function TaskPage(props: PropsType) {

useEffect(() => {
if (currentUnitDetails) {
const unitInputs = currentUnitDetails.inputs;
const unitOutputs = currentUnitDetails.outputs;

if (typeof unitInputs === "object") {
setUnitInputsIsJSON(true);
}

if (typeof unitOutputs === "object") {
setUnitResultsIsJSON(true);
}
Expand Down Expand Up @@ -533,9 +541,37 @@ function TaskPage(props: PropsType) {
</div>
)}

{currentUnitDetails?.inputs && (
<>
{/* Initial parameters */}
<div className={"results"}>
<h1 className={"results-header"} onClick={() => setInputsVisibility(!inputsVisibility)}>
<b>Initial Parameters</b>
<i className={"results-icon"}>
{inputsVisibility ? <>&#x25BE;</> : <>&#x25B8;</>}
</i>
</h1>

<div className={`${inputsVisibility ? "" : "results-closed"}`}>
{unitInputsIsJSON ? (
<JSONPretty
className={"json-pretty"}
data={currentUnitDetails.inputs}
space={4}
/>
) : (
<div>
{JSON.stringify(currentUnitDetails.inputs)}
</div>
)}
</div>
</div>
</>
)}

{currentUnitDetails?.outputs && (
<>
{/* Results table */}
{/* Results */}
<div className={"results"}>
<h1 className={"results-header"} onClick={() => setResultsVisibility(!resultsVisibility)}>
<b>Results</b>
Expand Down
1 change: 1 addition & 0 deletions mephisto/review_app/client/src/types/units.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ declare type UnitDetailsType = {
id: number;
inputs: object;
outputs: object;
prepared_inputs: object;
};
23 changes: 20 additions & 3 deletions mephisto/review_app/server/api/views/units_details_view.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#!/usr/bin/env python3

# Copyright (c) Meta Platforms and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
Expand All @@ -14,6 +13,9 @@

from mephisto.data_model.task_run import TaskRun
from mephisto.data_model.unit import Unit
from mephisto.generators.form_composer.config_validation.task_data_config import (
prepare_task_config_for_review_app
)


class UnitsDetailsView(MethodView):
Expand Down Expand Up @@ -53,12 +55,27 @@ def get(self) -> dict:
task_run: TaskRun = unit.get_task_run()
has_task_source_review = bool(task_run.args.get("blueprint").get("task_source_review"))

inputs = unit_data.get("data", {}).get("inputs")
outputs = unit_data.get("data", {}).get("outputs")

# In case if there is outdated code that returns `final_submission`
# under `inputs` and `outputs` keys, we should use the value in side `final_submission`
if "final_submission" in inputs:
inputs = inputs["final_submission"]
if "final_submission" in outputs:
outputs = outputs["final_submission"]

# Perform any dynamic action on task config for current unit
# to make it the same as it looked like for a worker
prepared_inputs = prepare_task_config_for_review_app(inputs)

units.append(
{
"has_task_source_review": has_task_source_review,
"id": int(unit.db_id),
"inputs": unit_data.get("data", {}).get("inputs"), # instructions for worker
"outputs": unit_data.get("data", {}).get("outputs"), # response from worker
"inputs": inputs, # instructions for worker
"outputs": outputs, # response from worker
"prepared_inputs": prepared_inputs, # prepared instructions from worker
}
)

Expand Down
Loading

0 comments on commit 54a4f96

Please sign in to comment.