diff --git a/regtests/run_spark_sql.sh b/regtests/run_spark_sql.sh index d1d29e769..9e1a53e26 100755 --- a/regtests/run_spark_sql.sh +++ b/regtests/run_spark_sql.sh @@ -16,11 +16,32 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. + +# ----------------------------------------------------------------------------- +# Purpose: Launch the Spark SQL shell to interact with Polaris. +# ----------------------------------------------------------------------------- +# +# Usage: +# ./run_spark_sql.sh [S3-location AWS-IAM-role] # +# Description: +# - Without arguments: Runs against a catalog backed by the local filesystem. +# - With two arguments: Runs against a catalog backed by AWS S3. +# - [S3-location] - The S3 path to use as the default base location for the catalog. +# - [AWS-IAM-role] - The AWS IAM role for catalog to assume when accessing the S3 location. # -# Run this to open an interactive spark-sql shell talking to a catalog named "manual_spark" +# Examples: +# - Run against local filesystem: +# ./run_spark_sql.sh # -# You must run 'use polaris;' as your first query in the spark-sql shell. +# - Run against AWS S3: +# ./run_spark_sql.sh s3://my-bucket/path arn:aws:iam::123456789001:role/my-role + +if [ $# -ne 0 ] && [ $# -ne 2 ]; then + echo "run_spark_sql.sh only accepts 0 or 2 arguments" + echo "Usage: ./run_spark_sql.sh [S3-location AWS-IAM-role]" + exit 1 +fi REGTEST_HOME=$(dirname $(realpath $0)) cd ${REGTEST_HOME} @@ -36,37 +57,60 @@ fi SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}" -# Use local filesystem by default -curl -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ - http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ - -d '{ - "catalog": { - "name": "manual_spark", - "type": "INTERNAL", - "readOnly": false, - "properties": { - "default-base-location": "file:///tmp/polaris/" - }, - "storageConfigInfo": { - "storageType": "FILE", - "allowedLocations": [ - "file:///tmp" - ] - } - } - }' +if [ $# -eq 0 ]; then + # create a catalog backed by the local filesystem + curl -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d '{ + "catalog": { + "name": "manual_spark", + "type": "INTERNAL", + "readOnly": false, + "properties": { + "default-base-location": "file:///tmp/polaris/" + }, + "storageConfigInfo": { + "storageType": "FILE", + "allowedLocations": [ + "file:///tmp" + ] + } + } + }' + +elif [ $# -eq 2 ]; then + # create a catalog backed by S3 + S3_LOCATION=$1 + AWS_IAM_ROLE=$2 -# Use the following instead of below to use s3 instead of local filesystem -#curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ -# http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ -# -d "{\"name\": \"manual_spark\", \"id\": 100, \"type\": \"INTERNAL\", \"readOnly\": false, \"properties\": {\"default-base-location\": \"s3://${S3_BUCKET}/${USER}/polaris/\"}}" + curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \ + -H 'Accept: application/json' \ + -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d "{ + \"name\": \"manual_spark\", + \"id\": 100, + \"type\": \"INTERNAL\", + \"readOnly\": false, + \"properties\": { + \"default-base-location\": \"${S3_LOCATION}\" + }, + \"storageConfigInfo\": { + \"storageType\": \"S3\", + \"allowedLocations\": [\"${S3_LOCATION}/\"], + \"roleArn\": \"${AWS_IAM_ROLE}\" + } + }" +fi # Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark/catalog-roles/catalog_admin/grants \ -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr -# For now, also explicitly assign the catalog_admin to the service_admin. Remove once GS fully rolled out for auto-assign. +# Assign the catalog_admin to the service_admin. curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark \ -d '{"name": "catalog_admin"}' > /dev/stderr @@ -74,7 +118,6 @@ curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: appl curl -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark -echo ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \ --conf spark.sql.catalog.polaris.warehouse=manual_spark \ --conf spark.sql.defaultCatalog=polaris \