Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow using fragment and query characters in S3 locations #19296

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ public static Location of(String location)
checkArgument(!location.isEmpty(), "location is empty");
checkArgument(!location.isBlank(), "location is blank");

checkArgument(location.indexOf('#') < 0, "Fragment is not allowed in a file system location: %s", location);
checkArgument(location.indexOf('?') < 0, "URI query component is not allowed in a file system location: %s", location);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there anything else preventing us from using such paths correctly?
like some urlencode somewhere in delta?

would it make sense to have end-to-end tests with Delta, Hive, Iceberg?


// legacy HDFS location that is just a path
if (location.startsWith("/")) {
return new Location(location, Optional.empty(), Optional.empty(), Optional.empty(), OptionalInt.empty(), location.substring(1));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
*/
package io.trino.filesystem;

import static com.google.common.base.Preconditions.checkArgument;

public final class Locations
{
private Locations() {}
Expand All @@ -25,20 +23,12 @@ private Locations() {}
@Deprecated
public static String appendPath(String location, String path)
{
validateLocation(location);

if (!location.endsWith("/")) {
location += "/";
}
return location + path;
}

private static void validateLocation(String location)
{
checkArgument(location.indexOf('?') < 0, "location contains a query string: %s", location);
checkArgument(location.indexOf('#') < 0, "location contains a fragment: %s", location);
}

/**
* Verifies whether the two provided directory location parameters point to the same actual location.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,15 @@ void testParse()

assertLocation("scheme://host///path//", "scheme", Optional.empty(), "host", "//path//");

assertLocationWithoutUriTesting("scheme://userInfo@host/some/path#fragment", "scheme", Optional.of("userInfo"), "host", "some/path#fragment");
assertLocationWithoutUriTesting("scheme://userInfo@ho#st/some/path", "scheme", Optional.of("userInfo"), "ho#st", "some/path");
assertLocationWithoutUriTesting("scheme://user#Info@host/some/path", "scheme", Optional.of("user#Info"), "host", "some/path");
assertLocationWithoutUriTesting("sc#heme://userInfo@host/some/path", "sc#heme", Optional.of("userInfo"), "host", "some/path");
assertLocationWithoutUriTesting("scheme://userInfo@host/some/path?fragment", "scheme", Optional.of("userInfo"), "host", "some/path?fragment");
assertLocationWithoutUriTesting("scheme://userInfo@ho?st/some/path", "scheme", Optional.of("userInfo"), "ho?st", "some/path");
assertLocationWithoutUriTesting("scheme://user?Info@host/some/path", "scheme", Optional.of("user?Info"), "host", "some/path");
assertLocationWithoutUriTesting("sc?heme://userInfo@host/some/path", "sc?heme", Optional.of("userInfo"), "host", "some/path");

// the path can be empty
assertLocation("scheme://", Optional.of("scheme"), Optional.empty(), Optional.empty(), OptionalInt.empty(), "", Set.of("Expected authority at index 9: scheme://"));
assertLocation("scheme://host/", "scheme", Optional.empty(), "host", "");
Expand Down Expand Up @@ -169,34 +178,12 @@ void testParse()
assertThatThrownBy(() -> Location.of("scheme://:"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Invalid port in file system location: scheme://:");
}

// fragment is not allowed
assertThatThrownBy(() -> Location.of("scheme://userInfo@host/some/path#fragement"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Fragment is not allowed in a file system location: scheme://userInfo@host/some/path#fragement");
assertThatThrownBy(() -> Location.of("scheme://userInfo@ho#st/some/path"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Fragment is not allowed in a file system location: scheme://userInfo@ho#st/some/path");
assertThatThrownBy(() -> Location.of("scheme://user#Info@host/some/path"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Fragment is not allowed in a file system location: scheme://user#Info@host/some/path");
assertThatThrownBy(() -> Location.of("sc#heme://userInfo@host/some/path"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Fragment is not allowed in a file system location: sc#heme://userInfo@host/some/path");

// query component is not allowed
assertThatThrownBy(() -> Location.of("scheme://userInfo@host/some/path?fragement"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("URI query component is not allowed in a file system location: scheme://userInfo@host/some/path?fragement");
assertThatThrownBy(() -> Location.of("scheme://userInfo@ho?st/some/path"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("URI query component is not allowed in a file system location: scheme://userInfo@ho?st/some/path");
assertThatThrownBy(() -> Location.of("scheme://user?Info@host/some/path"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("URI query component is not allowed in a file system location: scheme://user?Info@host/some/path");
assertThatThrownBy(() -> Location.of("sc?heme://userInfo@host/some/path"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("URI query component is not allowed in a file system location: sc?heme://userInfo@host/some/path");
private static void assertLocationWithoutUriTesting(String locationString, String scheme, Optional<String> userInfo, String host, String path)
{
Optional<String> expectedHost = host.isEmpty() ? Optional.empty() : Optional.of(host);
assertLocation(Location.of(locationString), locationString, Optional.of(scheme), userInfo, expectedHost, OptionalInt.empty(), path, true, Set.of("skipped"));
}

private static void assertLocation(String locationString, String scheme, Optional<String> userInfo, String host, String path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import static io.trino.filesystem.Locations.appendPath;
import static io.trino.filesystem.Locations.areDirectoryLocationsEquivalent;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

public class TestLocations
{
Expand Down Expand Up @@ -53,23 +52,6 @@ public void testAppendPath(String location, String path, String expected)
assertThat(appendPath(location, path)).isEqualTo(expected);
}

private static Stream<Arguments> invalidLocations()
{
return Stream.of(
Arguments.of("location?", "location contains a query string.*"),
Arguments.of("location#", "location contains a fragment.*"));
}

@ParameterizedTest
@MethodSource("invalidLocations")
@SuppressWarnings("deprecation") // we're testing a deprecated method
public void testInvalidLocationInAppendPath(String location, String exceptionMessageRegexp)
{
assertThatThrownBy(() -> appendPath(location, "test"))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageMatching(exceptionMessageRegexp);
}

@Test
public void testDirectoryLocationEquivalence()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -625,11 +625,10 @@ public void testSchemaOperations()
public void testCreateSchemaWithIncorrectLocation()
{
String schemaName = "test_create_schema_with_incorrect_location_" + randomNameSuffix();
String schemaLocation = "s3://testbucket/%s/a#hash/%s".formatted(schemaName, schemaName);
electrum marked this conversation as resolved.
Show resolved Hide resolved
String schemaLocation = "s3://bucket";

assertThatThrownBy(() -> assertUpdate("CREATE SCHEMA " + schemaName + " WITH (location = '" + schemaLocation + "')"))
.hasMessageContaining("Invalid location URI")
.hasStackTraceContaining("Fragment is not allowed in a file system location");
.hasMessageContaining("Invalid location URI");
}

@Test
Expand Down
Loading