[PATCH 3/3] erofs-utils: lib: s3: properly escape object key names
Yifan Zhao
zhaoyifan28 at huawei.com
Tue Jan 13 17:11:49 AEDT 2026
The current implementation does not correctly handle the escaping of
object key names. This patch ensures compliance with the AWS S3
documentation [1] for proper key name encoding and character handling.
[1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
Signed-off-by: Yifan Zhao <zhaoyifan28 at huawei.com>
---
lib/remotes/s3.c | 115 ++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 105 insertions(+), 10 deletions(-)
diff --git a/lib/remotes/s3.c b/lib/remotes/s3.c
index 8351674..a5b27eb 100644
--- a/lib/remotes/s3.c
+++ b/lib/remotes/s3.c
@@ -61,11 +61,17 @@ static const char *s3erofs_parse_host(const char *endpoint, const char **schema)
return host;
}
-static void *s3erofs_urlencode(const char *input)
+enum s3erofs_urlencode_mode {
+ S3EROFS_URLENCODE_QUERY_PARAM,
+ S3EROFS_URLENCODE_S3_KEY,
+};
+
+static void *s3erofs_urlencode(const char *input, enum s3erofs_urlencode_mode mode)
{
static const char hex[] = "0123456789ABCDEF";
char *p, *url;
int i, c;
+ bool safe;
url = malloc(strlen(input) * 3 + 1);
if (!url)
@@ -73,13 +79,31 @@ static void *s3erofs_urlencode(const char *input)
p = url;
for (i = 0; i < strlen(input); ++i) {
- c = input[i];
-
- // Unreserved characters: A-Z a-z 0-9 - . _ ~
- if (isalpha(c) || isdigit(c) || c == '-' || c == '.' ||
- c == '_' || c == '~') {
+ c = (unsigned char)input[i];
+
+ if (mode == S3EROFS_URLENCODE_S3_KEY)
+ /*
+ * AWS S3 safe characters for object key names:
+ * - Alphanumeric: 0-9 a-z A-Z
+ * - Special: ! - _ . * ' ( )
+ * - Forward slash (/) for hierarchy
+ * See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
+ */
+ safe = isalpha(c) || isdigit(c) || c == '!' || c == '-' ||
+ c == '_' || c == '.' || c == '*' || c == '(' || c == ')' ||
+ c == '\'' || c == '/';
+ else
+ /*
+ * URL encode query parameters
+ * See: https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html#create-signature-presign-entire-payload
+ */
+ safe = isalpha(c) || isdigit(c) || c == '-' || c == '.' ||
+ c == '_' || c == '~';
+
+ if (safe) {
*p++ = c;
} else {
+ /* URL encode this character */
*p++ = '%';
*p++ = hex[c >> 4];
*p++ = hex[c & 0x0F];
@@ -111,13 +135,13 @@ static int s3erofs_prepare_canonical_query(struct s3erofs_curl_request *req,
pairs = calloc(1, sizeof(struct s3erofs_qsort_kv) * params->num);
for (i = 0; i < params->num; i++) {
- pairs[i].key = s3erofs_urlencode(params->key[i]);
+ pairs[i].key = s3erofs_urlencode(params->key[i], S3EROFS_URLENCODE_QUERY_PARAM);
if (IS_ERR(pairs[i].key)) {
ret = PTR_ERR(pairs[i].key);
pairs[i].key = NULL;
goto out;
}
- pairs[i].value = s3erofs_urlencode(params->value[i]);
+ pairs[i].value = s3erofs_urlencode(params->value[i], S3EROFS_URLENCODE_QUERY_PARAM);
if (IS_ERR(pairs[i].value)) {
ret = PTR_ERR(pairs[i].value);
pairs[i].value = NULL;
@@ -154,6 +178,7 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
bool slash = false;
bool bucket_domain = false;
char *url = req->url;
+ char *encoded_key = NULL;
int pos, canonical_uri_pos, i, ret = 0;
if (!endpoint)
@@ -198,11 +223,18 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
}
}
if (key) {
+ encoded_key = s3erofs_urlencode(key, S3EROFS_URLENCODE_S3_KEY);
+ if (IS_ERR(encoded_key)) {
+ ret = PTR_ERR(encoded_key);
+ encoded_key = NULL;
+ goto err;
+ }
+
if (url[pos - 1] == '/')
--pos;
else
slash = true;
- pos += snprintf(url + pos, S3EROFS_URL_LEN - pos, "/%s", key);
+ pos += snprintf(url + pos, S3EROFS_URL_LEN - pos, "/%s", encoded_key);
}
if (sig == S3EROFS_SIGNATURE_VERSION_2) {
@@ -220,7 +252,8 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
i = 1;
}
i += snprintf(req->canonical_uri + i, S3EROFS_CANONICAL_URI_LEN - i,
- "%s%s%s", path, slash ? "/" : "", key ? key : "");
+ "%s%s%s", path, slash ? "/" : "",
+ encoded_key ? encoded_key : "");
} else {
i = snprintf(req->canonical_uri, S3EROFS_CANONICAL_URI_LEN,
"%s", url + canonical_uri_pos);
@@ -241,6 +274,8 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
erofs_dbg("Request canonical_uri %s", req->canonical_uri);
err:
+ if (encoded_key)
+ free(encoded_key);
if (schema != https)
free((void *)schema);
return ret;
@@ -1410,6 +1445,66 @@ static bool test_s3erofs_prepare_url(void)
.expected_canonical_v2 = "/bucket/object.txt",
.expected_canonical_v4 = "/object.txt",
.expected_ret = 0,
+ },
+ {
+ .name = "Key with spaces",
+ .endpoint = "s3.amazonaws.com",
+ .path = "bucket",
+ .key = "my folder/my file.txt",
+ .url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST,
+ .expected_url =
+ "https://bucket.s3.amazonaws.com/my%20folder/my%20file.txt",
+ .expected_canonical_v2 = "/bucket/my%20folder/my%20file.txt",
+ .expected_canonical_v4 = "/my%20folder/my%20file.txt",
+ .expected_ret = 0,
+ },
+ {
+ .name = "Key with special characters (&, $, @, =)",
+ .endpoint = "s3.amazonaws.com",
+ .path = "bucket",
+ .key = "file&name$test at sign=value.txt",
+ .url_style = S3EROFS_URL_STYLE_PATH,
+ .expected_url =
+ "https://s3.amazonaws.com/bucket/file%26name%24test%40sign%3Dvalue.txt",
+ .expected_canonical_v2 = "/bucket/file%26name%24test%40sign%3Dvalue.txt",
+ .expected_canonical_v4 = "/bucket/file%26name%24test%40sign%3Dvalue.txt",
+ .expected_ret = 0,
+ },
+ {
+ .name = "Key with semicolon, colon, and plus",
+ .endpoint = "s3.amazonaws.com",
+ .path = "bucket",
+ .key = "file;name:test+data.txt",
+ .url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST,
+ .expected_url =
+ "https://bucket.s3.amazonaws.com/file%3Bname%3Atest%2Bdata.txt",
+ .expected_canonical_v2 = "/bucket/file%3Bname%3Atest%2Bdata.txt",
+ .expected_canonical_v4 = "/file%3Bname%3Atest%2Bdata.txt",
+ .expected_ret = 0,
+ },
+ {
+ .name = "Key with comma and question mark",
+ .endpoint = "s3.amazonaws.com",
+ .path = "bucket",
+ .key = "file,name?query.txt",
+ .url_style = S3EROFS_URL_STYLE_PATH,
+ .expected_url =
+ "https://s3.amazonaws.com/bucket/file%2Cname%3Fquery.txt",
+ .expected_canonical_v2 = "/bucket/file%2Cname%3Fquery.txt",
+ .expected_canonical_v4 = "/bucket/file%2Cname%3Fquery.txt",
+ .expected_ret = 0,
+ },
+ {
+ .name = "Key with multiple special characters",
+ .endpoint = "s3.amazonaws.com",
+ .path = "bucket",
+ .key = "path/to/file name & data at 2024.txt",
+ .url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST,
+ .expected_url =
+ "https://bucket.s3.amazonaws.com/path/to/file%20name%20%26%20data%402024.txt",
+ .expected_canonical_v2 = "/bucket/path/to/file%20name%20%26%20data%402024.txt",
+ .expected_canonical_v4 = "/path/to/file%20name%20%26%20data%402024.txt",
+ .expected_ret = 0,
}
};
--
2.47.3
More information about the Linux-erofs
mailing list