[PATCH 3/3] erofs-utils: lib: s3: properly escape object key names

Yifan Zhao zhaoyifan28 at huawei.com
Tue Jan 13 17:11:49 AEDT 2026


The current implementation does not correctly handle the escaping of
object key names. This patch ensures compliance with the AWS S3
documentation [1] for proper key name encoding and character handling.

[1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html

Signed-off-by: Yifan Zhao <zhaoyifan28 at huawei.com>
---
 lib/remotes/s3.c | 115 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 105 insertions(+), 10 deletions(-)

diff --git a/lib/remotes/s3.c b/lib/remotes/s3.c
index 8351674..a5b27eb 100644
--- a/lib/remotes/s3.c
+++ b/lib/remotes/s3.c
@@ -61,11 +61,17 @@ static const char *s3erofs_parse_host(const char *endpoint, const char **schema)
 	return host;
 }
 
-static void *s3erofs_urlencode(const char *input)
+enum s3erofs_urlencode_mode {
+	S3EROFS_URLENCODE_QUERY_PARAM,
+	S3EROFS_URLENCODE_S3_KEY,
+};
+
+static void *s3erofs_urlencode(const char *input, enum s3erofs_urlencode_mode mode)
 {
 	static const char hex[] = "0123456789ABCDEF";
 	char *p, *url;
 	int i, c;
+	bool safe;
 
 	url = malloc(strlen(input) * 3 + 1);
 	if (!url)
@@ -73,13 +79,31 @@ static void *s3erofs_urlencode(const char *input)
 
 	p = url;
 	for (i = 0; i < strlen(input); ++i) {
-		c = input[i];
-
-		// Unreserved characters: A-Z a-z 0-9 - . _ ~
-		if (isalpha(c) || isdigit(c) || c == '-' || c == '.' ||
-		    c == '_' || c == '~') {
+		c = (unsigned char)input[i];
+
+		if (mode == S3EROFS_URLENCODE_S3_KEY)
+			/*
+			 * AWS S3 safe characters for object key names:
+			 * - Alphanumeric: 0-9 a-z A-Z
+			 * - Special: ! - _ . * ' ( )
+			 * - Forward slash (/) for hierarchy
+			 * See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
+			 */
+			safe = isalpha(c) || isdigit(c) || c == '!' || c == '-' ||
+			       c == '_' || c == '.' || c == '*' || c == '(' || c == ')' ||
+			       c == '\'' || c == '/';
+		else
+			/*
+			 * URL encode query parameters
+			 * See: https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html#create-signature-presign-entire-payload
+			 */
+			safe = isalpha(c) || isdigit(c) || c == '-' || c == '.' ||
+			       c == '_' || c == '~';
+
+		if (safe) {
 			*p++ = c;
 		} else {
+			/* URL encode this character */
 			*p++ = '%';
 			*p++ = hex[c >> 4];
 			*p++ = hex[c & 0x0F];
@@ -111,13 +135,13 @@ static int s3erofs_prepare_canonical_query(struct s3erofs_curl_request *req,
 
 	pairs = calloc(1, sizeof(struct s3erofs_qsort_kv) * params->num);
 	for (i = 0; i < params->num; i++) {
-		pairs[i].key = s3erofs_urlencode(params->key[i]);
+		pairs[i].key = s3erofs_urlencode(params->key[i], S3EROFS_URLENCODE_QUERY_PARAM);
 		if (IS_ERR(pairs[i].key)) {
 			ret = PTR_ERR(pairs[i].key);
 			pairs[i].key = NULL;
 			goto out;
 		}
-		pairs[i].value = s3erofs_urlencode(params->value[i]);
+		pairs[i].value = s3erofs_urlencode(params->value[i], S3EROFS_URLENCODE_QUERY_PARAM);
 		if (IS_ERR(pairs[i].value)) {
 			ret = PTR_ERR(pairs[i].value);
 			pairs[i].value = NULL;
@@ -154,6 +178,7 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
 	bool slash = false;
 	bool bucket_domain = false;
 	char *url = req->url;
+	char *encoded_key = NULL;
 	int pos, canonical_uri_pos, i, ret = 0;
 
 	if (!endpoint)
@@ -198,11 +223,18 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
 		}
 	}
 	if (key) {
+		encoded_key = s3erofs_urlencode(key, S3EROFS_URLENCODE_S3_KEY);
+		if (IS_ERR(encoded_key)) {
+			ret = PTR_ERR(encoded_key);
+			encoded_key = NULL;
+			goto err;
+		}
+
 		if (url[pos - 1] == '/')
 			--pos;
 		else
 			slash = true;
-		pos += snprintf(url + pos, S3EROFS_URL_LEN - pos, "/%s", key);
+		pos += snprintf(url + pos, S3EROFS_URL_LEN - pos, "/%s", encoded_key);
 	}
 
 	if (sig == S3EROFS_SIGNATURE_VERSION_2) {
@@ -220,7 +252,8 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
 			i = 1;
 		}
 		i += snprintf(req->canonical_uri + i, S3EROFS_CANONICAL_URI_LEN - i,
-			      "%s%s%s", path, slash ? "/" : "", key ? key : "");
+			      "%s%s%s", path, slash ? "/" : "",
+			      encoded_key ? encoded_key : "");
 	} else {
 		i = snprintf(req->canonical_uri, S3EROFS_CANONICAL_URI_LEN,
 			     "%s", url + canonical_uri_pos);
@@ -241,6 +274,8 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
 	erofs_dbg("Request canonical_uri %s", req->canonical_uri);
 
 err:
+	if (encoded_key)
+		free(encoded_key);
 	if (schema != https)
 		free((void *)schema);
 	return ret;
@@ -1410,6 +1445,66 @@ static bool test_s3erofs_prepare_url(void)
 			.expected_canonical_v2 = "/bucket/object.txt",
 			.expected_canonical_v4 = "/object.txt",
 			.expected_ret = 0,
+		},
+		{
+			.name = "Key with spaces",
+			.endpoint = "s3.amazonaws.com",
+			.path = "bucket",
+			.key = "my folder/my file.txt",
+			.url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST,
+			.expected_url =
+				"https://bucket.s3.amazonaws.com/my%20folder/my%20file.txt",
+			.expected_canonical_v2 = "/bucket/my%20folder/my%20file.txt",
+			.expected_canonical_v4 = "/my%20folder/my%20file.txt",
+			.expected_ret = 0,
+		},
+		{
+			.name = "Key with special characters (&, $, @, =)",
+			.endpoint = "s3.amazonaws.com",
+			.path = "bucket",
+			.key = "file&name$test at sign=value.txt",
+			.url_style = S3EROFS_URL_STYLE_PATH,
+			.expected_url =
+				"https://s3.amazonaws.com/bucket/file%26name%24test%40sign%3Dvalue.txt",
+			.expected_canonical_v2 = "/bucket/file%26name%24test%40sign%3Dvalue.txt",
+			.expected_canonical_v4 = "/bucket/file%26name%24test%40sign%3Dvalue.txt",
+			.expected_ret = 0,
+		},
+		{
+			.name = "Key with semicolon, colon, and plus",
+			.endpoint = "s3.amazonaws.com",
+			.path = "bucket",
+			.key = "file;name:test+data.txt",
+			.url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST,
+			.expected_url =
+				"https://bucket.s3.amazonaws.com/file%3Bname%3Atest%2Bdata.txt",
+			.expected_canonical_v2 = "/bucket/file%3Bname%3Atest%2Bdata.txt",
+			.expected_canonical_v4 = "/file%3Bname%3Atest%2Bdata.txt",
+			.expected_ret = 0,
+		},
+		{
+			.name = "Key with comma and question mark",
+			.endpoint = "s3.amazonaws.com",
+			.path = "bucket",
+			.key = "file,name?query.txt",
+			.url_style = S3EROFS_URL_STYLE_PATH,
+			.expected_url =
+				"https://s3.amazonaws.com/bucket/file%2Cname%3Fquery.txt",
+			.expected_canonical_v2 = "/bucket/file%2Cname%3Fquery.txt",
+			.expected_canonical_v4 = "/bucket/file%2Cname%3Fquery.txt",
+			.expected_ret = 0,
+		},
+		{
+			.name = "Key with multiple special characters",
+			.endpoint = "s3.amazonaws.com",
+			.path = "bucket",
+			.key = "path/to/file name & data at 2024.txt",
+			.url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST,
+			.expected_url =
+				"https://bucket.s3.amazonaws.com/path/to/file%20name%20%26%20data%402024.txt",
+			.expected_canonical_v2 = "/bucket/path/to/file%20name%20%26%20data%402024.txt",
+			.expected_canonical_v4 = "/path/to/file%20name%20%26%20data%402024.txt",
+			.expected_ret = 0,
 		}
 
 	};
-- 
2.47.3



More information about the Linux-erofs mailing list