[PATCH] erofs-utils: lib: s3: support bucket domain names

Gao Xiang hsiangkao at linux.alibaba.com
Wed Dec 17 18:16:32 AEDT 2025


Add support for S3 bucket domain names when the S3 source path starts
with '/', as this format is currently invalid, see [1].

e.g.
 $ mkfs.erofs \
	--s3=noaa-goes19.s3.amazonaws.com,sig=4,region=us-east-1 \
	output.img /ABI-Flood-Day-Shapefiles/2025/08/25/
and
 $ mkfs.erofs \
	--s3=noaa-goes19.s3.amazonaws.com,sig=2 \
	output.img /ABI-Flood-Day-Shapefiles/2025/08/25/

[1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 lib/remotes/s3.c | 80 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 68 insertions(+), 12 deletions(-)

diff --git a/lib/remotes/s3.c b/lib/remotes/s3.c
index b78c127807ca..296df61c06e5 100644
--- a/lib/remotes/s3.c
+++ b/lib/remotes/s3.c
@@ -152,10 +152,11 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
 	const char *schema, *host;
 	/* an additional slash is added, which wasn't specified by user inputs */
 	bool slash = false;
+	bool bucket_domain = false;
 	char *url = req->url;
 	int pos, canonical_uri_pos, i, ret = 0;
 
-	if (!endpoint || !path)
+	if (!endpoint)
 		return -EINVAL;
 
 	host = s3erofs_parse_host(endpoint, &schema);
@@ -164,14 +165,27 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
 	if (!schema)
 		schema = https;
 
+	if (__erofs_unlikely(!path))
+		path = "/";
+	if (__erofs_unlikely(path[0] == '/')) {
+		path++;
+		bucket_domain = true;
+		if (url_style != S3EROFS_URL_STYLE_VIRTUAL_HOST)
+			return -EINVAL;
+	}
+
 	if (url_style == S3EROFS_URL_STYLE_PATH) {
 		pos = snprintf(url, S3EROFS_URL_LEN, "%s%s/%s", schema,
 			       host, path);
 		canonical_uri_pos = pos - strlen(path) - 1;
 	} else {
-		const char * split = strchr(path, '/');
+		const char *split = strchr(path, '/');
 
-		if (!split) {
+		if (bucket_domain) {
+			pos = snprintf(url, S3EROFS_URL_LEN, "%s%s/%s",
+				       schema, host, path);
+			canonical_uri_pos = pos - 1;
+		} else if (!split) {
 			pos = snprintf(url, S3EROFS_URL_LEN, "%s%s.%s/",
 				       schema, path, host);
 			canonical_uri_pos = pos - 1;
@@ -191,12 +205,26 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
 		pos += snprintf(url + pos, S3EROFS_URL_LEN - pos, "/%s", key);
 	}
 
-	if (sig == S3EROFS_SIGNATURE_VERSION_2)
-		i = snprintf(req->canonical_uri, S3EROFS_CANONICAL_URI_LEN,
-			     "/%s%s%s", path, slash ? "/" : "", key ? key : "");
-	else
+	if (sig == S3EROFS_SIGNATURE_VERSION_2) {
+		if (bucket_domain) {
+			const char *bucket = strchr(host, '.');
+
+			if (!bucket) {
+				ret = -EINVAL;
+				goto err;
+			}
+			i = snprintf(req->canonical_uri, S3EROFS_CANONICAL_URI_LEN,
+				     "/%.*s/", (int)(bucket - host), host);
+		} else {
+			req->canonical_uri[0] = '/';
+			i = 1;
+		}
+		i += snprintf(req->canonical_uri + i, S3EROFS_CANONICAL_URI_LEN - i,
+			      "%s%s%s", path, slash ? "/" : "", key ? key : "");
+	} else {
 		i = snprintf(req->canonical_uri, S3EROFS_CANONICAL_URI_LEN,
 			     "%s", url + canonical_uri_pos);
+	}
 	req->canonical_uri[i] = '\0';
 
 	if (params) {
@@ -841,14 +869,17 @@ s3erofs_create_object_iterator(struct erofs_s3 *s3, const char *path,
 		return ERR_PTR(-ENOMEM);
 	iter->s3 = s3;
 	prefix = strchr(path, '/');
-	if (prefix) {
+	if (!prefix) {
+		iter->bucket = strdup(path);
+		iter->prefix = NULL;
+	} else if (prefix == path) {
+		iter->bucket = NULL;
+		iter->prefix = strdup(path + 1);
+	} else {
 		if (++prefix - path > S3EROFS_PATH_MAX)
 			return ERR_PTR(-EINVAL);
 		iter->bucket = strndup(path, prefix - path);
 		iter->prefix = strdup(prefix);
-	} else {
-		iter->bucket = strdup(path);
-		iter->prefix = NULL;
 	}
 	iter->delimiter = delimiter;
 	iter->is_truncated = true;
@@ -1041,8 +1072,8 @@ int s3erofs_build_trees(struct erofs_importer *im, struct erofs_s3 *s3,
 		if (!obj) {
 			break;
 		} else if (IS_ERR(obj)) {
-			erofs_err("failed to get next object");
 			ret = PTR_ERR(obj);
+			erofs_err("failed to get next object: %s", erofs_strerror(ret));
 			goto err_iter;
 		}
 
@@ -1356,7 +1387,32 @@ static bool test_s3erofs_prepare_url(void)
 			.expected_canonical_v2 = "/bucket/path/to/file-name_v2.0.txt",
 			.expected_canonical_v4 = "/path/to/file-name_v2.0.txt",
 			.expected_ret = 0,
+		},
+		{
+			.name = "S3 Bucket domain name (1)",
+			.endpoint = "bucket.s3.amazonaws.com",
+			.path = "/",
+			.key = "object.txt",
+			.url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST,
+			.expected_url =
+				"https://bucket.s3.amazonaws.com/object.txt",
+			.expected_canonical_v2 = "/bucket/object.txt",
+			.expected_canonical_v4 = "/object.txt",
+			.expected_ret = 0,
+		},
+		{
+			.name = "S3 Bucket domain name (2)",
+			.endpoint = "bucket.s3.amazonaws.com",
+			.path = NULL,
+			.key = "object.txt",
+			.url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST,
+			.expected_url =
+				"https://bucket.s3.amazonaws.com/object.txt",
+			.expected_canonical_v2 = "/bucket/object.txt",
+			.expected_canonical_v4 = "/object.txt",
+			.expected_ret = 0,
 		}
+
 	};
 	int i;
 	int pass = 0;
-- 
2.43.5



More information about the Linux-erofs mailing list