[PATCH] erofs-utils: lib: s3: support bucket domain names
Gao Xiang
hsiangkao at linux.alibaba.com
Wed Dec 17 18:16:32 AEDT 2025
Add support for S3 bucket domain names when the S3 source path starts
with '/', as this format is currently invalid, see [1].
e.g.
$ mkfs.erofs \
--s3=noaa-goes19.s3.amazonaws.com,sig=4,region=us-east-1 \
output.img /ABI-Flood-Day-Shapefiles/2025/08/25/
and
$ mkfs.erofs \
--s3=noaa-goes19.s3.amazonaws.com,sig=2 \
output.img /ABI-Flood-Day-Shapefiles/2025/08/25/
[1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
lib/remotes/s3.c | 80 ++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 68 insertions(+), 12 deletions(-)
diff --git a/lib/remotes/s3.c b/lib/remotes/s3.c
index b78c127807ca..296df61c06e5 100644
--- a/lib/remotes/s3.c
+++ b/lib/remotes/s3.c
@@ -152,10 +152,11 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
const char *schema, *host;
/* an additional slash is added, which wasn't specified by user inputs */
bool slash = false;
+ bool bucket_domain = false;
char *url = req->url;
int pos, canonical_uri_pos, i, ret = 0;
- if (!endpoint || !path)
+ if (!endpoint)
return -EINVAL;
host = s3erofs_parse_host(endpoint, &schema);
@@ -164,14 +165,27 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
if (!schema)
schema = https;
+ if (__erofs_unlikely(!path))
+ path = "/";
+ if (__erofs_unlikely(path[0] == '/')) {
+ path++;
+ bucket_domain = true;
+ if (url_style != S3EROFS_URL_STYLE_VIRTUAL_HOST)
+ return -EINVAL;
+ }
+
if (url_style == S3EROFS_URL_STYLE_PATH) {
pos = snprintf(url, S3EROFS_URL_LEN, "%s%s/%s", schema,
host, path);
canonical_uri_pos = pos - strlen(path) - 1;
} else {
- const char * split = strchr(path, '/');
+ const char *split = strchr(path, '/');
- if (!split) {
+ if (bucket_domain) {
+ pos = snprintf(url, S3EROFS_URL_LEN, "%s%s/%s",
+ schema, host, path);
+ canonical_uri_pos = pos - 1;
+ } else if (!split) {
pos = snprintf(url, S3EROFS_URL_LEN, "%s%s.%s/",
schema, path, host);
canonical_uri_pos = pos - 1;
@@ -191,12 +205,26 @@ static int s3erofs_prepare_url(struct s3erofs_curl_request *req,
pos += snprintf(url + pos, S3EROFS_URL_LEN - pos, "/%s", key);
}
- if (sig == S3EROFS_SIGNATURE_VERSION_2)
- i = snprintf(req->canonical_uri, S3EROFS_CANONICAL_URI_LEN,
- "/%s%s%s", path, slash ? "/" : "", key ? key : "");
- else
+ if (sig == S3EROFS_SIGNATURE_VERSION_2) {
+ if (bucket_domain) {
+ const char *bucket = strchr(host, '.');
+
+ if (!bucket) {
+ ret = -EINVAL;
+ goto err;
+ }
+ i = snprintf(req->canonical_uri, S3EROFS_CANONICAL_URI_LEN,
+ "/%.*s/", (int)(bucket - host), host);
+ } else {
+ req->canonical_uri[0] = '/';
+ i = 1;
+ }
+ i += snprintf(req->canonical_uri + i, S3EROFS_CANONICAL_URI_LEN - i,
+ "%s%s%s", path, slash ? "/" : "", key ? key : "");
+ } else {
i = snprintf(req->canonical_uri, S3EROFS_CANONICAL_URI_LEN,
"%s", url + canonical_uri_pos);
+ }
req->canonical_uri[i] = '\0';
if (params) {
@@ -841,14 +869,17 @@ s3erofs_create_object_iterator(struct erofs_s3 *s3, const char *path,
return ERR_PTR(-ENOMEM);
iter->s3 = s3;
prefix = strchr(path, '/');
- if (prefix) {
+ if (!prefix) {
+ iter->bucket = strdup(path);
+ iter->prefix = NULL;
+ } else if (prefix == path) {
+ iter->bucket = NULL;
+ iter->prefix = strdup(path + 1);
+ } else {
if (++prefix - path > S3EROFS_PATH_MAX)
return ERR_PTR(-EINVAL);
iter->bucket = strndup(path, prefix - path);
iter->prefix = strdup(prefix);
- } else {
- iter->bucket = strdup(path);
- iter->prefix = NULL;
}
iter->delimiter = delimiter;
iter->is_truncated = true;
@@ -1041,8 +1072,8 @@ int s3erofs_build_trees(struct erofs_importer *im, struct erofs_s3 *s3,
if (!obj) {
break;
} else if (IS_ERR(obj)) {
- erofs_err("failed to get next object");
ret = PTR_ERR(obj);
+ erofs_err("failed to get next object: %s", erofs_strerror(ret));
goto err_iter;
}
@@ -1356,7 +1387,32 @@ static bool test_s3erofs_prepare_url(void)
.expected_canonical_v2 = "/bucket/path/to/file-name_v2.0.txt",
.expected_canonical_v4 = "/path/to/file-name_v2.0.txt",
.expected_ret = 0,
+ },
+ {
+ .name = "S3 Bucket domain name (1)",
+ .endpoint = "bucket.s3.amazonaws.com",
+ .path = "/",
+ .key = "object.txt",
+ .url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST,
+ .expected_url =
+ "https://bucket.s3.amazonaws.com/object.txt",
+ .expected_canonical_v2 = "/bucket/object.txt",
+ .expected_canonical_v4 = "/object.txt",
+ .expected_ret = 0,
+ },
+ {
+ .name = "S3 Bucket domain name (2)",
+ .endpoint = "bucket.s3.amazonaws.com",
+ .path = NULL,
+ .key = "object.txt",
+ .url_style = S3EROFS_URL_STYLE_VIRTUAL_HOST,
+ .expected_url =
+ "https://bucket.s3.amazonaws.com/object.txt",
+ .expected_canonical_v2 = "/bucket/object.txt",
+ .expected_canonical_v4 = "/object.txt",
+ .expected_ret = 0,
}
+
};
int i;
int pass = 0;
--
2.43.5
More information about the Linux-erofs
mailing list