[PATCH 2/2] erofs-utils: lib: support AWS SigV4 for S3 backend
zhaoyifan (H)
zhaoyifan28 at huawei.com
Mon Nov 24 23:26:51 AEDT 2025
On 2025/11/21 17:00, Gao Xiang wrote:
> Hi Yifan,
>
> On 2025/11/20 17:22, Yifan Zhao wrote:
>> This patch introduces support for AWS Signature Version 4 for s3erofs
>> remote backend.
>>
>> Now users can specify the folowing options:
>> - passwd_file=Y, S3 credentials file in the format $ak:$sk (optional);
>> - urlstyle=<vhost, path>, S3 API calling style (optional);
>> - sig=<2,4>, S3 API signature version (optional);
>> - region=W, region code for S3 endpoint (required for sig=4).
>>
>> e.g.:
>> mkfs.erofs \
>> --s3=s3.us-east-1.amazonaws.com,sig=4,region=us-east-1 \
>> output.img some_bucket/path/to/object
>
> Thanks for the effort!
>
> Could we find a public s3 bucket and post here as an example?
Hi Xiang,
I have found *noaa-goes19.s3.amazonaws.com* and update the commit msg.
>
>>
>> Signed-off-by: Yifan Zhao <zhaoyifan28 at huawei.com>
>> ---
>> lib/liberofs_s3.h | 1 +
>> lib/remotes/s3.c | 567 +++++++++++++++++++++++++++++++++++++---------
>> mkfs/main.c | 14 +-
>> 3 files changed, 471 insertions(+), 111 deletions(-)
>>
>> diff --git a/lib/liberofs_s3.h b/lib/liberofs_s3.h
>> index f2ec822..f4886cd 100644
>> --- a/lib/liberofs_s3.h
>> +++ b/lib/liberofs_s3.h
>> @@ -27,6 +27,7 @@ enum s3erofs_signature_version {
>> struct erofs_s3 {
>> void *easy_curl;
>> const char *endpoint;
>> + const char *region;
>> char access_key[S3_ACCESS_KEY_LEN + 1];
>> char secret_key[S3_SECRET_KEY_LEN + 1];
>> diff --git a/lib/remotes/s3.c b/lib/remotes/s3.c
>> index 0f7e1a9..3263dd7 100644
>> --- a/lib/remotes/s3.c
>> +++ b/lib/remotes/s3.c
>> @@ -23,7 +23,8 @@
>> #define S3EROFS_PATH_MAX 1024
>> #define S3EROFS_MAX_QUERY_PARAMS 16
>> #define S3EROFS_URL_LEN 8192
>> -#define S3EROFS_CANONICAL_QUERY_LEN 2048
>> +#define S3EROFS_CANONICAL_URI_LEN 1024
>
> Is there a spec to document that?
Sorry, I made a mistake. The AWS documentation [1] explicitly specifies
that the maximum key length is 1024 bytes; therefore, setting the length
here to 1024 is unreasonable. I' ve reverted it back to 2048.
[1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
>
>> +#define S3EROFS_CANONICAL_QUERY_LEN S3EROFS_URL_LEN
>> #define BASE64_ENCODE_LEN(len) (((len + 2) / 3) * 4)
>> @@ -34,52 +35,142 @@ struct s3erofs_query_params {
>> };
>> struct s3erofs_curl_request {
>> - const char *method;
>
> It seems it's removed... S3 only allows `GET` method?
In our foreseeable usage scenarios, we will not modify the OBS bucket;
therefore, I believe we can use only the GET method to reduce the
number of unnecessary parameters.
>
>> char url[S3EROFS_URL_LEN];
>> + char canonical_uri[S3EROFS_CANONICAL_URI_LEN];
>> char canonical_query[S3EROFS_CANONICAL_QUERY_LEN];
>> };
>> +static const char *s3erofs_parse_host(const char *endpoint, const
>> char **schema) {
>
> K&R style is:
>
> static const char *s3erofs_parse_host()
> {
> if (!tmp) {
> ...
> } else {
> ...
> }
>
> }
>
Fixed.
>> + const char *tmp = strstr(endpoint, "://");
>> + const char *host;
>> +
>> + if (!tmp) {
>> + host = endpoint;
>> + if (schema)
>> + *schema = NULL;
>> + } else {
>> + host = tmp + sizeof("://") - 1;
>> + if (schema) {
>> + *schema = strndup(endpoint, host - endpoint);
>> + if (!*schema)
>> + return ERR_PTR(-ENOMEM);
>> + }
>> + }
>> +
>> + return host;
>> +}
>> +
>> +static int s3erofs_urlencode(const char *input, char **output)
>> +{
>
> static void *s3erofs_urlencode(const char *input)
> {
> char *output;
>
> output = malloc(strlen(input) * 3 + 1);
> if (!output)
> return ERR_PTR(-ENOMEM);
>
> ...
>
> return output;
>
> }
>
Fixed.
>> + static const char hex[] = "0123456789ABCDEF";
>> + int i;
>> + char c, *p;
>> +
>> + *output = malloc(strlen(input) * 3 + 1);
>> + if (!*output)
>> + return -ENOMEM;
>> +
>> + p = *output;
>> + for (i = 0; i < strlen(input); ++i) {
>> + c = (unsigned char)input[i];
>> +
>> + // Unreserved characters: A-Z a-z 0-9 - . _ ~
>> + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
>> + (c >= '0' && c <= '9') || c == '-' || c == '.' || c ==
>> '_' ||
>> + c == '~') {
>> + *p++ = c;
>> + } else {
>> + *p++ = '%';
>> + *p++ = hex[c >> 4];
>> + *p++ = hex[c & 0x0F];
>> + }
>> + }
>> + *p = '\0';
>> +
>> + return 0;
>> +}
>> +
>> +struct kv_pair {
>> + char *key;
>> + char *value;
>> +};
>> +
>> +static int compare_kv_pair(const void *a, const void *b)
>> +{
>> + return strcmp(((const struct kv_pair *)a)->key, ((const struct
>> kv_pair *)b)->key);
>> +}
>> +
>> +static int s3erofs_prepare_canonical_query(struct
>> s3erofs_curl_request *req,
>> + struct s3erofs_query_params *params)
>> +{
>> + struct kv_pair *pairs;
>> + int i, pos = 0, ret = 0;
>> +
>> + if (params->num == 0)
>
> if (!params->num) {
> }
>
Fixed.
>> + return 0;
>> +
>> + pairs = malloc(sizeof(struct kv_pair) * params->num);
>> + for (i = 0; i < params->num; i++) {
>> + ret = s3erofs_urlencode(params->key[i], &pairs[i].key);
>> + if (ret < 0)
>> + goto out;
>> + ret = s3erofs_urlencode(params->value[i], &pairs[i].value);
>
> Why we use urlencoding now?
>
According to AWS docs [2] urlencoding is needed when Calculate
*CanonicalQueryString*,
and custom UriEncode function is recommended to cover the (potential)
semantic differerence.
[2]
https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
Thanks,
Yifan Zhao
> Thanks,
> Gao Xiang
More information about the Linux-erofs
mailing list