[PATCH 2/2] erofs-utils: lib: support AWS SigV4 for S3 backend

zhaoyifan (H) zhaoyifan28 at huawei.com
Mon Nov 24 23:26:51 AEDT 2025


On 2025/11/21 17:00, Gao Xiang wrote:

> Hi Yifan,
>
> On 2025/11/20 17:22, Yifan Zhao wrote:
>> This patch introduces support for AWS Signature Version 4 for s3erofs
>> remote backend.
>>
>> Now users can specify the folowing options:
>>   - passwd_file=Y, S3 credentials file in the format $ak:$sk (optional);
>>   - urlstyle=<vhost, path>, S3 API calling style (optional);
>>   - sig=<2,4>, S3 API signature version (optional);
>>   - region=W, region code for S3 endpoint (required for sig=4).
>>
>> e.g.:
>> mkfs.erofs \
>>      --s3=s3.us-east-1.amazonaws.com,sig=4,region=us-east-1 \
>>      output.img some_bucket/path/to/object
>
> Thanks for the effort!
>
> Could we find a public s3 bucket and post here as an example?

Hi Xiang,

I have found *noaa-goes19.s3.amazonaws.com* and update the commit msg.

>
>>
>> Signed-off-by: Yifan Zhao <zhaoyifan28 at huawei.com>
>> ---
>>   lib/liberofs_s3.h |   1 +
>>   lib/remotes/s3.c  | 567 +++++++++++++++++++++++++++++++++++++---------
>>   mkfs/main.c       |  14 +-
>>   3 files changed, 471 insertions(+), 111 deletions(-)
>>
>> diff --git a/lib/liberofs_s3.h b/lib/liberofs_s3.h
>> index f2ec822..f4886cd 100644
>> --- a/lib/liberofs_s3.h
>> +++ b/lib/liberofs_s3.h
>> @@ -27,6 +27,7 @@ enum s3erofs_signature_version {
>>   struct erofs_s3 {
>>       void *easy_curl;
>>       const char *endpoint;
>> +    const char *region;
>>       char access_key[S3_ACCESS_KEY_LEN + 1];
>>       char secret_key[S3_SECRET_KEY_LEN + 1];
>>   diff --git a/lib/remotes/s3.c b/lib/remotes/s3.c
>> index 0f7e1a9..3263dd7 100644
>> --- a/lib/remotes/s3.c
>> +++ b/lib/remotes/s3.c
>> @@ -23,7 +23,8 @@
>>   #define S3EROFS_PATH_MAX        1024
>>   #define S3EROFS_MAX_QUERY_PARAMS    16
>>   #define S3EROFS_URL_LEN            8192
>> -#define S3EROFS_CANONICAL_QUERY_LEN    2048
>> +#define S3EROFS_CANONICAL_URI_LEN    1024
>
> Is there a spec to document that?

Sorry, I made a mistake. The AWS documentation [1] explicitly specifies

that the maximum key length is 1024 bytes; therefore, setting the length

here to 1024 is unreasonable. I' ve reverted it back to 2048.

[1] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html

>
>> +#define S3EROFS_CANONICAL_QUERY_LEN S3EROFS_URL_LEN
>>     #define BASE64_ENCODE_LEN(len)    (((len + 2) / 3) * 4)
>>   @@ -34,52 +35,142 @@ struct s3erofs_query_params {
>>   };
>>     struct s3erofs_curl_request {
>> -    const char *method;
>
> It seems it's removed... S3 only allows `GET` method?

In our foreseeable usage scenarios, we will not modify the OBS bucket;

therefore, I believe we can use only the GET method to reduce the

number of unnecessary parameters.

>
>>       char url[S3EROFS_URL_LEN];
>> +    char canonical_uri[S3EROFS_CANONICAL_URI_LEN];
>>       char canonical_query[S3EROFS_CANONICAL_QUERY_LEN];
>>   };
>>   +static const char *s3erofs_parse_host(const char *endpoint, const 
>> char **schema) {
>
> K&R style is:
>
> static const char *s3erofs_parse_host()
> {
>     if (!tmp) {
>         ...
>     } else {
>         ...
>     }
>
> }
>
Fixed.
>> +    const char *tmp = strstr(endpoint, "://");
>> +    const char *host;
>> +
>> +    if (!tmp) {
>> +        host = endpoint;
>> +        if (schema)
>> +            *schema = NULL;
>> +    } else {
>> +        host = tmp + sizeof("://") - 1;
>> +        if (schema) {
>> +            *schema = strndup(endpoint, host - endpoint);
>> +            if (!*schema)
>> +                return ERR_PTR(-ENOMEM);
>> +        }
>> +    }
>> +
>> +    return host;
>> +}
>> +
>> +static int s3erofs_urlencode(const char *input, char **output)
>> +{
>
> static void *s3erofs_urlencode(const char *input)
> {
>     char *output;
>
>     output = malloc(strlen(input) * 3 + 1);
>     if (!output)
>         return ERR_PTR(-ENOMEM);
>
>     ...
>
>     return output;
>
> }
>
Fixed.
>> +    static const char hex[] = "0123456789ABCDEF";
>> +    int i;
>> +    char c, *p;
>> +
>> +    *output = malloc(strlen(input) * 3 + 1);
>> +    if (!*output)
>> +        return -ENOMEM;
>> +
>> +    p = *output;
>> +    for (i = 0; i < strlen(input); ++i) {
>> +        c = (unsigned char)input[i];
>> +
>> +        // Unreserved characters: A-Z a-z 0-9 - . _ ~
>> +        if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
>> +            (c >= '0' && c <= '9') || c == '-' || c == '.' || c == 
>> '_' ||
>> +            c == '~') {
>> +            *p++ = c;
>> +        } else {
>> +            *p++ = '%';
>> +            *p++ = hex[c >> 4];
>> +            *p++ = hex[c & 0x0F];
>> +        }
>> +    }
>> +    *p = '\0';
>> +
>> +    return 0;
>> +}
>> +
>> +struct kv_pair {
>> +    char *key;
>> +    char *value;
>> +};
>> +
>> +static int compare_kv_pair(const void *a, const void *b)
>> +{
>> +    return strcmp(((const struct kv_pair *)a)->key, ((const struct 
>> kv_pair *)b)->key);
>> +}
>> +
>> +static int s3erofs_prepare_canonical_query(struct 
>> s3erofs_curl_request *req,
>> +                       struct s3erofs_query_params *params)
>> +{
>> +    struct kv_pair *pairs;
>> +    int i, pos = 0, ret = 0;
>> +
>> +    if (params->num == 0)
>
>     if (!params->num) {
>     }
>
Fixed.
>> +        return 0;
>> +
>> +    pairs = malloc(sizeof(struct kv_pair) * params->num);
>> +    for (i = 0; i < params->num; i++) {
>> +        ret = s3erofs_urlencode(params->key[i], &pairs[i].key);
>> +        if (ret < 0)
>> +            goto out;
>> +        ret = s3erofs_urlencode(params->value[i], &pairs[i].value);
>
> Why we use urlencoding now?
>
According to AWS docs [2] urlencoding is needed when Calculate 
*CanonicalQueryString*,

and custom UriEncode function is recommended to cover the (potential) 
semantic differerence.

[2] 
https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html


Thanks,

Yifan Zhao

> Thanks,
> Gao Xiang


More information about the Linux-erofs mailing list