[PATCH 1/2] erofs-utils: lib: oci: add on-demand blob cache
ChengyuZhu6
hudson at cyzhu.com
Mon Dec 1 02:16:25 AEDT 2025
From: Chengyu Zhu <hudsonzhu at tencent.com>
Add an on-disk cache for OCI blobs and hook it into the
ocierofs_iostream backend. The cache file is created under
/var/run/erofs/cache/oci/ and sized to the blob length.
Reads probe the local sparse file with SEEK_HOLE and only download
missing regions. This avoids redundant downloads and improves
random access performance for remote images.
Signed-off-by: Chengyu Zhu <hudsonzhu at tencent.com>
---
lib/liberofs_oci.h | 1 +
lib/remotes/oci.c | 212 +++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 205 insertions(+), 8 deletions(-)
diff --git a/lib/liberofs_oci.h b/lib/liberofs_oci.h
index 5298f18..f7b26b6 100644
--- a/lib/liberofs_oci.h
+++ b/lib/liberofs_oci.h
@@ -62,6 +62,7 @@ struct ocierofs_ctx {
struct ocierofs_iostream {
struct ocierofs_ctx *ctx;
u64 offset;
+ int cache_fd;
};
/*
diff --git a/lib/remotes/oci.c b/lib/remotes/oci.c
index ac8d495..8b253a3 100644
--- a/lib/remotes/oci.c
+++ b/lib/remotes/oci.c
@@ -5,6 +5,7 @@
*/
#define _GNU_SOURCE
#include "erofs/internal.h"
+#include "erofs/defs.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -12,6 +13,9 @@
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
+#ifdef HAVE_SYS_SENDFILE_H
+#include <sys/sendfile.h>
+#endif
#include <errno.h>
#ifdef HAVE_CURL_CURL_H
#include <curl/curl.h>
@@ -29,6 +33,10 @@
#include "liberofs_private.h"
#include "liberofs_gzran.h"
+#ifndef SEEK_HOLE
+#define SEEK_HOLE 4
+#endif
+
#ifdef OCIEROFS_ENABLED
#define DOCKER_REGISTRY "docker.io"
@@ -1425,25 +1433,208 @@ out:
return ret;
}
-static ssize_t ocierofs_io_pread(struct erofs_vfile *vf, void *buf, size_t len, u64 offset)
+static int ocierofs_cache(struct ocierofs_iostream *oci_iostream, off_t offset, size_t needed)
{
- struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vf->payload;
+ struct ocierofs_ctx *ctx = oci_iostream->ctx;
void *download_buf = NULL;
size_t download_size = 0;
- ssize_t ret;
+ int ret = 0;
+ off_t hole, align_offset;
+ size_t download_len;
+ int layer_idx;
+
+ if (oci_iostream->cache_fd < 0) {
+ char *path;
+
+ mkdir("/var/run/erofs", 0777);
+ mkdir("/var/run/erofs/cache", 0777);
+ mkdir("/var/run/erofs/cache/oci", 0777);
+
+ if (asprintf(&path, "/var/run/erofs/cache/oci/%s",
+ ctx->blob_digest ?: "erofs_oci_unknown") < 0)
+ return -ENOMEM;
+
+ oci_iostream->cache_fd = open(path, O_RDWR | O_CREAT, 0666);
+ free(path);
+
+ if (oci_iostream->cache_fd < 0)
+ return -errno;
+
+ layer_idx = ocierofs_find_layer_by_digest(ctx, ctx->blob_digest);
+ if (layer_idx >= 0) {
+ if (ftruncate(oci_iostream->cache_fd, ctx->layers[layer_idx]->size) < 0)
+ return -errno;
+ }
+ }
+
+ hole = lseek(oci_iostream->cache_fd, offset, SEEK_HOLE);
+ if (hole < 0) {
+ if (errno == ENXIO)
+ return 0;
+ return -errno;
+ }
+ if (hole >= offset + needed)
+ return 0;
- ret = ocierofs_download_blob_range(oci_iostream->ctx, offset, len,
+ align_offset = round_down(hole, OCIEROFS_IO_CHUNK_SIZE);
+ download_len = max_t(size_t, offset + needed - align_offset, OCIEROFS_IO_CHUNK_SIZE);
+
+ ret = ocierofs_download_blob_range(ctx, align_offset, download_len,
&download_buf, &download_size);
if (ret < 0)
return ret;
if (download_buf && download_size > 0) {
- memcpy(buf, download_buf, download_size);
- free(download_buf);
- return download_size;
+ char *p = download_buf;
+ size_t to_write = download_size;
+ ssize_t written = 0;
+
+ while (to_write > 0) {
+ ssize_t w = pwrite(oci_iostream->cache_fd, p, to_write, align_offset + written);
+ if (w < 0) {
+ if (errno == EINTR)
+ continue;
+ ret = -errno;
+ goto out_free;
+ }
+ written += w;
+ p += w;
+ to_write -= w;
+ }
}
- return 0;
+out_free:
+ free(download_buf);
+ return ret;
+}
+
+static ssize_t ocierofs_io_sendfile(struct erofs_vfile *vout, struct erofs_vfile *vin,
+ off_t *pos, size_t count)
+{
+ struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vin->payload;
+ off_t offset;
+ size_t remaining = count;
+ ssize_t total_written = 0;
+ int ret;
+
+ if (!pos)
+ offset = oci_iostream->offset;
+ else
+ offset = *pos;
+
+ ret = ocierofs_cache(oci_iostream, offset, count);
+ if (ret < 0)
+ return ret;
+
+ while (remaining > 0) {
+ struct stat st;
+
+ if (fstat(oci_iostream->cache_fd, &st) < 0)
+ return -errno;
+
+ if (offset >= st.st_size)
+ break;
+
+ size_t available = st.st_size - offset;
+ size_t chunk = min_t(size_t, remaining, available);
+
+ if (chunk == 0) {
+ chunk = min_t(size_t, remaining, OCIEROFS_IO_CHUNK_SIZE);
+ }
+
+#if defined(HAVE_SYS_SENDFILE_H) && defined(HAVE_SENDFILE)
+ off_t in_offset = offset;
+ ssize_t sent;
+
+ sent = sendfile(vout->fd, oci_iostream->cache_fd, &in_offset, chunk);
+ if (sent < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ if (total_written > 0)
+ goto out;
+ return -errno;
+ }
+ if (sent == 0)
+ break;
+
+ total_written += sent;
+ remaining -= sent;
+ offset += sent;
+#else
+ chunk = min_t(size_t, remaining, available);
+ if (chunk == 0)
+ break;
+
+ char buf[32768];
+ size_t to_read = min_t(size_t, chunk, sizeof(buf));
+ ssize_t read_len, write_len;
+
+ read_len = pread(oci_iostream->cache_fd, buf, to_read, offset);
+ if (read_len < 0)
+ return -errno;
+ if (read_len == 0)
+ break;
+
+ char *p = buf;
+ size_t to_write = read_len;
+ while (to_write > 0) {
+ write_len = write(vout->fd, p, to_write);
+ if (write_len < 0) {
+ if (errno == EINTR)
+ continue;
+ return -errno;
+ }
+ p += write_len;
+ to_write -= write_len;
+ }
+
+ total_written += read_len;
+ offset += read_len;
+ remaining -= read_len;
+#endif
+ }
+
+out:
+ if (pos)
+ *pos = offset;
+ else
+ oci_iostream->offset = offset;
+
+ return total_written;
+}
+
+static ssize_t ocierofs_io_pread(struct erofs_vfile *vf, void *buf, size_t len, u64 offset)
+{
+ struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vf->payload;
+ size_t remaining = len;
+ char *p = buf;
+ ssize_t total_read = 0;
+ int ret;
+
+ ret = ocierofs_cache(oci_iostream, offset, len);
+ if (ret < 0)
+ return ret;
+
+ while (remaining > 0) {
+ size_t chunk = min_t(size_t, remaining, OCIEROFS_IO_CHUNK_SIZE);
+ ssize_t n;
+
+ n = pread(oci_iostream->cache_fd, p, chunk, offset);
+ if (n < 0)
+ return -errno;
+ if (n == 0)
+ break;
+
+ p += n;
+ offset += n;
+ remaining -= n;
+ total_read += n;
+
+ if (n < chunk)
+ break;
+ }
+
+ return total_read;
}
static ssize_t ocierofs_io_read(struct erofs_vfile *vf, void *buf, size_t len)
@@ -1462,6 +1653,9 @@ static void ocierofs_io_close(struct erofs_vfile *vfile)
{
struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vfile->payload;
+ if (oci_iostream->cache_fd >= 0)
+ close(oci_iostream->cache_fd);
+
ocierofs_ctx_cleanup(oci_iostream->ctx);
free(oci_iostream->ctx);
free(oci_iostream);
@@ -1472,6 +1666,7 @@ static struct erofs_vfops ocierofs_io_vfops = {
.pread = ocierofs_io_pread,
.read = ocierofs_io_read,
.close = ocierofs_io_close,
+ .sendfile = ocierofs_io_sendfile,
};
int ocierofs_io_open(struct erofs_vfile *vfile, const struct ocierofs_config *cfg)
@@ -1499,6 +1694,7 @@ int ocierofs_io_open(struct erofs_vfile *vfile, const struct ocierofs_config *cf
oci_iostream->ctx = ctx;
oci_iostream->offset = 0;
+ oci_iostream->cache_fd = -1;
*vfile = (struct erofs_vfile){.ops = &ocierofs_io_vfops};
*(struct ocierofs_iostream **)vfile->payload = oci_iostream;
return 0;
--
2.47.1
More information about the Linux-erofs
mailing list