[PATCH v1] erofs-utils: add NBD-backed OCI image mounting

ChengyuZhu6 hudson at cyzhu.com
Thu Sep 4 18:06:24 AEST 2025


From: Chengyu Zhu <hudsonzhu at tencent.com>

- Add HTTP range downloads for OCI blobs
- Introduce ocierofs_iostream for virtual file I/O
- Add oci option for OCI image mounting with NBD backend

New mount.erofs -t erofs.nbd option: -o=[options] source-image mountpoint

Supported oci options:
- oci.platform=os/arch (default: linux/amd64)
- oci=N (extract specific layer, default: all layers)
- oci.username/oci.password (basic authentication)

e.g.:
sudo mount.erofs -t erofs.nbd  -o 'oci=0,oci.platform=linux/amd64' \
quay.io/chengyuzhu6/golang:1.22.8-erofs /tmp/test/

Signed-off-by: Chengyu Zhu <hudsonzhu at tencent.com>
---
 lib/liberofs_oci.h |  14 +++
 lib/remotes/oci.c  | 249 ++++++++++++++++++++++++++++++++++++++++++++-
 mount/Makefile.am  |   2 +-
 mount/main.c       | 233 ++++++++++++++++++++++++++++++++++--------
 4 files changed, 452 insertions(+), 46 deletions(-)

diff --git a/lib/liberofs_oci.h b/lib/liberofs_oci.h
index 2896308..077b7f9 100644
--- a/lib/liberofs_oci.h
+++ b/lib/liberofs_oci.h
@@ -55,6 +55,12 @@ struct ocierofs_ctx {
 	int layer_count;
 };
 
+struct ocierofs_iostream {
+	struct ocierofs_ctx *ctx;
+	struct erofs_vfile vf;
+	u64 offset;
+};
+
 int ocierofs_init(struct ocierofs_ctx *ctx, const struct ocierofs_config *config);
 
 /*
@@ -67,6 +73,14 @@ int ocierofs_init(struct ocierofs_ctx *ctx, const struct ocierofs_config *config
 int ocierofs_build_trees(struct erofs_importer *importer,
 			 const struct ocierofs_config *cfg);
 
+int ocierofs_is_erofs_native_image(struct ocierofs_ctx *ctx);
+
+void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx);
+
+int ocierofs_iostream_open(struct ocierofs_iostream *oci_iostream, struct ocierofs_ctx *oci_ctx);
+
+void ocierofs_iostream_close(struct ocierofs_iostream *oci_iostream);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/remotes/oci.c b/lib/remotes/oci.c
index f2b08b2..5c393e1 100644
--- a/lib/remotes/oci.c
+++ b/lib/remotes/oci.c
@@ -33,6 +33,9 @@
 #define OCI_MEDIATYPE_MANIFEST "application/vnd.oci.image.manifest.v1+json"
 #define OCI_MEDIATYPE_INDEX "application/vnd.oci.image.index.v1+json"
 
+/* Erofs Native Layer Media Type */
+#define EROFS_MEDIATYPE "application/vnd.erofs"
+
 struct ocierofs_request {
 	char *url;
 	struct curl_slist *headers;
@@ -1161,7 +1164,7 @@ out:
  * Clean up CURL handle, free all allocated string parameters, and
  * reset the OCI context structure to a clean state.
  */
-static void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx)
+void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx)
 {
 	if (!ctx)
 		return;
@@ -1226,3 +1229,247 @@ int ocierofs_build_trees(struct erofs_importer *importer,
 	ocierofs_ctx_cleanup(&ctx);
 	return ret;
 }
+
+static int ocierofs_download_blob_range(struct ocierofs_ctx *ctx, off_t offset, size_t length,
+					void **out_buf, size_t *out_size)
+{
+	struct ocierofs_request req = {};
+	struct ocierofs_response resp = {};
+	const char *api_registry;
+	char rangehdr[64];
+	long http_code = 0;
+	int ret;
+	int index = ctx->layer_index;
+	u64 blob_size = ctx->layers[index]->size;
+	size_t available;
+	size_t copy_size;
+
+	if (offset < 0)
+		return -EINVAL;
+
+	if (offset >= blob_size)
+		return 0;
+
+	if (length && offset + (off_t)length > blob_size)
+		length = (size_t)(blob_size - offset);
+
+	api_registry = ocierofs_get_api_registry(ctx->registry);
+	if (asprintf(&req.url, "https://%s/v2/%s/blobs/%s",
+	     api_registry, ctx->repository, ctx->layers[index]->digest) == -1)
+		return -ENOMEM;
+
+	if (length)
+		snprintf(rangehdr, sizeof(rangehdr), "Range: bytes=%lld-%lld",
+			 (long long)offset, (long long)(offset + (off_t)length - 1));
+	else
+		snprintf(rangehdr, sizeof(rangehdr), "Range: bytes=%lld-",
+			 (long long)offset);
+
+	if (ctx->auth_header && strstr(ctx->auth_header, "Bearer"))
+		req.headers = curl_slist_append(req.headers, ctx->auth_header);
+	req.headers = curl_slist_append(req.headers, rangehdr);
+
+	curl_easy_reset(ctx->curl);
+
+	ret = ocierofs_curl_setup_common_options(ctx->curl);
+	if (ret)
+		goto out;
+
+	ret = ocierofs_curl_setup_rq(ctx->curl, req.url, OCIEROFS_HTTP_GET,
+				     req.headers,
+				     ocierofs_write_callback,
+				     &resp, NULL, NULL);
+	if (ret)
+		goto out;
+
+	ret = ocierofs_curl_perform(ctx->curl, &http_code);
+	if (ret)
+		goto out;
+
+	if (http_code == 206) {
+		*out_buf = resp.data;
+		*out_size = resp.size;
+		resp.data = NULL;
+		ret = 0;
+	} else if (http_code == 200) {
+		if (offset == 0) {
+			*out_buf = resp.data;
+			*out_size = resp.size;
+			resp.data = NULL;
+			ret = 0;
+		} else {
+			if (offset < resp.size) {
+				available = resp.size - offset;
+				copy_size = length ? min_t(size_t, length, available) : available;
+
+				*out_buf = malloc(copy_size);
+				if (!*out_buf) {
+					ret = -ENOMEM;
+					goto out;
+				}
+				memcpy(*out_buf, resp.data + offset, copy_size);
+				*out_size = copy_size;
+				ret = 0;
+			} else {
+				ret = 0;
+			}
+		}
+	} else {
+		erofs_err("HTTP range request failed with code %ld", http_code);
+		ret = -EIO;
+	}
+
+out:
+	if (req.headers)
+		curl_slist_free_all(req.headers);
+	free(req.url);
+	free(resp.data);
+	return ret;
+}
+
+static ssize_t ocierofs_io_pread(struct erofs_vfile *vf, void *buf, size_t len, u64 offset)
+{
+	struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vf->payload;
+	void *download_buf = NULL;
+	size_t download_size = 0;
+	ssize_t ret;
+
+	ret = ocierofs_download_blob_range(oci_iostream->ctx, offset, len,
+					   &download_buf, &download_size);
+	if (ret < 0) {
+		memset(buf, 0, len);
+		return len;
+	}
+
+	if (download_buf && download_size > 0) {
+		memcpy(buf, download_buf, download_size);
+		free(download_buf);
+		return download_size;
+	}
+
+	return 0;
+}
+
+static ssize_t ocierofs_io_read(struct erofs_vfile *vf, void *buf, size_t len)
+{
+	struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vf->payload;
+	ssize_t ret;
+
+	ret = ocierofs_io_pread(vf, buf, len, oci_iostream->offset);
+	if (ret > 0)
+		oci_iostream->offset += ret;
+
+	return ret;
+}
+
+static off_t ocierofs_io_lseek(struct erofs_vfile *vf, u64 offset, int whence)
+{
+	struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vf->payload;
+	off_t new_offset;
+	int layer_index = oci_iostream->ctx->layer_index;
+
+	switch (whence) {
+	case SEEK_SET:
+		new_offset = offset;
+		break;
+	case SEEK_CUR:
+		new_offset = oci_iostream->offset + offset;
+		break;
+	case SEEK_END:
+		new_offset = oci_iostream->ctx->layers[layer_index]->size + offset;
+		break;
+	default:
+		return -1;
+	}
+
+	if (new_offset < 0 || new_offset > oci_iostream->ctx->layers[layer_index]->size)
+		return -1;
+
+	oci_iostream->offset = new_offset;
+	return new_offset;
+}
+
+static ssize_t ocierofs_io_sendfile(struct erofs_vfile *vout, struct erofs_vfile *vin,
+			       off_t *pos, size_t count)
+{
+	struct ocierofs_iostream *oci_iostream = *(struct ocierofs_iostream **)vin->payload;
+	char *buf = NULL;
+	ssize_t total_written = 0;
+	ssize_t ret = 0;
+
+	buf = malloc(min_t(size_t, count, 32768));
+	if (!buf)
+		return -ENOMEM;
+
+	while (count > 0) {
+		size_t to_read = min_t(size_t, count, 32768);
+		u64 read_offset = pos ? *pos : oci_iostream->offset;
+
+		ret = ocierofs_io_pread(vin, buf, to_read, read_offset);
+		if (ret <= 0) {
+			if (ret < 0)
+				erofs_err("OCI I/O sendfile: failed to read from OCI: %s",
+					  erofs_strerror(ret));
+			memset(buf, 0, to_read);
+			ret = to_read;
+		}
+
+		ssize_t written = write(vout->fd, buf, ret);
+
+		if (written < 0) {
+			erofs_err("OCI I/O sendfile: failed to write to output: %s",
+				  strerror(errno));
+			ret = -errno;
+			break;
+		}
+
+		if (written != ret) {
+			erofs_err("OCI I/O sendfile: partial write: %zd != %zd", written, ret);
+			ret = written;
+		}
+
+		total_written += ret;
+		count -= ret;
+		if (pos)
+			*pos += ret;
+		else
+			oci_iostream->offset += ret;
+	}
+
+	free(buf);
+	return count;
+}
+
+static struct erofs_vfops ocierofs_io_vfops = {
+	.pread = ocierofs_io_pread,
+	.read = ocierofs_io_read,
+	.lseek = ocierofs_io_lseek,
+	.sendfile = ocierofs_io_sendfile,
+};
+
+int ocierofs_iostream_open(struct ocierofs_iostream *oci_iostream, struct ocierofs_ctx *oci_ctx)
+{
+	memset(oci_iostream, 0, sizeof(*oci_iostream));
+	oci_iostream->ctx = oci_ctx;
+	oci_iostream->vf.ops = &ocierofs_io_vfops;
+	oci_iostream->vf.fd = -1;
+	*(struct ocierofs_iostream **)oci_iostream->vf.payload = oci_iostream;
+
+	return 0;
+}
+
+void ocierofs_iostream_close(struct ocierofs_iostream *oci_iostream)
+{
+	close(oci_iostream->vf.fd);
+}
+
+int ocierofs_is_erofs_native_image(struct ocierofs_ctx *ctx)
+{
+	if (ctx->layer_count > 0 && ctx->layers[0] &&
+	    ctx->layers[0]->media_type) {
+		if (strcmp(ctx->layers[0]->media_type, EROFS_MEDIATYPE) != 0)
+			return -ENOENT;
+		return 0;
+	}
+	return -ENOENT;
+}
diff --git a/mount/Makefile.am b/mount/Makefile.am
index d93f3f4..0b4447f 100644
--- a/mount/Makefile.am
+++ b/mount/Makefile.am
@@ -9,5 +9,5 @@ mount_erofs_SOURCES = main.c
 mount_erofs_CFLAGS = -Wall -I$(top_srcdir)/include
 mount_erofs_LDADD = $(top_builddir)/lib/liberofs.la ${libselinux_LIBS} \
 	${liblz4_LIBS} ${liblzma_LIBS} ${zlib_LIBS} ${libdeflate_LIBS} \
-	${libzstd_LIBS} ${libqpl_LIBS} ${libxxhash_LIBS} ${libnl3_LIBS}
+	${libzstd_LIBS} ${libqpl_LIBS} ${libxxhash_LIBS} ${libnl3_LIBS} ${openssl_LIBS}
 endif
diff --git a/mount/main.c b/mount/main.c
index a270f0a..3af4d63 100644
--- a/mount/main.c
+++ b/mount/main.c
@@ -15,6 +15,7 @@
 #include "erofs/err.h"
 #include "erofs/io.h"
 #include "../lib/liberofs_nbd.h"
+#include "../lib/liberofs_oci.h"
 #ifdef HAVE_LINUX_LOOP_H
 #include <linux/loop.h>
 #else
@@ -34,6 +35,10 @@ struct loop_info {
 #include <sys/sysmacros.h>
 #endif
 
+#ifdef OCIEROFS_ENABLED
+static struct ocierofs_config ocicfg;
+#endif
+
 enum erofs_backend_drv {
 	EROFSAUTO,
 	EROFSLOCAL,
@@ -56,12 +61,76 @@ static struct erofsmount_cfg {
 	long flags;
 	enum erofs_backend_drv backend;
 	enum erofsmount_mode mountmode;
+#ifdef OCIEROFS_ENABLED
+	bool use_oci;
+#endif
 } mountcfg = {
 	.full_options = "ro",
 	.flags = MS_RDONLY,		/* default mountflags */
 	.fstype = "erofs",
 };
 
+enum erofs_nbd_source_type {
+	EROFSNBD_SOURCE_LOCAL,
+	EROFSNBD_SOURCE_OCI,
+};
+
+union erofs_nbd_source {
+	const char *device_path;
+	struct ocierofs_ctx *oci_ctx;
+};
+
+union erofs_nbd_source src;
+
+static int parse_oci_option(struct ocierofs_config *oci_cfg, const char *option)
+{
+	char *p;
+
+	p = strstr(option, "oci=");
+	if (p != NULL) {
+		p += strlen("oci=");
+		{
+			char *endptr;
+			unsigned long v = strtoul(p, &endptr, 10);
+
+			if (endptr == p || *endptr != '\0')
+				return -EINVAL;
+			oci_cfg->layer_index = (int)v;
+		}
+	} else {
+		p = strstr(option, "oci.platform=");
+		if (p != NULL) {
+			p += strlen("oci.platform=");
+			free(oci_cfg->platform);
+			oci_cfg->platform = strdup(p);
+			if (!oci_cfg->platform)
+				return -ENOMEM;
+		} else {
+			p = strstr(option, "oci.username=");
+			if (p != NULL) {
+				p += strlen("oci.username=");
+				free(oci_cfg->username);
+				oci_cfg->username = strdup(p);
+				if (!oci_cfg->username)
+					return -ENOMEM;
+			} else {
+				p = strstr(option, "oci.password=");
+				if (p != NULL) {
+					p += strlen("oci.password=");
+					free(oci_cfg->password);
+					oci_cfg->password = strdup(p);
+					if (!oci_cfg->password)
+						return -ENOMEM;
+				} else {
+					return -EINVAL;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
 static long erofsmount_parse_flagopts(char *s, long flags, char **more)
 {
 	static const struct {
@@ -90,29 +159,41 @@ static long erofsmount_parse_flagopts(char *s, long flags, char **more)
 		comma = strchr(s, ',');
 		if (comma)
 			*comma = '\0';
-		for (i = 0; i < ARRAY_SIZE(opts); ++i) {
-			if (!strcasecmp(s, opts[i].name)) {
-				if (opts[i].flags < 0)
-					flags &= opts[i].flags;
-				else
-					flags |= opts[i].flags;
-				break;
-			}
-		}
 
-		if (more && i >= ARRAY_SIZE(opts)) {
-			int sl = strlen(s);
-			char *new = *more;
+		if (strncmp(s, "oci", 3) == 0) {
+#ifdef OCIEROFS_ENABLED
+			int err = parse_oci_option(&ocicfg, s);
 
-			i = new ? strlen(new) : 0;
-			new = realloc(new, i + strlen(s) + 2);
-			if (!new)
-				return -ENOMEM;
-			if (i)
-				new[i++] = ',';
-			memcpy(new + i, s, sl);
-			new[i + sl] = '\0';
-			*more = new;
+			if (err < 0)
+				return err;
+#else
+			return -EINVAL;
+#endif
+		} else {
+			for (i = 0; i < ARRAY_SIZE(opts); ++i) {
+				if (!strcasecmp(s, opts[i].name)) {
+					if (opts[i].flags < 0)
+						flags &= opts[i].flags;
+					else
+						flags |= opts[i].flags;
+					break;
+				}
+			}
+
+			if (more && i >= ARRAY_SIZE(opts)) {
+				int sl = strlen(s);
+				char *new = *more;
+
+				i = new ? strlen(new) : 0;
+				new = realloc(new, i + strlen(s) + 2);
+				if (!new)
+					return -ENOMEM;
+				if (i)
+					new[i++] = ',';
+				memcpy(new + i, s, sl);
+				new[i + sl] = '\0';
+				*more = new;
+			}
 		}
 
 		if (!comma)
@@ -120,6 +201,11 @@ static long erofsmount_parse_flagopts(char *s, long flags, char **more)
 		*comma = ',';
 		s = comma + 1;
 	}
+
+#ifdef OCIEROFS_ENABLED
+	if (ocicfg.platform || ocicfg.username || ocicfg.password || ocicfg.layer_index != 0)
+		mountcfg.use_oci = true;
+#endif
 	return flags;
 }
 
@@ -272,30 +358,47 @@ static void *erofsmount_nbd_loopfn(void *arg)
 	return (void *)(uintptr_t)err;
 }
 
-static int erofsmount_startnbd(int nbdfd, const char *source)
+static int erofsmount_startnbd(int nbdfd, union erofs_nbd_source source,
+			       enum erofs_nbd_source_type source_type)
 {
 	struct erofsmount_nbd_ctx ctx = {};
 	uintptr_t retcode;
 	pthread_t th;
 	int err, err2;
+	int blkbits = 9;
+	u64 blocks = INT64_MAX >> blkbits;
+	struct ocierofs_iostream oci_iostream = {};
 
-	err = open(source, O_RDONLY);
-	if (err < 0) {
-		err = -errno;
-		goto out_closefd;
+	if (source_type == EROFSNBD_SOURCE_OCI) {
+		err = ocierofs_iostream_open(&oci_iostream, source.oci_ctx);
+		if (err)
+			goto out_closefd;
+		ctx.vd = oci_iostream.vf;
+	} else {
+		err = open(source.device_path, O_RDONLY);
+		if (err < 0) {
+			err = -errno;
+			goto out_closefd;
+		}
+		ctx.vd.fd = err;
 	}
-	ctx.vd.fd = err;
 
-	err = erofs_nbd_connect(nbdfd, 9, INT64_MAX >> 9);
+	err = erofs_nbd_connect(nbdfd, blkbits, blocks);
 	if (err < 0) {
-		close(ctx.vd.fd);
+		if (source_type == EROFSNBD_SOURCE_OCI)
+			ocierofs_iostream_close(&oci_iostream);
+		else
+			close(ctx.vd.fd);
 		goto out_closefd;
 	}
 	ctx.sk.fd = err;
 
 	err = -pthread_create(&th, NULL, erofsmount_nbd_loopfn, &ctx);
 	if (err) {
-		close(ctx.vd.fd);
+		if (source_type == EROFSNBD_SOURCE_OCI)
+			ocierofs_iostream_close(&oci_iostream);
+		else
+			close(ctx.vd.fd);
 		close(ctx.sk.fd);
 		goto out_closefd;
 	}
@@ -307,6 +410,10 @@ static int erofsmount_startnbd(int nbdfd, const char *source)
 		          erofs_strerror(retcode));
 		err2 = retcode;
 	}
+
+	if (source_type == EROFSNBD_SOURCE_OCI)
+		ocierofs_iostream_close(&oci_iostream);
+
 	return err ?: err2;
 out_closefd:
 	close(nbdfd);
@@ -540,9 +647,9 @@ err_identifier:
 	return err;
 }
 
-static int erofsmount_nbd(const char *source, const char *mountpoint,
-			  const char *fstype, int flags,
-			  const char *options)
+static int erofsmount_nbd(union erofs_nbd_source source, enum erofs_nbd_source_type source_type,
+			  const char *mountpoint, const char *fstype,
+			  int flags, const char *options)
 {
 	bool is_netlink = false;
 	char nbdpath[32], *id;
@@ -557,9 +664,16 @@ static int erofsmount_nbd(const char *source, const char *mountpoint,
 	}
 	flags |= MS_RDONLY;
 
-	err = erofsmount_startnbd_nl(&pid, source);
-	if (err < 0) {
-		erofs_info("Fall back to ioctl-based NBD; failover is unsupported");
+	if (source_type == EROFSNBD_SOURCE_LOCAL) {
+		err = erofsmount_startnbd_nl(&pid, source.device_path);
+		if (err >= 0) {
+			num = err;
+			(void)snprintf(nbdpath, sizeof(nbdpath), "/dev/nbd%d", num);
+			is_netlink = true;
+		}
+	}
+
+	if (!is_netlink) {
 		num = erofs_nbd_devscan();
 		if (num < 0)
 			return num;
@@ -569,14 +683,11 @@ static int erofsmount_nbd(const char *source, const char *mountpoint,
 		if (nbdfd < 0)
 			return -errno;
 
-		if ((pid = fork()) == 0)
-			return erofsmount_startnbd(nbdfd, source) ?
+		if ((pid = fork()) == 0) {
+			return erofsmount_startnbd(nbdfd, source, source_type) ?
 				EXIT_FAILURE : EXIT_SUCCESS;
+		}
 		close(nbdfd);
-	} else {
-		num = err;
-		(void)snprintf(nbdpath, sizeof(nbdpath), "/dev/nbd%d", num);
-		is_netlink = true;
 	}
 
 	while (1) {
@@ -594,7 +705,7 @@ static int erofsmount_nbd(const char *source, const char *mountpoint,
 		if (err < 0)
 			err = -errno;
 
-		if (!err && is_netlink) {
+		if (!err && is_netlink && source_type == EROFSNBD_SOURCE_LOCAL) {
 			id = erofs_nbd_get_identifier(num);
 			if (id == ERR_PTR(-ENOENT))
 				id = NULL;
@@ -799,9 +910,43 @@ int main(int argc, char *argv[])
 	}
 
 	if (mountcfg.backend == EROFSNBD) {
-		err = erofsmount_nbd(mountcfg.device, mountcfg.target,
+#ifdef OCIEROFS_ENABLED
+		if (mountcfg.use_oci) {
+			struct ocierofs_ctx ctx = {};
+
+			ocicfg.image_ref = mountcfg.device;
+			err = ocierofs_init(&ctx, &ocicfg);
+			if (err) {
+				ocierofs_ctx_cleanup(&ctx);
+				goto exit;
+			}
+
+			err = ocierofs_is_erofs_native_image(&ctx);
+			if (err) {
+				ocierofs_ctx_cleanup(&ctx);
+				goto exit;
+			}
+
+			src.oci_ctx = &ctx;
+
+			err = erofsmount_nbd(src, EROFSNBD_SOURCE_OCI, mountcfg.target,
+					     mountcfg.fstype, mountcfg.flags, mountcfg.options);
+			if (err) {
+				ocierofs_ctx_cleanup(&ctx);
+				goto exit;
+			}
+		} else {
+			src.device_path = mountcfg.device;
+			err = erofsmount_nbd(src, EROFSNBD_SOURCE_LOCAL, mountcfg.target,
+					     mountcfg.fstype, mountcfg.flags,
+					     mountcfg.options);
+		}
+#else
+		src.device_path = mountcfg.device;
+		err = erofsmount_nbd(src, EROFSNBD_SOURCE_LOCAL, mountcfg.target,
 				     mountcfg.fstype, mountcfg.flags,
 				     mountcfg.options);
+#endif
 		goto exit;
 	}
 
-- 
2.51.0



More information about the Linux-erofs mailing list