[PATCH 2/5] erofs-utils: mount: Refactor NBD connection logic in erofsmount_nbd()
Yifan Zhao
zhaoyifan28 at huawei.com
Tue Dec 23 21:04:49 AEDT 2025
From: Yifan Zhao <yifan.yfzhao at foxmail.com>
The current NBD connection logic has the following issues:
1.It first tries netlink (forking a child), then falls back to ioctl
(forking another), causing redundant process overhead and double-opening
of erofs_nbd_source on fallback.
2.Child processes fail silently, hiding the error cause from the parent
and confusing users.
3.erofsmount_startnbd() doesn’t ignore SIGPIPE, causing nbd_loopfn to be
killed abruptly without clean up during disconnect.
4.During disconnect, -EPIPE from NBD socket I/O is expected, but
erofsmount_nbd_loopfn() does not suppress it, leading to uncessary
"NBD worker failed with EPIPE" message printed in erofsmount_startnbd().
This patch consolidates the netlink and ioctl fallback logic into a
single child process, eliminating redundant erofs_nbd_source opens. It
also ensure SIGPIPE and -EPIPE are properly suppressed during disconnect
in erofsmount_nbd_loopfn(), enabling cleanup and graceful exit.
Additionally, the child process now reports error code via exit() for
better user visibility.
Signed-off-by: Yifan Zhao <zhaoyifan28 at huawei.com>
---
lib/backends/nbd.c | 1 +
mount/main.c | 346 +++++++++++++++++++++++++--------------------
2 files changed, 194 insertions(+), 153 deletions(-)
diff --git a/lib/backends/nbd.c b/lib/backends/nbd.c
index da27334..46e75cd 100644
--- a/lib/backends/nbd.c
+++ b/lib/backends/nbd.c
@@ -108,6 +108,7 @@ int erofs_nbd_devscan(void)
errno = 0;
dp = readdir(_dir);
if (!dp) {
+ erofs_err("no available nbd device found in /sys/block");
if (errno)
err = -errno;
else
diff --git a/mount/main.c b/mount/main.c
index 02a7962..2a21979 100644
--- a/mount/main.c
+++ b/mount/main.c
@@ -610,6 +610,32 @@ struct erofsmount_nbd_ctx {
struct erofs_vfile sk; /* socket file */
};
+static int erofsmount_open_nbd_source(struct erofs_vfile *vd,
+ struct erofs_nbd_source *source)
+{
+ int err;
+
+ if (source->type == EROFSNBD_SOURCE_OCI) {
+ if (source->ocicfg.tarindex_path || source->ocicfg.zinfo_path) {
+ err = erofsmount_tarindex_open(vd, &source->ocicfg,
+ source->ocicfg.tarindex_path,
+ source->ocicfg.zinfo_path);
+ if (err)
+ return err;
+ } else {
+ err = ocierofs_io_open(vd, &source->ocicfg);
+ if (err)
+ return err;
+ }
+ } else {
+ err = open(source->device_path, O_RDONLY);
+ if (err < 0)
+ return -errno;
+ vd->fd = err;
+ }
+ return 0;
+}
+
static void *erofsmount_nbd_loopfn(void *arg)
{
struct erofsmount_nbd_ctx *ctx = arg;
@@ -621,11 +647,8 @@ static void *erofsmount_nbd_loopfn(void *arg)
off_t pos;
err = erofs_nbd_get_request(ctx->sk.fd, &rq);
- if (err < 0) {
- if (err == -EPIPE)
- err = 0;
+ if (err < 0)
break;
- }
if (rq.type != EROFS_NBD_CMD_READ) {
err = erofs_nbd_send_reply_header(ctx->sk.fd,
@@ -653,64 +676,11 @@ static void *erofsmount_nbd_loopfn(void *arg)
out:
erofs_io_close(&ctx->vd);
erofs_io_close(&ctx->sk);
+ if (err == -EPIPE)
+ err = 0;
return (void *)(uintptr_t)err;
}
-static int erofsmount_startnbd(int nbdfd, struct erofs_nbd_source *source)
-{
- struct erofsmount_nbd_ctx ctx = {};
- uintptr_t retcode;
- pthread_t th;
- int err, err2;
-
- if (source->type == EROFSNBD_SOURCE_OCI) {
- if (source->ocicfg.tarindex_path || source->ocicfg.zinfo_path) {
- err = erofsmount_tarindex_open(&ctx.vd, &source->ocicfg,
- source->ocicfg.tarindex_path,
- source->ocicfg.zinfo_path);
- if (err)
- goto out_closefd;
- } else {
- err = ocierofs_io_open(&ctx.vd, &source->ocicfg);
- if (err)
- goto out_closefd;
- }
- } else {
- err = open(source->device_path, O_RDONLY);
- if (err < 0) {
- err = -errno;
- goto out_closefd;
- }
- ctx.vd.fd = err;
- }
-
- err = erofs_nbd_connect(nbdfd, 9, EROFSMOUNT_NBD_DISK_SIZE);
- if (err < 0) {
- erofs_io_close(&ctx.vd);
- goto out_closefd;
- }
- ctx.sk.fd = err;
-
- err = -pthread_create(&th, NULL, erofsmount_nbd_loopfn, &ctx);
- if (err) {
- erofs_io_close(&ctx.vd);
- erofs_io_close(&ctx.sk);
- goto out_closefd;
- }
-
- err = erofs_nbd_do_it(nbdfd);
- err2 = -pthread_join(th, (void **)&retcode);
- if (!err2 && retcode) {
- erofs_err("NBD worker failed with %s",
- erofs_strerror(retcode));
- err2 = retcode;
- }
- return err ?: err2;
-out_closefd:
- close(nbdfd);
- return err;
-}
-
#ifdef OCIEROFS_ENABLED
static int erofsmount_write_recovery_oci(FILE *f, struct erofs_nbd_source *source)
{
@@ -1013,77 +983,136 @@ static int erofsmount_nbd_fix_backend_linkage(int num, char **recp)
return 0;
}
-static int erofsmount_startnbd_nl(pid_t *pid, struct erofs_nbd_source *source)
+struct erofsmount_nbd_msg {
+ int nbdnum;
+ bool is_netlink;
+};
+
+static void erofsmount_startnbd_ioctl(struct erofsmount_nbd_ctx *ctx, struct erofsmount_nbd_msg *msg, int pipefd)
{
- int pipefd[2], err, num;
+ uintptr_t retcode;
+ pthread_t th;
+ char nbddev[32];
+ int err = 0, err2 = 0, nbdfd;
- err = pipe(pipefd);
- if (err < 0)
- return -errno;
+ msg->nbdnum = erofs_nbd_devscan();
+ if (msg->nbdnum < 0) {
+ close(pipefd);
+ err = msg->nbdnum;
+ goto err_exit;
+ }
- if ((*pid = fork()) == 0) {
- struct erofsmount_nbd_ctx ctx = {};
- char *recp;
-
- /* Otherwise, NBD disconnect sends SIGPIPE, skipping cleanup */
- if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
- exit(EXIT_FAILURE);
-
- if (source->type == EROFSNBD_SOURCE_OCI) {
- if (source->ocicfg.tarindex_path || source->ocicfg.zinfo_path) {
- err = erofsmount_tarindex_open(&ctx.vd, &source->ocicfg,
- source->ocicfg.tarindex_path,
- source->ocicfg.zinfo_path);
- if (err)
- exit(EXIT_FAILURE);
- } else {
- err = ocierofs_io_open(&ctx.vd, &source->ocicfg);
- if (err)
- exit(EXIT_FAILURE);
- }
- } else {
- err = open(source->device_path, O_RDONLY);
- if (err < 0)
- exit(EXIT_FAILURE);
- ctx.vd.fd = err;
- }
+ (void)snprintf(nbddev, sizeof(nbddev), "/dev/nbd%d", msg->nbdnum);
+ nbdfd = open(nbddev, O_RDWR);
+ if (nbdfd < 0) {
+ close(pipefd);
+ err = -errno;
+ goto err_exit;
+ }
+
+ err = erofs_nbd_connect(nbdfd, 9, EROFSMOUNT_NBD_DISK_SIZE);
+ if (err < 0) {
+ close(pipefd);
+ goto err_nbdfd;
+ }
+ ctx->sk.fd = err;
+
+ /* Send device number to parent */
+ err = write(pipefd, msg, sizeof(*msg));
+ close(pipefd);
+ if (err != sizeof(*msg)) {
+ err = err < 0 ? -errno : -EIO;
+ goto err_nbdfd;
+ }
+
+ err = -pthread_create(&th, NULL, erofsmount_nbd_loopfn, ctx);
+ if (err) {
+ erofs_io_close(&ctx->sk);
+ goto err_nbdfd;
+ }
+
+ err = erofs_nbd_do_it(nbdfd);
+ err2 = -pthread_join(th, (void **)&retcode);
+ if (!err2 && retcode) {
+ erofs_err("NBD worker failed with %s",
+ erofs_strerror(retcode));
+ err2 = retcode;
+ }
+ close(nbdfd);
+ exit(-(err ?: err2));
+
+err_nbdfd:
+ close(nbdfd);
+err_exit:
+ erofs_io_close(&ctx->vd);
+ exit(-err);
+}
+
+/* Try to start NBD server with netlink first; fall back to ioctl if failed */
+static void erofsmount_startnbd(struct erofs_nbd_source *source, int pipefd)
+{
+ struct erofsmount_nbd_ctx ctx = {};
+ int err;
+ char *recp;
+ struct erofsmount_nbd_msg msg = { .is_netlink = false };
+
+ /* Otherwise, NBD disconnect sends SIGPIPE, skipping cleanup */
+ if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
+ close(pipefd);
+ exit(errno);
+ }
+
+ err = erofsmount_open_nbd_source(&ctx.vd, source);
+ if (err) {
+ close(pipefd);
+ exit(-err);
+ }
+
+ {
+ /* Try netlink-based NBD first */
recp = erofsmount_write_recovery_info(source);
- if (IS_ERR(recp)) {
- erofs_io_close(&ctx.vd);
- exit(EXIT_FAILURE);
+ if (IS_ERR(recp))
+ goto fallback_ioctl;
+
+ msg.nbdnum = -1;
+ err = erofs_nbd_nl_connect(&msg.nbdnum, 9, EROFSMOUNT_NBD_DISK_SIZE, recp);
+ if (err < 0)
+ goto err_recp;
+
+ ctx.sk.fd = err;
+ err = erofsmount_nbd_fix_backend_linkage(msg.nbdnum, &recp);
+ if (err) {
+ erofs_io_close(&ctx.sk);
+ goto err_recp;
}
- num = -1;
- err = erofs_nbd_nl_connect(&num, 9, EROFSMOUNT_NBD_DISK_SIZE, recp);
- if (err >= 0) {
- ctx.sk.fd = err;
- err = erofsmount_nbd_fix_backend_linkage(num, &recp);
- if (err) {
- erofs_io_close(&ctx.sk);
- } else {
- err = write(pipefd[1], &num, sizeof(int));
- if (err < 0)
- err = -errno;
- close(pipefd[1]);
- close(pipefd[0]);
- if (err >= sizeof(int)) {
- err = (int)(uintptr_t)erofsmount_nbd_loopfn(&ctx);
- goto out_fork;
- }
- }
+ /* Succeed in starting netlink-based NBD */
+ msg.is_netlink = true;
+
+ /* Send device number to parent */
+ err = write(pipefd, &msg, sizeof(msg));
+ close(pipefd);
+ if (err != sizeof(msg)) {
+ /* will not fall back if encounter pipe write error */
+ err = err < 0 ? -errno : -EIO;
+ erofs_io_close(&ctx.sk);
+ goto err_recp;
}
- erofs_io_close(&ctx.vd);
-out_fork:
- (void)unlink(recp);
- free(recp);
- exit(err ? EXIT_FAILURE : EXIT_SUCCESS);
+
+ err = (int)(uintptr_t)erofsmount_nbd_loopfn(&ctx);
}
- close(pipefd[1]);
- err = read(pipefd[0], &num, sizeof(int));
- close(pipefd[0]);
- if (err < sizeof(int))
- return -EPIPE;
- return num;
+
+err_recp:
+ (void)unlink(recp);
+ free(recp);
+
+fallback_ioctl:
+ if (!msg.is_netlink) {
+ erofs_info("Fall back to ioctl-based NBD; failover is unsupported");
+ erofsmount_startnbd_ioctl(&ctx, &msg, pipefd);
+ /* never returns */
+ }
+ exit(-err);
}
static int erofsmount_reattach(const char *target)
@@ -1192,10 +1221,11 @@ static int erofsmount_nbd(struct erofs_nbd_source *source,
const char *mountpoint, const char *fstype,
int flags, const char *options)
{
- bool is_netlink = false;
char nbdpath[32], *id;
- int num, nbdfd;
- pid_t pid = 0;
+ struct erofsmount_nbd_msg msg;
+ int pipefd[2];
+ pid_t child;
+ int child_status;
long err;
if (strcmp(fstype, "erofs")) {
@@ -1205,59 +1235,69 @@ static int erofsmount_nbd(struct erofs_nbd_source *source,
}
flags |= MS_RDONLY;
- err = erofsmount_startnbd_nl(&pid, source);
- if (err < 0) {
- erofs_info("Fall back to ioctl-based NBD; failover is unsupported");
- num = erofs_nbd_devscan();
- if (num < 0)
- return num;
+ err = pipe(pipefd);
+ if (err < 0)
+ return -errno;
- (void)snprintf(nbdpath, sizeof(nbdpath), "/dev/nbd%d", num);
- nbdfd = open(nbdpath, O_RDWR);
- if (nbdfd < 0)
- return -errno;
+ if ((child = fork()) == 0) {
+ close(pipefd[0]);
+ erofsmount_startnbd(source, pipefd[1]);
+ /* Never returns */
+ }
- if ((pid = fork()) == 0)
- return erofsmount_startnbd(nbdfd, source) ?
- EXIT_FAILURE : EXIT_SUCCESS;
- close(nbdfd);
- } else {
- num = err;
- (void)snprintf(nbdpath, sizeof(nbdpath), "/dev/nbd%d", num);
- is_netlink = true;
+ close(pipefd[1]);
+ err = read(pipefd[0], &msg, sizeof(msg));
+ close(pipefd[0]);
+
+ if (err != sizeof(msg)) {
+ int retries = 5;
+
+ while (retries-- > 0) {
+ if (waitpid(child, &child_status, WNOHANG) > 0) {
+ /* child exited, return its exit status */
+ return WIFEXITED(child_status) ? -WEXITSTATUS(child_status) : -EIO;
+ }
+ usleep(10000); /* Wait 10ms */
+ }
+
+ /* child still running, kill it before returning */
+ kill(child, SIGTERM);
+ waitpid(child, &child_status, 0);
+ return err < 0 ? -errno : -EIO;
}
+ (void)snprintf(nbdpath, sizeof(nbdpath), "/dev/nbd%d", msg.nbdnum);
+
while (1) {
- err = erofs_nbd_in_service(num);
+ err = erofs_nbd_in_service(msg.nbdnum);
if (err == -ENOENT || err == -ENOTCONN) {
- int status;
-
- err = waitpid(pid, &status, WNOHANG);
+ err = waitpid(child, &child_status, WNOHANG);
if (err < 0)
return -errno;
else if (err > 0)
- return status ? -EIO : 0;
+ return WIFEXITED(child_status) ? -WEXITSTATUS(child_status) : -EIO;
usleep(50000);
continue;
}
if (err >= 0)
- err = (err != pid ? -EBUSY : 0);
+ err = (err != child ? -EBUSY : 0);
break;
}
+
if (!err) {
err = mount(nbdpath, mountpoint, fstype, flags, options);
if (err < 0)
err = -errno;
- if (!err && is_netlink) {
- id = erofs_nbd_get_identifier(num);
+ if (!err && msg.is_netlink) {
+ id = erofs_nbd_get_identifier(msg.nbdnum);
err = IS_ERR(id) ? PTR_ERR(id) :
- erofs_nbd_nl_reconfigure(num, id, true);
+ erofs_nbd_nl_reconfigure(msg.nbdnum, id, true);
if (err)
erofs_warn("failed to turn on autoclear for nbd%d: %s",
- num, erofs_strerror(err));
+ msg.nbdnum, erofs_strerror(err));
if (!IS_ERR(id))
free(id);
}
--
2.43.0
More information about the Linux-erofs
mailing list