[PATCH v2 2/5] erofs-utils: mount: Refactor NBD connection logic in erofsmount_nbd()

Yifan Zhao zhaoyifan28 at huawei.com
Tue Dec 23 21:11:23 AEDT 2025


The current NBD connection logic has the following issues:

1.It first tries netlink (forking a child), then falls back to ioctl
(forking another), causing redundant process overhead and double-opening
of erofs_nbd_source on fallback.
2.Child processes fail silently, hiding the error cause from the parent
and confusing users.
3.erofsmount_startnbd() doesn't ignore SIGPIPE, causing nbd_loopfn to be
killed abruptly without clean up during disconnect.
4.During disconnect, -EPIPE from NBD socket I/O is expected, but
erofsmount_nbd_loopfn() does not suppress it, leading to uncessary
"NBD worker failed with EPIPE" message printed in erofsmount_startnbd().

This patch consolidates the netlink and ioctl fallback logic into a
single child process, eliminating redundant erofs_nbd_source opens. It
also ensure SIGPIPE and -EPIPE are properly suppressed during disconnect
in erofsmount_nbd_loopfn(), enabling cleanup and graceful exit.
Additionally, the child process now reports error code via exit() for
better user visibility.

Signed-off-by: Yifan Zhao <zhaoyifan28 at huawei.com>
---
diff against v1:
- fix incorrectly configured commit author name and email

Also, I believe corresponding tests are needed to safeguard this code change
and I am working on them.

 lib/backends/nbd.c |   1 +
 mount/main.c       | 346 +++++++++++++++++++++++++--------------------
 2 files changed, 194 insertions(+), 153 deletions(-)

diff --git a/lib/backends/nbd.c b/lib/backends/nbd.c
index da27334..46e75cd 100644
--- a/lib/backends/nbd.c
+++ b/lib/backends/nbd.c
@@ -108,6 +108,7 @@ int erofs_nbd_devscan(void)
 		errno = 0;
 		dp = readdir(_dir);
 		if (!dp) {
+			erofs_err("no available nbd device found in /sys/block");
 			if (errno)
 				err = -errno;
 			else
diff --git a/mount/main.c b/mount/main.c
index 02a7962..2a21979 100644
--- a/mount/main.c
+++ b/mount/main.c
@@ -610,6 +610,32 @@ struct erofsmount_nbd_ctx {
 	struct erofs_vfile sk;		/* socket file */
 };
 
+static int erofsmount_open_nbd_source(struct erofs_vfile *vd,
+				      struct erofs_nbd_source *source)
+{
+	int err;
+
+	if (source->type == EROFSNBD_SOURCE_OCI) {
+		if (source->ocicfg.tarindex_path || source->ocicfg.zinfo_path) {
+			err = erofsmount_tarindex_open(vd, &source->ocicfg,
+						       source->ocicfg.tarindex_path,
+						       source->ocicfg.zinfo_path);
+			if (err)
+				return err;
+		} else {
+			err = ocierofs_io_open(vd, &source->ocicfg);
+			if (err)
+				return err;
+		}
+	} else {
+		err = open(source->device_path, O_RDONLY);
+		if (err < 0)
+			return -errno;
+		vd->fd = err;
+	}
+	return 0;
+}
+
 static void *erofsmount_nbd_loopfn(void *arg)
 {
 	struct erofsmount_nbd_ctx *ctx = arg;
@@ -621,11 +647,8 @@ static void *erofsmount_nbd_loopfn(void *arg)
 		off_t pos;
 
 		err = erofs_nbd_get_request(ctx->sk.fd, &rq);
-		if (err < 0) {
-			if (err == -EPIPE)
-				err = 0;
+		if (err < 0)
 			break;
-		}
 
 		if (rq.type != EROFS_NBD_CMD_READ) {
 			err = erofs_nbd_send_reply_header(ctx->sk.fd,
@@ -653,64 +676,11 @@ static void *erofsmount_nbd_loopfn(void *arg)
 out:
 	erofs_io_close(&ctx->vd);
 	erofs_io_close(&ctx->sk);
+	if (err == -EPIPE)
+		err = 0;
 	return (void *)(uintptr_t)err;
 }
 
-static int erofsmount_startnbd(int nbdfd, struct erofs_nbd_source *source)
-{
-	struct erofsmount_nbd_ctx ctx = {};
-	uintptr_t retcode;
-	pthread_t th;
-	int err, err2;
-
-	if (source->type == EROFSNBD_SOURCE_OCI) {
-		if (source->ocicfg.tarindex_path || source->ocicfg.zinfo_path) {
-			err = erofsmount_tarindex_open(&ctx.vd, &source->ocicfg,
-						       source->ocicfg.tarindex_path,
-						       source->ocicfg.zinfo_path);
-			if (err)
-				goto out_closefd;
-		} else {
-			err = ocierofs_io_open(&ctx.vd, &source->ocicfg);
-			if (err)
-				goto out_closefd;
-		}
-	} else {
-		err = open(source->device_path, O_RDONLY);
-		if (err < 0) {
-			err = -errno;
-			goto out_closefd;
-		}
-		ctx.vd.fd = err;
-	}
-
-	err = erofs_nbd_connect(nbdfd, 9, EROFSMOUNT_NBD_DISK_SIZE);
-	if (err < 0) {
-		erofs_io_close(&ctx.vd);
-		goto out_closefd;
-	}
-	ctx.sk.fd = err;
-
-	err = -pthread_create(&th, NULL, erofsmount_nbd_loopfn, &ctx);
-	if (err) {
-		erofs_io_close(&ctx.vd);
-		erofs_io_close(&ctx.sk);
-		goto out_closefd;
-	}
-
-	err = erofs_nbd_do_it(nbdfd);
-	err2 = -pthread_join(th, (void **)&retcode);
-	if (!err2 && retcode) {
-		erofs_err("NBD worker failed with %s",
-		          erofs_strerror(retcode));
-		err2 = retcode;
-	}
-	return err ?: err2;
-out_closefd:
-	close(nbdfd);
-	return err;
-}
-
 #ifdef OCIEROFS_ENABLED
 static int erofsmount_write_recovery_oci(FILE *f, struct erofs_nbd_source *source)
 {
@@ -1013,77 +983,136 @@ static int erofsmount_nbd_fix_backend_linkage(int num, char **recp)
 	return 0;
 }
 
-static int erofsmount_startnbd_nl(pid_t *pid, struct erofs_nbd_source *source)
+struct erofsmount_nbd_msg {
+	int nbdnum;
+	bool is_netlink;
+};
+
+static void erofsmount_startnbd_ioctl(struct erofsmount_nbd_ctx *ctx, struct erofsmount_nbd_msg *msg, int pipefd)
 {
-	int pipefd[2], err, num;
+	uintptr_t retcode;
+	pthread_t th;
+	char nbddev[32];
+	int err = 0, err2 = 0, nbdfd;
 
-	err = pipe(pipefd);
-	if (err < 0)
-		return -errno;
+	msg->nbdnum = erofs_nbd_devscan();
+	if (msg->nbdnum < 0) {
+		close(pipefd);
+		err = msg->nbdnum;
+		goto err_exit;
+	}
 
-	if ((*pid = fork()) == 0) {
-		struct erofsmount_nbd_ctx ctx = {};
-		char *recp;
-
-		/* Otherwise, NBD disconnect sends SIGPIPE, skipping cleanup */
-		if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
-			exit(EXIT_FAILURE);
-
-		if (source->type == EROFSNBD_SOURCE_OCI) {
-			if (source->ocicfg.tarindex_path || source->ocicfg.zinfo_path) {
-				err = erofsmount_tarindex_open(&ctx.vd, &source->ocicfg,
-							       source->ocicfg.tarindex_path,
-							       source->ocicfg.zinfo_path);
-				if (err)
-					exit(EXIT_FAILURE);
-			} else {
-				err = ocierofs_io_open(&ctx.vd, &source->ocicfg);
-				if (err)
-					exit(EXIT_FAILURE);
-			}
-		} else {
-			err = open(source->device_path, O_RDONLY);
-			if (err < 0)
-				exit(EXIT_FAILURE);
-			ctx.vd.fd = err;
-		}
+	(void)snprintf(nbddev, sizeof(nbddev), "/dev/nbd%d", msg->nbdnum);
+	nbdfd = open(nbddev, O_RDWR);
+	if (nbdfd < 0) {
+		close(pipefd);
+		err = -errno;
+		goto err_exit;
+	}
+
+	err = erofs_nbd_connect(nbdfd, 9, EROFSMOUNT_NBD_DISK_SIZE);
+	if (err < 0) {
+		close(pipefd);
+		goto err_nbdfd;
+	}
+	ctx->sk.fd = err;
+
+	/* Send device number to parent */
+	err = write(pipefd, msg, sizeof(*msg));
+	close(pipefd);
+	if (err != sizeof(*msg)) {
+		err = err < 0 ? -errno : -EIO;
+		goto err_nbdfd;
+	}
+
+	err = -pthread_create(&th, NULL, erofsmount_nbd_loopfn, ctx);
+	if (err) {
+		erofs_io_close(&ctx->sk);
+		goto err_nbdfd;
+	}
+
+	err = erofs_nbd_do_it(nbdfd);
+	err2 = -pthread_join(th, (void **)&retcode);
+	if (!err2 && retcode) {
+		erofs_err("NBD worker failed with %s",
+			  erofs_strerror(retcode));
+		err2 = retcode;
+	}
+	close(nbdfd);
+	exit(-(err ?: err2));
+
+err_nbdfd:
+	close(nbdfd);
+err_exit:
+	erofs_io_close(&ctx->vd);
+	exit(-err);
+}
+
+/* Try to start NBD server with netlink first; fall back to ioctl if failed */
+static void erofsmount_startnbd(struct erofs_nbd_source *source, int pipefd)
+{
+	struct erofsmount_nbd_ctx ctx = {};
+	int err;
+	char *recp;
+	struct erofsmount_nbd_msg msg = { .is_netlink = false };
+
+	/* Otherwise, NBD disconnect sends SIGPIPE, skipping cleanup */
+	if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) {
+		close(pipefd);
+		exit(errno);
+	}
+
+	err = erofsmount_open_nbd_source(&ctx.vd, source);
+	if (err) {
+		close(pipefd);
+		exit(-err);
+	}
+
+	{
+		/* Try netlink-based NBD first */
 		recp = erofsmount_write_recovery_info(source);
-		if (IS_ERR(recp)) {
-			erofs_io_close(&ctx.vd);
-			exit(EXIT_FAILURE);
+		if (IS_ERR(recp))
+			goto fallback_ioctl;
+
+		msg.nbdnum = -1;
+		err = erofs_nbd_nl_connect(&msg.nbdnum, 9, EROFSMOUNT_NBD_DISK_SIZE, recp);
+		if (err < 0)
+			goto err_recp;
+
+		ctx.sk.fd = err;
+		err = erofsmount_nbd_fix_backend_linkage(msg.nbdnum, &recp);
+		if (err) {
+			erofs_io_close(&ctx.sk);
+			goto err_recp;
 		}
 
-		num = -1;
-		err = erofs_nbd_nl_connect(&num, 9, EROFSMOUNT_NBD_DISK_SIZE, recp);
-		if (err >= 0) {
-			ctx.sk.fd = err;
-			err = erofsmount_nbd_fix_backend_linkage(num, &recp);
-			if (err) {
-				erofs_io_close(&ctx.sk);
-			} else {
-				err = write(pipefd[1], &num, sizeof(int));
-				if (err < 0)
-					err = -errno;
-				close(pipefd[1]);
-				close(pipefd[0]);
-				if (err >= sizeof(int)) {
-					err = (int)(uintptr_t)erofsmount_nbd_loopfn(&ctx);
-					goto out_fork;
-				}
-			}
+		/* Succeed in starting netlink-based NBD */
+		msg.is_netlink = true;
+
+		/* Send device number to parent */
+		err = write(pipefd, &msg, sizeof(msg));
+		close(pipefd);
+		if (err != sizeof(msg)) {
+			/* will not fall back if encounter pipe write error */
+			err = err < 0 ? -errno : -EIO;
+			erofs_io_close(&ctx.sk);
+			goto err_recp;
 		}
-		erofs_io_close(&ctx.vd);
-out_fork:
-		(void)unlink(recp);
-		free(recp);
-		exit(err ? EXIT_FAILURE : EXIT_SUCCESS);
+
+		err = (int)(uintptr_t)erofsmount_nbd_loopfn(&ctx);
 	}
-	close(pipefd[1]);
-	err = read(pipefd[0], &num, sizeof(int));
-	close(pipefd[0]);
-	if (err < sizeof(int))
-		return -EPIPE;
-	return num;
+
+err_recp:
+	(void)unlink(recp);
+	free(recp);
+
+fallback_ioctl:
+	if (!msg.is_netlink) {
+		erofs_info("Fall back to ioctl-based NBD; failover is unsupported");
+		erofsmount_startnbd_ioctl(&ctx, &msg, pipefd);
+		/* never returns */
+	}
+	exit(-err);
 }
 
 static int erofsmount_reattach(const char *target)
@@ -1192,10 +1221,11 @@ static int erofsmount_nbd(struct erofs_nbd_source *source,
 			  const char *mountpoint, const char *fstype,
 			  int flags, const char *options)
 {
-	bool is_netlink = false;
 	char nbdpath[32], *id;
-	int num, nbdfd;
-	pid_t pid = 0;
+	struct erofsmount_nbd_msg msg;
+	int pipefd[2];
+	pid_t child;
+	int child_status;
 	long err;
 
 	if (strcmp(fstype, "erofs")) {
@@ -1205,59 +1235,69 @@ static int erofsmount_nbd(struct erofs_nbd_source *source,
 	}
 	flags |= MS_RDONLY;
 
-	err = erofsmount_startnbd_nl(&pid, source);
-	if (err < 0) {
-		erofs_info("Fall back to ioctl-based NBD; failover is unsupported");
-		num = erofs_nbd_devscan();
-		if (num < 0)
-			return num;
+	err = pipe(pipefd);
+	if (err < 0)
+		return -errno;
 
-		(void)snprintf(nbdpath, sizeof(nbdpath), "/dev/nbd%d", num);
-		nbdfd = open(nbdpath, O_RDWR);
-		if (nbdfd < 0)
-			return -errno;
+	if ((child = fork()) == 0) {
+		close(pipefd[0]);
+		erofsmount_startnbd(source, pipefd[1]);
+		/* Never returns */
+	}
 
-		if ((pid = fork()) == 0)
-			return erofsmount_startnbd(nbdfd, source) ?
-				EXIT_FAILURE : EXIT_SUCCESS;
-		close(nbdfd);
-	} else {
-		num = err;
-		(void)snprintf(nbdpath, sizeof(nbdpath), "/dev/nbd%d", num);
-		is_netlink = true;
+	close(pipefd[1]);
+	err = read(pipefd[0], &msg, sizeof(msg));
+	close(pipefd[0]);
+
+	if (err != sizeof(msg)) {
+		int retries = 5;
+
+		while (retries-- > 0) {
+			if (waitpid(child, &child_status, WNOHANG) > 0) {
+				/* child exited, return its exit status */
+				return WIFEXITED(child_status) ? -WEXITSTATUS(child_status) : -EIO;
+			}
+			usleep(10000);  /* Wait 10ms */
+		}
+
+		/* child still running, kill it before returning */
+		kill(child, SIGTERM);
+		waitpid(child, &child_status, 0);
+		return err < 0 ? -errno : -EIO;
 	}
 
+	(void)snprintf(nbdpath, sizeof(nbdpath), "/dev/nbd%d", msg.nbdnum);
+
 	while (1) {
-		err = erofs_nbd_in_service(num);
+		err = erofs_nbd_in_service(msg.nbdnum);
 		if (err == -ENOENT || err == -ENOTCONN) {
-			int status;
-
-			err = waitpid(pid, &status, WNOHANG);
+			err = waitpid(child, &child_status, WNOHANG);
 			if (err < 0)
 				return -errno;
 			else if (err > 0)
-				return status ? -EIO : 0;
+				return WIFEXITED(child_status) ? -WEXITSTATUS(child_status) : -EIO;
 
 			usleep(50000);
 			continue;
 		}
 		if (err >= 0)
-			err = (err != pid ? -EBUSY : 0);
+			err = (err != child ? -EBUSY : 0);
 		break;
 	}
+
 	if (!err) {
 		err = mount(nbdpath, mountpoint, fstype, flags, options);
 		if (err < 0)
 			err = -errno;
 
-		if (!err && is_netlink) {
-			id = erofs_nbd_get_identifier(num);
+		if (!err && msg.is_netlink) {
+			id = erofs_nbd_get_identifier(msg.nbdnum);
 
 			err = IS_ERR(id) ? PTR_ERR(id) :
-				erofs_nbd_nl_reconfigure(num, id, true);
+				erofs_nbd_nl_reconfigure(msg.nbdnum, id, true);
 			if (err)
 				erofs_warn("failed to turn on autoclear for nbd%d: %s",
-					   num, erofs_strerror(err));
+					   msg.nbdnum, erofs_strerror(err));
 			if (!IS_ERR(id))
 				free(id);
 		}
-- 
2.43.0



More information about the Linux-erofs mailing list