[[RFC PATCH]] jffs2: attempt to fix "Error garbage collecting node"
Jian Zhang
zhangjian.3032 at bytedance.com
Wed Nov 12 01:56:08 AEDT 2025
This patch shares analysis and a potential fix for the "Error garbage collecting node"
issue observed in long-run tests with SPI NOR flash under repeated AC cycles.
TL;DR:
An unstable block may pass CRC checks multiple times but fail during GC.
The practical mitigation is to locate such blocks and perform a read-erase-write
cycle to make them stable; the actual data content is irrelevant.
Background:
In long-run tests, JFFS2 sometimes fails to perform garbage collection, producing logs like:
[ 3.470788] jffs2: notice: check_node_data: wrong data CRC in data node at 0x008cc2e8: read 0xb554ca36, calculated 0xc7ac50f7
...
[ 46.815192] jffs2: Error garbage collecting node at 008cb694, mark node obsolete!
[ 46.840646] jffs2: read_cache_page() returned error: -5
Root cause:
1. During GC, a power loss occurring while an `erase` operation is in progress
can leave a block in an unstable state. Reads from this block may yield
inconsistent results.
2. A node may pass multiple CRC checks, but during GC — particularly when moving
a PRISTINE node — a CRC failure may occur, triggering the garbage collection error.
Detailed analysis:
- In `jffs2_garbage_collect_pristine`, moving a PRISTINE node does not increment
the version number. At some point, two nodes with identical version numbers may
exist simultaneously.
- With JFFS2 SUMMARY enabled, `jffs2_mark_node_obsolete` only marks the node
obsolete in RAM, skipping flash updates. On the next boot, the node may be
re-scanned and remain unstable.
- During file/node construction in `jffs2_add_tn_to_tree`, two nodes with the
same version number are handled as follows:
* Current node valid → discard the new node
* Current node CRC error → replace with the new node
This allows an unstable node to be selected even if it passes CRC on this pass.
Relevant call flow:
jffs2_find_gc_block
jffs2_do_crccheck_inode
jffs2_do_read_inode_internal
jffs2_get_inode_nodes
read_dnode
jffs2_add_tn_to_tree
Proposed mitigation:
Locate blocks that may be unstable and perform a read-erase-write cycle to
stabilize them. The actual data content is not important, only block stability.
Signed-off-by: Jian Zhang <zhangjian.3032 at bytedance.com>
---
fs/jffs2/scan.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 172 insertions(+)
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 62879c218d4b..1475d2c0be4c 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -442,6 +442,174 @@ static int jffs2_scan_xref_node(struct jffs2_sb_info *c, struct jffs2_eraseblock
}
#endif
+static inline uint32_t jffs2_calc_node_hdr_crc(const struct jffs2_unknown_node *node)
+{
+ struct jffs2_unknown_node crcnode;
+
+ crcnode.magic = node->magic;
+ crcnode.nodetype = cpu_to_je16(je16_to_cpu(node->nodetype) | JFFS2_NODE_ACCURATE);
+ crcnode.totlen = node->totlen;
+
+ return crc32(0, &crcnode, sizeof(crcnode) - 4);
+}
+
+static int jffs2_pre_scan_eraseblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
+{
+ int ret;
+ bool error_found = false;
+ unsigned char *buf;
+ uint32_t crc;
+ struct jffs2_unknown_node *node;
+ struct jffs2_raw_inode *ri;
+ struct jffs2_raw_dirent *rd;
+
+ uint32_t ofs = 0, buf_len = c->sector_size;
+ uint32_t retlen;
+
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!buf) {
+ JFFS2_WARNING("Unable to allocate scan buffer of size %u\n", buf_len);
+ return -ENOMEM;
+ }
+
+ ret = jffs2_fill_scan_buf(c, buf, jeb->offset, buf_len);
+ if (ret) {
+ JFFS2_WARNING("Unable to read eraseblock at 0x%08x\n", jeb->offset);
+ goto exit;
+ }
+
+ while (ofs < c->sector_size) {
+ if (c->sector_size - ofs < sizeof(struct jffs2_unknown_node)) {
+ /* Not enough space for a node header */
+ break;
+ }
+
+ if (*(uint32_t *)(&buf[ofs]) == 0xffffffff) {
+ /* Reached empty space */
+ ofs += 4;
+ continue;
+ }
+
+ node = (struct jffs2_unknown_node *)&buf[ofs];
+ if (je16_to_cpu(node->magic) != JFFS2_MAGIC_BITMASK) {
+ ofs += 4;
+ continue;
+ }
+
+ if (jffs2_calc_node_hdr_crc(node) != je32_to_cpu(node->hdr_crc)) {
+ JFFS2_WARNING("node header CRC failed at %#08x\n",
+ jeb->offset + ofs);
+ ofs += 4;
+ error_found = true;
+ goto check;
+ }
+
+ if (!(je16_to_cpu(node->nodetype) & JFFS2_NODE_ACCURATE)) {
+ /* This is an obsoleted node */
+ ofs += PAD(je32_to_cpu(node->totlen));
+ continue;
+ }
+
+ switch (je16_to_cpu(node->nodetype)) {
+ case JFFS2_NODETYPE_INODE:
+ if (c->sector_size - ofs < sizeof(struct jffs2_raw_inode)) {
+ /* Not enough space for a full inode node */
+ ofs += 4;
+ goto check;
+ }
+
+ ri = (struct jffs2_raw_inode *)node;
+ crc = crc32(0, ri, sizeof(*ri) - 8);
+ if (crc != je32_to_cpu(ri->node_crc)) {
+ JFFS2_WARNING("inode node CRC failed at %#08x, read=%#08x, calc=%#08x\n",
+ jeb->offset + ofs,
+ je32_to_cpu(ri->node_crc), crc);
+ error_found = true;
+ goto check;
+ }
+
+ if (je32_to_cpu(ri->dsize)) {
+ crc = crc32(0, ri->data, je32_to_cpu(ri->csize));
+ if (je32_to_cpu(ri->data_crc) != crc) {
+ JFFS2_WARNING("Data CRC failed data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
+ ofs, je32_to_cpu(ri->data_crc), crc);
+ error_found = true;
+ goto check;
+ }
+ }
+
+ ofs += PAD(je32_to_cpu(node->totlen));
+ break;
+ case JFFS2_NODETYPE_DIRENT:
+ if (c->sector_size - ofs < sizeof(struct jffs2_raw_dirent)) {
+ /* Not enough space for a full dirent node */
+ ofs += 4;
+ goto check;
+ }
+
+ rd = (struct jffs2_raw_dirent *)node;
+ crc = crc32(0, rd, sizeof(*rd) - 8);
+ if (je32_to_cpu(rd->node_crc) != crc) {
+ JFFS2_WARNING("Node CRC failed dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
+ ofs, je32_to_cpu(rd->node_crc), crc);
+ error_found = true;
+ goto check;
+ }
+
+ if (strnlen(rd->name, rd->nsize) != rd->nsize) {
+ JFFS2_WARNING("Name in dirent node at 0x%08x contains zeroes\n", ofs);
+ error_found = true;
+ break;
+ }
+
+ if (rd->nsize) {
+ crc = crc32(0, rd->name, rd->nsize);
+ if (je32_to_cpu(rd->name_crc) != crc) {
+ JFFS2_WARNING("Name CRC failed dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
+ ofs, je32_to_cpu(rd->name_crc), crc);
+ error_found = true;
+ goto check;
+ }
+ }
+
+ ofs += PAD(je32_to_cpu(node->totlen));
+ break;
+ default:
+ ofs += PAD(je32_to_cpu(node->totlen));
+ /* Other node types are not pre-checked */
+ break;
+ }
+ }
+
+check:
+ // find any error during pre-scan, if found, erase the block, and write back.
+ if (error_found) {
+ JFFS2_WARNING("Erasing block at 0x%08x error_count %d due to pre-scan errors\n",
+ jeb->offset);
+ struct erase_info instr;
+
+ instr.addr = jeb->offset;
+ instr.len = c->sector_size;
+ ret = mtd_erase(c->mtd, &instr);
+ if (ret) {
+ JFFS2_ERROR("Erase at 0x%08x failed during pre-scan: errno %d\n",
+ jeb->offset, ret);
+ goto exit;
+ }
+
+ ret = jffs2_flash_direct_write(c, jeb->offset, buf_len, &retlen, buf);
+ if (ret) {
+ JFFS2_ERROR("Write back at 0x%08x failed during pre-scan: errno %d\n",
+ jeb->offset, ret);
+ goto exit;
+ }
+ }
+exit:
+
+ kfree(buf);
+ return ret;
+}
+
/* Called with 'buf_size == 0' if buf is in fact a pointer _directly_ into
the flash, XIP-style */
static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
@@ -453,6 +621,10 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
int err;
int noise = 0;
+ err = jffs2_pre_scan_eraseblock(c, jeb);
+ if (err) // only log warning, continue scanning
+ JFFS2_WARNING("Pre-scan of eraseblock at 0x%08x failed: err=%d\n",
+ jeb->offset, err);
#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
int cleanmarkerfound = 0;
--
2.47.0
More information about the openbmc
mailing list