[PATCH v3 13/13] selftests/cgroup: extend test_hugetlb_memcg.c to support all huge page sizes
Sayali Patil
sayalip at linux.ibm.com
Sat Apr 4 04:16:29 AEDT 2026
On 27/03/26 12:46, Sayali Patil wrote:
> The hugetlb memcg selftest was previously skipped when the configured
> huge page size was not 2MB, preventing the test from running on systems
> using other default huge page sizes.
>
> Detect the system's configured huge page size at runtime and use it for
> the allocation instead of assuming a fixed 2MB size. This allows the
> test to run on configurations using non-2MB huge pages and avoids
> unnecessary skips.
>
> Fixes: c0dddb7aa5f8 ("selftests: add a selftest to verify hugetlb usage in memcg")
> Tested-by: Venkat Rao Bagalkote <venkat88 at linux.ibm.com>
> Signed-off-by: Sayali Patil <sayalip at linux.ibm.com>
> ---
> .../selftests/cgroup/test_hugetlb_memcg.c | 66 ++++++++++++++-----
> 1 file changed, 48 insertions(+), 18 deletions(-)
>
> diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
> index f451aa449be6..a449dbec16a8 100644
> --- a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
> +++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
> @@ -12,10 +12,15 @@
>
> #define ADDR ((void *)(0x0UL))
> #define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
> -/* mapping 8 MBs == 4 hugepages */
> -#define LENGTH (8UL*1024*1024)
> #define PROTECTION (PROT_READ | PROT_WRITE)
>
> +/*
> + * This value matches the kernel's MEMCG_CHARGE_BATCH definition:
> + * see include/linux/memcontrol.h. If the kernel value changes, this
> + * test constant must be updated accordingly to stay consistent.
> + */
> +#define MEMCG_CHARGE_BATCH 64U
> +
> /* borrowed from mm/hmm-tests.c */
> static long get_hugepage_size(void)
> {
> @@ -84,11 +89,11 @@ static unsigned int check_first(char *addr)
> return *(unsigned int *)addr;
> }
>
> -static void write_data(char *addr)
> +static void write_data(char *addr, size_t length)
> {
> unsigned long i;
>
> - for (i = 0; i < LENGTH; i++)
> + for (i = 0; i < length; i++)
> *(addr + i) = (char)i;
> }
>
> @@ -96,26 +101,31 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
> {
> char *test_group = (char *)arg;
> void *addr;
> + long hpage_size = get_hugepage_size() * 1024;
> long old_current, expected_current, current;
> int ret = EXIT_FAILURE;
> + size_t length = 4 * hpage_size;
> + int pagesize, nr_pages;
> +
> + pagesize = getpagesize();
>
> old_current = cg_read_long(test_group, "memory.current");
> set_nr_hugepages(20);
> current = cg_read_long(test_group, "memory.current");
> - if (current - old_current >= MB(2)) {
> + if (current - old_current >= hpage_size) {
> ksft_print_msg(
> "setting nr_hugepages should not increase hugepage usage.\n");
> ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
> return EXIT_FAILURE;
> }
>
> - addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
> + addr = mmap(ADDR, length, PROTECTION, FLAGS, 0, 0);
> if (addr == MAP_FAILED) {
> ksft_print_msg("fail to mmap.\n");
> return EXIT_FAILURE;
> }
> current = cg_read_long(test_group, "memory.current");
> - if (current - old_current >= MB(2)) {
> + if (current - old_current >= hpage_size) {
> ksft_print_msg("mmap should not increase hugepage usage.\n");
> ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
> goto out_failed_munmap;
> @@ -124,10 +134,24 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
>
> /* read the first page */
> check_first(addr);
> - expected_current = old_current + MB(2);
> + nr_pages = hpage_size / pagesize;
> + expected_current = old_current + hpage_size;
> current = cg_read_long(test_group, "memory.current");
> - if (!values_close(expected_current, current, 5)) {
> - ksft_print_msg("memory usage should increase by around 2MB.\n");
> + if (nr_pages < MEMCG_CHARGE_BATCH && current == old_current) {
> + /*
> + * Memory cgroup charging uses per-CPU stocks and batched updates to the
> + * memcg usage counters. For hugetlb allocations, the number of pages
> + * that memcg charges is expressed in base pages (nr_pages), not
> + * in hugepage units. When the charge for an allocation is smaller than
> + * the internal batching threshold (nr_pages < MEMCG_CHARGE_BATCH),
> + * it may be fully satisfied from the CPU’s local stock. In such
> + * cases memory.current does not necessarily
> + * increase.
> + * Therefore, Treat a zero delta as valid behaviour here.
> + */
> + ksft_print_msg("no visible memcg charge, allocation consumed from local stock.\n");
> + } else if (!values_close(expected_current, current, 5)) {
> + ksft_print_msg("memory usage should increase by ~1 huge page.\n");
> ksft_print_msg(
> "expected memory: %ld, actual memory: %ld\n",
> expected_current, current);
> @@ -135,11 +159,11 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
> }
>
> /* write to the whole range */
> - write_data(addr);
> + write_data(addr, length);
> current = cg_read_long(test_group, "memory.current");
> - expected_current = old_current + MB(8);
> + expected_current = old_current + length;
> if (!values_close(expected_current, current, 5)) {
> - ksft_print_msg("memory usage should increase by around 8MB.\n");
> + ksft_print_msg("memory usage should increase by around 4 huge pages.\n");
> ksft_print_msg(
> "expected memory: %ld, actual memory: %ld\n",
> expected_current, current);
> @@ -147,7 +171,7 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
> }
>
> /* unmap the whole range */
> - munmap(addr, LENGTH);
> + munmap(addr, length);
> current = cg_read_long(test_group, "memory.current");
> expected_current = old_current;
> if (!values_close(expected_current, current, 5)) {
> @@ -162,13 +186,15 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
> return ret;
>
> out_failed_munmap:
> - munmap(addr, LENGTH);
> + munmap(addr, length);
> return ret;
> }
>
> static int test_hugetlb_memcg(char *root)
> {
> int ret = KSFT_FAIL;
> + int num_pages = 20;
> + long hpage_size = get_hugepage_size();
> char *test_group;
>
> test_group = cg_name(root, "hugetlb_memcg_test");
> @@ -177,7 +203,7 @@ static int test_hugetlb_memcg(char *root)
> goto out;
> }
>
> - if (cg_write(test_group, "memory.max", "100M")) {
> + if (cg_write_numeric(test_group, "memory.max", num_pages * hpage_size * 1024)) {
> ksft_print_msg("fail to set cgroup memory limit.\n");
> goto out;
> }
> @@ -200,6 +226,7 @@ int main(int argc, char **argv)
> {
> char root[PATH_MAX];
> int ret = EXIT_SUCCESS, has_memory_hugetlb_acc;
> + long val;
>
> has_memory_hugetlb_acc = proc_mount_contains("memory_hugetlb_accounting");
> if (has_memory_hugetlb_acc < 0)
> @@ -208,12 +235,15 @@ int main(int argc, char **argv)
> ksft_exit_skip("memory hugetlb accounting is disabled\n");
>
> /* Unit is kB! */
> - if (get_hugepage_size() != 2048) {
> - ksft_print_msg("test_hugetlb_memcg requires 2MB hugepages\n");
> + val = get_hugepage_size();
> + if (val < 0) {
> + ksft_print_msg("Failed to read hugepage size\n");
> ksft_test_result_skip("test_hugetlb_memcg\n");
> return ret;
> }
>
> + ksft_print_msg("Hugepage size: %ld kB\n", val);
> +
> if (cg_find_unified_root(root, sizeof(root), NULL))
> ksft_exit_skip("cgroup v2 isn't mounted\n");
>
AI review question:
> > - addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
> > + addr = mmap(ADDR, length, PROTECTION, FLAGS, 0, 0);
> > if (addr == MAP_FAILED) {
> > ksft_print_msg("fail to mmap.\n");
> > return EXIT_FAILURE;
> Should the test gracefully handle memory constraints by skipping
> instead of returning EXIT_FAILURE here?
> With the removal of the 2MB skip condition, the test will now run on systems
> with large default huge pages (e.g., 1GB on x86_64). Reserving 20
> huge pages
> (20GB) and mapping 4 huge pages (4GB) might easily fail due to memory
> exhaustion in typical automated testing environments, causing false
> test regressions.
I’ll update it in v4 to skip on memory constraints instead of returning
EXIT_FAILURE.
> > @@ -124,10 +134,24 @@ static int hugetlb_test_program(const char
> *cgroup, void *arg)
> >
> > /* read the first page */
> > check_first(addr);
> > - expected_current = old_current + MB(2);
> > + nr_pages = hpage_size / pagesize;
> > + expected_current = old_current + hpage_size;
> > current = cg_read_long(test_group, "memory.current");
> > - if (!values_close(expected_current, current, 5)) {
> > - ksft_print_msg("memory usage should increase by around 2MB.\n");
> > + if (nr_pages < MEMCG_CHARGE_BATCH && current == old_current) {
> > + /*
> > + * Memory cgroup charging uses per-CPU stocks and batched
> updates to the
> > + * memcg usage counters. For hugetlb allocations, the number of
> pages
> > + * that memcg charges is expressed in base pages (nr_pages), not
> > + * in hugepage units. When the charge for an allocation is
> smaller than
> > + * the internal batching threshold (nr_pages <
> > MEMCG_CHARGE_BATCH),
> > + * it may be fully satisfied from the CPU’s local stock. In such
> > + * cases memory.current does not necessarily
> > + * increase.
> > + * Therefore, Treat a zero delta as valid behaviour here.
> > + */
> > + ksft_print_msg("no visible memcg charge, allocation consumed
> from local stock.\n");
> > + } else if (!values_close(expected_current, current, 5)) {
> Does this exception fully account for memcg stock batching when the
> per-CPU
> stock is empty?
> If the stock lacks sufficient pages, charging the huge page triggers
> a stock
> refill that charges exactly MEMCG_CHARGE_BATCH (64) base pages from
> the global
> memcg counter.
> On an architecture with 64KB base pages and 2MB contiguous huge pages,
> nr_pages is 32. Refilling the stock would charge 64 base pages (4MB),
> which
> increases memory.current by 4MB instead of the expected 2MB. Since
> current != old_current, the test falls through to values_close(),
> which expects
> a 2MB increase and may fail the test.
Valid point.
The current exception does not fully account for this
scenario when the per-CPU stock is empty and a refill charges
MEMCG_CHARGE_BATCH. This can indeed lead to a larger-than-expected jump
in memory.current and cause the test to fail.
I’ll update the logic in v4 to handle this case more robustly.
> > @@ -177,7 +203,7 @@ static int test_hugetlb_memcg(char *root)
> > goto out;
> > }
> >
> > - if (cg_write(test_group, "memory.max", "100M")) {
> > + if (cg_write_numeric(test_group, "memory.max", num_pages *
> > hpage_size * 1024)) {
> Can this calculation overflow on 32-bit systems?
> Since long is 32 bits on 32-bit systems, num_pages * hpage_size * 1024 can
> exceed the 32-bit signed integer maximum if the architecture supports large> huge pages (e.g., 256MB on MIPS).This would evaluate to 5,368,709,120,
> resulting in a negative or truncated value, which sets memory.max to an
> invalid or overly restrictive limit.
Yes, this can overflow on 32-bit systems. I’ll fix it in v4.
More information about the Linuxppc-dev
mailing list