diff options
Diffstat (limited to 'tools/testing/selftests/vm/userfaultfd.c')
-rw-r--r-- | tools/testing/selftests/vm/userfaultfd.c | 247 |
1 files changed, 199 insertions, 48 deletions
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 7c3f1b0ab468..74babdbc02e5 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -61,10 +61,11 @@ #include <sys/random.h> #include "../kselftest.h" +#include "vm_util.h" #ifdef __NR_userfaultfd -static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size, hpage_size; #define BOUNCE_RANDOM (1<<0) #define BOUNCE_RACINGFAULTS (1<<1) @@ -77,6 +78,13 @@ static int bounces; #define TEST_SHMEM 3 static int test_type; +#define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY) + +#define BASE_PMD_ADDR ((void *)(1UL << 30)) + +/* test using /dev/userfaultfd, instead of userfaultfd(2) */ +static bool test_dev_userfaultfd; + /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ #define ALARM_INTERVAL_SECS 10 static volatile bool test_uffdio_copy_eexist = true; @@ -92,9 +100,10 @@ static int huge_fd; static unsigned long long *count_verify; static int uffd = -1; static int uffd_flags, finished, *pipefd; -static char *area_src, *area_src_alias, *area_dst, *area_dst_alias; +static char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; static char *zeropage; pthread_attr_t attr; +static bool test_collapse; /* Userfaultfd test statistics */ struct uffd_stats { @@ -122,9 +131,13 @@ struct uffd_stats { #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +#define factor_of_2(x) ((x) ^ ((x) & ((x) - 1))) + const char *examples = "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" "./userfaultfd anon 100 99999\n\n" + "# Run the same anonymous memory test, but using /dev/userfaultfd:\n" + "./userfaultfd anon:dev 100 99999\n\n" "# Run share memory test on 1GiB region with 99 bounces:\n" "./userfaultfd shmem 1000 99\n\n" "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" @@ -141,6 +154,16 @@ static void usage(void) "[hugetlbfs_file]\n\n"); fprintf(stderr, "Supported <test type>: anon, hugetlb, " "hugetlb_shared, shmem\n\n"); + fprintf(stderr, "'Test mods' can be joined to the test type string with a ':'. " + "Supported mods:\n"); + fprintf(stderr, "\tsyscall - Use userfaultfd(2) (default)\n"); + fprintf(stderr, "\tdev - Use /dev/userfaultfd instead of userfaultfd(2)\n"); + fprintf(stderr, "\tcollapse - Test MADV_COLLAPSE of UFFDIO_REGISTER_MODE_MINOR\n" + "memory\n"); + fprintf(stderr, "\nExample test mod usage:\n"); + fprintf(stderr, "# Run anonymous memory test with /dev/userfaultfd:\n"); + fprintf(stderr, "./userfaultfd anon:dev 100 99999\n\n"); + fprintf(stderr, "Examples:\n\n"); fprintf(stderr, "%s", examples); exit(1); @@ -154,12 +177,14 @@ static void usage(void) ret, __LINE__); \ } while (0) -#define err(fmt, ...) \ +#define errexit(exitcode, fmt, ...) \ do { \ _err(fmt, ##__VA_ARGS__); \ - exit(1); \ + exit(exitcode); \ } while (0) +#define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__) + static void uffd_stats_reset(struct uffd_stats *uffd_stats, unsigned long n_cpus) { @@ -212,12 +237,10 @@ static void anon_release_pages(char *rel_area) err("madvise(MADV_DONTNEED) failed"); } -static void anon_allocate_area(void **alloc_area) +static void anon_allocate_area(void **alloc_area, bool is_src) { *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (*alloc_area == MAP_FAILED) - err("mmap of anonymous memory failed"); } static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) @@ -235,7 +258,7 @@ static void hugetlb_release_pages(char *rel_area) } } -static void hugetlb_allocate_area(void **alloc_area) +static void hugetlb_allocate_area(void **alloc_area, bool is_src) { void *area_alias = NULL; char **alloc_area_alias; @@ -245,7 +268,7 @@ static void hugetlb_allocate_area(void **alloc_area) nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | - (*alloc_area == area_src ? 0 : MAP_NORESERVE), + (is_src ? 0 : MAP_NORESERVE), -1, 0); else @@ -253,9 +276,9 @@ static void hugetlb_allocate_area(void **alloc_area) nr_pages * page_size, PROT_READ | PROT_WRITE, MAP_SHARED | - (*alloc_area == area_src ? 0 : MAP_NORESERVE), + (is_src ? 0 : MAP_NORESERVE), huge_fd, - *alloc_area == area_src ? 0 : nr_pages * page_size); + is_src ? 0 : nr_pages * page_size); if (*alloc_area == MAP_FAILED) err("mmap of hugetlbfs file failed"); @@ -265,12 +288,12 @@ static void hugetlb_allocate_area(void **alloc_area) PROT_READ | PROT_WRITE, MAP_SHARED, huge_fd, - *alloc_area == area_src ? 0 : nr_pages * page_size); + is_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) err("mmap of hugetlb file alias failed"); } - if (*alloc_area == area_src) { + if (is_src) { alloc_area_alias = &area_src_alias; } else { alloc_area_alias = &area_dst_alias; @@ -293,21 +316,36 @@ static void shmem_release_pages(char *rel_area) err("madvise(MADV_REMOVE) failed"); } -static void shmem_allocate_area(void **alloc_area) +static void shmem_allocate_area(void **alloc_area, bool is_src) { void *area_alias = NULL; - bool is_src = alloc_area == (void **)&area_src; - unsigned long offset = is_src ? 0 : nr_pages * page_size; + size_t bytes = nr_pages * page_size; + unsigned long offset = is_src ? 0 : bytes; + char *p = NULL, *p_alias = NULL; + + if (test_collapse) { + p = BASE_PMD_ADDR; + if (!is_src) + /* src map + alias + interleaved hpages */ + p += 2 * (bytes + hpage_size); + p_alias = p; + p_alias += bytes; + p_alias += hpage_size; /* Prevent src/dst VMA merge */ + } - *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED, shm_fd, offset); + *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd, offset); if (*alloc_area == MAP_FAILED) err("mmap of memfd failed"); + if (test_collapse && *alloc_area != p) + err("mmap of memfd failed at %p", p); - area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED, shm_fd, offset); + area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd, offset); if (area_alias == MAP_FAILED) err("mmap of memfd alias failed"); + if (test_collapse && area_alias != p_alias) + err("mmap of anonymous memory failed at %p", p_alias); if (is_src) area_src_alias = area_alias; @@ -320,28 +358,39 @@ static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset) *start = (unsigned long)area_dst_alias + offset; } +static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages) +{ + if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, hpage_size)) + err("Did not find expected %d number of hugepages", + expect_nr_hpages); +} + struct uffd_test_ops { - void (*allocate_area)(void **alloc_area); + void (*allocate_area)(void **alloc_area, bool is_src); void (*release_pages)(char *rel_area); void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); + void (*check_pmd_mapping)(void *p, int expect_nr_hpages); }; static struct uffd_test_ops anon_uffd_test_ops = { .allocate_area = anon_allocate_area, .release_pages = anon_release_pages, .alias_mapping = noop_alias_mapping, + .check_pmd_mapping = NULL, }; static struct uffd_test_ops shmem_uffd_test_ops = { .allocate_area = shmem_allocate_area, .release_pages = shmem_release_pages, .alias_mapping = shmem_alias_mapping, + .check_pmd_mapping = shmem_check_pmd_mapping, }; static struct uffd_test_ops hugetlb_uffd_test_ops = { .allocate_area = hugetlb_allocate_area, .release_pages = hugetlb_release_pages, .alias_mapping = hugetlb_alias_mapping, + .check_pmd_mapping = NULL, }; static struct uffd_test_ops *uffd_test_ops; @@ -383,13 +432,34 @@ static void assert_expected_ioctls_present(uint64_t mode, uint64_t ioctls) } } +static int __userfaultfd_open_dev(void) +{ + int fd, _uffd; + + fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); + if (fd < 0) + errexit(KSFT_SKIP, "opening /dev/userfaultfd failed"); + + _uffd = ioctl(fd, USERFAULTFD_IOC_NEW, UFFD_FLAGS); + if (_uffd < 0) + errexit(errno == ENOTTY ? KSFT_SKIP : 1, + "creating userfaultfd failed"); + close(fd); + return _uffd; +} + static void userfaultfd_open(uint64_t *features) { struct uffdio_api uffdio_api; - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); - if (uffd < 0) - err("userfaultfd syscall not available in this kernel"); + if (test_dev_userfaultfd) + uffd = __userfaultfd_open_dev(); + else { + uffd = syscall(__NR_userfaultfd, UFFD_FLAGS); + if (uffd < 0) + errexit(errno == ENOSYS ? KSFT_SKIP : 1, + "creating userfaultfd failed"); + } uffd_flags = fcntl(uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; @@ -440,6 +510,7 @@ static void uffd_test_ctx_clear(void) munmap_area((void **)&area_src_alias); munmap_area((void **)&area_dst); munmap_area((void **)&area_dst_alias); + munmap_area((void **)&area_remap); } static void uffd_test_ctx_init(uint64_t features) @@ -448,8 +519,8 @@ static void uffd_test_ctx_init(uint64_t features) uffd_test_ctx_clear(); - uffd_test_ops->allocate_area((void **)&area_src); - uffd_test_ops->allocate_area((void **)&area_dst); + uffd_test_ops->allocate_area((void **)&area_src, true); + uffd_test_ops->allocate_area((void **)&area_dst, false); userfaultfd_open(&features); @@ -766,6 +837,7 @@ static void *uffd_poll_thread(void *arg) err("remove failure"); break; case UFFD_EVENT_REMAP: + area_remap = area_dst; /* save for later unmap */ area_dst = (char *)(unsigned long)msg.arg.remap.to; break; } @@ -1218,13 +1290,30 @@ static int userfaultfd_sig_test(void) return userfaults != 0; } +void check_memory_contents(char *p) +{ + unsigned long i; + uint8_t expected_byte; + void *expected_page; + + if (posix_memalign(&expected_page, page_size, page_size)) + err("out of memory"); + + for (i = 0; i < nr_pages; ++i) { + expected_byte = ~((uint8_t)(i % ((uint8_t)-1))); + memset(expected_page, expected_byte, page_size); + if (my_bcmp(expected_page, p + (i * page_size), page_size)) + err("unexpected page contents after minor fault"); + } + + free(expected_page); +} + static int userfaultfd_minor_test(void) { - struct uffdio_register uffdio_register; unsigned long p; + struct uffdio_register uffdio_register; pthread_t uffd_mon; - uint8_t expected_byte; - void *expected_page; char c; struct uffd_stats stats = { 0 }; @@ -1263,17 +1352,7 @@ static int userfaultfd_minor_test(void) * fault. uffd_poll_thread will resolve the fault by bit-flipping the * page's contents, and then issuing a CONTINUE ioctl. */ - - if (posix_memalign(&expected_page, page_size, page_size)) - err("out of memory"); - - for (p = 0; p < nr_pages; ++p) { - expected_byte = ~((uint8_t)(p % ((uint8_t)-1))); - memset(expected_page, expected_byte, page_size); - if (my_bcmp(expected_page, area_dst_alias + (p * page_size), - page_size)) - err("unexpected page contents after minor fault"); - } + check_memory_contents(area_dst_alias); if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) err("pipe write"); @@ -1282,6 +1361,23 @@ static int userfaultfd_minor_test(void) uffd_stats_report(&stats, 1); + if (test_collapse) { + printf("testing collapse of uffd memory into PMD-mapped THPs:"); + if (madvise(area_dst_alias, nr_pages * page_size, + MADV_COLLAPSE)) + err("madvise(MADV_COLLAPSE)"); + + uffd_test_ops->check_pmd_mapping(area_dst, + nr_pages * page_size / + hpage_size); + /* + * This won't cause uffd-fault - it purely just makes sure there + * was no corruption. + */ + check_memory_contents(area_dst_alias); + printf(" done.\n"); + } + return stats.missing_faults != 0 || stats.minor_faults != nr_pages; } @@ -1584,8 +1680,6 @@ unsigned long default_huge_page_size(void) static void set_test_type(const char *type) { - uint64_t features = UFFD_API_FEATURES; - if (!strcmp(type, "anon")) { test_type = TEST_ANON; uffd_test_ops = &anon_uffd_test_ops; @@ -1603,12 +1697,37 @@ static void set_test_type(const char *type) test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; test_uffdio_minor = true; - } else { - err("Unknown test type: %s", type); } +} + +static void parse_test_type_arg(const char *raw_type) +{ + char *buf = strdup(raw_type); + uint64_t features = UFFD_API_FEATURES; + + while (buf) { + const char *token = strsep(&buf, ":"); + + if (!test_type) + set_test_type(token); + else if (!strcmp(token, "dev")) + test_dev_userfaultfd = true; + else if (!strcmp(token, "syscall")) + test_dev_userfaultfd = false; + else if (!strcmp(token, "collapse")) + test_collapse = true; + else + err("unrecognized test mod '%s'", token); + } + + if (!test_type) + err("failed to parse test type argument: '%s'", raw_type); + + if (test_collapse && test_type != TEST_SHMEM) + err("Unsupported test: %s", raw_type); if (test_type == TEST_HUGETLB) - page_size = default_huge_page_size(); + page_size = hpage_size; else page_size = sysconf(_SC_PAGE_SIZE); @@ -1646,6 +1765,8 @@ static void sigalrm(int sig) int main(int argc, char **argv) { + size_t bytes; + if (argc < 4) usage(); @@ -1653,11 +1774,41 @@ int main(int argc, char **argv) err("failed to arm SIGALRM"); alarm(ALARM_INTERVAL_SECS); - set_test_type(argv[1]); + hpage_size = default_huge_page_size(); + parse_test_type_arg(argv[1]); + bytes = atol(argv[2]) * 1024 * 1024; + + if (test_collapse && bytes & (hpage_size - 1)) + err("MiB must be multiple of %lu if :collapse mod set", + hpage_size >> 20); nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size / - nr_cpus; + + if (test_collapse) { + /* nr_cpus must divide (bytes / page_size), otherwise, + * area allocations of (nr_pages * paze_size) won't be a + * multiple of hpage_size, even if bytes is a multiple of + * hpage_size. + * + * This means that nr_cpus must divide (N * (2 << (H-P)) + * where: + * bytes = hpage_size * N + * hpage_size = 2 << H + * page_size = 2 << P + * + * And we want to chose nr_cpus to be the largest value + * satisfying this constraint, not larger than the number + * of online CPUs. Unfortunately, prime factorization of + * N and nr_cpus may be arbitrary, so have to search for it. + * Instead, just use the highest power of 2 dividing both + * nr_cpus and (bytes / page_size). + */ + int x = factor_of_2(nr_cpus); + int y = factor_of_2(bytes / page_size); + + nr_cpus = x < y ? x : y; + } + nr_pages_per_cpu = bytes / page_size / nr_cpus; if (!nr_pages_per_cpu) { _err("invalid MiB"); usage(); |