From 14f16d47561ba9249efc6c2db9d47ed56841f070 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 16 Nov 2022 16:37:36 -0700 Subject: ACPI: HMAT: remove unnecessary variable initialization In hmat_register_target_initiators(), the variable 'best' gets initialized in the outer per-locality-type for loop. The initialization just before setting up 'Access 1' targets was unnecessary. Remove it. Cc: Rafael J. Wysocki Cc: Liu Shixin Cc: Dan Williams Acked-by: Kirill A. Shutemov Acked-by: Rafael J. Wysocki Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20221116-acpi_hmat_fix-v2-1-3712569be691@intel.com Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 23f49a2f4d14..144a84f429ed 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -644,7 +644,6 @@ static void hmat_register_target_initiators(struct memory_target *target) /* Access 1 ignores Generic Initiators */ bitmap_zero(p_nodes, MAX_NUMNODES); list_sort(p_nodes, &initiators, initiator_cmp); - best = 0; for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; if (!loc) -- cgit From 48d4180939e12c4bd2846f984436d895bb9699ed Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 16 Nov 2022 16:37:37 -0700 Subject: ACPI: HMAT: Fix initiator registration for single-initiator systems In a system with a single initiator node, and one or more memory-only 'target' nodes, the memory-only node(s) would fail to register their initiator node correctly. i.e. in sysfs: # ls /sys/devices/system/node/node0/access0/targets/ node0 Where as the correct behavior should be: # ls /sys/devices/system/node/node0/access0/targets/ node0 node1 This happened because hmat_register_target_initiators() uses list_sort() to sort the initiator list, but the sort comparision function (initiator_cmp()) is overloaded to also set the node mask's bits. In a system with a single initiator, the list is singular, and list_sort elides the comparision helper call. Thus the node mask never gets set, and the subsequent search for the best initiator comes up empty. Add a new helper to consume the sorted initiator list, and generate the nodemask, decoupling it from the overloaded initiator_cmp() comparision callback. This prevents the singular list corner case naturally, and makes the code easier to follow as well. Cc: Cc: Rafael J. Wysocki Cc: Liu Shixin Cc: Dan Williams Cc: Kirill A. Shutemov Reported-by: Chris Piper Signed-off-by: Vishal Verma Acked-by: Rafael J. Wysocki Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/r/20221116-acpi_hmat_fix-v2-2-3712569be691@intel.com Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 144a84f429ed..6cceca64a6bc 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -562,17 +562,26 @@ static int initiator_cmp(void *priv, const struct list_head *a, { struct memory_initiator *ia; struct memory_initiator *ib; - unsigned long *p_nodes = priv; ia = list_entry(a, struct memory_initiator, node); ib = list_entry(b, struct memory_initiator, node); - set_bit(ia->processor_pxm, p_nodes); - set_bit(ib->processor_pxm, p_nodes); - return ia->processor_pxm - ib->processor_pxm; } +static int initiators_to_nodemask(unsigned long *p_nodes) +{ + struct memory_initiator *initiator; + + if (list_empty(&initiators)) + return -ENXIO; + + list_for_each_entry(initiator, &initiators, node) + set_bit(initiator->processor_pxm, p_nodes); + + return 0; +} + static void hmat_register_target_initiators(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); @@ -609,7 +618,10 @@ static void hmat_register_target_initiators(struct memory_target *target) * initiators. */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); + list_sort(NULL, &initiators, initiator_cmp); + if (initiators_to_nodemask(p_nodes) < 0) + return; + if (!access0done) { for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; @@ -643,7 +655,9 @@ static void hmat_register_target_initiators(struct memory_target *target) /* Access 1 ignores Generic Initiators */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); + if (initiators_to_nodemask(p_nodes) < 0) + return; + for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; if (!loc) -- cgit From 472faf72b33d80aa8e7a99c9410c1a23d3bf0cd8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 19 Nov 2022 17:37:49 -0800 Subject: device-dax: Fix duplicate 'hmem' device registration So called "soft-reserved" memory is an EFI conventional memory range with the EFI_MEMORY_SP attribute set. That attribute indicates that the memory is not part of the platform general purpose memory pool and may want some consideration from the system administrator about whether to keep that memory set aside for dedicated access through device-dax (map a device file), or assigned to the page allocator as another general purpose memory node target. Absent an ACPI HMAT table the default device-dax registration creates coarse grained devices that are delineated by EFI Memory Map entries. With the HMAT the devices are delineated by the finer grained ranges associated with the proximity domain of the memory target. I.e. the HMAT describes the properties of performance differentiated memory and each unique performance description results in a unique target proximity domain where each memory proximity domain has an associated SRAT entry that delineates the address range. The intent was that SRAT-defined device-dax instances are registered first. Then any left-over address range with the EFI_MEMORY_SP attribute, but not covered by the SRAT, would have a coarse grained device-dax instance established. However, the scheme to detect what ranges are left to be assigned to a device was buggy and resulted in multiple overlapping device-dax instances. Fix this by using explicit tracking for which ranges have been handled. Now, this new approach may leave memory stranded in the presence of broken platform firmware that fails to fully describe all EFI_MEMORY_SP ranges in the HMAT. That requires a deeper fix if it becomes a problem in practice. Reported-by: "Tallam Mahendra Kumar" Reported-by: Mustafa Hajeer Debugged-by: Vishal Verma Tested-by: Vishal Verma Link: https://lore.kernel.org/r/166890823379.4183293.15333502171004313377.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/dax/hmem/device.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 97086fab698e..903325aac991 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -8,6 +8,13 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); +static struct resource hmem_active = { + .name = "HMEM devices", + .start = 0, + .end = -1, + .flags = IORESOURCE_MEM, +}; + void hmem_register_device(int target_nid, struct resource *r) { /* define a clean / non-busy resource for the platform device */ @@ -41,6 +48,12 @@ void hmem_register_device(int target_nid, struct resource *r) goto out_pdev; } + if (!__request_region(&hmem_active, res.start, resource_size(&res), + dev_name(&pdev->dev), 0)) { + dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res); + goto out_active; + } + pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, @@ -66,6 +79,8 @@ void hmem_register_device(int target_nid, struct resource *r) return; out_resource: + __release_region(&hmem_active, res.start, resource_size(&res)); +out_active: platform_device_put(pdev); out_pdev: memregion_free(id); @@ -73,15 +88,6 @@ out_pdev: static __init int hmem_register_one(struct resource *res, void *data) { - /* - * If the resource is not a top-level resource it was already - * assigned to a device by the HMAT parsing. - */ - if (res->parent != &iomem_resource) { - pr_info("HMEM: skip %pr, already claimed\n", res); - return 0; - } - hmem_register_device(phys_to_target_node(res->start), res); return 0; -- cgit