All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
To: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
	linux-mm@kvack.org, akpm@linux-foundation.org,
	mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org,
	npiggin@gmail.com, christophe.leroy@csgroup.eu
Cc: Oscar Salvador <osalvador@suse.de>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Joao Martins <joao.m.martins@oracle.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Muchun Song <muchun.song@linux.dev>,
	Will Deacon <will@kernel.org>,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Subject: Re: [PATCH v2 14/16] powerpc/book3s64/vmemmap: Switch radix to use a different vmemmap handling function
Date: Wed, 28 Jun 2023 07:03:03 +0530	[thread overview]
Message-ID: <87r0pwnzg0.fsf@doe.com> (raw)
In-Reply-To: <20230616110826.344417-15-aneesh.kumar@linux.ibm.com>

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> This is in preparation to update radix to implement vmemmap optimization
> for devdax. Below are the rules w.r.t radix vmemmap mapping
>
> 1. First try to map things using PMD (2M)
> 2. With altmap if altmap cross-boundary check returns true, fall back to
>    PAGE_SIZE
> 3. If we can't allocate PMD_SIZE backing memory for vmemmap, fallback to
>    PAGE_SIZE
>
> On removing vmemmap mapping, check if every subsection that is using the
> vmemmap area is invalid. If found to be invalid, that implies we can safely
> free the vmemmap area. We don't use the PAGE_UNUSED pattern used by x86
> because with 64K page size, we need to do the above check even at the
> PAGE_SIZE granularity.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/radix.h |   2 +
>  arch/powerpc/include/asm/pgtable.h         |   3 +
>  arch/powerpc/mm/book3s64/radix_pgtable.c   | 319 +++++++++++++++++++--
>  arch/powerpc/mm/init_64.c                  |  26 +-
>  4 files changed, 319 insertions(+), 31 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
> index 8cdff5a05011..87d4c1e62491 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -332,6 +332,8 @@ extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
>  					     unsigned long phys);
>  int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end,
>  				      int node, struct vmem_altmap *altmap);
> +void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
> +			       struct vmem_altmap *altmap);
>  extern void radix__vmemmap_remove_mapping(unsigned long start,
>  				    unsigned long page_size);
>  
> diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
> index 9972626ddaf6..6d4cd2ebae6e 100644
> --- a/arch/powerpc/include/asm/pgtable.h
> +++ b/arch/powerpc/include/asm/pgtable.h
> @@ -168,6 +168,9 @@ static inline bool is_ioremap_addr(const void *x)
>  
>  struct seq_file;
>  void arch_report_meminfo(struct seq_file *m);
> +int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size);
> +bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
> +			   unsigned long page_size);
>  #endif /* CONFIG_PPC64 */
>  
>  #endif /* __ASSEMBLY__ */
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index d7e2dd3d4add..ef886fab643d 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -742,8 +742,57 @@ static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
>  	p4d_clear(p4d);
>  }
>  
> +static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end)
> +{
> +	unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
> +
> +	return !vmemmap_populated(start, PMD_SIZE);
> +}
> +
> +static bool __meminit vmemmap_page_is_unused(unsigned long addr, unsigned long end)
> +{
> +	unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
> +
> +	return !vmemmap_populated(start, PAGE_SIZE);
> +
> +}
> +
> +static void __meminit free_vmemmap_pages(struct page *page,
> +					 struct vmem_altmap *altmap,
> +					 int order)
> +{
> +	unsigned int nr_pages = 1 << order;
> +
> +	if (altmap) {
> +		unsigned long alt_start, alt_end;
> +		unsigned long base_pfn = page_to_pfn(page);
> +
> +		/*
> +		 * with 1G vmemmap mmaping we can have things setup
> +		 * such that even though atlmap is specified we never
> +		 * used altmap.
> +		 */
> +		alt_start = altmap->base_pfn;
> +		alt_end = altmap->base_pfn + altmap->reserve +
> +			altmap->free + altmap->alloc + altmap->align;
> +
> +		if (base_pfn >= alt_start && base_pfn < alt_end) {
> +			vmem_altmap_free(altmap, nr_pages);
> +			return;
> +		}
> +	}
> +
> +	if (PageReserved(page)) {
> +		/* allocated from memblock */
> +		while (nr_pages--)
> +			free_reserved_page(page++);
> +	} else
> +		free_pages((unsigned long)page_address(page), order);
> +}
> +
>  static void remove_pte_table(pte_t *pte_start, unsigned long addr,
> -			     unsigned long end, bool direct)
> +			     unsigned long end, bool direct,
> +			     struct vmem_altmap *altmap)
>  {
>  	unsigned long next, pages = 0;
>  	pte_t *pte;
> @@ -757,24 +806,23 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
>  		if (!pte_present(*pte))
>  			continue;
>  
> -		if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
> -			/*
> -			 * The vmemmap_free() and remove_section_mapping()
> -			 * codepaths call us with aligned addresses.
> -			 */
> -			WARN_ONCE(1, "%s: unaligned range\n", __func__);
> -			continue;
> +		if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
> +			if (!direct)
> +				free_vmemmap_pages(pte_page(*pte), altmap, 0);
> +			pte_clear(&init_mm, addr, pte);
> +			pages++;
> +		} else if (!direct && vmemmap_page_is_unused(addr, next)) {
> +			free_vmemmap_pages(pte_page(*pte), altmap, 0);
> +			pte_clear(&init_mm, addr, pte);
>  		}
> -
> -		pte_clear(&init_mm, addr, pte);
> -		pages++;
>  	}
>  	if (direct)
>  		update_page_count(mmu_virtual_psize, -pages);
>  }
>  
>  static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
> -				       unsigned long end, bool direct)
> +				       unsigned long end, bool direct,
> +				       struct vmem_altmap *altmap)
>  {
>  	unsigned long next, pages = 0;
>  	pte_t *pte_base;
> @@ -788,18 +836,21 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
>  			continue;
>  
>  		if (pmd_is_leaf(*pmd)) {
> -			if (!IS_ALIGNED(addr, PMD_SIZE) ||
> -			    !IS_ALIGNED(next, PMD_SIZE)) {
> -				WARN_ONCE(1, "%s: unaligned range\n", __func__);
> -				continue;
> +			if (IS_ALIGNED(addr, PMD_SIZE) &&
> +			    IS_ALIGNED(next, PMD_SIZE)) {
> +				if (!direct)
> +					free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
> +				pte_clear(&init_mm, addr, (pte_t *)pmd);
> +				pages++;
> +			} else if (vmemmap_pmd_is_unused(addr, next)) {
> +				free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
> +				pte_clear(&init_mm, addr, (pte_t *)pmd);
>  			}
> -			pte_clear(&init_mm, addr, (pte_t *)pmd);
> -			pages++;
>  			continue;
>  		}
>  
>  		pte_base = (pte_t *)pmd_page_vaddr(*pmd);
> -		remove_pte_table(pte_base, addr, next, direct);
> +		remove_pte_table(pte_base, addr, next, direct, altmap);
>  		free_pte_table(pte_base, pmd);
>  	}
>  	if (direct)
> @@ -807,7 +858,8 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
>  }
>  
>  static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
> -				       unsigned long end, bool direct)
> +				       unsigned long end, bool direct,
> +				       struct vmem_altmap *altmap)
>  {
>  	unsigned long next, pages = 0;
>  	pmd_t *pmd_base;
> @@ -832,15 +884,16 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
>  		}
>  
>  		pmd_base = pud_pgtable(*pud);
> -		remove_pmd_table(pmd_base, addr, next, direct);
> +		remove_pmd_table(pmd_base, addr, next, direct, altmap);
>  		free_pmd_table(pmd_base, pud);
>  	}
>  	if (direct)
>  		update_page_count(MMU_PAGE_1G, -pages);
>  }
>  
> -static void __meminit remove_pagetable(unsigned long start, unsigned long end,
> -				       bool direct)
> +static void __meminit
> +remove_pagetable(unsigned long start, unsigned long end, bool direct,
> +		 struct vmem_altmap *altmap)
>  {
>  	unsigned long addr, next;
>  	pud_t *pud_base;
> @@ -869,7 +922,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end,
>  		}
>  
>  		pud_base = p4d_pgtable(*p4d);
> -		remove_pud_table(pud_base, addr, next, direct);
> +		remove_pud_table(pud_base, addr, next, direct, altmap);
>  		free_pud_table(pud_base, p4d);
>  	}
>  
> @@ -892,7 +945,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
>  
>  int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
>  {
> -	remove_pagetable(start, end, true);
> +	remove_pagetable(start, end, true, NULL);
>  	return 0;
>  }
>  #endif /* CONFIG_MEMORY_HOTPLUG */
> @@ -924,10 +977,224 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
>  	return 0;
>  }
>  
> +int __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
> +				unsigned long addr, unsigned long next)
> +{
> +	int large = pmd_large(*pmd);
> +
> +	if (pmd_large(*pmd))

we already got the value of pmd_large into "large" variable.
we can use just if (large) right?

> +		vmemmap_verify((pte_t *)pmd, node, addr, next);

maybe we can use pmdp_ptep() function here which we used in the 1st patch?
also shouldn't this be pmdp in the function argument instead of pmd?

> +
> +	return large;
> +}
> +
> +void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
> +			       unsigned long addr, unsigned long next)
> +{
> +	pte_t entry;
> +	pte_t *ptep = pmdp_ptep(pmdp);
> +
> +	VM_BUG_ON(!IS_ALIGNED(addr, PMD_SIZE));
> +	entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
> +	set_pte_at(&init_mm, addr, ptep, entry);
> +	asm volatile("ptesync": : :"memory");
> +
> +	vmemmap_verify(ptep, node, addr, next);
> +}
> +
> +static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
> +						     struct vmem_altmap *altmap,
> +						     struct page *reuse)
> +{
> +	pte_t *pte = pte_offset_kernel(pmd, addr);
> +
> +	if (pte_none(*pte)) {
> +		pte_t entry;
> +		void *p;
> +
> +		if (!reuse) {
> +			/*
> +			 * make sure we don't create altmap mappings
> +			 * covering things outside the device.
> +			 */
> +			if (altmap && altmap_cross_boundary(altmap, addr, PAGE_SIZE))
> +				altmap = NULL;
> +
> +			p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
> +			if (!p) {
> +				if (altmap)
> +					p = vmemmap_alloc_block_buf(PAGE_SIZE, node, NULL);
> +				if (!p)
> +					return NULL;
> +			}

Above if conditions are quite confusing when looking for the 1st time?
Can we do this? Did I get it right?

                if (!p && altmap)
                  p = vmemmap_alloc_block_buf(PAGE_SIZE, node, NULL);

                if (!p)
                  return NULL;

-ritesh


WARNING: multiple messages have this Message-ID (diff)
From: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
To: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
	linux-mm@kvack.org, akpm@linux-foundation.org,
	mpe@ellerman.id.au, linuxppc-dev@lists.ozlabs.org,
	npiggin@gmail.com, christophe.leroy@csgroup.eu
Cc: Catalin Marinas <catalin.marinas@arm.com>,
	Muchun Song <muchun.song@linux.dev>,
	"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Oscar Salvador <osalvador@suse.de>, Will Deacon <will@kernel.org>,
	Joao Martins <joao.m.martins@oracle.com>,
	Mike Kravetz <mike.kravetz@oracle.com>
Subject: Re: [PATCH v2 14/16] powerpc/book3s64/vmemmap: Switch radix to use a different vmemmap handling function
Date: Wed, 28 Jun 2023 07:03:03 +0530	[thread overview]
Message-ID: <87r0pwnzg0.fsf@doe.com> (raw)
In-Reply-To: <20230616110826.344417-15-aneesh.kumar@linux.ibm.com>

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> This is in preparation to update radix to implement vmemmap optimization
> for devdax. Below are the rules w.r.t radix vmemmap mapping
>
> 1. First try to map things using PMD (2M)
> 2. With altmap if altmap cross-boundary check returns true, fall back to
>    PAGE_SIZE
> 3. If we can't allocate PMD_SIZE backing memory for vmemmap, fallback to
>    PAGE_SIZE
>
> On removing vmemmap mapping, check if every subsection that is using the
> vmemmap area is invalid. If found to be invalid, that implies we can safely
> free the vmemmap area. We don't use the PAGE_UNUSED pattern used by x86
> because with 64K page size, we need to do the above check even at the
> PAGE_SIZE granularity.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/radix.h |   2 +
>  arch/powerpc/include/asm/pgtable.h         |   3 +
>  arch/powerpc/mm/book3s64/radix_pgtable.c   | 319 +++++++++++++++++++--
>  arch/powerpc/mm/init_64.c                  |  26 +-
>  4 files changed, 319 insertions(+), 31 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
> index 8cdff5a05011..87d4c1e62491 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -332,6 +332,8 @@ extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
>  					     unsigned long phys);
>  int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end,
>  				      int node, struct vmem_altmap *altmap);
> +void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
> +			       struct vmem_altmap *altmap);
>  extern void radix__vmemmap_remove_mapping(unsigned long start,
>  				    unsigned long page_size);
>  
> diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
> index 9972626ddaf6..6d4cd2ebae6e 100644
> --- a/arch/powerpc/include/asm/pgtable.h
> +++ b/arch/powerpc/include/asm/pgtable.h
> @@ -168,6 +168,9 @@ static inline bool is_ioremap_addr(const void *x)
>  
>  struct seq_file;
>  void arch_report_meminfo(struct seq_file *m);
> +int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size);
> +bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
> +			   unsigned long page_size);
>  #endif /* CONFIG_PPC64 */
>  
>  #endif /* __ASSEMBLY__ */
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index d7e2dd3d4add..ef886fab643d 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -742,8 +742,57 @@ static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
>  	p4d_clear(p4d);
>  }
>  
> +static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end)
> +{
> +	unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
> +
> +	return !vmemmap_populated(start, PMD_SIZE);
> +}
> +
> +static bool __meminit vmemmap_page_is_unused(unsigned long addr, unsigned long end)
> +{
> +	unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
> +
> +	return !vmemmap_populated(start, PAGE_SIZE);
> +
> +}
> +
> +static void __meminit free_vmemmap_pages(struct page *page,
> +					 struct vmem_altmap *altmap,
> +					 int order)
> +{
> +	unsigned int nr_pages = 1 << order;
> +
> +	if (altmap) {
> +		unsigned long alt_start, alt_end;
> +		unsigned long base_pfn = page_to_pfn(page);
> +
> +		/*
> +		 * with 1G vmemmap mmaping we can have things setup
> +		 * such that even though atlmap is specified we never
> +		 * used altmap.
> +		 */
> +		alt_start = altmap->base_pfn;
> +		alt_end = altmap->base_pfn + altmap->reserve +
> +			altmap->free + altmap->alloc + altmap->align;
> +
> +		if (base_pfn >= alt_start && base_pfn < alt_end) {
> +			vmem_altmap_free(altmap, nr_pages);
> +			return;
> +		}
> +	}
> +
> +	if (PageReserved(page)) {
> +		/* allocated from memblock */
> +		while (nr_pages--)
> +			free_reserved_page(page++);
> +	} else
> +		free_pages((unsigned long)page_address(page), order);
> +}
> +
>  static void remove_pte_table(pte_t *pte_start, unsigned long addr,
> -			     unsigned long end, bool direct)
> +			     unsigned long end, bool direct,
> +			     struct vmem_altmap *altmap)
>  {
>  	unsigned long next, pages = 0;
>  	pte_t *pte;
> @@ -757,24 +806,23 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
>  		if (!pte_present(*pte))
>  			continue;
>  
> -		if (!PAGE_ALIGNED(addr) || !PAGE_ALIGNED(next)) {
> -			/*
> -			 * The vmemmap_free() and remove_section_mapping()
> -			 * codepaths call us with aligned addresses.
> -			 */
> -			WARN_ONCE(1, "%s: unaligned range\n", __func__);
> -			continue;
> +		if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
> +			if (!direct)
> +				free_vmemmap_pages(pte_page(*pte), altmap, 0);
> +			pte_clear(&init_mm, addr, pte);
> +			pages++;
> +		} else if (!direct && vmemmap_page_is_unused(addr, next)) {
> +			free_vmemmap_pages(pte_page(*pte), altmap, 0);
> +			pte_clear(&init_mm, addr, pte);
>  		}
> -
> -		pte_clear(&init_mm, addr, pte);
> -		pages++;
>  	}
>  	if (direct)
>  		update_page_count(mmu_virtual_psize, -pages);
>  }
>  
>  static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
> -				       unsigned long end, bool direct)
> +				       unsigned long end, bool direct,
> +				       struct vmem_altmap *altmap)
>  {
>  	unsigned long next, pages = 0;
>  	pte_t *pte_base;
> @@ -788,18 +836,21 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
>  			continue;
>  
>  		if (pmd_is_leaf(*pmd)) {
> -			if (!IS_ALIGNED(addr, PMD_SIZE) ||
> -			    !IS_ALIGNED(next, PMD_SIZE)) {
> -				WARN_ONCE(1, "%s: unaligned range\n", __func__);
> -				continue;
> +			if (IS_ALIGNED(addr, PMD_SIZE) &&
> +			    IS_ALIGNED(next, PMD_SIZE)) {
> +				if (!direct)
> +					free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
> +				pte_clear(&init_mm, addr, (pte_t *)pmd);
> +				pages++;
> +			} else if (vmemmap_pmd_is_unused(addr, next)) {
> +				free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
> +				pte_clear(&init_mm, addr, (pte_t *)pmd);
>  			}
> -			pte_clear(&init_mm, addr, (pte_t *)pmd);
> -			pages++;
>  			continue;
>  		}
>  
>  		pte_base = (pte_t *)pmd_page_vaddr(*pmd);
> -		remove_pte_table(pte_base, addr, next, direct);
> +		remove_pte_table(pte_base, addr, next, direct, altmap);
>  		free_pte_table(pte_base, pmd);
>  	}
>  	if (direct)
> @@ -807,7 +858,8 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
>  }
>  
>  static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
> -				       unsigned long end, bool direct)
> +				       unsigned long end, bool direct,
> +				       struct vmem_altmap *altmap)
>  {
>  	unsigned long next, pages = 0;
>  	pmd_t *pmd_base;
> @@ -832,15 +884,16 @@ static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
>  		}
>  
>  		pmd_base = pud_pgtable(*pud);
> -		remove_pmd_table(pmd_base, addr, next, direct);
> +		remove_pmd_table(pmd_base, addr, next, direct, altmap);
>  		free_pmd_table(pmd_base, pud);
>  	}
>  	if (direct)
>  		update_page_count(MMU_PAGE_1G, -pages);
>  }
>  
> -static void __meminit remove_pagetable(unsigned long start, unsigned long end,
> -				       bool direct)
> +static void __meminit
> +remove_pagetable(unsigned long start, unsigned long end, bool direct,
> +		 struct vmem_altmap *altmap)
>  {
>  	unsigned long addr, next;
>  	pud_t *pud_base;
> @@ -869,7 +922,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end,
>  		}
>  
>  		pud_base = p4d_pgtable(*p4d);
> -		remove_pud_table(pud_base, addr, next, direct);
> +		remove_pud_table(pud_base, addr, next, direct, altmap);
>  		free_pud_table(pud_base, p4d);
>  	}
>  
> @@ -892,7 +945,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
>  
>  int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
>  {
> -	remove_pagetable(start, end, true);
> +	remove_pagetable(start, end, true, NULL);
>  	return 0;
>  }
>  #endif /* CONFIG_MEMORY_HOTPLUG */
> @@ -924,10 +977,224 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
>  	return 0;
>  }
>  
> +int __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
> +				unsigned long addr, unsigned long next)
> +{
> +	int large = pmd_large(*pmd);
> +
> +	if (pmd_large(*pmd))

we already got the value of pmd_large into "large" variable.
we can use just if (large) right?

> +		vmemmap_verify((pte_t *)pmd, node, addr, next);

maybe we can use pmdp_ptep() function here which we used in the 1st patch?
also shouldn't this be pmdp in the function argument instead of pmd?

> +
> +	return large;
> +}
> +
> +void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
> +			       unsigned long addr, unsigned long next)
> +{
> +	pte_t entry;
> +	pte_t *ptep = pmdp_ptep(pmdp);
> +
> +	VM_BUG_ON(!IS_ALIGNED(addr, PMD_SIZE));
> +	entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
> +	set_pte_at(&init_mm, addr, ptep, entry);
> +	asm volatile("ptesync": : :"memory");
> +
> +	vmemmap_verify(ptep, node, addr, next);
> +}
> +
> +static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
> +						     struct vmem_altmap *altmap,
> +						     struct page *reuse)
> +{
> +	pte_t *pte = pte_offset_kernel(pmd, addr);
> +
> +	if (pte_none(*pte)) {
> +		pte_t entry;
> +		void *p;
> +
> +		if (!reuse) {
> +			/*
> +			 * make sure we don't create altmap mappings
> +			 * covering things outside the device.
> +			 */
> +			if (altmap && altmap_cross_boundary(altmap, addr, PAGE_SIZE))
> +				altmap = NULL;
> +
> +			p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
> +			if (!p) {
> +				if (altmap)
> +					p = vmemmap_alloc_block_buf(PAGE_SIZE, node, NULL);
> +				if (!p)
> +					return NULL;
> +			}

Above if conditions are quite confusing when looking for the 1st time?
Can we do this? Did I get it right?

                if (!p && altmap)
                  p = vmemmap_alloc_block_buf(PAGE_SIZE, node, NULL);

                if (!p)
                  return NULL;

-ritesh

  reply	other threads:[~2023-06-28  1:33 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-16 11:08 [PATCH v2 00/16] Add support for DAX vmemmap optimization for ppc64 Aneesh Kumar K.V
2023-06-16 11:08 ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 01/16] powerpc/mm/book3s64: Use pmdp_ptep helper instead of typecasting Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 02/16] powerpc/book3s64/mm: mmu_vmemmap_psize is used by radix Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 03/16] powerpc/book3s64/mm: Fix DirectMap stats in /proc/meminfo Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 04/16] powerpc/book3s64/mm: Use PAGE_KERNEL instead of opencoding Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 05/16] powerpc/mm/dax: Fix the condition when checking if altmap vmemap can cross-boundary Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 06/16] mm/hugepage pud: Allow arch-specific helper function to check huge page pud support Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 07/16] mm: Change pudp_huge_get_and_clear_full take vm_area_struct as arg Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 08/16] mm/vmemmap: Improve vmemmap_can_optimize and allow architectures to override Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-20 11:53   ` Joao Martins
2023-06-20 11:53     ` Joao Martins
2023-06-20 14:29     ` Aneesh Kumar K.V
2023-06-20 14:29       ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 09/16] mm/vmemmap: Allow architectures to override how vmemmap optimization works Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 10/16] mm: Add __HAVE_ARCH_PUD_SAME similar to __HAVE_ARCH_P4D_SAME Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 11/16] mm/huge pud: Use transparent huge pud helpers only with CONFIG_TRANSPARENT_HUGEPAGE Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 12/16] mm/vmemmap optimization: Split hugetlb and devdax vmemmap optimization Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-28  1:09   ` Ritesh Harjani
2023-06-28  1:09     ` Ritesh Harjani
2023-06-28  3:01     ` Aneesh Kumar K V
2023-06-28  3:01       ` Aneesh Kumar K V
2023-06-16 11:08 ` [PATCH v2 13/16] powerpc/book3s64/mm: Enable transparent pud hugepage Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-28  1:23   ` Ritesh Harjani
2023-06-28  1:23     ` Ritesh Harjani
2023-06-28  3:32     ` Aneesh Kumar K V
2023-06-28  3:32       ` Aneesh Kumar K V
2023-06-16 11:08 ` [PATCH v2 14/16] powerpc/book3s64/vmemmap: Switch radix to use a different vmemmap handling function Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-28  1:33   ` Ritesh Harjani [this message]
2023-06-28  1:33     ` Ritesh Harjani
2023-06-28  3:37     ` Aneesh Kumar K V
2023-06-28  3:37       ` Aneesh Kumar K V
2023-06-16 11:08 ` [PATCH v2 15/16] powerpc/book3s64/radix: Add support for vmemmap optimization for radix Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-16 11:08 ` [PATCH v2 16/16] powerpc/book3s64/radix: Remove mmu_vmemmap_psize Aneesh Kumar K.V
2023-06-16 11:08   ` Aneesh Kumar K.V
2023-06-18 11:54 ` [PATCH v2 00/16] Add support for DAX vmemmap optimization for ppc64 Sachin Sant
2023-06-18 11:54   ` Sachin Sant
2023-06-24 14:52 ` Aneesh Kumar K.V
2023-06-24 14:52   ` Aneesh Kumar K.V
2023-06-24 17:22   ` Andrew Morton
2023-06-24 17:22     ` Andrew Morton
2023-07-03  5:26 ` (subset) " Michael Ellerman
2023-07-03  5:26   ` Michael Ellerman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87r0pwnzg0.fsf@doe.com \
    --to=ritesh.list@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=catalin.marinas@arm.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=dan.j.williams@intel.com \
    --cc=joao.m.martins@oracle.com \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mike.kravetz@oracle.com \
    --cc=mpe@ellerman.id.au \
    --cc=muchun.song@linux.dev \
    --cc=npiggin@gmail.com \
    --cc=osalvador@suse.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.