trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
+static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
static inline void _tlbiel_va(unsigned long va, unsigned long pid,
unsigned long psize, unsigned long ric)
{
unsigned long pid, unsigned long page_size,
unsigned long psize)
{
- unsigned long addr;
- unsigned long ap = mmu_get_ap(psize);
-
asm volatile("ptesync": : :"memory");
- for (addr = start; addr < end; addr += page_size)
- __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+ __tlbiel_va_range(start, end, pid, page_size, psize);
asm volatile("ptesync": : :"memory");
}
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static inline void __tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
static inline void _tlbie_va(unsigned long va, unsigned long pid,
unsigned long psize, unsigned long ric)
{
unsigned long pid, unsigned long page_size,
unsigned long psize)
{
- unsigned long addr;
- unsigned long ap = mmu_get_ap(psize);
-
asm volatile("ptesync": : :"memory");
- for (addr = start; addr < end; addr += page_size)
- __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ __tlbie_va_range(start, end, pid, page_size, psize);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
}
EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+#define TLB_FLUSH_ALL -1UL
+
/*
- * Currently, for range flushing, we just do a full mm flush. Because
- * we use this in code path where we don' track the page size.
+ * Number of pages above which we invalidate the entire PID rather than
+ * flush individual pages, for local and global flushes respectively.
+ *
+ * tlbie goes out to the interconnect and individual ops are more costly.
+ * It also does not iterate over sets like the local tlbiel variant when
+ * invalidating a full PID, so it has a far lower threshold to change from
+ * individual page flushes to full-pid flushes.
*/
+static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
+ unsigned long pid;
+ unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
- radix__flush_tlb_mm(mm);
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ return;
+
+ preempt_disable();
+ local = mm_is_thread_local(mm);
+ full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
+
+ if (full) {
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ bool hflush = false;
+ unsigned long hstart, hend;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
+ hend = end >> HPAGE_PMD_SHIFT;
+ if (hstart < hend) {
+ hstart <<= HPAGE_PMD_SHIFT;
+ hend <<= HPAGE_PMD_SHIFT;
+ hflush = true;
+ }
+#endif
+
+ asm volatile("ptesync": : :"memory");
+ if (local) {
+ __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbiel_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ asm volatile("ptesync": : :"memory");
+ } else {
+ __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbie_va_range(hstart, hend, pid,
+ HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ }
+ }
+ preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_range);
radix__flush_tlb_mm(mm);
}
-#define TLB_FLUSH_ALL -1UL
-/*
- * Number of pages above which we will do a bcast tlbie. Just a
- * number at this point copied from x86
- */
-static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
-
void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long end, int psize)
{
unsigned long pid;
- bool local;
- unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
+ unsigned int page_shift = mmu_psize_defs[psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool local, full;
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
preempt_disable();
local = mm_is_thread_local(mm);
- if (end == TLB_FLUSH_ALL ||
- (end - start) > tlb_single_page_flush_ceiling * page_size) {
+ full = (end == TLB_FLUSH_ALL || nr_pages > tlb_single_page_flush_ceiling);
+
+ if (full) {
if (local)
_tlbiel_pid(pid, RIC_FLUSH_TLB);
else
void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
{
unsigned long pid, end;
- bool local;
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
}
+ end = addr + HPAGE_PMD_SIZE;
+
+ /* Otherwise first do the PWC, then iterate the pages. */
preempt_disable();
- local = mm_is_thread_local(mm);
- /* Otherwise first do the PWC */
- if (local)
- _tlbiel_pid(pid, RIC_FLUSH_PWC);
- else
- _tlbie_pid(pid, RIC_FLUSH_PWC);
- /* Then iterate the pages */
- end = addr + HPAGE_PMD_SIZE;
- if (local)
+ if (mm_is_thread_local(mm)) {
+ _tlbiel_pid(pid, RIC_FLUSH_PWC);
_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
- else
+ } else {
+ _tlbie_pid(pid, RIC_FLUSH_PWC);
_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize);
+ }
preempt_enable();
}