Skip to content

Commit 0baba94

Browse files
committed
arm64: errata: Work around early CME DVMSync acknowledgement
C1-Pro acknowledges DVMSync messages before completing the SME/CME memory accesses. Work around this by issuing an IPI to the affected CPUs if they are running in EL0 with SME enabled. Note that we avoid the local DSB in the IPI handler as the kernel runs with SCTLR_EL1.IESB=1. This is sufficient to complete SME memory accesses at EL0 on taking an exception to EL1. On the return to user path, no barrier is necessary either. See the comment in sme_set_active() and the more detailed explanation in the link below. To avoid a potential IPI flood from malicious applications (e.g. madvise(MADV_PAGEOUT) in a tight loop), track where a process is active via mm_cpumask() and only interrupt those CPUs. Link: https://lore.kernel.org/r/ablEXwhfKyJW1i7l@J2N7QTR9R3 Cc: Will Deacon <[email protected]> Cc: Mark Rutland <[email protected]> Cc: James Morse <[email protected]> Cc: Mark Brown <[email protected]> Reviewed-by: Will Deacon <[email protected]> Signed-off-by: Catalin Marinas <[email protected]>
1 parent 2c99561 commit 0baba94

11 files changed

Lines changed: 264 additions & 4 deletions

File tree

Documentation/arch/arm64/silicon-errata.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@ stable kernels.
202202
+----------------+-----------------+-----------------+-----------------------------+
203203
| ARM | Neoverse-V3AE | #3312417 | ARM64_ERRATUM_3194386 |
204204
+----------------+-----------------+-----------------+-----------------------------+
205+
| ARM | C1-Pro | #4193714 | ARM64_ERRATUM_4193714 |
206+
+----------------+-----------------+-----------------+-----------------------------+
205207
| ARM | MMU-500 | #841119,826419 | ARM_SMMU_MMU_500_CPRE_ERRATA|
206208
| | | #562869,1047329 | |
207209
+----------------+-----------------+-----------------+-----------------------------+

arch/arm64/Kconfig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,6 +1175,18 @@ config ARM64_ERRATUM_4311569
11751175

11761176
If unsure, say Y.
11771177

1178+
config ARM64_ERRATUM_4193714
1179+
bool "C1-Pro: 4193714: SME DVMSync early acknowledgement"
1180+
depends on ARM64_SME
1181+
default y
1182+
help
1183+
Enable workaround for C1-Pro acknowledging the DVMSync before
1184+
the SME memory accesses are complete. This will cause TLB
1185+
maintenance for processes using SME to also issue an IPI to
1186+
the affected CPUs.
1187+
1188+
If unsure, say Y.
1189+
11781190
config CAVIUM_ERRATUM_22375
11791191
bool "Cavium erratum 22375, 24313"
11801192
default y

arch/arm64/include/asm/cpucaps.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ cpucap_is_possible(const unsigned int cap)
6464
return IS_ENABLED(CONFIG_ARM64_WORKAROUND_REPEAT_TLBI);
6565
case ARM64_WORKAROUND_SPECULATIVE_SSBS:
6666
return IS_ENABLED(CONFIG_ARM64_ERRATUM_3194386);
67+
case ARM64_WORKAROUND_4193714:
68+
return IS_ENABLED(CONFIG_ARM64_ERRATUM_4193714);
6769
case ARM64_MPAM:
6870
/*
6971
* KVM MPAM support doesn't rely on the host kernel supporting MPAM.

arch/arm64/include/asm/fpsimd.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,24 @@ static inline size_t sme_state_size(struct task_struct const *task)
428428
return __sme_state_size(task_get_sme_vl(task));
429429
}
430430

431+
void sme_enable_dvmsync(void);
432+
void sme_set_active(void);
433+
void sme_clear_active(void);
434+
435+
static inline void sme_enter_from_user_mode(void)
436+
{
437+
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714) &&
438+
test_thread_flag(TIF_SME))
439+
sme_clear_active();
440+
}
441+
442+
static inline void sme_exit_to_user_mode(void)
443+
{
444+
if (alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714) &&
445+
test_thread_flag(TIF_SME))
446+
sme_set_active();
447+
}
448+
431449
#else
432450

433451
static inline void sme_user_disable(void) { BUILD_BUG(); }
@@ -456,6 +474,9 @@ static inline size_t sme_state_size(struct task_struct const *task)
456474
return 0;
457475
}
458476

477+
static inline void sme_enter_from_user_mode(void) { }
478+
static inline void sme_exit_to_user_mode(void) { }
479+
459480
#endif /* ! CONFIG_ARM64_SME */
460481

461482
/* For use by EFI runtime services calls only */

arch/arm64/include/asm/tlbbatch.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,17 @@
22
#ifndef _ARCH_ARM64_TLBBATCH_H
33
#define _ARCH_ARM64_TLBBATCH_H
44

5+
#include <linux/cpumask.h>
6+
57
struct arch_tlbflush_unmap_batch {
8+
#ifdef CONFIG_ARM64_ERRATUM_4193714
69
/*
7-
* For arm64, HW can do tlb shootdown, so we don't
8-
* need to record cpumask for sending IPI
10+
* Track CPUs that need SME DVMSync on completion of this batch.
11+
* Otherwise, the arm64 HW can do tlb shootdown, so we don't need to
12+
* record cpumask for sending IPI
913
*/
14+
cpumask_var_t cpumask;
15+
#endif
1016
};
1117

1218
#endif /* _ARCH_ARM64_TLBBATCH_H */

arch/arm64/include/asm/tlbflush.h

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,71 @@ static inline unsigned long get_trans_granule(void)
8080
}
8181
}
8282

83+
#ifdef CONFIG_ARM64_ERRATUM_4193714
84+
85+
void sme_do_dvmsync(const struct cpumask *mask);
86+
87+
static inline void sme_dvmsync(struct mm_struct *mm)
88+
{
89+
if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
90+
return;
91+
92+
sme_do_dvmsync(mm_cpumask(mm));
93+
}
94+
95+
static inline void sme_dvmsync_add_pending(struct arch_tlbflush_unmap_batch *batch,
96+
struct mm_struct *mm)
97+
{
98+
if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
99+
return;
100+
101+
/*
102+
* Order the mm_cpumask() read after the hardware DVMSync.
103+
*/
104+
dsb(ish);
105+
if (cpumask_empty(mm_cpumask(mm)))
106+
return;
107+
108+
/*
109+
* Allocate the batch cpumask on first use. Fall back to an immediate
110+
* IPI for this mm in case of failure.
111+
*/
112+
if (!cpumask_available(batch->cpumask) &&
113+
!zalloc_cpumask_var(&batch->cpumask, GFP_ATOMIC)) {
114+
sme_do_dvmsync(mm_cpumask(mm));
115+
return;
116+
}
117+
118+
cpumask_or(batch->cpumask, batch->cpumask, mm_cpumask(mm));
119+
}
120+
121+
static inline void sme_dvmsync_batch(struct arch_tlbflush_unmap_batch *batch)
122+
{
123+
if (!alternative_has_cap_unlikely(ARM64_WORKAROUND_4193714))
124+
return;
125+
126+
if (!cpumask_available(batch->cpumask))
127+
return;
128+
129+
sme_do_dvmsync(batch->cpumask);
130+
cpumask_clear(batch->cpumask);
131+
}
132+
133+
#else
134+
135+
static inline void sme_dvmsync(struct mm_struct *mm)
136+
{
137+
}
138+
static inline void sme_dvmsync_add_pending(struct arch_tlbflush_unmap_batch *batch,
139+
struct mm_struct *mm)
140+
{
141+
}
142+
static inline void sme_dvmsync_batch(struct arch_tlbflush_unmap_batch *batch)
143+
{
144+
}
145+
146+
#endif /* CONFIG_ARM64_ERRATUM_4193714 */
147+
83148
/*
84149
* Level-based TLBI operations.
85150
*
@@ -189,12 +254,14 @@ static inline void __tlbi_sync_s1ish(struct mm_struct *mm)
189254
{
190255
dsb(ish);
191256
__repeat_tlbi_sync(vale1is, 0);
257+
sme_dvmsync(mm);
192258
}
193259

194-
static inline void __tlbi_sync_s1ish_batch(void)
260+
static inline void __tlbi_sync_s1ish_batch(struct arch_tlbflush_unmap_batch *batch)
195261
{
196262
dsb(ish);
197263
__repeat_tlbi_sync(vale1is, 0);
264+
sme_dvmsync_batch(batch);
198265
}
199266

200267
static inline void __tlbi_sync_s1ish_kernel(void)
@@ -397,7 +464,7 @@ static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
397464
*/
398465
static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
399466
{
400-
__tlbi_sync_s1ish_batch();
467+
__tlbi_sync_s1ish_batch(batch);
401468
}
402469

403470
/*
@@ -602,6 +669,7 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
602669
struct mm_struct *mm, unsigned long start, unsigned long end)
603670
{
604671
__flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3);
672+
sme_dvmsync_add_pending(batch, mm);
605673
}
606674

607675
static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)

arch/arm64/kernel/cpu_errata.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <asm/cpu.h>
1212
#include <asm/cputype.h>
1313
#include <asm/cpufeature.h>
14+
#include <asm/fpsimd.h>
1415
#include <asm/kvm_asm.h>
1516
#include <asm/smp_plat.h>
1617

@@ -575,6 +576,23 @@ static const struct midr_range erratum_spec_ssbs_list[] = {
575576
};
576577
#endif
577578

579+
#ifdef CONFIG_ARM64_ERRATUM_4193714
580+
static bool has_sme_dvmsync_erratum(const struct arm64_cpu_capabilities *entry,
581+
int scope)
582+
{
583+
if (!id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1)))
584+
return false;
585+
586+
return is_affected_midr_range(entry, scope);
587+
}
588+
589+
static void cpu_enable_sme_dvmsync(const struct arm64_cpu_capabilities *__unused)
590+
{
591+
if (this_cpu_has_cap(ARM64_WORKAROUND_4193714))
592+
sme_enable_dvmsync();
593+
}
594+
#endif
595+
578596
#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
579597
static const struct midr_range erratum_ac03_cpu_38_list[] = {
580598
MIDR_ALL_VERSIONS(MIDR_AMPERE1),
@@ -901,6 +919,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
901919
.matches = need_arm_si_l1_workaround_4311569,
902920
},
903921
#endif
922+
#ifdef CONFIG_ARM64_ERRATUM_4193714
923+
{
924+
.desc = "C1-Pro SME DVMSync early acknowledgement",
925+
.capability = ARM64_WORKAROUND_4193714,
926+
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
927+
.matches = has_sme_dvmsync_erratum,
928+
.cpu_enable = cpu_enable_sme_dvmsync,
929+
/* C1-Pro r0p0 - r1p2 (the latter only when REVIDR_EL1[0]==0) */
930+
.midr_range = MIDR_RANGE(MIDR_C1_PRO, 0, 0, 1, 2),
931+
MIDR_FIXED(MIDR_CPU_VAR_REV(1, 2), BIT(0)),
932+
},
933+
#endif
904934
#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD
905935
{
906936
.desc = "ARM errata 2966298, 3117295",

arch/arm64/kernel/entry-common.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <asm/daifflags.h>
2222
#include <asm/esr.h>
2323
#include <asm/exception.h>
24+
#include <asm/fpsimd.h>
2425
#include <asm/irq_regs.h>
2526
#include <asm/kprobes.h>
2627
#include <asm/mmu.h>
@@ -67,6 +68,7 @@ static __always_inline void arm64_enter_from_user_mode(struct pt_regs *regs)
6768
{
6869
enter_from_user_mode(regs);
6970
mte_disable_tco_entry(current);
71+
sme_enter_from_user_mode();
7072
}
7173

7274
/*
@@ -80,6 +82,7 @@ static __always_inline void arm64_exit_to_user_mode(struct pt_regs *regs)
8082
local_irq_disable();
8183
exit_to_user_mode_prepare_legacy(regs);
8284
local_daif_mask();
85+
sme_exit_to_user_mode();
8386
mte_check_tfsr_exit();
8487
exit_to_user_mode();
8588
}

arch/arm64/kernel/fpsimd.c

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/compiler.h>
1616
#include <linux/cpu.h>
1717
#include <linux/cpu_pm.h>
18+
#include <linux/cpumask.h>
1819
#include <linux/ctype.h>
1920
#include <linux/kernel.h>
2021
#include <linux/linkage.h>
@@ -28,6 +29,7 @@
2829
#include <linux/sched/task_stack.h>
2930
#include <linux/signal.h>
3031
#include <linux/slab.h>
32+
#include <linux/smp.h>
3133
#include <linux/stddef.h>
3234
#include <linux/sysctl.h>
3335
#include <linux/swab.h>
@@ -1358,6 +1360,83 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs)
13581360
put_cpu_fpsimd_context();
13591361
}
13601362

1363+
#ifdef CONFIG_ARM64_ERRATUM_4193714
1364+
1365+
/*
1366+
* SME/CME erratum handling.
1367+
*/
1368+
static cpumask_t sme_dvmsync_cpus;
1369+
1370+
/*
1371+
* These helpers are only called from non-preemptible contexts, so
1372+
* smp_processor_id() is safe here.
1373+
*/
1374+
void sme_set_active(void)
1375+
{
1376+
unsigned int cpu = smp_processor_id();
1377+
1378+
if (!cpumask_test_cpu(cpu, &sme_dvmsync_cpus))
1379+
return;
1380+
1381+
cpumask_set_cpu(cpu, mm_cpumask(current->mm));
1382+
1383+
/*
1384+
* A subsequent (post ERET) SME access may use a stale address
1385+
* translation. On C1-Pro, a TLBI+DSB on a different CPU will wait for
1386+
* the completion of cpumask_set_cpu() above as it appears in program
1387+
* order before the SME access. The post-TLBI+DSB read of mm_cpumask()
1388+
* will lead to the IPI being issued.
1389+
*
1390+
* https://lore.kernel.org/r/ablEXwhfKyJW1i7l@J2N7QTR9R3
1391+
*/
1392+
}
1393+
1394+
void sme_clear_active(void)
1395+
{
1396+
unsigned int cpu = smp_processor_id();
1397+
1398+
if (!cpumask_test_cpu(cpu, &sme_dvmsync_cpus))
1399+
return;
1400+
1401+
/*
1402+
* With SCTLR_EL1.IESB enabled, the SME memory transactions are
1403+
* completed on entering EL1.
1404+
*/
1405+
cpumask_clear_cpu(cpu, mm_cpumask(current->mm));
1406+
}
1407+
1408+
static void sme_dvmsync_ipi(void *unused)
1409+
{
1410+
/*
1411+
* With SCTLR_EL1.IESB on, taking an exception is sufficient to ensure
1412+
* the completion of the SME memory accesses, so no need for an
1413+
* explicit DSB.
1414+
*/
1415+
}
1416+
1417+
void sme_do_dvmsync(const struct cpumask *mask)
1418+
{
1419+
/*
1420+
* This is called from the TLB maintenance functions after the DSB ISH
1421+
* to send the hardware DVMSync message. If this CPU sees the mask as
1422+
* empty, the remote CPU executing sme_set_active() would have seen
1423+
* the DVMSync and no IPI required.
1424+
*/
1425+
if (cpumask_empty(mask))
1426+
return;
1427+
1428+
preempt_disable();
1429+
smp_call_function_many(mask, sme_dvmsync_ipi, NULL, true);
1430+
preempt_enable();
1431+
}
1432+
1433+
void sme_enable_dvmsync(void)
1434+
{
1435+
cpumask_set_cpu(smp_processor_id(), &sme_dvmsync_cpus);
1436+
}
1437+
1438+
#endif /* CONFIG_ARM64_ERRATUM_4193714 */
1439+
13611440
/*
13621441
* Trapped SME access
13631442
*

0 commit comments

Comments
 (0)