Skip to content

Commit 6c6ba54

Browse files
committed
Merge tag 'kvm-s390-master-7.0-2' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
KVM: s390: More memory management fixes Lots of small and not-so-small fixes for the newly rewritten gmap, mostly affecting the handling of nested guests.
2 parents 0138af2 + 0a28e06 commit 6c6ba54

7 files changed

Lines changed: 231 additions & 178 deletions

File tree

arch/s390/kvm/dat.c

Lines changed: 15 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -134,32 +134,6 @@ int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newt
134134
return 0;
135135
}
136136

137-
/**
138-
* dat_crstep_xchg() - Exchange a gmap CRSTE with another.
139-
* @crstep: Pointer to the CRST entry
140-
* @new: Replacement entry.
141-
* @gfn: The affected guest address.
142-
* @asce: The ASCE of the address space.
143-
*
144-
* Context: This function is assumed to be called with kvm->mmu_lock held.
145-
*/
146-
void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce)
147-
{
148-
if (crstep->h.i) {
149-
WRITE_ONCE(*crstep, new);
150-
return;
151-
} else if (cpu_has_edat2()) {
152-
crdte_crste(crstep, *crstep, new, gfn, asce);
153-
return;
154-
}
155-
156-
if (machine_has_tlb_guest())
157-
idte_crste(crstep, gfn, IDTE_GUEST_ASCE, asce, IDTE_GLOBAL);
158-
else
159-
idte_crste(crstep, gfn, 0, NULL_ASCE, IDTE_GLOBAL);
160-
WRITE_ONCE(*crstep, new);
161-
}
162-
163137
/**
164138
* dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another.
165139
* @crstep: Pointer to the CRST entry.
@@ -175,8 +149,8 @@ void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce
175149
*
176150
* Return: %true if the exchange was successful.
177151
*/
178-
bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
179-
union asce asce)
152+
bool __must_check dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new,
153+
gfn_t gfn, union asce asce)
180154
{
181155
if (old.h.i)
182156
return arch_try_cmpxchg((long *)crstep, &old.val, new.val);
@@ -292,6 +266,7 @@ static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t g
292266
pt->ptes[i].val = init.val | i * PAGE_SIZE;
293267
/* No need to take locks as the page table is not installed yet. */
294268
pgste_init.prefix_notif = old.s.fc1.prefix_notif;
269+
pgste_init.vsie_notif = old.s.fc1.vsie_notif;
295270
pgste_init.pcl = uses_skeys && init.h.i;
296271
dat_init_pgstes(pt, pgste_init.val);
297272
} else {
@@ -893,7 +868,8 @@ static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct d
893868

894869
/* This table entry needs to be updated. */
895870
if (walk->start <= gfn && walk->end >= next) {
896-
dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce);
871+
if (!dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce))
872+
return -EINVAL;
897873
/* A lower level table was present, needs to be freed. */
898874
if (!crste.h.fc && !crste.h.i) {
899875
if (is_pmd(crste))
@@ -1021,67 +997,21 @@ bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end)
1021997
return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0;
1022998
}
1023999

1024-
int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
1025-
bool uses_skeys, struct guest_fault *f)
1026-
{
1027-
union crste oldval, newval;
1028-
union pte newpte, oldpte;
1029-
union pgste pgste;
1030-
int rc = 0;
1031-
1032-
rc = dat_entry_walk(mc, f->gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &f->crstep, &f->ptep);
1033-
if (rc == -EINVAL || rc == -ENOMEM)
1034-
return rc;
1035-
if (rc)
1036-
return -EAGAIN;
1037-
1038-
if (WARN_ON_ONCE(unlikely(get_level(f->crstep, f->ptep) > level)))
1039-
return -EINVAL;
1040-
1041-
if (f->ptep) {
1042-
pgste = pgste_get_lock(f->ptep);
1043-
oldpte = *f->ptep;
1044-
newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
1045-
newpte.s.sd = oldpte.s.sd;
1046-
oldpte.s.sd = 0;
1047-
if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
1048-
pgste = __dat_ptep_xchg(f->ptep, pgste, newpte, f->gfn, asce, uses_skeys);
1049-
if (f->callback)
1050-
f->callback(f);
1051-
} else {
1052-
rc = -EAGAIN;
1053-
}
1054-
pgste_set_unlock(f->ptep, pgste);
1055-
} else {
1056-
oldval = READ_ONCE(*f->crstep);
1057-
newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
1058-
f->write_attempt | oldval.s.fc1.d);
1059-
newval.s.fc1.sd = oldval.s.fc1.sd;
1060-
if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
1061-
crste_origin_large(oldval) != crste_origin_large(newval))
1062-
return -EAGAIN;
1063-
if (!dat_crstep_xchg_atomic(f->crstep, oldval, newval, f->gfn, asce))
1064-
return -EAGAIN;
1065-
if (f->callback)
1066-
f->callback(f);
1067-
}
1068-
1069-
return rc;
1070-
}
1071-
10721000
static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
10731001
{
1074-
union crste crste = READ_ONCE(*crstep);
1002+
union crste newcrste, oldcrste;
10751003
int *n = walk->priv;
10761004

1077-
if (!crste.h.fc || crste.h.i || crste.h.p)
1078-
return 0;
1079-
1005+
do {
1006+
oldcrste = READ_ONCE(*crstep);
1007+
if (!oldcrste.h.fc || oldcrste.h.i || oldcrste.h.p)
1008+
return 0;
1009+
if (oldcrste.s.fc1.prefix_notif)
1010+
break;
1011+
newcrste = oldcrste;
1012+
newcrste.s.fc1.prefix_notif = 1;
1013+
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, newcrste, gfn, walk->asce));
10801014
*n = 2;
1081-
if (crste.s.fc1.prefix_notif)
1082-
return 0;
1083-
crste.s.fc1.prefix_notif = 1;
1084-
dat_crstep_xchg(crstep, crste, gfn, walk->asce);
10851015
return 0;
10861016
}
10871017

arch/s390/kvm/dat.h

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -160,14 +160,14 @@ union pmd {
160160
unsigned long :44; /* HW */
161161
unsigned long : 3; /* Unused */
162162
unsigned long : 1; /* HW */
163+
unsigned long s : 1; /* Special */
163164
unsigned long w : 1; /* Writable soft-bit */
164165
unsigned long r : 1; /* Readable soft-bit */
165166
unsigned long d : 1; /* Dirty */
166167
unsigned long y : 1; /* Young */
167-
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
168168
unsigned long : 3; /* HW */
169+
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
169170
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
170-
unsigned long : 1; /* Unused */
171171
unsigned long : 4; /* HW */
172172
unsigned long sd : 1; /* Soft-Dirty */
173173
unsigned long pr : 1; /* Present */
@@ -183,14 +183,14 @@ union pud {
183183
unsigned long :33; /* HW */
184184
unsigned long :14; /* Unused */
185185
unsigned long : 1; /* HW */
186+
unsigned long s : 1; /* Special */
186187
unsigned long w : 1; /* Writable soft-bit */
187188
unsigned long r : 1; /* Readable soft-bit */
188189
unsigned long d : 1; /* Dirty */
189190
unsigned long y : 1; /* Young */
190-
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
191191
unsigned long : 3; /* HW */
192+
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
192193
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
193-
unsigned long : 1; /* Unused */
194194
unsigned long : 4; /* HW */
195195
unsigned long sd : 1; /* Soft-Dirty */
196196
unsigned long pr : 1; /* Present */
@@ -254,14 +254,14 @@ union crste {
254254
struct {
255255
unsigned long :47;
256256
unsigned long : 1; /* HW (should be 0) */
257+
unsigned long s : 1; /* Special */
257258
unsigned long w : 1; /* Writable */
258259
unsigned long r : 1; /* Readable */
259260
unsigned long d : 1; /* Dirty */
260261
unsigned long y : 1; /* Young */
261-
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
262262
unsigned long : 3; /* HW */
263+
unsigned long prefix_notif : 1; /* Guest prefix invalidation notification */
263264
unsigned long vsie_notif : 1; /* Referenced in a shadow table */
264-
unsigned long : 1;
265265
unsigned long : 4; /* HW */
266266
unsigned long sd : 1; /* Soft-Dirty */
267267
unsigned long pr : 1; /* Present */
@@ -540,8 +540,6 @@ int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gf
540540
u16 type, u16 param);
541541
int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
542542
bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
543-
int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
544-
bool uses_skeys, struct guest_fault *f);
545543

546544
int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
547545
long dat_reset_cmma(union asce asce, gfn_t start_gfn);
@@ -938,11 +936,14 @@ static inline bool dat_pudp_xchg_atomic(union pud *pudp, union pud old, union pu
938936
return dat_crstep_xchg_atomic(_CRSTEP(pudp), _CRSTE(old), _CRSTE(new), gfn, asce);
939937
}
940938

941-
static inline void dat_crstep_clear(union crste *crstep, gfn_t gfn, union asce asce)
939+
static inline union crste dat_crstep_clear_atomic(union crste *crstep, gfn_t gfn, union asce asce)
942940
{
943-
union crste newcrste = _CRSTE_EMPTY(crstep->h.tt);
941+
union crste oldcrste, empty = _CRSTE_EMPTY(crstep->h.tt);
944942

945-
dat_crstep_xchg(crstep, newcrste, gfn, asce);
943+
do {
944+
oldcrste = READ_ONCE(*crstep);
945+
} while (!dat_crstep_xchg_atomic(crstep, oldcrste, empty, gfn, asce));
946+
return oldcrste;
946947
}
947948

948949
static inline int get_level(union crste *crstep, union pte *ptep)

arch/s390/kvm/gaccess.c

Lines changed: 51 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1436,13 +1436,21 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
14361436

14371437
if (!pgste_get_trylock(ptep_h, &pgste))
14381438
return -EAGAIN;
1439-
newpte = _pte(f->pfn, f->writable, !p, 0);
1440-
newpte.s.d |= ptep->s.d;
1441-
newpte.s.sd |= ptep->s.sd;
1442-
newpte.h.p &= ptep->h.p;
1443-
pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
1444-
pgste.vsie_notif = 1;
1439+
newpte = _pte(f->pfn, f->writable, !p, ptep_h->s.s);
1440+
newpte.s.d |= ptep_h->s.d;
1441+
newpte.s.sd |= ptep_h->s.sd;
1442+
newpte.h.p &= ptep_h->h.p;
1443+
if (!newpte.h.p && !f->writable) {
1444+
rc = -EOPNOTSUPP;
1445+
} else {
1446+
pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false);
1447+
pgste.vsie_notif = 1;
1448+
}
14451449
pgste_set_unlock(ptep_h, pgste);
1450+
if (rc)
1451+
return rc;
1452+
if (!sg->parent)
1453+
return -EAGAIN;
14461454

14471455
newpte = _pte(f->pfn, 0, !p, 0);
14481456
if (!pgste_get_trylock(ptep, &pgste))
@@ -1456,7 +1464,7 @@ static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union
14561464
static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table,
14571465
struct guest_fault *f, bool p)
14581466
{
1459-
union crste newcrste;
1467+
union crste newcrste, oldcrste;
14601468
gfn_t gfn;
14611469
int rc;
14621470

@@ -1469,16 +1477,28 @@ static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, uni
14691477
if (rc)
14701478
return rc;
14711479

1472-
newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p);
1473-
newcrste.s.fc1.d |= host->s.fc1.d;
1474-
newcrste.s.fc1.sd |= host->s.fc1.sd;
1475-
newcrste.h.p &= host->h.p;
1476-
newcrste.s.fc1.vsie_notif = 1;
1477-
newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif;
1478-
_gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false);
1480+
do {
1481+
/* _gmap_crstep_xchg_atomic() could have unshadowed this shadow gmap */
1482+
if (!sg->parent)
1483+
return -EAGAIN;
1484+
oldcrste = READ_ONCE(*host);
1485+
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, f->writable, !p);
1486+
newcrste.s.fc1.d |= oldcrste.s.fc1.d;
1487+
newcrste.s.fc1.sd |= oldcrste.s.fc1.sd;
1488+
newcrste.h.p &= oldcrste.h.p;
1489+
newcrste.s.fc1.vsie_notif = 1;
1490+
newcrste.s.fc1.prefix_notif = oldcrste.s.fc1.prefix_notif;
1491+
newcrste.s.fc1.s = oldcrste.s.fc1.s;
1492+
if (!newcrste.h.p && !f->writable)
1493+
return -EOPNOTSUPP;
1494+
} while (!_gmap_crstep_xchg_atomic(sg->parent, host, oldcrste, newcrste, f->gfn, false));
1495+
if (!sg->parent)
1496+
return -EAGAIN;
14791497

1480-
newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p);
1481-
dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce);
1498+
newcrste = _crste_fc1(f->pfn, oldcrste.h.tt, 0, !p);
1499+
gfn = gpa_to_gfn(raddr);
1500+
while (!dat_crstep_xchg_atomic(table, READ_ONCE(*table), newcrste, gfn, sg->asce))
1501+
;
14821502
return 0;
14831503
}
14841504

@@ -1502,21 +1522,31 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
15021522
if (rc)
15031523
return rc;
15041524

1505-
/* A race occourred. The shadow mapping is already valid, nothing to do */
1506-
if ((ptep && !ptep->h.i) || (!ptep && crste_leaf(*table)))
1525+
/* A race occurred. The shadow mapping is already valid, nothing to do */
1526+
if ((ptep && !ptep->h.i && ptep->h.p == w->p) ||
1527+
(!ptep && crste_leaf(*table) && !table->h.i && table->h.p == w->p))
15071528
return 0;
15081529

15091530
gl = get_level(table, ptep);
15101531

1532+
/* In case of a real address space */
1533+
if (w->level <= LEVEL_MEM) {
1534+
l = TABLE_TYPE_PAGE_TABLE;
1535+
hl = TABLE_TYPE_REGION1;
1536+
goto real_address_space;
1537+
}
1538+
15111539
/*
15121540
* Skip levels that are already protected. For each level, protect
15131541
* only the page containing the entry, not the whole table.
15141542
*/
15151543
for (i = gl ; i >= w->level; i--) {
1516-
rc = gmap_protect_rmap(mc, sg, entries[i - 1].gfn, gpa_to_gfn(saddr),
1517-
entries[i - 1].pfn, i, entries[i - 1].writable);
1544+
rc = gmap_protect_rmap(mc, sg, entries[i].gfn, gpa_to_gfn(saddr),
1545+
entries[i].pfn, i + 1, entries[i].writable);
15181546
if (rc)
15191547
return rc;
1548+
if (!sg->parent)
1549+
return -EAGAIN;
15201550
}
15211551

15221552
rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF,
@@ -1528,6 +1558,7 @@ static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg,
15281558
/* Get the smallest granularity */
15291559
l = min3(gl, hl, w->level);
15301560

1561+
real_address_space:
15311562
flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0);
15321563
/* If necessary, create the shadow mapping */
15331564
if (l < gl) {

0 commit comments

Comments
 (0)