diff --git a/.gitignore b/.gitignore index 70580bdd352cc..2e5dd819a2d5a 100644 --- a/.gitignore +++ b/.gitignore @@ -70,7 +70,7 @@ modules.order # # Debian directory (make deb-pkg) # -/debian/ +#/debian/ # # Snap directory (make snap-pkg) diff --git a/Documentation/ABI/stable/sysfs-driver-mlxreg-io b/Documentation/ABI/stable/sysfs-driver-mlxreg-io index 8ca498447aeb9..05601a90a9b6f 100644 --- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io +++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io @@ -29,13 +29,13 @@ Description: This file shows the system fans direction: The files are read only. -What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/jtag_enable +What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld3_version Date: November 2018 KernelVersion: 5.0 Contact: Vadim Pasternak Description: These files show with which CPLD versions have been burned - on LED board. + on LED or Gearbox board. The files are read only. @@ -121,6 +121,15 @@ Description: These files show the system reset cause, as following: ComEx The files are read only. +What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld4_version +Date: November 2018 +KernelVersion: 5.0 +Contact: Vadim Pasternak +Description: These files show with which CPLD versions have been burned + on LED board. + + The files are read only. + Date: June 2019 KernelVersion: 5.3 Contact: Vadim Pasternak diff --git a/Documentation/ABI/testing/debugfs-aufs b/Documentation/ABI/testing/debugfs-aufs new file mode 100644 index 0000000000000..4a6694194ba69 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-aufs @@ -0,0 +1,55 @@ +What: /debug/aufs/si_/ +Date: March 2009 +Contact: J. R. Okajima +Description: + Under /debug/aufs, a directory named si_ is created + per aufs mount, where is a unique id generated + internally. + +What: /debug/aufs/si_/plink +Date: Apr 2013 +Contact: J. R. Okajima +Description: + It has three lines and shows the information about the + pseudo-link. The first line is a single number + representing a number of buckets. The second line is a + number of pseudo-links per buckets (separated by a + blank). The last line is a single number representing a + total number of psedo-links. + When the aufs mount option 'noplink' is specified, it + will show "1\n0\n0\n". + +What: /debug/aufs/si_/xib +Date: March 2009 +Contact: J. R. Okajima +Description: + It shows the consumed blocks by xib (External Inode Number + Bitmap), its block size and file size. + When the aufs mount option 'noxino' is specified, it + will be empty. About XINO files, see the aufs manual. + +What: /debug/aufs/si_/xi0, xi1 ... xiN and xiN-N +Date: March 2009 +Contact: J. R. Okajima +Description: + It shows the consumed blocks by xino (External Inode Number + Translation Table), its link count, block size and file + size. + Due to the file size limit, there may exist multiple + xino files per branch. In this case, "-N" is added to + the filename and it corresponds to the index of the + internal xino array. "-0" is omitted. + When the aufs mount option 'noxino' is specified, Those + entries won't exist. About XINO files, see the aufs + manual. + +What: /debug/aufs/si_/xigen +Date: March 2009 +Contact: J. R. Okajima +Description: + It shows the consumed blocks by xigen (External Inode + Generation Table), its block size and file size. + If CONFIG_AUFS_EXPORT is disabled, this entry will not + be created. + When the aufs mount option 'noxino' is specified, it + will be empty. About XINO files, see the aufs manual. diff --git a/Documentation/ABI/testing/debugfs-hisi-hpre b/Documentation/ABI/testing/debugfs-hisi-hpre new file mode 100644 index 0000000000000..ec4a79e3a8073 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-hisi-hpre @@ -0,0 +1,57 @@ +What: /sys/kernel/debug/hisi_hpre//cluster[0-3]/regs +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: Dump debug registers from the HPRE cluster. + Only available for PF. + +What: /sys/kernel/debug/hisi_hpre//cluster[0-3]/cluster_ctrl +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: Write the HPRE core selection in the cluster into this file, + and then we can read the debug information of the core. + Only available for PF. + +What: /sys/kernel/debug/hisi_hpre//rdclr_en +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: HPRE cores debug registers read clear control. 1 means enable + register read clear, otherwise 0. Writing to this file has no + functional effect, only enable or disable counters clear after + reading of these registers. + Only available for PF. + +What: /sys/kernel/debug/hisi_hpre//current_qm +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: One HPRE controller has one PF and multiple VFs, each function + has a QM. Select the QM which below qm refers to. + Only available for PF. + +What: /sys/kernel/debug/hisi_hpre//regs +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: Dump debug registers from the HPRE. + Only available for PF. + +What: /sys/kernel/debug/hisi_hpre//qm/qm_regs +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: Dump debug registers from the QM. + Available for PF and VF in host. VF in guest currently only + has one debug register. + +What: /sys/kernel/debug/hisi_hpre//qm/current_q +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: One QM may contain multiple queues. Select specific queue to + show its debug registers in above qm_regs. + Only available for PF. + +What: /sys/kernel/debug/hisi_hpre//qm/clear_enable +Date: Sep 2019 +Contact: linux-crypto@vger.kernel.org +Description: QM debug registers(qm_regs) read clear control. 1 means enable + register read clear, otherwise 0. + Writing to this file has no functional effect, only enable or + disable counters clear after reading of these registers. + Only available for PF. diff --git a/Documentation/ABI/testing/debugfs-hisi-sec b/Documentation/ABI/testing/debugfs-hisi-sec new file mode 100644 index 0000000000000..06adb899495e0 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-hisi-sec @@ -0,0 +1,43 @@ +What: /sys/kernel/debug/hisi_sec//sec_dfx +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: Dump the debug registers of SEC cores. + Only available for PF. + +What: /sys/kernel/debug/hisi_sec//clear_enable +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: Enabling/disabling of clear action after reading + the SEC debug registers. + 0: disable, 1: enable. + Only available for PF, and take no other effect on SEC. + +What: /sys/kernel/debug/hisi_sec//current_qm +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: One SEC controller has one PF and multiple VFs, each function + has a QM. This file can be used to select the QM which below + qm refers to. + Only available for PF. + +What: /sys/kernel/debug/hisi_sec//qm/qm_regs +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: Dump of QM related debug registers. + Available for PF and VF in host. VF in guest currently only + has one debug register. + +What: /sys/kernel/debug/hisi_sec//qm/current_q +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: One QM of SEC may contain multiple queues. Select specific + queue to show its debug registers in above 'qm_regs'. + Only available for PF. + +What: /sys/kernel/debug/hisi_sec//qm/clear_enable +Date: Oct 2019 +Contact: linux-crypto@vger.kernel.org +Description: Enabling/disabling of clear action after reading + the SEC's QM debug registers. + 0: disable, 1: enable. + Only available for PF, and take no other effect on SEC. diff --git a/Documentation/ABI/testing/evm b/Documentation/ABI/testing/evm index 201d10319fa18..1df1177df68ad 100644 --- a/Documentation/ABI/testing/evm +++ b/Documentation/ABI/testing/evm @@ -42,8 +42,30 @@ Description: modification of EVM-protected metadata and disable all further modification of policy - Note that once a key has been loaded, it will no longer be - possible to enable metadata modification. + Echoing a value is additive, the new value is added to the + existing initialization flags. + + For example, after:: + + echo 2 >/evm + + another echo can be performed:: + + echo 1 >/evm + + and the resulting value will be 3. + + Note that once an HMAC key has been loaded, it will no longer + be possible to enable metadata modification. Signaling that an + HMAC key has been loaded will clear the corresponding flag. + For example, if the current value is 6 (2 and 4 set):: + + echo 1 >/evm + + will set the new value to 3 (4 cleared). + + Loading an HMAC key is the only way to disable metadata + modification. Until key loading has been signaled EVM can not create or validate the 'security.evm' xattr, but returns diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy index 29ebe9afdac42..29aaedf332461 100644 --- a/Documentation/ABI/testing/ima_policy +++ b/Documentation/ABI/testing/ima_policy @@ -25,6 +25,7 @@ Description: lsm: [[subj_user=] [subj_role=] [subj_type=] [obj_user=] [obj_role=] [obj_type=]] option: [[appraise_type=]] [template=] [permit_directio] + [appraise_flag=] base: func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK] [FIRMWARE_CHECK] [KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK] @@ -38,6 +39,9 @@ Description: fowner:= decimal value lsm: are LSM specific option: appraise_type:= [imasig] [imasig|modsig] + appraise_flag:= [check_blacklist] + Currently, blacklist check is only for files signed with appended + signature. template:= name of a defined IMA template type (eg, ima-ng). Only valid when action is "measure". pcr:= decimal value diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata index 9ab0ef1dd1c72..299e0d1dc1619 100644 --- a/Documentation/ABI/testing/sysfs-ata +++ b/Documentation/ABI/testing/sysfs-ata @@ -107,13 +107,14 @@ Description: described in ATA8 7.16 and 7.17. Only valid if the device is not a PM. - pio_mode: (RO) Transfer modes supported by the device when - in PIO mode. Mostly used by PATA device. + pio_mode: (RO) PIO transfer mode used by the device. + Mostly used by PATA devices. - xfer_mode: (RO) Current transfer mode + xfer_mode: (RO) Current transfer mode. Mostly used by + PATA devices. - dma_mode: (RO) Transfer modes supported by the device when - in DMA mode. Mostly used by PATA device. + dma_mode: (RO) DMA transfer mode used by the device. + Mostly used by PATA devices. class: (RO) Device class. Can be "ata" for disk, "atapi" for packet device, "pmp" for PM, or diff --git a/Documentation/ABI/testing/sysfs-aufs b/Documentation/ABI/testing/sysfs-aufs new file mode 100644 index 0000000000000..82f9518495eae --- /dev/null +++ b/Documentation/ABI/testing/sysfs-aufs @@ -0,0 +1,31 @@ +What: /sys/fs/aufs/si_/ +Date: March 2009 +Contact: J. R. Okajima +Description: + Under /sys/fs/aufs, a directory named si_ is created + per aufs mount, where is a unique id generated + internally. + +What: /sys/fs/aufs/si_/br0, br1 ... brN +Date: March 2009 +Contact: J. R. Okajima +Description: + It shows the abolute path of a member directory (which + is called branch) in aufs, and its permission. + +What: /sys/fs/aufs/si_/brid0, brid1 ... bridN +Date: July 2013 +Contact: J. R. Okajima +Description: + It shows the id of a member directory (which is called + branch) in aufs. + +What: /sys/fs/aufs/si_/xi_path +Date: March 2009 +Contact: J. R. Okajima +Description: + It shows the abolute path of XINO (External Inode Number + Bitmap, Translation Table and Generation Table) file + even if it is the default path. + When the aufs mount option 'noxino' is specified, it + will be empty. About XINO files, see the aufs manual. diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 680451695422a..c3767d4d01a6f 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -1566,7 +1566,8 @@ What: /sys/bus/iio/devices/iio:deviceX/in_concentrationX_voc_raw KernelVersion: 4.3 Contact: linux-iio@vger.kernel.org Description: - Raw (unscaled no offset etc.) percentage reading of a substance. + Raw (unscaled no offset etc.) reading of a substance. Units + after application of scale and offset are percents. What: /sys/bus/iio/devices/iio:deviceX/in_resistance_raw What: /sys/bus/iio/devices/iio:deviceX/in_resistanceX_raw diff --git a/Documentation/ABI/testing/sysfs-bus-iio-vf610 b/Documentation/ABI/testing/sysfs-bus-iio-vf610 index 308a6756d3bf3..491ead8044888 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio-vf610 +++ b/Documentation/ABI/testing/sysfs-bus-iio-vf610 @@ -1,4 +1,4 @@ -What: /sys/bus/iio/devices/iio:deviceX/conversion_mode +What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode KernelVersion: 4.2 Contact: linux-iio@vger.kernel.org Description: diff --git a/Documentation/ABI/testing/sysfs-bus-mei b/Documentation/ABI/testing/sysfs-bus-mei index 6bd45346ac7e4..3f8701e8fa241 100644 --- a/Documentation/ABI/testing/sysfs-bus-mei +++ b/Documentation/ABI/testing/sysfs-bus-mei @@ -4,7 +4,7 @@ KernelVersion: 3.10 Contact: Samuel Ortiz linux-mei@linux.intel.com Description: Stores the same MODALIAS value emitted by uevent - Format: mei::: + Format: mei::: What: /sys/bus/mei/devices/.../name Date: May 2015 diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 8bfee557e50ea..450296cc7948e 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -347,3 +347,16 @@ Description: If the device has any Peer-to-Peer memory registered, this file contains a '1' if the memory has been published for use outside the driver that owns the device. + +What: /sys/bus/pci/devices/.../link/clkpm + /sys/bus/pci/devices/.../link/l0s_aspm + /sys/bus/pci/devices/.../link/l1_aspm + /sys/bus/pci/devices/.../link/l1_1_aspm + /sys/bus/pci/devices/.../link/l1_2_aspm + /sys/bus/pci/devices/.../link/l1_1_pcipm + /sys/bus/pci/devices/.../link/l1_2_pcipm +Date: October 2019 +Contact: Heiner Kallweit +Description: If ASPM is supported for an endpoint, these files can be + used to disable or enable the individual power management + states. Write y/1/on to enable, n/0/off to disable. diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 01196e19afca3..75897e2fde43e 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -7,6 +7,13 @@ Description: The name of devfreq object denoted as ... is same as the name of device using devfreq. +What: /sys/class/devfreq/.../name +Date: November 2019 +Contact: Chanwoo Choi +Description: + The /sys/class/devfreq/.../name shows the name of device + of the corresponding devfreq object. + What: /sys/class/devfreq/.../governor Date: September 2011 Contact: MyungJoo Ham diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index fc20cde63d1ea..726ac2e01b777 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -486,8 +486,10 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds + /sys/devices/system/cpu/vulnerabilities/srbds /sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/itlb_multihit + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/ABI/testing/sysfs-secvar b/Documentation/ABI/testing/sysfs-secvar new file mode 100644 index 0000000000000..feebb8c57294c --- /dev/null +++ b/Documentation/ABI/testing/sysfs-secvar @@ -0,0 +1,46 @@ +What: /sys/firmware/secvar +Date: August 2019 +Contact: Nayna Jain +Description: This directory is created if the POWER firmware supports OS + secureboot, thereby secure variables. It exposes interface + for reading/writing the secure variables + +What: /sys/firmware/secvar/vars +Date: August 2019 +Contact: Nayna Jain +Description: This directory lists all the secure variables that are supported + by the firmware. + +What: /sys/firmware/secvar/format +Date: August 2019 +Contact: Nayna Jain +Description: A string indicating which backend is in use by the firmware. + This determines the format of the variable and the accepted + format of variable updates. + +What: /sys/firmware/secvar/vars/ +Date: August 2019 +Contact: Nayna Jain +Description: Each secure variable is represented as a directory named as + . The variable name is unique and is in ASCII + representation. The data and size can be determined by reading + their respective attribute files. + +What: /sys/firmware/secvar/vars//size +Date: August 2019 +Contact: Nayna Jain +Description: An integer representation of the size of the content of the + variable. In other words, it represents the size of the data. + +What: /sys/firmware/secvar/vars//data +Date: August 2019 +Contact: Nayna Jain h +Description: A read-only file containing the value of the variable. The size + of the file represents the maximum size of the variable data. + +What: /sys/firmware/secvar/vars//update +Date: August 2019 +Contact: Nayna Jain +Description: A write-only file that is used to submit the new value for the + variable. The size of the file represents the maximum size of + the variable data that can be written. diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt index 8f8d97f65d737..29dcbe8826e85 100644 --- a/Documentation/DMA-attributes.txt +++ b/Documentation/DMA-attributes.txt @@ -5,24 +5,6 @@ DMA attributes This document describes the semantics of the DMA attributes that are defined in linux/dma-mapping.h. -DMA_ATTR_WRITE_BARRIER ----------------------- - -DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA. DMA -to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces -all pending DMA writes to complete, and thus provides a mechanism to -strictly order DMA from a device across all intervening busses and -bridges. This barrier is not specific to a particular type of -interconnect, it applies to the system as a whole, and so its -implementation must account for the idiosyncrasies of the system all -the way from the DMA device to memory. - -As an example of a situation where DMA_ATTR_WRITE_BARRIER would be -useful, suppose that a device does a DMA write to indicate that data is -ready and available in memory. The DMA of the "completion indication" -could race with data DMA. Mapping the memory used for completion -indications with DMA_ATTR_WRITE_BARRIER would prevent the race. - DMA_ATTR_WEAK_ORDERING ---------------------- diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst index 621111ce57401..28c0461ba2e1b 100644 --- a/Documentation/accounting/psi.rst +++ b/Documentation/accounting/psi.rst @@ -90,7 +90,8 @@ Triggers can be set on more than one psi metric and more than one trigger for the same psi metric can be specified. However for each trigger a separate file descriptor is required to be able to poll it separately from others, therefore for each trigger a separate open() syscall should be made even -when opening the same psi interface file. +when opening the same psi interface file. Write operations to a file descriptor +with an already existing psi trigger will fail with EBUSY. Monitors activate only when system enters stall state for the monitored psi metric and deactivates upon exit from the stall state. While system is diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst index a30aa91b5fbe9..3463883844c0b 100644 --- a/Documentation/admin-guide/device-mapper/dm-integrity.rst +++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst @@ -177,6 +177,12 @@ bitmap_flush_interval:number The bitmap flush interval in milliseconds. The metadata buffers are synchronized when this interval expires. +legacy_recalculate + Allow recalculating of volumes with HMAC keys. This is disabled by + default for security reasons - an attacker could modify the volume, + set recalc_sector to zero, and the kernel would not detect the + modification. + The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can be changed when reloading the target (load an inactive table and swap the diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst index c77c58b8f67b7..d8dec8911eb38 100644 --- a/Documentation/admin-guide/device-mapper/index.rst +++ b/Documentation/admin-guide/device-mapper/index.rst @@ -8,6 +8,7 @@ Device Mapper cache-policies cache delay + dm-clone dm-crypt dm-flakey dm-init diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt index 1c5d2281efc97..771d9e7ae082b 100644 --- a/Documentation/admin-guide/devices.txt +++ b/Documentation/admin-guide/devices.txt @@ -3002,10 +3002,10 @@ 65 = /dev/infiniband/issm1 Second InfiniBand IsSM device ... 127 = /dev/infiniband/issm63 63rd InfiniBand IsSM device - 128 = /dev/infiniband/uverbs0 First InfiniBand verbs device - 129 = /dev/infiniband/uverbs1 Second InfiniBand verbs device + 192 = /dev/infiniband/uverbs0 First InfiniBand verbs device + 193 = /dev/infiniband/uverbs1 Second InfiniBand verbs device ... - 159 = /dev/infiniband/uverbs31 31st InfiniBand verbs device + 223 = /dev/infiniband/uverbs31 31st InfiniBand verbs device 232 char Biometric Devices 0 = /dev/biometric/sensor0/fingerprint first fingerprint sensor on first device diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 0795e3c2643f2..2adec1e6520a6 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -14,3 +14,5 @@ are configurable at compile, boot or run time. mds tsx_async_abort multihit.rst + special-register-buffer-data-sampling.rst + processor_mmio_stale_data.rst diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst index e3a796c0d3a24..2d19c9f4c1fec 100644 --- a/Documentation/admin-guide/hw-vuln/mds.rst +++ b/Documentation/admin-guide/hw-vuln/mds.rst @@ -265,8 +265,11 @@ time with the option "mds=". The valid arguments for this option are: ============ ============================================================= -Not specifying this option is equivalent to "mds=full". - +Not specifying this option is equivalent to "mds=full". For processors +that are affected by both TAA (TSX Asynchronous Abort) and MDS, +specifying just "mds=off" without an accompanying "tsx_async_abort=off" +will have no effect as the same mitigation is used for both +vulnerabilities. Mitigation selection guide -------------------------- diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst new file mode 100644 index 0000000000000..c98fd11907cc8 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -0,0 +1,260 @@ +========================================= +Processor MMIO Stale Data Vulnerabilities +========================================= + +Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O +(MMIO) vulnerabilities that can expose data. The sequences of operations for +exposing data range from simple to very complex. Because most of the +vulnerabilities require the attacker to have access to MMIO, many environments +are not affected. System environments using virtualization where MMIO access is +provided to untrusted guests may need mitigation. These vulnerabilities are +not transient execution attacks. However, these vulnerabilities may propagate +stale data into core fill buffers where the data can subsequently be inferred +by an unmitigated transient execution attack. Mitigation for these +vulnerabilities includes a combination of microcode update and software +changes, depending on the platform and usage model. Some of these mitigations +are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or +those used to mitigate Special Register Buffer Data Sampling (SRBDS). + +Data Propagators +================ +Propagators are operations that result in stale data being copied or moved from +one microarchitectural buffer or register to another. Processor MMIO Stale Data +Vulnerabilities are operations that may result in stale data being directly +read into an architectural, software-visible state or sampled from a buffer or +register. + +Fill Buffer Stale Data Propagator (FBSDP) +----------------------------------------- +Stale data may propagate from fill buffers (FB) into the non-coherent portion +of the uncore on some non-coherent writes. Fill buffer propagation by itself +does not make stale data architecturally visible. Stale data must be propagated +to a location where it is subject to reading or sampling. + +Sideband Stale Data Propagator (SSDP) +------------------------------------- +The sideband stale data propagator (SSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. The sideband response buffer is +shared by all client cores. For non-coherent reads that go to sideband +destinations, the uncore logic returns 64 bytes of data to the core, including +both requested data and unrequested stale data, from a transaction buffer and +the sideband response buffer. As a result, stale data from the sideband +response and transaction buffers may now reside in a core fill buffer. + +Primary Stale Data Propagator (PSDP) +------------------------------------ +The primary stale data propagator (PSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. Similar to the sideband response +buffer, the primary response buffer is shared by all client cores. For some +processors, MMIO primary reads will return 64 bytes of data to the core fill +buffer including both requested data and unrequested stale data. This is +similar to the sideband stale data propagator. + +Vulnerabilities +=============== +Device Register Partial Write (DRPW) (CVE-2022-21166) +----------------------------------------------------- +Some endpoint MMIO registers incorrectly handle writes that are smaller than +the register size. Instead of aborting the write or only copying the correct +subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than +specified by the write transaction may be written to the register. On +processors affected by FBSDP, this may expose stale data from the fill buffers +of the core that created the write transaction. + +Shared Buffers Data Sampling (SBDS) (CVE-2022-21125) +---------------------------------------------------- +After propagators may have moved data around the uncore and copied stale data +into client core fill buffers, processors affected by MFBDS can leak data from +the fill buffer. It is limited to the client (including Intel Xeon server E3) +uncore implementation. + +Shared Buffers Data Read (SBDR) (CVE-2022-21123) +------------------------------------------------ +It is similar to Shared Buffer Data Sampling (SBDS) except that the data is +directly read into the architectural software-visible state. It is limited to +the client (including Intel Xeon server E3) uncore implementation. + +Affected Processors +=================== +Not all the CPUs are affected by all the variants. For instance, most +processors for the server market (excluding Intel Xeon E3 processors) are +impacted by only Device Register Partial Write (DRPW). + +Below is the list of affected Intel processors [#f1]_: + + =================== ============ ========= + Common name Family_Model Steppings + =================== ============ ========= + HASWELL_X 06_3FH 2,4 + SKYLAKE_L 06_4EH 3 + BROADWELL_X 06_4FH All + SKYLAKE_X 06_55H 3,4,6,7,11 + BROADWELL_D 06_56H 3,4,5 + SKYLAKE 06_5EH 3 + ICELAKE_X 06_6AH 4,5,6 + ICELAKE_D 06_6CH 1 + ICELAKE_L 06_7EH 5 + ATOM_TREMONT_D 06_86H All + LAKEFIELD 06_8AH 1 + KABYLAKE_L 06_8EH 9 to 12 + ATOM_TREMONT 06_96H 1 + ATOM_TREMONT_L 06_9CH 0 + KABYLAKE 06_9EH 9 to 13 + COMETLAKE 06_A5H 2,3,5 + COMETLAKE_L 06_A6H 0,1 + ROCKETLAKE 06_A7H 1 + =================== ============ ========= + +If a CPU is in the affected processor list, but not affected by a variant, it +is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later +section, mitigation largely remains the same for all the variants, i.e. to +clear the CPU fill buffers via VERW instruction. + +New bits in MSRs +================ +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +specific variants of Processor MMIO Stale Data vulnerabilities and mitigation +capability. + +MSR IA32_ARCH_CAPABILITIES +-------------------------- +Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the + Shared Buffers Data Read (SBDR) vulnerability or the sideband stale + data propagator (SSDP). +Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer + Stale Data Propagator (FBSDP). +Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data + Propagator (PSDP). +Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer + values as part of MD_CLEAR operations. Processors that do not + enumerate MDS_NO (meaning they are affected by MDS) but that do + enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate + FB_CLEAR as part of their MD_CLEAR support. +Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR + IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS + bit can be set to cause the VERW instruction to not perform the + FB_CLEAR action. Not all processors that support FB_CLEAR will support + FB_CLEAR_CTRL. + +MSR IA32_MCU_OPT_CTRL +--------------------- +Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR +action. This may be useful to reduce the performance impact of FB_CLEAR in +cases where system software deems it warranted (for example, when performance +is more critical, or the untrusted software has no MMIO access). Note that +FB_CLEAR_DIS has no impact on enumeration (for example, it does not change +FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors +that enumerate FB_CLEAR. + +Mitigation +========== +Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the +same mitigation strategy to force the CPU to clear the affected buffers before +an attacker can extract the secrets. + +This is achieved by using the otherwise unused and obsolete VERW instruction in +combination with a microcode update. The microcode clears the affected CPU +buffers when the VERW instruction is executed. + +Kernel reuses the MDS function to invoke the buffer clearing: + + mds_clear_cpu_buffers() + +On MDS affected CPUs, the kernel already invokes CPU buffer clear on +kernel/userspace, hypervisor/guest and C-state (idle) transitions. No +additional mitigation is needed on such CPUs. + +For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker +with MMIO capability. Therefore, VERW is not required for kernel/userspace. For +virtualization case, VERW is only needed at VMENTER for a guest with MMIO +capability. + +Mitigation points +----------------- +Return to user space +^^^^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation +needed. + +C-State transition +^^^^^^^^^^^^^^^^^^ +Control register writes by CPU during C-state transition can propagate data +from fill buffer to uncore buffers. Execute VERW before C-state transition to +clear CPU fill buffers. + +Guest entry point +^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise +execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by +MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO +Stale Data vulnerabilities, so there is no need to execute VERW for such guests. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control the Processor MMIO Stale Data +mitigations at boot time with the option "mmio_stale_data=". The valid +arguments for this option are: + + ========== ================================================================= + full If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and when entering a VM. Idle transitions are + protected as well. It does not automatically disable SMT. + full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the + complete mitigation. + off Disables mitigation completely. + ========== ================================================================= + +If the CPU is affected and mmio_stale_data=off is not supplied on the kernel +command line, then the kernel selects the appropriate mitigation. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The processor is vulnerable, but microcode is not updated. The + mitigation is enabled on a best effort basis. + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + * - 'Unknown: No mitigations' + - The processor vulnerability status is unknown because it is + out of Servicing period. Mitigation is not attempted. + +Definitions: +------------ + +Servicing period: The process of providing functional and security updates to +Intel processors or platforms, utilizing the Intel Platform Update (IPU) +process or other similar mechanisms. + +End of Servicing Updates (ESU): ESU is the date at which Intel will no +longer provide Servicing, such as through IPU or other similar update +processes. ESU dates will typically be aligned to end of quarter. + +If the processor is vulnerable then the following information is appended to +the above information: + + ======================== =========================================== + 'SMT vulnerable' SMT is enabled + 'SMT disabled' SMT is disabled + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown + ======================== =========================================== + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html diff --git a/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst new file mode 100644 index 0000000000000..47b1b3afac994 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst @@ -0,0 +1,149 @@ +.. SPDX-License-Identifier: GPL-2.0 + +SRBDS - Special Register Buffer Data Sampling +============================================= + +SRBDS is a hardware vulnerability that allows MDS :doc:`mds` techniques to +infer values returned from special register accesses. Special register +accesses are accesses to off core registers. According to Intel's evaluation, +the special register reads that have a security expectation of privacy are +RDRAND, RDSEED and SGX EGETKEY. + +When RDRAND, RDSEED and EGETKEY instructions are used, the data is moved +to the core through the special register mechanism that is susceptible +to MDS attacks. + +Affected processors +-------------------- +Core models (desktop, mobile, Xeon-E3) that implement RDRAND and/or RDSEED may +be affected. + +A processor is affected by SRBDS if its Family_Model and stepping is +in the following list, with the exception of the listed processors +exporting MDS_NO while Intel TSX is available yet not enabled. The +latter class of processors are only affected when Intel TSX is enabled +by software using TSX_CTRL_MSR otherwise they are not affected. + + ============= ============ ======== + common name Family_Model Stepping + ============= ============ ======== + IvyBridge 06_3AH All + + Haswell 06_3CH All + Haswell_L 06_45H All + Haswell_G 06_46H All + + Broadwell_G 06_47H All + Broadwell 06_3DH All + + Skylake_L 06_4EH All + Skylake 06_5EH All + + Kabylake_L 06_8EH <= 0xC + Kabylake 06_9EH <= 0xD + ============= ============ ======== + +Related CVEs +------------ + +The following CVE entry is related to this SRBDS issue: + + ============== ===== ===================================== + CVE-2020-0543 SRBDS Special Register Buffer Data Sampling + ============== ===== ===================================== + +Attack scenarios +---------------- +An unprivileged user can extract values returned from RDRAND and RDSEED +executed on another core or sibling thread using MDS techniques. + + +Mitigation mechanism +------------------- +Intel will release microcode updates that modify the RDRAND, RDSEED, and +EGETKEY instructions to overwrite secret special register data in the shared +staging buffer before the secret data can be accessed by another logical +processor. + +During execution of the RDRAND, RDSEED, or EGETKEY instructions, off-core +accesses from other logical processors will be delayed until the special +register read is complete and the secret data in the shared staging buffer is +overwritten. + +This has three effects on performance: + +#. RDRAND, RDSEED, or EGETKEY instructions have higher latency. + +#. Executing RDRAND at the same time on multiple logical processors will be + serialized, resulting in an overall reduction in the maximum RDRAND + bandwidth. + +#. Executing RDRAND, RDSEED or EGETKEY will delay memory accesses from other + logical processors that miss their core caches, with an impact similar to + legacy locked cache-line-split accesses. + +The microcode updates provide an opt-out mechanism (RNGDS_MITG_DIS) to disable +the mitigation for RDRAND and RDSEED instructions executed outside of Intel +Software Guard Extensions (Intel SGX) enclaves. On logical processors that +disable the mitigation using this opt-out mechanism, RDRAND and RDSEED do not +take longer to execute and do not impact performance of sibling logical +processors memory accesses. The opt-out mechanism does not affect Intel SGX +enclaves (including execution of RDRAND or RDSEED inside an enclave, as well +as EGETKEY execution). + +IA32_MCU_OPT_CTRL MSR Definition +-------------------------------- +Along with the mitigation for this issue, Intel added a new thread-scope +IA32_MCU_OPT_CTRL MSR, (address 0x123). The presence of this MSR and +RNGDS_MITG_DIS (bit 0) is enumerated by CPUID.(EAX=07H,ECX=0).EDX[SRBDS_CTRL = +9]==1. This MSR is introduced through the microcode update. + +Setting IA32_MCU_OPT_CTRL[0] (RNGDS_MITG_DIS) to 1 for a logical processor +disables the mitigation for RDRAND and RDSEED executed outside of an Intel SGX +enclave on that logical processor. Opting out of the mitigation for a +particular logical processor does not affect the RDRAND and RDSEED mitigations +for other logical processors. + +Note that inside of an Intel SGX enclave, the mitigation is applied regardless +of the value of RNGDS_MITG_DS. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows control over the SRBDS mitigation at boot time +with the option "srbds=". The option for this is: + + ============= ============================================================= + off This option disables SRBDS mitigation for RDRAND and RDSEED on + affected platforms. + ============= ============================================================= + +SRBDS System Information +----------------------- +The Linux kernel provides vulnerability status information through sysfs. For +SRBDS this can be accessed by the following sysfs file: +/sys/devices/system/cpu/vulnerabilities/srbds + +The possible values contained in this file are: + + ============================== ============================================= + Not affected Processor not vulnerable + Vulnerable Processor vulnerable and mitigation disabled + Vulnerable: No microcode Processor vulnerable and microcode is missing + mitigation + Mitigation: Microcode Processor is vulnerable and mitigation is in + effect. + Mitigation: TSX disabled Processor is only vulnerable when TSX is + enabled while this system was booted with TSX + disabled. + Unknown: Dependent on + hypervisor status Running on virtual guest processor that is + affected but with no way to know if host + processor is mitigated or vulnerable. + ============================== ============================================= + +SRBDS Default mitigation +------------------------ +This new microcode serializes processor access during execution of RDRAND, +RDSEED ensures that the shared buffer is overwritten before it is released for +reuse. Use the "srbds=off" kernel command line to disable the mitigation for +RDRAND and RDSEED. diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index e05e581af5cfe..7e061ed449aaa 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -60,8 +60,8 @@ privileged data touched during the speculative execution. Spectre variant 1 attacks take advantage of speculative execution of conditional branches, while Spectre variant 2 attacks use speculative execution of indirect branches to leak privileged memory. -See :ref:`[1] ` :ref:`[5] ` :ref:`[7] ` -:ref:`[10] ` :ref:`[11] `. +See :ref:`[1] ` :ref:`[5] ` :ref:`[6] ` +:ref:`[7] ` :ref:`[10] ` :ref:`[11] `. Spectre variant 1 (Bounds Check Bypass) --------------------------------------- @@ -131,6 +131,19 @@ steer its indirect branch speculations to gadget code, and measure the speculative execution's side effects left in level 1 cache to infer the victim's data. +Yet another variant 2 attack vector is for the attacker to poison the +Branch History Buffer (BHB) to speculatively steer an indirect branch +to a specific Branch Target Buffer (BTB) entry, even if the entry isn't +associated with the source address of the indirect branch. Specifically, +the BHB might be shared across privilege levels even in the presence of +Enhanced IBRS. + +Currently the only known real-world BHB attack vector is via +unprivileged eBPF. Therefore, it's highly recommended to not enable +unprivileged eBPF, especially when eIBRS is used (without retpolines). +For a full mitigation against BHB attacks, it's recommended to use +retpolines (or eIBRS combined with retpolines). + Attack scenarios ---------------- @@ -364,13 +377,15 @@ The possible values in this file are: - Kernel status: - ==================================== ================================= - 'Not affected' The processor is not vulnerable - 'Vulnerable' Vulnerable, no mitigation - 'Mitigation: Full generic retpoline' Software-focused mitigation - 'Mitigation: Full AMD retpoline' AMD-specific software mitigation - 'Mitigation: Enhanced IBRS' Hardware-focused mitigation - ==================================== ================================= + ======================================== ================================= + 'Not affected' The processor is not vulnerable + 'Mitigation: None' Vulnerable, no mitigation + 'Mitigation: Retpolines' Use Retpoline thunks + 'Mitigation: LFENCE' Use LFENCE instructions + 'Mitigation: Enhanced IBRS' Hardware-focused mitigation + 'Mitigation: Enhanced IBRS + Retpolines' Hardware-focused + Retpolines + 'Mitigation: Enhanced IBRS + LFENCE' Hardware-focused + LFENCE + ======================================== ================================= - Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is used to protect against Spectre variant 2 attacks when calling firmware (x86 only). @@ -407,6 +422,14 @@ The possible values in this file are: 'RSB filling' Protection of RSB on context switch enabled ============= =========================================== + - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status: + + =========================== ======================================================= + 'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled + 'PBRSB-eIBRS: Vulnerable' CPU is vulnerable + 'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB + =========================== ======================================================= + Full mitigation might require a microcode update from the CPU vendor. When the necessary microcode is not available, the kernel will report vulnerability. @@ -468,7 +491,7 @@ Spectre variant 2 before invoking any firmware code to prevent Spectre variant 2 exploits using the firmware. - Using kernel address space randomization (CONFIG_RANDOMIZE_SLAB=y + Using kernel address space randomization (CONFIG_RANDOMIZE_BASE=y and CONFIG_SLAB_FREELIST_RANDOM=y in the kernel configuration) makes attacks on the kernel generally more difficult. @@ -584,12 +607,13 @@ kernel command line. Specific mitigations can also be selected manually: - retpoline - replace indirect branches - retpoline,generic - google's original retpoline - retpoline,amd - AMD-specific minimal thunk + retpoline auto pick between generic,lfence + retpoline,generic Retpolines + retpoline,lfence LFENCE; indirect branch + retpoline,amd alias for retpoline,lfence + eibrs enhanced IBRS + eibrs,retpoline enhanced IBRS + Retpolines + eibrs,lfence enhanced IBRS + LFENCE Not specifying this option is equivalent to spectre_v2=auto. @@ -730,7 +754,7 @@ AMD white papers: .. _spec_ref6: -[6] `Software techniques for managing speculation on AMD processors `_. +[6] `Software techniques for managing speculation on AMD processors `_. ARM white papers: diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst index fddbd7579c535..af6865b822d21 100644 --- a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst +++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst @@ -174,7 +174,10 @@ the option "tsx_async_abort=". The valid arguments for this option are: CPU is not vulnerable to cross-thread TAA attacks. ============ ============================================================= -Not specifying this option is equivalent to "tsx_async_abort=full". +Not specifying this option is equivalent to "tsx_async_abort=full". For +processors that are affected by both TAA and MDS, specifying just +"tsx_async_abort=off" without an accompanying "mds=off" will have no +effect as the same mitigation is used for both vulnerabilities. The kernel command line also allows to control the TSX feature using the parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst index 5d63b18bd6d1f..60c45c916f7d0 100644 --- a/Documentation/admin-guide/iostats.rst +++ b/Documentation/admin-guide/iostats.rst @@ -99,7 +99,7 @@ Field 10 -- # of milliseconds spent doing I/Os Since 5.0 this field counts jiffies when at least one request was started or completed. If request runs more than 2 jiffies then some - I/O time will not be accounted unless there are other requests. + I/O time might be not accounted in case of concurrent requests. Field 11 -- weighted # of milliseconds spent doing I/Os This field is incremented at each I/O start, I/O completion, I/O @@ -133,6 +133,9 @@ are summed (possibly overflowing the unsigned long variable they are summed to) and the result given to the user. There is no convenient user interface for accessing the per-CPU counters themselves. +Since 4.19 request times are measured with nanoseconds precision and +truncated to milliseconds before showing in this interface. + Disks vs Partitions ------------------- diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 007a6b86e0eed..ba1aed57e55dc 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -393,6 +393,17 @@ KERNELOFFSET The kernel randomization offset. Used to compute the page offset. If KASLR is disabled, this value is zero. +TCR_EL1.T1SZ +------------ + +Indicates the size offset of the memory region addressed by TTBR1_EL1. +The region size is 2^(64-T1SZ) bytes. + +TTBR1_EL1 is the table base address register specified by ARMv8-A +architecture which is used to lookup the page-tables for the Virtual +addresses in the higher VA range (refer to ARMv8 ARM document for +more details). + arm === diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8dee8f68fe15c..e0527952a338b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -113,7 +113,7 @@ the GPE dispatcher. This facility can be used to prevent such uncontrolled GPE floodings. - Format: + Format: acpi_no_auto_serialize [HW,ACPI] Disable auto-serialization of AML methods @@ -136,6 +136,10 @@ dynamic table installation which will install SSDT tables to /sys/firmware/acpi/tables/dynamic. + acpi_no_watchdog [HW,ACPI,WDT] + Ignore the ACPI-based watchdog interface (WDAT) and let + a native driver control the watchdog device instead. + acpi_rsdp= [ACPI,EFI,KEXEC] Pass the RSDP address to the kernel, mostly used on machines running EFI runtime service to boot the @@ -221,14 +225,23 @@ For broken nForce2 BIOS resulting in XT-PIC timer. acpi_sleep= [HW,ACPI] Sleep options - Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, - old_ordering, nonvs, sci_force_enable, nobl } + Format: { s3_bios, s3_mode, s3_beep, s4_hwsig, + s4_nohwsig, old_ordering, nonvs, + sci_force_enable, nobl } See Documentation/power/video.rst for information on s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep as soon as the kernel's real-mode entry point is called. + s4_hwsig causes the kernel to check the ACPI hardware + signature during resume from hibernation, and gracefully + refuse to resume if it has changed. This complies with + the ACPI specification but not with reality, since + Windows does not do this and many laptops do change it + on docking. So the default behaviour is to allow resume + and simply warn when the signature changes, unless the + s4_hwsig option is enabled. s4_nohwsig prevents ACPI hardware signature from being - used during resume from hibernation. + used (or even warned about) during resume. old_ordering causes the ACPI 1.0 ordering of the _PTS control method, with respect to putting devices into low power states, to be enforced (the ACPI 2.0 ordering @@ -563,7 +576,13 @@ loops can be debugged more effectively on production systems. - clearcpuid=BITNUM [X86] + clocksource.max_cswd_read_retries= [KNL] + Number of clocksource_watchdog() retries due to + external delays before the clock will be marked + unstable. Defaults to three retries, that is, + four attempts to read the clock under test. + + clearcpuid=BITNUM[,BITNUM...] [X86] Disable CPUID feature X for the kernel. See arch/x86/include/asm/cpufeatures.h for the valid bit numbers. Note the Linux specific bits are not necessarily @@ -680,6 +699,10 @@ 0: default value, disable debugging 1: enable debugging at boot time + cpufreq_driver= [X86] Allow only the named cpu frequency scaling driver + to register. Example: cpufreq_driver=powernow-k8 + Format: { none | STRING } + cpuidle.off=1 [CPU_IDLE] disable the cpuidle sub-system @@ -836,6 +859,18 @@ dump out devices still on the deferred probe list after retrying. + dfltcc= [HW,S390] + Format: { on | off | def_only | inf_only | always } + on: s390 zlib hardware support for compression on + level 1 and decompression (default) + off: No s390 zlib hardware support + def_only: s390 zlib hardware support for deflate + only (compression on level 1) + inf_only: s390 zlib hardware support for inflate + only (decompression) + always: Same as 'on' but ignores the selected compression + level always using hardware support (used for debugging) + dhash_entries= [KNL] Set number of hash buckets for dentry cache. @@ -1477,6 +1512,8 @@ architectures force reset to be always executed i8042.unlock [HW] Unlock (ignore) the keylock i8042.kbdreset [HW] Reset device connected to KBD port + i8042.probe_defer + [HW] Allow deferred probing upon i8042 probe errors i810= [HW,DRM] @@ -1815,13 +1852,13 @@ iommu.strict= [ARM64] Configure TLB invalidation behaviour Format: { "0" | "1" } - 0 - Lazy mode. + 0 - Lazy mode (default). Request that DMA unmap operations use deferred invalidation of hardware TLBs, for increased throughput at the cost of reduced device isolation. Will fall back to strict mode if not supported by the relevant IOMMU driver. - 1 - Strict mode (default). + 1 - Strict mode. DMA unmap operations invalidate IOMMU hardware TLBs synchronously. @@ -2102,8 +2139,12 @@ Default is 1 (enabled) kvm-intel.emulate_invalid_guest_state= - [KVM,Intel] Enable emulation of invalid guest states - Default is 0 (disabled) + [KVM,Intel] Disable emulation of invalid guest state. + Ignored if kvm-intel.enable_unrestricted_guest=1, as + guest state is never invalid for unrestricted guests. + This param doesn't apply to nested guests (L2), as KVM + never emulates invalid L2 guest state. + Default is 1 (enabled) kvm-intel.flexpriority= [KVM,Intel] Disable FlexPriority feature (TPR shadow). @@ -2473,6 +2514,12 @@ SMT on vulnerable CPUs off - Unconditionally disable MDS mitigation + On TAA-affected machines, mds=off can be prevented by + an active TAA mitigation as both vulnerabilities are + mitigated with the same mechanism so in order to disable + this mitigation, you need to specify tsx_async_abort=off + too. + Not specifying this option is equivalent to mds=full. @@ -2657,6 +2704,9 @@ mds=off [X86] tsx_async_abort=off [X86] kvm.nx_huge_pages=off [X86] + no_entry_flush [PPC] + no_uaccess_flush [PPC] + mmio_stale_data=off [X86] Exceptions: This does not have any effect on @@ -2678,6 +2728,7 @@ Equivalent to: l1tf=flush,nosmt [X86] mds=full,nosmt [X86] tsx_async_abort=full,nosmt [X86] + mmio_stale_data=full,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this @@ -2687,6 +2738,40 @@ log everything. Information is printed at KERN_DEBUG so loglevel=8 may also need to be specified. + mmio_stale_data= + [X86,INTEL] Control mitigation for the Processor + MMIO Stale Data vulnerabilities. + + Processor MMIO Stale Data is a class of + vulnerabilities that may expose data after an MMIO + operation. Exposed data could originate or end in + the same CPU buffers as affected by MDS and TAA. + Therefore, similar to MDS and TAA, the mitigation + is to clear the affected CPU buffers. + + This parameter controls the mitigation. The + options are: + + full - Enable mitigation on vulnerable CPUs + + full,nosmt - Enable mitigation and disable SMT on + vulnerable CPUs. + + off - Unconditionally disable mitigation + + On MDS or TAA affected machines, + mmio_stale_data=off can be prevented by an active + MDS or TAA mitigation as these vulnerabilities are + mitigated with the same mechanism so in order to + disable this mitigation, you need to specify + mds=off and tsx_async_abort=off too. + + Not specifying this option is equivalent to + mmio_stale_data=full. + + For details see: + Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst + module.sig_enforce [KNL] When CONFIG_MODULE_SIG is set, this means that modules without (valid) signatures will fail to load. @@ -2731,7 +2816,7 @@ ,[,,,,] mtdparts= [MTD] - See drivers/mtd/cmdlinepart.c. + See drivers/mtd/parsers/cmdlinepart.c multitce=off [PPC] This parameter disables the use of the pSeries firmware feature for updating multiple TCE entries @@ -2979,6 +3064,8 @@ noefi Disable EFI runtime services support. + no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel. + noexec [IA-64] noexec [X86] @@ -3028,6 +3115,9 @@ nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability + no_uaccess_flush + [PPC] Don't flush the L1-D cache after accessing user data. + noxsave [BUGS=X86] Disables x86 extended register state save and restore using xsave. The kernel will fallback to enabling legacy floating-point and sse state. @@ -3084,7 +3174,8 @@ disable unhandled interrupt sources. no_timer_check [X86,APIC] Disables the code which tests for - broken timer IRQ sources. + broken timer IRQ sources. For AWS, this is defaulted + to disabled, can be re-enabled using no_timer_check=0. noisapnp [ISAPNP] Disables ISA PnP code. @@ -3110,9 +3201,9 @@ [X86,PV_OPS] Disable paravirtualized VMware scheduler clock and use the default one. - no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting. - steal time is computed, but won't influence scheduler - behaviour + no-steal-acc [X86,KVM,ARM64] Disable paravirtualized steal time + accounting. steal time is computed, but won't + influence scheduler behaviour nolapic [X86-32,APIC] Do not enable or use the local APIC. @@ -3402,6 +3493,12 @@ nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. + clearmsi [X86] Clears MSI/MSI-X enable bits early in boot + time in order to avoid issues like adapters + screaming irqs and preventing boot progress. + Also, it enforces the PCI Local Bus spec + rule that those bits should be 0 in system reset + events (useful for kexec/kdump cases). noioapicquirk [APIC] Disable all boot interrupt quirks. Safety option to keep boot IRQs enabled. This should never be necessary. @@ -3555,6 +3652,8 @@ may put more devices in an IOMMU group. force_floating [S390] Force usage of floating interrupts. nomio [S390] Do not use MIO instructions. + norid [S390] ignore the RID field and force use of + one PCI domain per PCI function pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power Management. @@ -3567,6 +3666,8 @@ even if the platform doesn't give the OS permission to use them. This may cause conflicts if the platform also tries to use these services. + dpc-native Use native PCIe service for DPC only. May + cause conflicts if firmware uses AER or DPC. compat Disable native PCIe services (PME, AER, DPC, PCIe hotplug). @@ -3720,6 +3821,11 @@ before loading. See Documentation/admin-guide/blockdev/ramdisk.rst. + prot_virt= [S390] enable hosting protected virtual machines + isolated from the hypervisor (if hardware supports + that). + Format: + psi= [KNL] Enable or disable pressure stall information tracking. Format: @@ -3777,6 +3883,12 @@ fully seed the kernel's CRNG. Default is controlled by CONFIG_RANDOM_TRUST_CPU. + random.trust_bootloader={on,off} + [KNL] Enable or disable trusting the use of a + seed passed by the bootloader (if available) to + fully seed the kernel's CRNG. Default is controlled + by CONFIG_RANDOM_TRUST_BOOTLOADER. + ras=option[,option,...] [KNL] RAS-specific options cec_disable [X86] @@ -4227,6 +4339,18 @@ retain_initrd [RAM] Keep initrd memory after extraction + retbleed= [X86] Control mitigation of RETBleed (Arbitrary + Speculative Code Execution with Return Instructions) + vulnerability. + + off - unconditionally disable + auto - automatically select a migitation + + Selecting 'auto' will choose a mitigation method at run + time according to the CPU. + + Not specifying this option is equivalent to retbleed=auto. + rfkill.default_state= 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, etc. communication is blocked by default. @@ -4464,8 +4588,13 @@ Specific mitigations can also be selected manually: retpoline - replace indirect branches - retpoline,generic - google's original retpoline - retpoline,amd - AMD-specific minimal thunk + retpoline,generic - Retpolines + retpoline,lfence - LFENCE; indirect branch + retpoline,amd - alias for retpoline,lfence + eibrs - enhanced IBRS + eibrs,retpoline - enhanced IBRS + Retpolines + eibrs,lfence - enhanced IBRS + LFENCE + ibrs - use IBRS to protect kernel Not specifying this option is equivalent to spectre_v2=auto. @@ -4569,6 +4698,26 @@ spia_pedr= spia_peddr= + srbds= [X86,INTEL] + Control the Special Register Buffer Data Sampling + (SRBDS) mitigation. + + Certain CPUs are vulnerable to an MDS-like + exploit which can leak bits from the random + number generator. + + By default, this issue is mitigated by + microcode. However, the microcode fix can cause + the RDRAND and RDSEED instructions to become + much slower. Among other effects, this will + result in reduced throughput from /dev/urandom. + + The microcode mitigation can be disabled with + the following option: + + off: Disable mitigation and remove + performance impact to RDRAND and RDSEED + srcutree.counter_wrap_check [KNL] Specifies how frequently to check for grace-period sequence counter wrap for the @@ -4863,6 +5012,7 @@ as the stability checks done at bootup. Used to enable high-resolution timer mode on older hardware, and in virtualized environment. + [x86] unreliable: mark tsc clocksource as unreliable. [x86] noirqtime: Do not use TSC to do irq accounting. Used to run time disable IRQ_TIME_ACCOUNTING on any platforms where RDTSC is slow and this accounting @@ -4931,6 +5081,11 @@ vulnerable to cross-thread TAA attacks. off - Unconditionally disable TAA mitigation + On MDS-affected machines, tsx_async_abort=off can be + prevented by an active MDS mitigation as both vulnerabilities + are mitigated with the same mechanism so in order to disable + this mitigation, you need to specify mds=off too. + Not specifying this option is equivalent to tsx_async_abort=full. On CPUs which are MDS affected and deploy MDS mitigation, TAA mitigation is not @@ -4990,8 +5145,7 @@ usbcore.old_scheme_first= [USB] Start with the old device initialization - scheme, applies only to low and full-speed devices - (default 0 = off). + scheme (default 0 = off). usbcore.usbfs_memory_mb= [USB] Memory limit (in MB) for buffers allocated by @@ -5090,13 +5244,13 @@ Flags is a set of characters, each corresponding to a common usb-storage quirk flag as follows: a = SANE_SENSE (collect more than 18 bytes - of sense data); + of sense data, not on uas); b = BAD_SENSE (don't collect more than 18 - bytes of sense data); + bytes of sense data, not on uas); c = FIX_CAPACITY (decrease the reported device capacity by one sector); d = NO_READ_DISC_INFO (don't use - READ_DISC_INFO command); + READ_DISC_INFO command, not on uas); e = NO_READ_CAPACITY_16 (don't use READ_CAPACITY_16 command); f = NO_REPORT_OPCODES (don't use report opcodes @@ -5110,18 +5264,20 @@ device); j = NO_REPORT_LUNS (don't use report luns command, uas only); + k = NO_SAME (do not use WRITE_SAME, uas only) l = NOT_LOCKABLE (don't try to lock and - unlock ejectable media); + unlock ejectable media, not on uas); m = MAX_SECTORS_64 (don't transfer more - than 64 sectors = 32 KB at a time); + than 64 sectors = 32 KB at a time, + not on uas); n = INITIAL_READ10 (force a retry of the - initial READ(10) command); + initial READ(10) command, not on uas); o = CAPACITY_OK (accept the capacity - reported by the device); + reported by the device, not on uas); p = WRITE_CACHE (the device cache is ON - by default); + by default, not on uas); r = IGNORE_RESIDUE (the device reports - bogus residue values); + bogus residue values, not on uas); s = SINGLE_LUN (the device has only one Logical Unit); t = NO_ATA_1X (don't allow ATA(12) and ATA(16) @@ -5130,7 +5286,8 @@ w = NO_WP_DETECT (don't test whether the medium is write-protected). y = ALWAYS_SYNC (issue a SYNCHRONIZE_CACHE - even if the device claims no cache) + even if the device claims no cache, + not on uas) Example: quirks=0419:aaf5:rl,0421:0433:rc user_debug= [KNL,ARM] @@ -5408,6 +5565,10 @@ This option is obsoleted by the "nopv" option, which has equivalent effect for XEN platform. + xen_no_vector_callback + [KNL,X86,XEN] Disable the vector callback for Xen + event channel interrupts. + xen_scrub_pages= [XEN] Boolean option to control scrubbing pages before giving them back to Xen, for use by other domains. Can be also changed at runtime @@ -5426,6 +5587,21 @@ as generic guest with no PV drivers. Currently support XEN HVM, KVM, HYPER_V and VMWARE guest. + xen.balloon_boot_timeout= [XEN] + The time (in seconds) to wait before giving up to boot + in case initial ballooning fails to free enough memory. + Applies only when running as HVM or PVH guest and + started with less memory configured than allowed at + max. Default is 180. + + xen.event_eoi_delay= [XEN] + How long to delay EOI handling in case of event + storms (jiffies). Default is 10. + + xen.event_loop_timeout= [XEN] + After which time (jiffies) the event handling loop + should start to delay EOI handling. Default is 2. + xirc2ps_cs= [NET,PCMCIA] Format: ,,,,,[,[,[,]]] diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index e70b365dbc603..80cf2ef2a5062 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -676,8 +676,8 @@ the ``menu`` governor to be used on the systems that use the ``ladder`` governor by default this way, for example. The other kernel command line parameters controlling CPU idle time management -described below are only relevant for the *x86* architecture and some of -them affect Intel processors only. +described below are only relevant for the *x86* architecture and references +to ``intel_idle`` affect Intel processors only. The *x86* architecture support code recognizes three kernel command line options related to CPU idle time management: ``idle=poll``, ``idle=halt``, @@ -699,10 +699,13 @@ idle, so it very well may hurt single-thread computations performance as well as energy-efficiency. Thus using it for performance reasons may not be a good idea at all.] -The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes -``acpi_idle`` to be used (as long as all of the information needed by it is -there in the system's ACPI tables), but it is not allowed to use the -``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states. +The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of +the CPU to enter idle states. When this option is used, the ``acpi_idle`` +driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems +running Intel processors, this option disables the ``intel_idle`` driver +and forces the use of the ``acpi_idle`` driver instead. Note that in either +case, ``acpi_idle`` driver will function only if all the information needed +by it is in the system's ACPI tables. In addition to the architecture-level kernel command line options affecting CPU idle time management, there are parameters affecting individual ``CPUIdle`` diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 032c7cd3cede0..9715685be6e3b 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -862,9 +862,40 @@ The kernel command line parameter printk.devkmsg= overrides this and is a one-time setting until next reboot: once set, it cannot be changed by this sysctl interface anymore. +pty +=== -randomize_va_space: -=================== +See Documentation/filesystems/devpts.rst. + + +random +====== + +This is a directory, with the following entries: + +* ``boot_id``: a UUID generated the first time this is retrieved, and + unvarying after that; + +* ``uuid``: a UUID generated every time this is retrieved (this can + thus be used to generate UUIDs at will); + +* ``entropy_avail``: the pool's entropy count, in bits; + +* ``poolsize``: the entropy pool size, in bits; + +* ``urandom_min_reseed_secs``: obsolete (used to determine the minimum + number of seconds between urandom pool reseeding). This file is + writable for compatibility purposes, but writing to it has no effect + on any RNG behavior; + +* ``write_wakeup_threshold``: when the entropy count drops below this + (as a number of bits), processes waiting to write to ``/dev/random`` + are woken up. This file is writable for compatibility purposes, but + writing to it has no effect on any RNG behavior. + + +randomize_va_space +================== This option can be used to select the type of process address space randomization that is used in the system, for architectures @@ -1125,6 +1156,27 @@ NMI switch that most IA32 servers have fires unknown NMI up, for example. If a system hangs up, try pressing the NMI switch. +unprivileged_bpf_disabled: +========================== + +Writing 1 to this entry will disable unprivileged calls to ``bpf()``; +once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return +``-EPERM``. Once set to 1, this can't be cleared from the running kernel +anymore. + +Writing 2 to this entry will also disable unprivileged calls to ``bpf()``, +however, an admin can still change this setting later on, if needed, by +writing 0 or 1 to this entry. + +If ``BPF_UNPRIV_DEFAULT_OFF`` is enabled in the kernel config, then this +entry will default to 2 instead of 0. + += ============================================================= +0 Unprivileged calls to ``bpf()`` are enabled +1 Unprivileged calls to ``bpf()`` are disabled without recovery +2 Unprivileged calls to ``bpf()`` are disabled += ============================================================= + watchdog: ========= diff --git a/Documentation/arm/memory.rst b/Documentation/arm/memory.rst index 0521b4ce5c961..34bb23c44a710 100644 --- a/Documentation/arm/memory.rst +++ b/Documentation/arm/memory.rst @@ -45,9 +45,14 @@ fffe8000 fffeffff DTCM mapping area for platforms with fffe0000 fffe7fff ITCM mapping area for platforms with ITCM mounted inside the CPU. -ffc00000 ffefffff Fixmap mapping region. Addresses provided +ffc80000 ffefffff Fixmap mapping region. Addresses provided by fix_to_virt() will be located here. +ffc00000 ffc7ffff Guard region + +ff800000 ffbfffff Permanent, fixed read-only mapping of the + firmware provided DT blob + fee00000 feffffff Mapping of PCI I/O space. This is a static mapping within the vmalloc space. diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 5a09661330fcc..59daa4c21816b 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -88,6 +88,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index d4a85d535bf99..7d255249094d0 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -44,8 +44,25 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space - management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use - of valid tagged pointers in this context is always allowed. + management (e.g. ``mprotect()``, ``madvise()``). The use of valid + tagged pointers in this context is allowed with these exceptions: + + - ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. + + - The ``range.start``, ``start`` and ``dst`` arguments to the + ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from + ``userfaultfd()``, as fault addresses subsequently obtained by reading + the file descriptor will be untagged, which may otherwise confuse + tag-unaware programs. + + NOTE: This behaviour changed in v5.14 and so some earlier kernels may + incorrectly accept valid tagged pointers for this system call. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/Documentation/asm-annotations.rst b/Documentation/asm-annotations.rst new file mode 100644 index 0000000000000..29ccd6e61fe5c --- /dev/null +++ b/Documentation/asm-annotations.rst @@ -0,0 +1,216 @@ +Assembler Annotations +===================== + +Copyright (c) 2017-2019 Jiri Slaby + +This document describes the new macros for annotation of data and code in +assembly. In particular, it contains information about ``SYM_FUNC_START``, +``SYM_FUNC_END``, ``SYM_CODE_START``, and similar. + +Rationale +--------- +Some code like entries, trampolines, or boot code needs to be written in +assembly. The same as in C, such code is grouped into functions and +accompanied with data. Standard assemblers do not force users into precisely +marking these pieces as code, data, or even specifying their length. +Nevertheless, assemblers provide developers with such annotations to aid +debuggers throughout assembly. On top of that, developers also want to mark +some functions as *global* in order to be visible outside of their translation +units. + +Over time, the Linux kernel has adopted macros from various projects (like +``binutils``) to facilitate such annotations. So for historic reasons, +developers have been using ``ENTRY``, ``END``, ``ENDPROC``, and other +annotations in assembly. Due to the lack of their documentation, the macros +are used in rather wrong contexts at some locations. Clearly, ``ENTRY`` was +intended to denote the beginning of global symbols (be it data or code). +``END`` used to mark the end of data or end of special functions with +*non-standard* calling convention. In contrast, ``ENDPROC`` should annotate +only ends of *standard* functions. + +When these macros are used correctly, they help assemblers generate a nice +object with both sizes and types set correctly. For example, the result of +``arch/x86/lib/putuser.S``:: + + Num: Value Size Type Bind Vis Ndx Name + 25: 0000000000000000 33 FUNC GLOBAL DEFAULT 1 __put_user_1 + 29: 0000000000000030 37 FUNC GLOBAL DEFAULT 1 __put_user_2 + 32: 0000000000000060 36 FUNC GLOBAL DEFAULT 1 __put_user_4 + 35: 0000000000000090 37 FUNC GLOBAL DEFAULT 1 __put_user_8 + +This is not only important for debugging purposes. When there are properly +annotated objects like this, tools can be run on them to generate more useful +information. In particular, on properly annotated objects, ``objtool`` can be +run to check and fix the object if needed. Currently, ``objtool`` can report +missing frame pointer setup/destruction in functions. It can also +automatically generate annotations for :doc:`ORC unwinder ` +for most code. Both of these are especially important to support reliable +stack traces which are in turn necessary for :doc:`Kernel live patching +`. + +Caveat and Discussion +--------------------- +As one might realize, there were only three macros previously. That is indeed +insufficient to cover all the combinations of cases: + +* standard/non-standard function +* code/data +* global/local symbol + +There was a discussion_ and instead of extending the current ``ENTRY/END*`` +macros, it was decided that brand new macros should be introduced instead:: + + So how about using macro names that actually show the purpose, instead + of importing all the crappy, historic, essentially randomly chosen + debug symbol macro names from the binutils and older kernels? + +.. _discussion: https://lkml.kernel.org/r/20170217104757.28588-1-jslaby@suse.cz + +Macros Description +------------------ + +The new macros are prefixed with the ``SYM_`` prefix and can be divided into +three main groups: + +1. ``SYM_FUNC_*`` -- to annotate C-like functions. This means functions with + standard C calling conventions, i.e. the stack contains a return address at + the predefined place and a return from the function can happen in a + standard way. When frame pointers are enabled, save/restore of frame + pointer shall happen at the start/end of a function, respectively, too. + + Checking tools like ``objtool`` should ensure such marked functions conform + to these rules. The tools can also easily annotate these functions with + debugging information (like *ORC data*) automatically. + +2. ``SYM_CODE_*`` -- special functions called with special stack. Be it + interrupt handlers with special stack content, trampolines, or startup + functions. + + Checking tools mostly ignore checking of these functions. But some debug + information still can be generated automatically. For correct debug data, + this code needs hints like ``UNWIND_HINT_REGS`` provided by developers. + +3. ``SYM_DATA*`` -- obviously data belonging to ``.data`` sections and not to + ``.text``. Data do not contain instructions, so they have to be treated + specially by the tools: they should not treat the bytes as instructions, + nor assign any debug information to them. + +Instruction Macros +~~~~~~~~~~~~~~~~~~ +This section covers ``SYM_FUNC_*`` and ``SYM_CODE_*`` enumerated above. + +* ``SYM_FUNC_START`` and ``SYM_FUNC_START_LOCAL`` are supposed to be **the + most frequent markings**. They are used for functions with standard calling + conventions -- global and local. Like in C, they both align the functions to + architecture specific ``__ALIGN`` bytes. There are also ``_NOALIGN`` variants + for special cases where developers do not want this implicit alignment. + + ``SYM_FUNC_START_WEAK`` and ``SYM_FUNC_START_WEAK_NOALIGN`` markings are + also offered as an assembler counterpart to the *weak* attribute known from + C. + + All of these **shall** be coupled with ``SYM_FUNC_END``. First, it marks + the sequence of instructions as a function and computes its size to the + generated object file. Second, it also eases checking and processing such + object files as the tools can trivially find exact function boundaries. + + So in most cases, developers should write something like in the following + example, having some asm instructions in between the macros, of course:: + + SYM_FUNC_START(function_hook) + ... asm insns ... + SYM_FUNC_END(function_hook) + + In fact, this kind of annotation corresponds to the now deprecated ``ENTRY`` + and ``ENDPROC`` macros. + +* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those + who decided to have two or more names for one function. The typical use is:: + + SYM_FUNC_START_ALIAS(__memset) + SYM_FUNC_START(memset) + ... asm insns ... + SYM_FUNC_END(memset) + SYM_FUNC_END_ALIAS(__memset) + + In this example, one can call ``__memset`` or ``memset`` with the same + result, except the debug information for the instructions is generated to + the object file only once -- for the non-``ALIAS`` case. + +* ``SYM_CODE_START`` and ``SYM_CODE_START_LOCAL`` should be used only in + special cases -- if you know what you are doing. This is used exclusively + for interrupt handlers and similar where the calling convention is not the C + one. ``_NOALIGN`` variants exist too. The use is the same as for the ``FUNC`` + category above:: + + SYM_CODE_START_LOCAL(bad_put_user) + ... asm insns ... + SYM_CODE_END(bad_put_user) + + Again, every ``SYM_CODE_START*`` **shall** be coupled by ``SYM_CODE_END``. + + To some extent, this category corresponds to deprecated ``ENTRY`` and + ``END``. Except ``END`` had several other meanings too. + +* ``SYM_INNER_LABEL*`` is used to denote a label inside some + ``SYM_{CODE,FUNC}_START`` and ``SYM_{CODE,FUNC}_END``. They are very similar + to C labels, except they can be made global. An example of use:: + + SYM_CODE_START(ftrace_caller) + /* save_mcount_regs fills in first two parameters */ + ... + + SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) + /* Load the ftrace_ops into the 3rd parameter */ + ... + + SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) + call ftrace_stub + ... + retq + SYM_CODE_END(ftrace_caller) + +Data Macros +~~~~~~~~~~~ +Similar to instructions, there is a couple of macros to describe data in the +assembly. + +* ``SYM_DATA_START`` and ``SYM_DATA_START_LOCAL`` mark the start of some data + and shall be used in conjunction with either ``SYM_DATA_END``, or + ``SYM_DATA_END_LABEL``. The latter adds also a label to the end, so that + people can use ``lstack`` and (local) ``lstack_end`` in the following + example:: + + SYM_DATA_START_LOCAL(lstack) + .skip 4096 + SYM_DATA_END_LABEL(lstack, SYM_L_LOCAL, lstack_end) + +* ``SYM_DATA`` and ``SYM_DATA_LOCAL`` are variants for simple, mostly one-line + data:: + + SYM_DATA(HEAP, .long rm_heap) + SYM_DATA(heap_end, .long rm_stack) + + In the end, they expand to ``SYM_DATA_START`` with ``SYM_DATA_END`` + internally. + +Support Macros +~~~~~~~~~~~~~~ +All the above reduce themselves to some invocation of ``SYM_START``, +``SYM_END``, or ``SYM_ENTRY`` at last. Normally, developers should avoid using +these. + +Further, in the above examples, one could see ``SYM_L_LOCAL``. There are also +``SYM_L_GLOBAL`` and ``SYM_L_WEAK``. All are intended to denote linkage of a +symbol marked by them. They are used either in ``_LABEL`` variants of the +earlier macros, or in ``SYM_START``. + + +Overriding Macros +~~~~~~~~~~~~~~~~~ +Architecture can also override any of the macros in their own +``asm/linkage.h``, including macros specifying the type of a symbol +(``SYM_T_FUNC``, ``SYM_T_OBJECT``, and ``SYM_T_NONE``). As every macro +described in this file is surrounded by ``#ifdef`` + ``#endif``, it is enough +to define the macros differently in the aforementioned architecture-dependent +header. diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index 093cdaefdb373..d8b101c97031b 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -59,7 +59,7 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. - RMW operations that are conditional are unordered on FAILURE, - otherwise the above rules apply. In the case of test_and_{}_bit() operations, + otherwise the above rules apply. In the case of test_and_set_bit_lock(), if the bit in memory is unchanged by the operation then it is deemed to have failed. diff --git a/Documentation/cgroups/namespace.txt b/Documentation/cgroups/namespace.txt new file mode 100644 index 0000000000000..a5b80e8f295f9 --- /dev/null +++ b/Documentation/cgroups/namespace.txt @@ -0,0 +1,142 @@ + CGroup Namespaces + +CGroup Namespace provides a mechanism to virtualize the view of the +/proc//cgroup file. The CLONE_NEWCGROUP clone-flag can be used with +clone() and unshare() syscalls to create a new cgroup namespace. +The process running inside the cgroup namespace will have its /proc//cgroup +output restricted to cgroupns-root. cgroupns-root is the cgroup of the process +at the time of creation of the cgroup namespace. + +Prior to CGroup Namespace, the /proc//cgroup file used to show complete +path of the cgroup of a process. In a container setup (where a set of cgroups +and namespaces are intended to isolate processes), the /proc//cgroup file +may leak potential system level information to the isolated processes. + +For Example: + $ cat /proc/self/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1 + +The path '/batchjobs/container_id1' can generally be considered as system-data +and its desirable to not expose it to the isolated process. + +CGroup Namespaces can be used to restrict visibility of this path. +For Example: + # Before creating cgroup namespace + $ ls -l /proc/self/ns/cgroup + lrwxrwxrwx 1 root root 0 2014-07-15 10:37 /proc/self/ns/cgroup -> cgroup:[4026531835] + $ cat /proc/self/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1 + + # unshare(CLONE_NEWCGROUP) and exec /bin/bash + $ ~/unshare -c + [ns]$ ls -l /proc/self/ns/cgroup + lrwxrwxrwx 1 root root 0 2014-07-15 10:35 /proc/self/ns/cgroup -> cgroup:[4026532183] + # From within new cgroupns, process sees that its in the root cgroup + [ns]$ cat /proc/self/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/ + + # From global cgroupns: + $ cat /proc//cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1 + + # Unshare cgroupns along with userns and mountns + # Following calls unshare(CLONE_NEWCGROUP|CLONE_NEWUSER|CLONE_NEWNS), then + # sets up uid/gid map and execs /bin/bash + $ ~/unshare -c -u -m + # Originally, we were in /batchjobs/container_id1 cgroup. Mount our own cgroup + # hierarchy. + [ns]$ mount -t cgroup cgroup /tmp/cgroup + [ns]$ ls -l /tmp/cgroup + total 0 + -r--r--r-- 1 root root 0 2014-10-13 09:32 cgroup.controllers + -r--r--r-- 1 root root 0 2014-10-13 09:32 cgroup.populated + -rw-r--r-- 1 root root 0 2014-10-13 09:25 cgroup.procs + -rw-r--r-- 1 root root 0 2014-10-13 09:32 cgroup.subtree_control + +The cgroupns-root (/batchjobs/container_id1 in above example) becomes the +filesystem root for the namespace specific cgroupfs mount. + +The virtualization of /proc/self/cgroup file combined with restricting +the view of cgroup hierarchy by namespace-private cgroupfs mount +should provide a completely isolated cgroup view inside the container. + +In its current form, the cgroup namespaces patcheset provides following +behavior: + +(1) The 'cgroupns-root' for a cgroup namespace is the cgroup in which + the process calling unshare is running. + For ex. if a process in /batchjobs/container_id1 cgroup calls unshare, + cgroup /batchjobs/container_id1 becomes the cgroupns-root. + For the init_cgroup_ns, this is the real root ('/') cgroup + (identified in code as cgrp_dfl_root.cgrp). + +(2) The cgroupns-root cgroup does not change even if the namespace + creator process later moves to a different cgroup. + $ ~/unshare -c # unshare cgroupns in some cgroup + [ns]$ cat /proc/self/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/ + [ns]$ mkdir sub_cgrp_1 + [ns]$ echo 0 > sub_cgrp_1/cgroup.procs + [ns]$ cat /proc/self/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/sub_cgrp_1 + +(3) Each process gets its CGROUPNS specific view of /proc//cgroup +(a) Processes running inside the cgroup namespace will be able to see + cgroup paths (in /proc/self/cgroup) only inside their root cgroup + [ns]$ sleep 100000 & # From within unshared cgroupns + [1] 7353 + [ns]$ echo 7353 > sub_cgrp_1/cgroup.procs + [ns]$ cat /proc/7353/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/sub_cgrp_1 + +(b) From global cgroupns, the real cgroup path will be visible: + $ cat /proc/7353/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/batchjobs/container_id1/sub_cgrp_1 + +(c) From a sibling cgroupns (cgroupns root-ed at a different cgroup), cgroup + path relative to its own cgroupns-root will be shown: + # ns2's cgroupns-root is at '/batchjobs/container_id2' + [ns2]$ cat /proc/7353/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/../container_id2/sub_cgrp_1 + + Note that the relative path always starts with '/' to indicate that its + relative to the cgroupns-root of the caller. + +(4) Processes inside a cgroupns can move in-and-out of the cgroupns-root + (if they have proper access to external cgroups). + # From inside cgroupns (with cgroupns-root at /batchjobs/container_id1), and + # assuming that the global hierarchy is still accessible inside cgroupns: + $ cat /proc/7353/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/sub_cgrp_1 + $ echo 7353 > batchjobs/container_id2/cgroup.procs + $ cat /proc/7353/cgroup + 0:cpuset,cpu,cpuacct,memory,devices,freezer,hugetlb:/../container_id2 + + Note that this kind of setup is not encouraged. A task inside cgroupns + should only be exposed to its own cgroupns hierarchy. Otherwise it makes + the virtualization of /proc//cgroup less useful. + +(5) Setns to another cgroup namespace is allowed when: + (a) the process has CAP_SYS_ADMIN in its current userns + (b) the process has CAP_SYS_ADMIN in the target cgroupns' userns + No implicit cgroup changes happen with attaching to another cgroupns. It + is expected that the somone moves the attaching process under the target + cgroupns-root. + +(6) When some thread from a multi-threaded process unshares its + cgroup-namespace, the new cgroupns gets applied to the entire process (all + the threads). For the unified-hierarchy this is expected as it only allows + process-level containerization. For the legacy hierarchies this may be + unexpected. So all the threads in the process will have the same cgroup. + +(7) The cgroup namespace is alive as long as there is atleast 1 + process inside it. When the last process exits, the cgroup + namespace is destroyed. The cgroupns-root and the actual cgroups + remain though. + +(8) Namespace specific cgroup hierarchy can be mounted by a process running + inside cgroupns: + $ mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT + + This will mount the unified cgroup hierarchy with cgroupns-root as the + filesystem root. The process needs CAP_SYS_ADMIN in its userns and mntns. diff --git a/Documentation/conf.py b/Documentation/conf.py index a8fe845832bce..38c1f7618b5e8 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -98,7 +98,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst index ecbebf4ca8e7e..74a7cf13d2391 100644 --- a/Documentation/core-api/printk-formats.rst +++ b/Documentation/core-api/printk-formats.rst @@ -508,6 +508,12 @@ Passed by reference. Thanks ====== +Kernel messages: + + %pj 123456 + + For generating the jhash of a string truncated to six digits + If you add other %p extensions, please extend with one or more test cases, if at all feasible. diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index fcedc5349ace4..2ad3c1fce5795 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -461,13 +461,15 @@ or iterations will move the index to the first index in the range. Each entry will only be returned once, no matter how many indices it occupies. -Using xas_next() or xas_prev() with a multi-index xa_state -is not supported. Using either of these functions on a multi-index entry -will reveal sibling entries; these should be skipped over by the caller. - -Storing ``NULL`` into any index of a multi-index entry will set the entry -at every index to ``NULL`` and dissolve the tie. Splitting a multi-index -entry into entries occupying smaller ranges is not yet supported. +Using xas_next() or xas_prev() with a multi-index xa_state is not +supported. Using either of these functions on a multi-index entry will +reveal sibling entries; these should be skipped over by the caller. + +Storing ``NULL`` into any index of a multi-index entry will set the +entry at every index to ``NULL`` and dissolve the tie. A multi-index +entry can be split into entries occupying smaller ranges by calling +xas_split_alloc() without the xa_lock held, followed by taking the lock +and calling xas_split(). Functions and structures ======================== diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile index 5138a2f6232aa..646cb35253733 100644 --- a/Documentation/devicetree/bindings/Makefile +++ b/Documentation/devicetree/bindings/Makefile @@ -12,7 +12,6 @@ $(obj)/%.example.dts: $(src)/%.yaml FORCE $(call if_changed,chk_binding) DT_TMP_SCHEMA := processed-schema.yaml -extra-y += $(DT_TMP_SCHEMA) quiet_cmd_mk_schema = SCHEMA $@ cmd_mk_schema = $(DT_MK_SCHEMA) $(DT_MK_SCHEMA_FLAGS) -o $@ $(real-prereqs) @@ -26,8 +25,12 @@ DT_DOCS = $(shell \ DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS)) +ifeq ($(CHECK_DTBS),) extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES)) extra-y += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES)) +endif $(obj)/$(DT_TMP_SCHEMA): $(DT_SCHEMA_FILES) FORCE $(call if_changed,mk_schema) + +extra-y += $(DT_TMP_SCHEMA) diff --git a/Documentation/devicetree/bindings/arm/qcom.yaml b/Documentation/devicetree/bindings/arm/qcom.yaml index e39d8f02e33c1..3de6182cde973 100644 --- a/Documentation/devicetree/bindings/arm/qcom.yaml +++ b/Documentation/devicetree/bindings/arm/qcom.yaml @@ -112,8 +112,8 @@ properties: - const: qcom,msm8974 - items: - - const: qcom,msm8916-mtp/1 - const: qcom,msm8916-mtp + - const: qcom,msm8916-mtp/1 - const: qcom,msm8916 - items: diff --git a/Documentation/devicetree/bindings/arm/tegra.yaml b/Documentation/devicetree/bindings/arm/tegra.yaml index 60b38eb5c61ab..56e1945911f1e 100644 --- a/Documentation/devicetree/bindings/arm/tegra.yaml +++ b/Documentation/devicetree/bindings/arm/tegra.yaml @@ -49,7 +49,7 @@ properties: - const: toradex,apalis_t30 - const: nvidia,tegra30 - items: - - const: toradex,apalis_t30-eval-v1.1 + - const: toradex,apalis_t30-v1.1-eval - const: toradex,apalis_t30-eval - const: toradex,apalis_t30-v1.1 - const: toradex,apalis_t30 diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt index e96e085271c13..83f6c6a7c41c7 100644 --- a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt +++ b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt @@ -46,7 +46,7 @@ Required properties: Example (R-Car H3): usb2_clksel: clock-controller@e6590630 { - compatible = "renesas,r8a77950-rcar-usb2-clock-sel", + compatible = "renesas,r8a7795-rcar-usb2-clock-sel", "renesas,rcar-gen3-usb2-clock-sel"; reg = <0 0xe6590630 0 0x02>; clocks = <&cpg CPG_MOD 703>, <&usb_extal>, <&usb_xtal>; diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml index fb747682006d9..92dc08b78a176 100644 --- a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml @@ -10,6 +10,9 @@ title: Amlogic specific extensions to the Synopsys Designware HDMI Controller maintainers: - Neil Armstrong +allOf: + - $ref: /schemas/sound/name-prefix.yaml# + description: | The Amlogic Meson Synopsys Designware Integration is composed of - A Synopsys DesignWare HDMI Controller IP @@ -101,6 +104,8 @@ properties: "#sound-dai-cells": const: 0 + sound-name-prefix: true + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml index d1205a6697a09..766b04501cb96 100644 --- a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml +++ b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml @@ -78,6 +78,10 @@ properties: interrupts: maxItems: 1 + amlogic,canvas: + description: should point to a canvas provider node + $ref: /schemas/types.yaml#/definitions/phandle + power-domains: maxItems: 1 description: phandle to the associated power domain @@ -106,6 +110,7 @@ required: - port@1 - "#address-cells" - "#size-cells" + - amlogic,canvas examples: - | @@ -116,6 +121,7 @@ examples: interrupts = <3>; #address-cells = <1>; #size-cells = <0>; + amlogic,canvas = <&canvas>; /* CVBS VDAC output port */ port@0 { diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt index b6a7e7397b8b4..b944fe0671885 100644 --- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt +++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dpi.txt @@ -16,6 +16,9 @@ Required properties: Documentation/devicetree/bindings/graph.txt. This port should be connected to the input port of an attached HDMI or LVDS encoder chip. +Optional properties: +- pinctrl-names: Contain "default" and "sleep". + Example: dpi0: dpi@1401d000 { @@ -26,6 +29,9 @@ dpi0: dpi@1401d000 { <&mmsys CLK_MM_DPI_ENGINE>, <&apmixedsys CLK_APMIXED_TVDPLL>; clock-names = "pixel", "engine", "pll"; + pinctrl-names = "default", "sleep"; + pinctrl-0 = <&dpi_pin_func>; + pinctrl-1 = <&dpi_pin_idle>; port { dpi0_out: endpoint { diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml index 4cb9d6b931389..c61c4a6b57f10 100644 --- a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml +++ b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml @@ -58,7 +58,7 @@ if: then: properties: clocks: - maxItems: 2 + minItems: 2 required: - clock-names diff --git a/Documentation/devicetree/bindings/gpio/gpio-altera.txt b/Documentation/devicetree/bindings/gpio/gpio-altera.txt index 146e554b3c676..2a80e272cd666 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-altera.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-altera.txt @@ -9,8 +9,9 @@ Required properties: - The second cell is reserved and is currently unused. - gpio-controller : Marks the device node as a GPIO controller. - interrupt-controller: Mark the device node as an interrupt controller -- #interrupt-cells : Should be 1. The interrupt type is fixed in the hardware. +- #interrupt-cells : Should be 2. The interrupt type is fixed in the hardware. - The first cell is the GPIO offset number within the GPIO controller. + - The second cell is the interrupt trigger type and level flags. - interrupts: Specify the interrupt. - altr,interrupt-type: Specifies the interrupt trigger type the GPIO hardware is synthesized. This field is required if the Altera GPIO controller @@ -38,6 +39,6 @@ gpio_altr: gpio@ff200000 { altr,interrupt-type = ; #gpio-cells = <2>; gpio-controller; - #interrupt-cells = <1>; + #interrupt-cells = <2>; interrupt-controller; }; diff --git a/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt b/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt index d4d83916c09dd..be329ea4794f8 100644 --- a/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt +++ b/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt @@ -20,8 +20,9 @@ Required properties: - gpio-controller : Marks the device node as a GPIO controller - interrupts : Interrupt specifier, see interrupt-controller/interrupts.txt - interrupt-controller : Mark the GPIO controller as an interrupt-controller -- ngpios : number of GPIO lines, see gpio.txt - (should be multiple of 8, up to 80 pins) +- ngpios : number of *hardware* GPIO lines, see gpio.txt. This will expose + 2 software GPIOs per hardware GPIO: one for hardware input, one for hardware + output. Up to 80 pins, must be a multiple of 8. - clocks : A phandle to the APB clock for SGPM clock division - bus-frequency : SGPM CLK frequency diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml index cc544fdc38bea..bc8aed17800d3 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml @@ -85,7 +85,7 @@ properties: Must be the device tree identifier of the over-sampling mode pins. As the line is active high, it should be marked GPIO_ACTIVE_HIGH. - maxItems: 1 + maxItems: 3 adi,sw-mode: description: @@ -128,9 +128,9 @@ examples: adi,conversion-start-gpios = <&gpio 17 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio 27 GPIO_ACTIVE_HIGH>; adi,first-data-gpios = <&gpio 22 GPIO_ACTIVE_HIGH>; - adi,oversampling-ratio-gpios = <&gpio 18 GPIO_ACTIVE_HIGH - &gpio 23 GPIO_ACTIVE_HIGH - &gpio 26 GPIO_ACTIVE_HIGH>; + adi,oversampling-ratio-gpios = <&gpio 18 GPIO_ACTIVE_HIGH>, + <&gpio 23 GPIO_ACTIVE_HIGH>, + <&gpio 26 GPIO_ACTIVE_HIGH>; standby-gpios = <&gpio 24 GPIO_ACTIVE_LOW>; adi,sw-mode; }; diff --git a/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt b/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt index c82794002595f..89647d7143879 100644 --- a/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt +++ b/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt @@ -21,7 +21,7 @@ controller state. The mux controller state is described in Example: mux: mux-controller { - compatible = "mux-gpio"; + compatible = "gpio-mux"; #mux-control-cells = <0>; mux-gpios = <&pioA 0 GPIO_ACTIVE_HIGH>, diff --git a/Documentation/devicetree/bindings/mailbox/xlnx,zynqmp-ipi-mailbox.txt b/Documentation/devicetree/bindings/mailbox/xlnx,zynqmp-ipi-mailbox.txt index 4438432bfe9b3..ad76edccf8816 100644 --- a/Documentation/devicetree/bindings/mailbox/xlnx,zynqmp-ipi-mailbox.txt +++ b/Documentation/devicetree/bindings/mailbox/xlnx,zynqmp-ipi-mailbox.txt @@ -87,7 +87,7 @@ Example: ranges; /* APU<->RPU0 IPI mailbox controller */ - ipi_mailbox_rpu0: mailbox@ff90400 { + ipi_mailbox_rpu0: mailbox@ff990400 { reg = <0xff990400 0x20>, <0xff990420 0x20>, <0xff990080 0x20>, diff --git a/Documentation/devicetree/bindings/mfd/ahc1ec0.yaml b/Documentation/devicetree/bindings/mfd/ahc1ec0.yaml new file mode 100644 index 0000000000000..40af14bb9c0a2 --- /dev/null +++ b/Documentation/devicetree/bindings/mfd/ahc1ec0.yaml @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/mfd/ahc1ec0.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Advantech Embedded Controller (AHC1EC0) + +maintainers: + - Campion Kang + +description: | + AHC1EC0 is one of the embedded controllers used by Advantech to provide several + functions such as watchdog, hwmon, brightness, etc. Advantech related applications + can control the whole system via these functions. + +properties: + compatible: + const: advantech,ahc1ec0 + + advantech,sub-dev-nb: + description: + The number of sub-devices specified in the platform. + $ref: /schemas/types.yaml#/definitions/uint32 + maxItems: 1 + + advantech,sub-dev: + description: + A list of the sub-devices supported in the platform. Defines for the + appropriate values can found in dt-bindings/mfd/ahc1ec0-dt.h. + $ref: "/schemas/types.yaml#/definitions/uint32-array" + minItems: 1 + maxItems: 6 + + advantech,hwmon-profile: + description: + The number of sub-devices specified in the platform. Defines for the + hwmon profiles can found in dt-bindings/mfd/ahc1ec0-dt. + $ref: /schemas/types.yaml#/definitions/uint32 + maxItems: 1 + +required: + - compatible + - advantech,sub-dev-nb + - advantech,sub-dev + +if: + properties: + advantech,sub-dev: + contains: + const: 0x3 +then: + required: + - advantech,hwmon-profile + +additionalProperties: false + +examples: + - | + #include + ahc1ec0 { + compatible = "advantech,ahc1ec0"; + + advantech,sub-dev-nb = <2>; + advantech,sub-dev = ; + + advantech,hwmon-profile = ; + }; diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.txt b/Documentation/devicetree/bindings/mmc/mtk-sd.txt index 8a532f4453f26..09aecec47003a 100644 --- a/Documentation/devicetree/bindings/mmc/mtk-sd.txt +++ b/Documentation/devicetree/bindings/mmc/mtk-sd.txt @@ -49,6 +49,8 @@ Optional properties: error caused by stop clock(fifo full) Valid range = [0:0x7]. if not present, default value is 0. applied to compatible "mediatek,mt2701-mmc". +- resets: Phandle and reset specifier pair to softreset line of MSDC IP. +- reset-names: Should be "hrst". Examples: mmc0: mmc@11230000 { diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt index 2cf3affa1be70..96c0b1440c9c5 100644 --- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt +++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt @@ -15,8 +15,15 @@ Required properties: - "nvidia,tegra210-sdhci": for Tegra210 - "nvidia,tegra186-sdhci": for Tegra186 - "nvidia,tegra194-sdhci": for Tegra194 -- clocks : Must contain one entry, for the module clock. - See ../clocks/clock-bindings.txt for details. +- clocks: For Tegra210, Tegra186 and Tegra194 must contain two entries. + One for the module clock and one for the timeout clock. + For all other Tegra devices, must contain a single entry for + the module clock. See ../clocks/clock-bindings.txt for details. +- clock-names: For Tegra210, Tegra186 and Tegra194 must contain the + strings 'sdhci' and 'tmclk' to represent the module and + the timeout clocks, respectively. + For all other Tegra devices must contain the string 'sdhci' + to represent the module clock. - resets : Must contain an entry for each entry in reset-names. See ../reset/reset.txt for details. - reset-names : Must include the following entries: @@ -99,7 +106,7 @@ Optional properties for Tegra210, Tegra186 and Tegra194: Example: sdhci@700b0000 { - compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci"; + compatible = "nvidia,tegra124-sdhci"; reg = <0x0 0x700b0000 0x0 0x200>; interrupts = ; clocks = <&tegra_car TEGRA210_CLK_SDMMC1>; @@ -115,3 +122,22 @@ sdhci@700b0000 { nvidia,pad-autocal-pull-down-offset-1v8 = <0x7b>; status = "disabled"; }; + +sdhci@700b0000 { + compatible = "nvidia,tegra210-sdhci"; + reg = <0x0 0x700b0000 0x0 0x200>; + interrupts = ; + clocks = <&tegra_car TEGRA210_CLK_SDMMC1>, + <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>; + clock-names = "sdhci", "tmclk"; + resets = <&tegra_car 14>; + reset-names = "sdhci"; + pinctrl-names = "sdmmc-3v3", "sdmmc-1v8"; + pinctrl-0 = <&sdmmc1_3v3>; + pinctrl-1 = <&sdmmc1_1v8>; + nvidia,pad-autocal-pull-up-offset-3v3 = <0x00>; + nvidia,pad-autocal-pull-down-offset-3v3 = <0x7d>; + nvidia,pad-autocal-pull-up-offset-1v8 = <0x7b>; + nvidia,pad-autocal-pull-down-offset-1v8 = <0x7b>; + status = "disabled"; +}; diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt index 44919d48d2415..c459f169a9044 100644 --- a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt +++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt @@ -122,7 +122,7 @@ on various other factors also like; so the device should have enough free bytes available its OOB/Spare area to accommodate ECC for entire page. In general following expression helps in determining if given device can accommodate ECC syndrome: - "2 + (PAGESIZE / 512) * ECC_BYTES" >= OOBSIZE" + "2 + (PAGESIZE / 512) * ECC_BYTES" <= OOBSIZE" where OOBSIZE number of bytes in OOB/spare area PAGESIZE number of bytes in main-area of device page diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml index d261b7096c696..2767f182fd3c5 100644 --- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml +++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml @@ -44,7 +44,7 @@ patternProperties: properties: reg: description: - Contains the native Ready/Busy IDs. + Contains the chip-select IDs. nand-ecc-mode: allOf: @@ -139,6 +139,6 @@ examples: nand-ecc-mode = "soft"; nand-ecc-algo = "bch"; - /* controller specific properties */ + /* NAND chip specific properties */ }; }; diff --git a/Documentation/devicetree/bindings/net/btusb.txt b/Documentation/devicetree/bindings/net/btusb.txt index b1ad6ee68e909..c51dd99dc0d3c 100644 --- a/Documentation/devicetree/bindings/net/btusb.txt +++ b/Documentation/devicetree/bindings/net/btusb.txt @@ -38,7 +38,7 @@ Following example uses irq pin number 3 of gpio0 for out of band wake-on-bt: compatible = "usb1286,204e"; reg = <1>; interrupt-parent = <&gpio0>; - interrupt-name = "wakeup"; + interrupt-names = "wakeup"; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; }; }; diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt index 27e1b4cebfbd4..53c26ffd020a3 100644 --- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt +++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt @@ -31,9 +31,9 @@ tcan4x5x: tcan4x5x@0 { #address-cells = <1>; #size-cells = <1>; spi-max-frequency = <10000000>; - bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; + bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>; interrupt-parent = <&gpio1>; - interrupts = <14 GPIO_ACTIVE_LOW>; + interrupts = <14 IRQ_TYPE_LEVEL_LOW>; device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 0e7c31794ae6c..fcafce635ff01 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -51,7 +51,7 @@ properties: description: Reference to an nvmem node for the MAC address - nvmem-cells-names: + nvmem-cell-names: const: mac-address phy-connection-type: @@ -190,6 +190,11 @@ properties: Indicates that full-duplex is used. When absent, half duplex is assumed. + pause: + $ref: /schemas/types.yaml#definitions/flag + description: + Indicates that pause should be enabled. + asym-pause: $ref: /schemas/types.yaml#definitions/flag description: diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index f70f18ff821f5..8f3e9c774b74a 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -87,6 +87,14 @@ properties: compensate for the board being designed with the lanes swapped. + enet-phy-lane-no-swap: + $ref: /schemas/types.yaml#/definitions/flag + description: + If set, indicates that PHY will disable swap of the + TX/RX lanes. This property allows the PHY to work correcly after + e.g. wrong bootstrap configuration caused by issues in PCB + layout design. + eee-broken-100tx: $ref: /schemas/types.yaml#definitions/flag description: diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt index 299c0dcd67db4..1316f0aec0cf3 100644 --- a/Documentation/devicetree/bindings/net/fsl-fman.txt +++ b/Documentation/devicetree/bindings/net/fsl-fman.txt @@ -110,6 +110,13 @@ PROPERTIES Usage: required Definition: See soc/fsl/qman.txt and soc/fsl/bman.txt +- fsl,erratum-a050385 + Usage: optional + Value type: boolean + Definition: A boolean property. Indicates the presence of the + erratum A050385 which indicates that DMA transactions that are + split can result in a FMan lock. + ============================================================================= FMan MURAM Node diff --git a/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt b/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt index cfaf889989187..9e4dc510a40aa 100644 --- a/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt +++ b/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt @@ -25,7 +25,7 @@ Example (for ARM-based BeagleBone with NPC100 NFC controller on I2C2): clock-frequency = <100000>; interrupt-parent = <&gpio1>; - interrupts = <29 GPIO_ACTIVE_HIGH>; + interrupts = <29 IRQ_TYPE_LEVEL_HIGH>; enable-gpios = <&gpio0 30 GPIO_ACTIVE_HIGH>; firmware-gpios = <&gpio0 31 GPIO_ACTIVE_HIGH>; diff --git a/Documentation/devicetree/bindings/net/nfc/pn544.txt b/Documentation/devicetree/bindings/net/nfc/pn544.txt index 92f399ec22b87..2bd82562ce8e9 100644 --- a/Documentation/devicetree/bindings/net/nfc/pn544.txt +++ b/Documentation/devicetree/bindings/net/nfc/pn544.txt @@ -25,7 +25,7 @@ Example (for ARM-based BeagleBone with PN544 on I2C2): clock-frequency = <400000>; interrupt-parent = <&gpio1>; - interrupts = <17 GPIO_ACTIVE_HIGH>; + interrupts = <17 IRQ_TYPE_LEVEL_HIGH>; enable-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; firmware-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 4845e29411e46..e08cd4c4d5682 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -347,6 +347,7 @@ allOf: - st,spear600-gmac then: + properties: snps,tso: $ref: /schemas/types.yaml#definitions/flag description: diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt index ae661e65354e7..f9499b20d8405 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt @@ -81,6 +81,12 @@ Optional properties: Definition: Name of external front end module used. Some valid FEM names for example: "microsemi-lx5586", "sky85703-11" and "sky85803" etc. +- qcom,snoc-host-cap-8bit-quirk: + Usage: Optional + Value type: + Definition: Quirk specifying that the firmware expects the 8bit version + of the host capability QMI request + Example (to supply PCI based wifi block details): diff --git a/Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.txt b/Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.txt index b739f92da58e5..1f90eb39870be 100644 --- a/Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.txt +++ b/Documentation/devicetree/bindings/pci/nvidia,tegra194-pcie.txt @@ -118,7 +118,7 @@ Tegra194: -------- pcie@14180000 { - compatible = "nvidia,tegra194-pcie", "snps,dw-pcie"; + compatible = "nvidia,tegra194-pcie"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX8B>; reg = <0x00 0x14180000 0x0 0x00020000 /* appl registers (128K) */ 0x00 0x38000000 0x0 0x00040000 /* configuration space (256K) */ diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt index 38dc56a577604..ecec514b31550 100644 --- a/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-37xx-pinctrl.txt @@ -43,19 +43,19 @@ group emmc_nb group pwm0 - pin 11 (GPIO1-11) - - functions pwm, gpio + - functions pwm, led, gpio group pwm1 - pin 12 - - functions pwm, gpio + - functions pwm, led, gpio group pwm2 - pin 13 - - functions pwm, gpio + - functions pwm, led, gpio group pwm3 - pin 14 - - functions pwm, gpio + - functions pwm, led, gpio group pmic1 - pin 7 diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt index 093edda0c8dfc..6cd83d920155f 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt +++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt @@ -13,6 +13,14 @@ common regulator binding documented in: Required properties of the main device node (the parent!): + - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used + for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines. + + [1] If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional + property is specified, then all the eight voltage values for the + 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified. + +Optional properties of the main device node (the parent!): - s5m8767,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV) units for buck2 when changing voltage using gpio dvs. Refer to [1] below for additional information. @@ -25,26 +33,13 @@ Required properties of the main device node (the parent!): units for buck4 when changing voltage using gpio dvs. Refer to [1] below for additional information. - - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used - for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines. - - [1] If none of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional - property is specified, the 's5m8767,pmic-buck[2/3/4]-dvs-voltage' - property should specify atleast one voltage level (which would be a - safe operating voltage). - - If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional - property is specified, then all the eight voltage values for the - 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified. - -Optional properties of the main device node (the parent!): - s5m8767,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs. - s5m8767,pmic-buck3-uses-gpio-dvs: 'buck3' can be controlled by gpio dvs. - s5m8767,pmic-buck4-uses-gpio-dvs: 'buck4' can be controlled by gpio dvs. Additional properties required if either of the optional properties are used: - - s5m8767,pmic-buck234-default-dvs-idx: Default voltage setting selected from + - s5m8767,pmic-buck-default-dvs-idx: Default voltage setting selected from the possible 8 options selectable by the dvs gpios. The value of this property should be between 0 and 7. If not specified or if out of range, the default value of this property is set to 0. diff --git a/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt b/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt index 6e5341b4f8919..ee59409640f24 100644 --- a/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt +++ b/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt @@ -22,6 +22,6 @@ Example: }; ðernet_switch { - resets = <&reset>; + resets = <&reset 26>; reset-names = "switch"; }; diff --git a/Documentation/devicetree/bindings/rng/omap3_rom_rng.txt b/Documentation/devicetree/bindings/rng/omap3_rom_rng.txt new file mode 100644 index 0000000000000..f315c9723bd2a --- /dev/null +++ b/Documentation/devicetree/bindings/rng/omap3_rom_rng.txt @@ -0,0 +1,27 @@ +OMAP ROM RNG driver binding + +Secure SoCs may provide RNG via secure ROM calls like Nokia N900 does. The +implementation can depend on the SoC secure ROM used. + +- compatible: + Usage: required + Value type: + Definition: must be "nokia,n900-rom-rng" + +- clocks: + Usage: required + Value type: + Definition: reference to the the RNG interface clock + +- clock-names: + Usage: required + Value type: + Definition: must be "ick" + +Example: + + rom_rng: rng { + compatible = "nokia,n900-rom-rng"; + clocks = <&rng_ick>; + clock-names = "ick"; + }; diff --git a/Documentation/devicetree/bindings/sound/mt8183-mt6358-ts3a227-max98357.txt b/Documentation/devicetree/bindings/sound/mt8183-mt6358-ts3a227-max98357.txt index d6d5207fa996a..17ff3892f4391 100644 --- a/Documentation/devicetree/bindings/sound/mt8183-mt6358-ts3a227-max98357.txt +++ b/Documentation/devicetree/bindings/sound/mt8183-mt6358-ts3a227-max98357.txt @@ -2,9 +2,11 @@ MT8183 with MT6358, TS3A227 and MAX98357 CODECS Required properties: - compatible : "mediatek,mt8183_mt6358_ts3a227_max98357" -- mediatek,headset-codec: the phandles of ts3a227 codecs - mediatek,platform: the phandle of MT8183 ASoC platform +Optional properties: +- mediatek,headset-codec: the phandles of ts3a227 codecs + Example: sound { diff --git a/Documentation/devicetree/bindings/sound/wm8994.txt b/Documentation/devicetree/bindings/sound/wm8994.txt index 68cccc4653ba3..367b58ce1bb92 100644 --- a/Documentation/devicetree/bindings/sound/wm8994.txt +++ b/Documentation/devicetree/bindings/sound/wm8994.txt @@ -14,9 +14,15 @@ Required properties: - #gpio-cells : Must be 2. The first cell is the pin number and the second cell is used to specify optional parameters (currently unused). - - AVDD2-supply, DBVDD1-supply, DBVDD2-supply, DBVDD3-supply, CPVDD-supply, - SPKVDD1-supply, SPKVDD2-supply : power supplies for the device, as covered - in Documentation/devicetree/bindings/regulator/regulator.txt + - power supplies for the device, as covered in + Documentation/devicetree/bindings/regulator/regulator.txt, depending + on compatible: + - for wlf,wm1811 and wlf,wm8958: + AVDD1-supply, AVDD2-supply, DBVDD1-supply, DBVDD2-supply, DBVDD3-supply, + DCVDD-supply, CPVDD-supply, SPKVDD1-supply, SPKVDD2-supply + - for wlf,wm8994: + AVDD1-supply, AVDD2-supply, DBVDD-supply, DCVDD-supply, CPVDD-supply, + SPKVDD1-supply, SPKVDD2-supply Optional properties: @@ -73,11 +79,11 @@ wm8994: codec@1a { lineout1-se; + AVDD1-supply = <®ulator>; AVDD2-supply = <®ulator>; CPVDD-supply = <®ulator>; - DBVDD1-supply = <®ulator>; - DBVDD2-supply = <®ulator>; - DBVDD3-supply = <®ulator>; + DBVDD-supply = <®ulator>; + DCVDD-supply = <®ulator>; SPKVDD1-supply = <®ulator>; SPKVDD2-supply = <®ulator>; }; diff --git a/Documentation/devicetree/bindings/spi/spi-mxic.txt b/Documentation/devicetree/bindings/spi/spi-mxic.txt index 529f2dab2648a..7bcbb229b78bb 100644 --- a/Documentation/devicetree/bindings/spi/spi-mxic.txt +++ b/Documentation/devicetree/bindings/spi/spi-mxic.txt @@ -8,11 +8,13 @@ Required properties: - reg: should contain 2 entries, one for the registers and one for the direct mapping area - reg-names: should contain "regs" and "dirmap" -- interrupts: interrupt line connected to the SPI controller - clock-names: should contain "ps_clk", "send_clk" and "send_dly_clk" - clocks: should contain 3 entries for the "ps_clk", "send_clk" and "send_dly_clk" clocks +Optional properties: +- interrupts: interrupt line connected to the SPI controller + Example: spi@43c30000 { diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt index 66780a47ad859..c977a3ba2f35c 100644 --- a/Documentation/devicetree/bindings/usb/dwc3.txt +++ b/Documentation/devicetree/bindings/usb/dwc3.txt @@ -75,6 +75,8 @@ Optional properties: from P0 to P1/P2/P3 without delay. - snps,dis-tx-ipgap-linecheck-quirk: when set, disable u2mac linestate check during HS transmit. + - snps,parkmode-disable-ss-quirk: when set, all SuperSpeed bus instances in + park mode are disabled. - snps,dis_metastability_quirk: when set, disable metastability workaround. CAUTION: use only if you are absolutely sure of it. - snps,is-utmi-l1-suspend: true when DWC3 asserts output signal diff --git a/Documentation/devicetree/writing-schema.rst b/Documentation/devicetree/writing-schema.rst index f4a638072262f..83e04e5c342da 100644 --- a/Documentation/devicetree/writing-schema.rst +++ b/Documentation/devicetree/writing-schema.rst @@ -130,11 +130,13 @@ binding schema. All of the DT binding documents can be validated using the make dt_binding_check -In order to perform validation of DT source files, use the `dtbs_check` target:: +In order to perform validation of DT source files, use the ``dtbs_check`` target:: make dtbs_check -This will first run the `dt_binding_check` which generates the processed schema. +Note that ``dtbs_check`` will skip any binding schema files with errors. It is +necessary to use ``dt_binding_check`` to get all the validation errors in the +binding schema files. It is also possible to run checks with a single schema file by setting the ``DT_SCHEMA_FILES`` variable to a specific schema file. diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst index ee268d445d38b..d2e1d8b58e7dc 100644 --- a/Documentation/driver-api/dmaengine/dmatest.rst +++ b/Documentation/driver-api/dmaengine/dmatest.rst @@ -143,13 +143,14 @@ Part 5 - Handling channel allocation Allocating Channels ------------------- -Channels are required to be configured prior to starting the test run. -Attempting to run the test without configuring the channels will fail. +Channels do not need to be configured prior to starting a test run. Attempting +to run the test without configuring the channels will result in testing any +channels that are available. Example:: % echo 1 > /sys/module/dmatest/parameters/run - dmatest: Could not start test, no channels configured + dmatest: No channels configured, continue with any Channels are registered using the "channel" parameter. Channels can be requested by their name, once requested, the channel is registered and a pending thread is added to the test list. diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst index 94a2d7f01d999..d3cfa73cbb2b4 100644 --- a/Documentation/driver-api/firewire.rst +++ b/Documentation/driver-api/firewire.rst @@ -19,7 +19,7 @@ of kernel interfaces is available via exported symbols in `firewire-core` module Firewire char device data structures ==================================== -.. include:: /ABI/stable/firewire-cdev +.. include:: ../ABI/stable/firewire-cdev :literal: .. kernel-doc:: include/uapi/linux/firewire-cdev.h @@ -28,7 +28,7 @@ Firewire char device data structures Firewire device probing and sysfs interfaces ============================================ -.. include:: /ABI/stable/sysfs-bus-firewire +.. include:: ../ABI/stable/sysfs-bus-firewire :literal: .. kernel-doc:: drivers/firewire/core-device.c diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst index 70e180e6b93dc..9f3e5dc311840 100644 --- a/Documentation/driver-api/libata.rst +++ b/Documentation/driver-api/libata.rst @@ -250,7 +250,7 @@ High-level taskfile hooks :: - void (*qc_prep) (struct ata_queued_cmd *qc); + enum ata_completion_errors (*qc_prep) (struct ata_queued_cmd *qc); int (*qc_issue) (struct ata_queued_cmd *qc); diff --git a/Documentation/fb/fbcon.rst b/Documentation/fb/fbcon.rst index ebca41785abea..65ba402551374 100644 --- a/Documentation/fb/fbcon.rst +++ b/Documentation/fb/fbcon.rst @@ -127,7 +127,7 @@ C. Boot options is typically located on the same video card. Thus, the consoles that are controlled by the VGA console will be garbled. -4. fbcon=rotate: +5. fbcon=rotate: This option changes the orientation angle of the console display. The value 'n' accepts the following: @@ -152,21 +152,21 @@ C. Boot options Actually, the underlying fb driver is totally ignorant of console rotation. -5. fbcon=margin: +6. fbcon=margin: This option specifies the color of the margins. The margins are the leftover area at the right and the bottom of the screen that are not used by text. By default, this area will be black. The 'color' value is an integer number that depends on the framebuffer driver being used. -6. fbcon=nodefer +7. fbcon=nodefer If the kernel is compiled with deferred fbcon takeover support, normally the framebuffer contents, left in place by the firmware/bootloader, will be preserved until there actually is some text is output to the console. This option causes fbcon to bind immediately to the fbdev device. -7. fbcon=logo-pos: +8. fbcon=logo-pos: The only possible 'location' is 'center' (without quotes), and when given, the bootup logo is moved from the default top-left corner diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt index 4fae0464ddff2..32b297295fff7 100644 --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt @@ -24,7 +24,7 @@ | parisc: | ok | | powerpc: | ok | | riscv: | TODO | - | s390: | TODO | + | s390: | ok | | sh: | TODO | | sparc: | TODO | | um: | TODO | diff --git a/Documentation/filesystems/affs.txt b/Documentation/filesystems/affs.txt index 71b63c2b98410..a8f1a58e36922 100644 --- a/Documentation/filesystems/affs.txt +++ b/Documentation/filesystems/affs.txt @@ -93,13 +93,15 @@ The Amiga protection flags RWEDRWEDHSPARWED are handled as follows: - R maps to r for user, group and others. On directories, R implies x. - - If both W and D are allowed, w will be set. + - W maps to w. - E maps to x. - - H and P are always retained and ignored under Linux. + - D is ignored. - - A is always reset when a file is written to. + - H, S and P are always retained and ignored under Linux. + + - A is cleared when a file is written to. User id and group id will be used unless set[gu]id are given as mount options. Since most of the Amiga file systems are single user systems @@ -111,11 +113,13 @@ Linux -> Amiga: The Linux rwxrwxrwx file mode is handled as follows: - - r permission will set R for user, group and others. + - r permission will allow R for user, group and others. + + - w permission will allow W for user, group and others. - - w permission will set W and D for user, group and others. + - x permission of the user will allow E for plain files. - - x permission of the user will set E for plain files. + - D will be allowed for user, group and others. - All other flags (suid, sgid, ...) are ignored and will not be retained. diff --git a/Documentation/filesystems/aufs/README b/Documentation/filesystems/aufs/README new file mode 100644 index 0000000000000..3e655d357134c --- /dev/null +++ b/Documentation/filesystems/aufs/README @@ -0,0 +1,401 @@ + +Aufs5 -- advanced multi layered unification filesystem version 5.x +http://aufs.sf.net +Junjiro R. Okajima + + +0. Introduction +---------------------------------------- +In the early days, aufs was entirely re-designed and re-implemented +Unionfs Version 1.x series. Adding many original ideas, approaches, +improvements and implementations, it became totally different from +Unionfs while keeping the basic features. +Later, Unionfs Version 2.x series began taking some of the same +approaches to aufs1's. +Unionfs was being developed by Professor Erez Zadok at Stony Brook +University and his team. + +Aufs5 supports linux-v5.0 and later, If you want older kernel version +support, +- for linux-v4.x series, try aufs4-linux.git or aufs4-standalone.git +- for linux-v3.x series, try aufs3-linux.git or aufs3-standalone.git +- for linux-v2.6.16 and later, try aufs2-2.6.git, aufs2-standalone.git + or aufs1 from CVS on SourceForge. + +Note: it becomes clear that "Aufs was rejected. Let's give it up." + According to Christoph Hellwig, linux rejects all union-type + filesystems but UnionMount. + + +PS. Al Viro seems have a plan to merge aufs as well as overlayfs and + UnionMount, and he pointed out an issue around a directory mutex + lock and aufs addressed it. But it is still unsure whether aufs will + be merged (or any other union solution). + + + +1. Features +---------------------------------------- +- unite several directories into a single virtual filesystem. The member + directory is called as a branch. +- you can specify the permission flags to the branch, which are 'readonly', + 'readwrite' and 'whiteout-able.' +- by upper writable branch, internal copyup and whiteout, files/dirs on + readonly branch are modifiable logically. +- dynamic branch manipulation, add, del. +- etc... + +Also there are many enhancements in aufs, such as: +- test only the highest one for the directory permission (dirperm1) +- copyup on open (coo=) +- 'move' policy for copy-up between two writable branches, after + checking free space. +- xattr, acl +- readdir(3) in userspace. +- keep inode number by external inode number table +- keep the timestamps of file/dir in internal copyup operation +- seekable directory, supporting NFS readdir. +- whiteout is hardlinked in order to reduce the consumption of inodes + on branch +- do not copyup, nor create a whiteout when it is unnecessary +- revert a single systemcall when an error occurs in aufs +- remount interface instead of ioctl +- maintain /etc/mtab by an external command, /sbin/mount.aufs. +- loopback mounted filesystem as a branch +- kernel thread for removing the dir who has a plenty of whiteouts +- support copyup sparse file (a file which has a 'hole' in it) +- default permission flags for branches +- selectable permission flags for ro branch, whether whiteout can + exist or not +- export via NFS. +- support /fs/aufs and /aufs. +- support multiple writable branches, some policies to select one + among multiple writable branches. +- a new semantics for link(2) and rename(2) to support multiple + writable branches. +- no glibc changes are required. +- pseudo hardlink (hardlink over branches) +- allow a direct access manually to a file on branch, e.g. bypassing aufs. + including NFS or remote filesystem branch. +- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX. +- and more... + +Currently these features are dropped temporary from aufs5. +See design/08plan.txt in detail. +- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs + (robr) +- statistics of aufs thread (/sys/fs/aufs/stat) + +Features or just an idea in the future (see also design/*.txt), +- reorder the branch index without del/re-add. +- permanent xino files for NFSD +- an option for refreshing the opened files after add/del branches +- light version, without branch manipulation. (unnecessary?) +- copyup in userspace +- inotify in userspace +- readv/writev + + +2. Download +---------------------------------------- +There are three GIT trees for aufs5, aufs5-linux.git, +aufs5-standalone.git, and aufs-util.git. Note that there is no "5" in +"aufs-util.git." +While the aufs-util is always necessary, you need either of aufs5-linux +or aufs5-standalone. + +The aufs5-linux tree includes the whole linux mainline GIT tree, +git://git.kernel.org/.../torvalds/linux.git. +And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot +build aufs5 as an external kernel module. +Several extra patches are not included in this tree. Only +aufs5-standalone tree contains them. They are described in the later +section "Configuration and Compilation." + +On the other hand, the aufs5-standalone tree has only aufs source files +and necessary patches, and you can select CONFIG_AUFS_FS=m. +But you need to apply all aufs patches manually. + +You will find GIT branches whose name is in form of "aufs5.x" where "x" +represents the linux kernel version, "linux-5.x". For instance, +"aufs5.0" is for linux-5.0. For latest "linux-5.x-rcN", use +"aufs5.x-rcN" branch. + +o aufs5-linux tree +$ git clone --reference /your/linux/git/tree \ + git://github.com/sfjro/aufs5-linux.git aufs5-linux.git +- if you don't have linux GIT tree, then remove "--reference ..." +$ cd aufs5-linux.git +$ git checkout origin/aufs5.0 + +Or You may want to directly git-pull aufs into your linux GIT tree, and +leave the patch-work to GIT. +$ cd /your/linux/git/tree +$ git remote add aufs5 git://github.com/sfjro/aufs5-linux.git +$ git fetch aufs5 +$ git checkout -b my5.0 v5.0 +$ (add your local change...) +$ git pull aufs5 aufs5.0 +- now you have v5.0 + your_changes + aufs5.0 in you my5.0 branch. +- you may need to solve some conflicts between your_changes and + aufs5.0. in this case, git-rerere is recommended so that you can + solve the similar conflicts automatically when you upgrade to 5.1 or + later in the future. + +o aufs5-standalone tree +$ git clone git://github.com/sfjro/aufs5-standalone.git aufs5-standalone.git +$ cd aufs5-standalone.git +$ git checkout origin/aufs5.0 + +o aufs-util tree +$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git +- note that the public aufs-util.git is on SourceForge instead of + GitHUB. +$ cd aufs-util.git +$ git checkout origin/aufs5.0 + +Note: The 5.x-rcN branch is to be used with `rc' kernel versions ONLY. +The minor version number, 'x' in '5.x', of aufs may not always +follow the minor version number of the kernel. +Because changes in the kernel that cause the use of a new +minor version number do not always require changes to aufs-util. + +Since aufs-util has its own minor version number, you may not be +able to find a GIT branch in aufs-util for your kernel's +exact minor version number. +In this case, you should git-checkout the branch for the +nearest lower number. + +For (an unreleased) example: +If you are using "linux-5.10" and the "aufs5.10" branch +does not exist in aufs-util repository, then "aufs5.9", "aufs5.8" +or something numerically smaller is the branch for your kernel. + +Also you can view all branches by + $ git branch -a + + +3. Configuration and Compilation +---------------------------------------- +Make sure you have git-checkout'ed the correct branch. + +For aufs5-linux tree, +- enable CONFIG_AUFS_FS. +- set other aufs configurations if necessary. + +For aufs5-standalone tree, +There are several ways to build. + +1. +- apply ./aufs5-kbuild.patch to your kernel source files. +- apply ./aufs5-base.patch too. +- apply ./aufs5-mmap.patch too. +- apply ./aufs5-standalone.patch too, if you have a plan to set + CONFIG_AUFS_FS=m. otherwise you don't need ./aufs5-standalone.patch. +- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your + kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild. +- enable CONFIG_AUFS_FS, you can select either + =m or =y. +- and build your kernel as usual. +- install the built kernel. +- install the header files too by "make headers_install" to the + directory where you specify. By default, it is $PWD/usr. + "make help" shows a brief note for headers_install. +- and reboot your system. + +2. +- module only (CONFIG_AUFS_FS=m). +- apply ./aufs5-base.patch to your kernel source files. +- apply ./aufs5-mmap.patch too. +- apply ./aufs5-standalone.patch too. +- build your kernel, don't forget "make headers_install", and reboot. +- edit ./config.mk and set other aufs configurations if necessary. + Note: You should read $PWD/fs/aufs/Kconfig carefully which describes + every aufs configurations. +- build the module by simple "make". +- you can specify ${KDIR} make variable which points to your kernel + source tree. +- install the files + + run "make install" to install the aufs module, or copy the built + $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply). + + run "make install_headers" (instead of headers_install) to install + the modified aufs header file (you can specify DESTDIR which is + available in aufs standalone version's Makefile only), or copy + $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever + you like manually. By default, the target directory is $PWD/usr. +- no need to apply aufs5-kbuild.patch, nor copying source files to your + kernel source tree. + +Note: The header file aufs_type.h is necessary to build aufs-util + as well as "make headers_install" in the kernel source tree. + headers_install is subject to be forgotten, but it is essentially + necessary, not only for building aufs-util. + You may not meet problems without headers_install in some older + version though. + +And then, +- read README in aufs-util, build and install it +- note that your distribution may contain an obsoleted version of + aufs_type.h in /usr/include/linux or something. When you build aufs + utilities, make sure that your compiler refers the correct aufs header + file which is built by "make headers_install." +- if you want to use readdir(3) in userspace or pathconf(3) wrapper, + then run "make install_ulib" too. And refer to the aufs manual in + detail. + +There several other patches in aufs5-standalone.git. They are all +optional. When you meet some problems, they will help you. +- aufs5-loopback.patch + Supports a nested loopback mount in a branch-fs. This patch is + unnecessary until aufs produces a message like "you may want to try + another patch for loopback file". +- proc_mounts.patch + When there are many mountpoints and many mount(2)/umount(2) are + running, then /proc/mounts may not show the all mountpoints. This + patch makes /proc/mounts always show the full mountpoints list. + If you don't want to apply this patch and meet such problem, then you + need to increase the value of 'ProcMounts_Times' make-variable in + aufs-util.git as a second best solution. +- vfs-ino.patch + Modifies a system global kernel internal function get_next_ino() in + order to stop assigning 0 for an inode-number. Not directly related to + aufs, but recommended generally. +- tmpfs-idr.patch + Keeps the tmpfs inode number as the lowest value. Effective to reduce + the size of aufs XINO files for tmpfs branch. Also it prevents the + duplication of inode number, which is important for backup tools and + other utilities. When you find aufs XINO files for tmpfs branch + growing too much, try this patch. +- lockdep-debug.patch + Because aufs is not only an ordinary filesystem (callee of VFS), but + also a caller of VFS functions for branch filesystems, subclassing of + the internal locks for LOCKDEP is necessary. LOCKDEP is a debugging + feature of linux kernel. If you enable CONFIG_LOCKDEP, then you will + need to apply this debug patch to expand several constant values. + If you don't know what LOCKDEP is, then you don't have apply this + patch. + + +4. Usage +---------------------------------------- +At first, make sure aufs-util are installed, and please read the aufs +manual, aufs.5 in aufs-util.git tree. +$ man -l aufs.5 + +And then, +$ mkdir /tmp/rw /tmp/aufs +# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs + +Here is another example. The result is equivalent. +# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs + Or +# mount -t aufs -o br:/tmp/rw none /tmp/aufs +# mount -o remount,append:${HOME} /tmp/aufs + +Then, you can see whole tree of your home dir through /tmp/aufs. If +you modify a file under /tmp/aufs, the one on your home directory is +not affected, instead the same named file will be newly created under +/tmp/rw. And all of your modification to a file will be applied to +the one under /tmp/rw. This is called the file based Copy on Write +(COW) method. +Aufs mount options are described in aufs.5. +If you run chroot or something and make your aufs as a root directory, +then you need to customize the shutdown script. See the aufs manual in +detail. + +Additionally, there are some sample usages of aufs which are a +diskless system with network booting, and LiveCD over NFS. +See sample dir in CVS tree on SourceForge. + + +5. Contact +---------------------------------------- +When you have any problems or strange behaviour in aufs, please let me +know with: +- /proc/mounts (instead of the output of mount(8)) +- /sys/module/aufs/* +- /sys/fs/aufs/* (if you have them) +- /debug/aufs/* (if you have them) +- linux kernel version + if your kernel is not plain, for example modified by distributor, + the url where i can download its source is necessary too. +- aufs version which was printed at loading the module or booting the + system, instead of the date you downloaded. +- configuration (define/undefine CONFIG_AUFS_xxx) +- kernel configuration or /proc/config.gz (if you have it) +- LSM (linux security module, if you are using) +- behaviour which you think to be incorrect +- actual operation, reproducible one is better +- mailto: aufs-users at lists.sourceforge.net + +Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches, +and Feature Requests) on SourceForge. Please join and write to +aufs-users ML. + + +6. Acknowledgements +---------------------------------------- +Thanks to everyone who have tried and are using aufs, whoever +have reported a bug or any feedback. + +Especially donators: +Tomas Matejicek(slax.org) made a donation (much more than once). + Since Apr 2010, Tomas M (the author of Slax and Linux Live + scripts) is making "doubling" donations. + Unfortunately I cannot list all of the donators, but I really + appreciate. + It ends Aug 2010, but the ordinary donation URL is still available. + +Dai Itasaka made a donation (2007/8). +Chuck Smith made a donation (2008/4, 10 and 12). +Henk Schoneveld made a donation (2008/9). +Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10). +Francois Dupoux made a donation (2008/11). +Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public + aufs2 GIT tree (2009/2). +William Grant made a donation (2009/3). +Patrick Lane made a donation (2009/4). +The Mail Archive (mail-archive.com) made donations (2009/5). +Nippy Networks (Ed Wildgoose) made a donation (2009/7). +New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11). +Pavel Pronskiy made a donation (2011/2). +Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy + Networks (Ed Wildgoose) made a donation for hardware (2011/3). +Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and +11). +Sam Liddicott made a donation (2011/9). +Era Scarecrow made a donation (2013/4). +Bor Ratajc made a donation (2013/4). +Alessandro Gorreta made a donation (2013/4). +POIRETTE Marc made a donation (2013/4). +Alessandro Gorreta made a donation (2013/4). +lauri kasvandik made a donation (2013/5). +"pemasu from Finland" made a donation (2013/7). +The Parted Magic Project made a donation (2013/9 and 11). +Pavel Barta made a donation (2013/10). +Nikolay Pertsev made a donation (2014/5). +James B made a donation (2014/7 and 2015/7). +Stefano Di Biase made a donation (2014/8). +Daniel Epellei made a donation (2015/1). +OmegaPhil made a donation (2016/1, 2018/4). +Tomasz Szewczyk made a donation (2016/4). +James Burry made a donation (2016/12). +Carsten Rose made a donation (2018/9). +Porteus Kiosk made a donation (2018/10). + +Thank you very much. +Donations are always, including future donations, very important and +helpful for me to keep on developing aufs. + + +7. +---------------------------------------- +If you are an experienced user, no explanation is needed. Aufs is +just a linux filesystem. + + +Enjoy! + +# Local variables: ; +# mode: text; +# End: ; diff --git a/Documentation/filesystems/aufs/design/01intro.txt b/Documentation/filesystems/aufs/design/01intro.txt new file mode 100644 index 0000000000000..47e0a01a8af2c --- /dev/null +++ b/Documentation/filesystems/aufs/design/01intro.txt @@ -0,0 +1,171 @@ + +# Copyright (C) 2005-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +Introduction +---------------------------------------- + +aufs [ei ju: ef es] | /ey-yoo-ef-es/ | [a u f s] +1. abbrev. for "advanced multi-layered unification filesystem". +2. abbrev. for "another unionfs". +3. abbrev. for "auf das" in German which means "on the" in English. + Ex. "Butter aufs Brot"(G) means "butter onto bread"(E). + But "Filesystem aufs Filesystem" is hard to understand. +4. abbrev. for "African Urban Fashion Show". + +AUFS is a filesystem with features: +- multi layered stackable unification filesystem, the member directory + is called as a branch. +- branch permission and attribute, 'readonly', 'real-readonly', + 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their + combination. +- internal "file copy-on-write". +- logical deletion, whiteout. +- dynamic branch manipulation, adding, deleting and changing permission. +- allow bypassing aufs, user's direct branch access. +- external inode number translation table and bitmap which maintains the + persistent aufs inode number. +- seekable directory, including NFS readdir. +- file mapping, mmap and sharing pages. +- pseudo-link, hardlink over branches. +- loopback mounted filesystem as a branch. +- several policies to select one among multiple writable branches. +- revert a single systemcall when an error occurs in aufs. +- and more... + + +Multi Layered Stackable Unification Filesystem +---------------------------------------------------------------------- +Most people already knows what it is. +It is a filesystem which unifies several directories and provides a +merged single directory. When users access a file, the access will be +passed/re-directed/converted (sorry, I am not sure which English word is +correct) to the real file on the member filesystem. The member +filesystem is called 'lower filesystem' or 'branch' and has a mode +'readonly' and 'readwrite.' And the deletion for a file on the lower +readonly branch is handled by creating 'whiteout' on the upper writable +branch. + +On LKML, there have been discussions about UnionMount (Jan Blunck, +Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took +different approaches to implement the merged-view. +The former tries putting it into VFS, and the latter implements as a +separate filesystem. +(If I misunderstand about these implementations, please let me know and +I shall correct it. Because it is a long time ago when I read their +source files last time). + +UnionMount's approach will be able to small, but may be hard to share +branches between several UnionMount since the whiteout in it is +implemented in the inode on branch filesystem and always +shared. According to Bharata's post, readdir does not seems to be +finished yet. +There are several missing features known in this implementations such as +- for users, the inode number may change silently. eg. copy-up. +- link(2) may break by copy-up. +- read(2) may get an obsoleted filedata (fstat(2) too). +- fcntl(F_SETLK) may be broken by copy-up. +- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after + open(O_RDWR). + +In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was +merged into mainline. This is another implementation of UnionMount as a +separated filesystem. All the limitations and known problems which +UnionMount are equally inherited to "overlay" filesystem. + +Unionfs has a longer history. When I started implementing a stackable +filesystem (Aug 2005), it already existed. It has virtual super_block, +inode, dentry and file objects and they have an array pointing lower +same kind objects. After contributing many patches for Unionfs, I +re-started my project AUFS (Jun 2006). + +In AUFS, the structure of filesystem resembles to Unionfs, but I +implemented my own ideas, approaches and enhancements and it became +totally different one. + +Comparing DM snapshot and fs based implementation +- the number of bytes to be copied between devices is much smaller. +- the type of filesystem must be one and only. +- the fs must be writable, no readonly fs, even for the lower original + device. so the compression fs will not be usable. but if we use + loopback mount, we may address this issue. + for instance, + mount /cdrom/squashfs.img /sq + losetup /sq/ext2.img + losetup /somewhere/cow + dmsetup "snapshot /dev/loop0 /dev/loop1 ..." +- it will be difficult (or needs more operations) to extract the + difference between the original device and COW. +- DM snapshot-merge may help a lot when users try merging. in the + fs-layer union, users will use rsync(1). + +You may want to read my old paper "Filesystems in LiveCD" +(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf). + + +Several characters/aspects/persona of aufs +---------------------------------------------------------------------- + +Aufs has several characters, aspects or persona. +1. a filesystem, callee of VFS helper +2. sub-VFS, caller of VFS helper for branches +3. a virtual filesystem which maintains persistent inode number +4. reader/writer of files on branches such like an application + +1. Callee of VFS Helper +As an ordinary linux filesystem, aufs is a callee of VFS. For instance, +unlink(2) from an application reaches sys_unlink() kernel function and +then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it +calls filesystem specific unlink operation. Actually aufs implements the +unlink operation but it behaves like a redirector. + +2. Caller of VFS Helper for Branches +aufs_unlink() passes the unlink request to the branch filesystem as if +it were called from VFS. So the called unlink operation of the branch +filesystem acts as usual. As a caller of VFS helper, aufs should handle +every necessary pre/post operation for the branch filesystem. +- acquire the lock for the parent dir on a branch +- lookup in a branch +- revalidate dentry on a branch +- mnt_want_write() for a branch +- vfs_unlink() for a branch +- mnt_drop_write() for a branch +- release the lock on a branch + +3. Persistent Inode Number +One of the most important issue for a filesystem is to maintain inode +numbers. This is particularly important to support exporting a +filesystem via NFS. Aufs is a virtual filesystem which doesn't have a +backend block device for its own. But some storage is necessary to +keep and maintain the inode numbers. It may be a large space and may not +suit to keep in memory. Aufs rents some space from its first writable +branch filesystem (by default) and creates file(s) on it. These files +are created by aufs internally and removed soon (currently) keeping +opened. +Note: Because these files are removed, they are totally gone after + unmounting aufs. It means the inode numbers are not persistent + across unmount or reboot. I have a plan to make them really + persistent which will be important for aufs on NFS server. + +4. Read/Write Files Internally (copy-on-write) +Because a branch can be readonly, when you write a file on it, aufs will +"copy-up" it to the upper writable branch internally. And then write the +originally requested thing to the file. Generally kernel doesn't +open/read/write file actively. In aufs, even a single write may cause a +internal "file copy". This behaviour is very similar to cp(1) command. + +Some people may think it is better to pass such work to user space +helper, instead of doing in kernel space. Actually I am still thinking +about it. But currently I have implemented it in kernel space. diff --git a/Documentation/filesystems/aufs/design/02struct.txt b/Documentation/filesystems/aufs/design/02struct.txt new file mode 100644 index 0000000000000..0092b3dcfbe0f --- /dev/null +++ b/Documentation/filesystems/aufs/design/02struct.txt @@ -0,0 +1,258 @@ + +# Copyright (C) 2005-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +Basic Aufs Internal Structure + +Superblock/Inode/Dentry/File Objects +---------------------------------------------------------------------- +As like an ordinary filesystem, aufs has its own +superblock/inode/dentry/file objects. All these objects have a +dynamically allocated array and store the same kind of pointers to the +lower filesystem, branch. +For example, when you build a union with one readwrite branch and one +readonly, mounted /au, /rw and /ro respectively. +- /au = /rw + /ro +- /ro/fileA exists but /rw/fileA + +Aufs lookup operation finds /ro/fileA and gets dentry for that. These +pointers are stored in a aufs dentry. The array in aufs dentry will be, +- [0] = NULL (because /rw/fileA doesn't exist) +- [1] = /ro/fileA + +This style of an array is essentially same to the aufs +superblock/inode/dentry/file objects. + +Because aufs supports manipulating branches, ie. add/delete/change +branches dynamically, these objects has its own generation. When +branches are changed, the generation in aufs superblock is +incremented. And a generation in other object are compared when it is +accessed. When a generation in other objects are obsoleted, aufs +refreshes the internal array. + + +Superblock +---------------------------------------------------------------------- +Additionally aufs superblock has some data for policies to select one +among multiple writable branches, XIB files, pseudo-links and kobject. +See below in detail. +About the policies which supports copy-down a directory, see +wbr_policy.txt too. + + +Branch and XINO(External Inode Number Translation Table) +---------------------------------------------------------------------- +Every branch has its own xino (external inode number translation table) +file. The xino file is created and unlinked by aufs internally. When two +members of a union exist on the same filesystem, they share the single +xino file. +The struct of a xino file is simple, just a sequence of aufs inode +numbers which is indexed by the lower inode number. +In the above sample, assume the inode number of /ro/fileA is i111 and +aufs assigns the inode number i999 for fileA. Then aufs writes 999 as +4(8) bytes at 111 * 4(8) bytes offset in the xino file. + +When the inode numbers are not contiguous, the xino file will be sparse +which has a hole in it and doesn't consume as much disk space as it +might appear. If your branch filesystem consumes disk space for such +holes, then you should specify 'xino=' option at mounting aufs. + +Aufs has a mount option to free the disk blocks for such holes in XINO +files on tmpfs or ramdisk. But it is not so effective actually. If you +meet a problem of disk shortage due to XINO files, then you should try +"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git. +The patch localizes the assignment inumbers per tmpfs-mount and avoid +the holes in XINO files. + +Also a writable branch has three kinds of "whiteout bases". All these +are existed when the branch is joined to aufs, and their names are +whiteout-ed doubly, so that users will never see their names in aufs +hierarchy. +1. a regular file which will be hardlinked to all whiteouts. +2. a directory to store a pseudo-link. +3. a directory to store an "orphan"-ed file temporary. + +1. Whiteout Base + When you remove a file on a readonly branch, aufs handles it as a + logical deletion and creates a whiteout on the upper writable branch + as a hardlink of this file in order not to consume inode on the + writable branch. +2. Pseudo-link Dir + See below, Pseudo-link. +3. Step-Parent Dir + When "fileC" exists on the lower readonly branch only and it is + opened and removed with its parent dir, and then user writes + something into it, then aufs copies-up fileC to this + directory. Because there is no other dir to store fileC. After + creating a file under this dir, the file is unlinked. + +Because aufs supports manipulating branches, ie. add/delete/change +dynamically, a branch has its own id. When the branch order changes, +aufs finds the new index by searching the branch id. + + +Pseudo-link +---------------------------------------------------------------------- +Assume "fileA" exists on the lower readonly branch only and it is +hardlinked to "fileB" on the branch. When you write something to fileA, +aufs copies-up it to the upper writable branch. Additionally aufs +creates a hardlink under the Pseudo-link Directory of the writable +branch. The inode of a pseudo-link is kept in aufs super_block as a +simple list. If fileB is read after unlinking fileA, aufs returns +filedata from the pseudo-link instead of the lower readonly +branch. Because the pseudo-link is based upon the inode, to keep the +inode number by xino (see above) is essentially necessary. + +All the hardlinks under the Pseudo-link Directory of the writable branch +should be restored in a proper location later. Aufs provides a utility +to do this. The userspace helpers executed at remounting and unmounting +aufs by default. +During this utility is running, it puts aufs into the pseudo-link +maintenance mode. In this mode, only the process which began the +maintenance mode (and its child processes) is allowed to operate in +aufs. Some other processes which are not related to the pseudo-link will +be allowed to run too, but the rest have to return an error or wait +until the maintenance mode ends. If a process already acquires an inode +mutex (in VFS), it has to return an error. + + +XIB(external inode number bitmap) +---------------------------------------------------------------------- +Addition to the xino file per a branch, aufs has an external inode number +bitmap in a superblock object. It is also an internal file such like a +xino file. +It is a simple bitmap to mark whether the aufs inode number is in-use or +not. +To reduce the file I/O, aufs prepares a single memory page to cache xib. + +As well as XINO files, aufs has a feature to truncate/refresh XIB to +reduce the number of consumed disk blocks for these files. + + +Virtual or Vertical Dir, and Readdir in Userspace +---------------------------------------------------------------------- +In order to support multiple layers (branches), aufs readdir operation +constructs a virtual dir block on memory. For readdir, aufs calls +vfs_readdir() internally for each dir on branches, merges their entries +with eliminating the whiteout-ed ones, and sets it to file (dir) +object. So the file object has its entry list until it is closed. The +entry list will be updated when the file position is zero and becomes +obsoleted. This decision is made in aufs automatically. + +The dynamically allocated memory block for the name of entries has a +unit of 512 bytes (by default) and stores the names contiguously (no +padding). Another block for each entry is handled by kmem_cache too. +During building dir blocks, aufs creates hash list and judging whether +the entry is whiteouted by its upper branch or already listed. +The merged result is cached in the corresponding inode object and +maintained by a customizable life-time option. + +Some people may call it can be a security hole or invite DoS attack +since the opened and once readdir-ed dir (file object) holds its entry +list and becomes a pressure for system memory. But I'd say it is similar +to files under /proc or /sys. The virtual files in them also holds a +memory page (generally) while they are opened. When an idea to reduce +memory for them is introduced, it will be applied to aufs too. +For those who really hate this situation, I've developed readdir(3) +library which operates this merging in userspace. You just need to set +LD_PRELOAD environment variable, and aufs will not consume no memory in +kernel space for readdir(3). + + +Workqueue +---------------------------------------------------------------------- +Aufs sometimes requires privilege access to a branch. For instance, +in copy-up/down operation. When a user process is going to make changes +to a file which exists in the lower readonly branch only, and the mode +of one of ancestor directories may not be writable by a user +process. Here aufs copy-up the file with its ancestors and they may +require privilege to set its owner/group/mode/etc. +This is a typical case of a application character of aufs (see +Introduction). + +Aufs uses workqueue synchronously for this case. It creates its own +workqueue. The workqueue is a kernel thread and has privilege. Aufs +passes the request to call mkdir or write (for example), and wait for +its completion. This approach solves a problem of a signal handler +simply. +If aufs didn't adopt the workqueue and changed the privilege of the +process, then the process may receive the unexpected SIGXFSZ or other +signals. + +Also aufs uses the system global workqueue ("events" kernel thread) too +for asynchronous tasks, such like handling inotify/fsnotify, re-creating a +whiteout base and etc. This is unrelated to a privilege. +Most of aufs operation tries acquiring a rw_semaphore for aufs +superblock at the beginning, at the same time waits for the completion +of all queued asynchronous tasks. + + +Whiteout +---------------------------------------------------------------------- +The whiteout in aufs is very similar to Unionfs's. That is represented +by its filename. UnionMount takes an approach of a file mode, but I am +afraid several utilities (find(1) or something) will have to support it. + +Basically the whiteout represents "logical deletion" which stops aufs to +lookup further, but also it represents "dir is opaque" which also stop +further lookup. + +In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively. +In order to make several functions in a single systemcall to be +revertible, aufs adopts an approach to rename a directory to a temporary +unique whiteouted name. +For example, in rename(2) dir where the target dir already existed, aufs +renames the target dir to a temporary unique whiteouted name before the +actual rename on a branch, and then handles other actions (make it opaque, +update the attributes, etc). If an error happens in these actions, aufs +simply renames the whiteouted name back and returns an error. If all are +succeeded, aufs registers a function to remove the whiteouted unique +temporary name completely and asynchronously to the system global +workqueue. + + +Copy-up +---------------------------------------------------------------------- +It is a well-known feature or concept. +When user modifies a file on a readonly branch, aufs operate "copy-up" +internally and makes change to the new file on the upper writable branch. +When the trigger systemcall does not update the timestamps of the parent +dir, aufs reverts it after copy-up. + + +Move-down (aufs3.9 and later) +---------------------------------------------------------------------- +"Copy-up" is one of the essential feature in aufs. It copies a file from +the lower readonly branch to the upper writable branch when a user +changes something about the file. +"Move-down" is an opposite action of copy-up. Basically this action is +ran manually instead of automatically and internally. +For desgin and implementation, aufs has to consider these issues. +- whiteout for the file may exist on the lower branch. +- ancestor directories may not exist on the lower branch. +- diropq for the ancestor directories may exist on the upper branch. +- free space on the lower branch will reduce. +- another access to the file may happen during moving-down, including + UDBA (see "Revalidate Dentry and UDBA"). +- the file should not be hard-linked nor pseudo-linked. they should be + handled by auplink utility later. + +Sometimes users want to move-down a file from the upper writable branch +to the lower readonly or writable branch. For instance, +- the free space of the upper writable branch is going to run out. +- create a new intermediate branch between the upper and lower branch. +- etc. + +For this purpose, use "aumvdown" command in aufs-util.git. diff --git a/Documentation/filesystems/aufs/design/03atomic_open.txt b/Documentation/filesystems/aufs/design/03atomic_open.txt new file mode 100644 index 0000000000000..fb8cf0bc8c720 --- /dev/null +++ b/Documentation/filesystems/aufs/design/03atomic_open.txt @@ -0,0 +1,85 @@ + +# Copyright (C) 2015-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +Support for a branch who has its ->atomic_open() +---------------------------------------------------------------------- +The filesystems who implement its ->atomic_open() are not majority. For +example NFSv4 does, and aufs should call NFSv4 ->atomic_open, +particularly for open(O_CREAT|O_EXCL, 0400) case. Other than +->atomic_open(), NFSv4 returns an error for this open(2). While I am not +sure whether all filesystems who have ->atomic_open() behave like this, +but NFSv4 surely returns the error. + +In order to support ->atomic_open() for aufs, there are a few +approaches. + +A. Introduce aufs_atomic_open() + - calls one of VFS:do_last(), lookup_open() or atomic_open() for + branch fs. +B. Introduce aufs_atomic_open() calling create, open and chmod. this is + an aufs user Pip Cet's approach + - calls aufs_create(), VFS finish_open() and notify_change(). + - pass fake-mode to finish_open(), and then correct the mode by + notify_change(). +C. Extend aufs_open() to call branch fs's ->atomic_open() + - no aufs_atomic_open(). + - aufs_lookup() registers the TID to an aufs internal object. + - aufs_create() does nothing when the matching TID is registered, but + registers the mode. + - aufs_open() calls branch fs's ->atomic_open() when the matching + TID is registered. +D. Extend aufs_open() to re-try branch fs's ->open() with superuser's + credential + - no aufs_atomic_open(). + - aufs_create() registers the TID to an internal object. this info + represents "this process created this file just now." + - when aufs gets EACCES from branch fs's ->open(), then confirm the + registered TID and re-try open() with superuser's credential. + +Pros and cons for each approach. + +A. + - straightforward but highly depends upon VFS internal. + - the atomic behavaiour is kept. + - some of parameters such as nameidata are hard to reproduce for + branch fs. + - large overhead. +B. + - easy to implement. + - the atomic behavaiour is lost. +C. + - the atomic behavaiour is kept. + - dirty and tricky. + - VFS checks whether the file is created correctly after calling + ->create(), which means this approach doesn't work. +D. + - easy to implement. + - the atomic behavaiour is lost. + - to open a file with superuser's credential and give it to a user + process is a bad idea, since the file object keeps the credential + in it. It may affect LSM or something. This approach doesn't work + either. + +The approach A is ideal, but it hard to implement. So here is a +variation of A, which is to be implemented. + +A-1. Introduce aufs_atomic_open() + - calls branch fs ->atomic_open() if exists. otherwise calls + vfs_create() and finish_open(). + - the demerit is that the several checks after branch fs + ->atomic_open() are lost. in the ordinary case, the checks are + done by VFS:do_last(), lookup_open() and atomic_open(). some can + be implemented in aufs, but not all I am afraid. diff --git a/Documentation/filesystems/aufs/design/03lookup.txt b/Documentation/filesystems/aufs/design/03lookup.txt new file mode 100644 index 0000000000000..5c3c97f11c571 --- /dev/null +++ b/Documentation/filesystems/aufs/design/03lookup.txt @@ -0,0 +1,113 @@ + +# Copyright (C) 2005-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +Lookup in a Branch +---------------------------------------------------------------------- +Since aufs has a character of sub-VFS (see Introduction), it operates +lookup for branches as VFS does. It may be a heavy work. But almost all +lookup operation in aufs is the simplest case, ie. lookup only an entry +directly connected to its parent. Digging down the directory hierarchy +is unnecessary. VFS has a function lookup_one_len() for that use, and +aufs calls it. + +When a branch is a remote filesystem, aufs basically relies upon its +->d_revalidate(), also aufs forces the hardest revalidate tests for +them. +For d_revalidate, aufs implements three levels of revalidate tests. See +"Revalidate Dentry and UDBA" in detail. + + +Test Only the Highest One for the Directory Permission (dirperm1 option) +---------------------------------------------------------------------- +Let's try case study. +- aufs has two branches, upper readwrite and lower readonly. + /au = /rw + /ro +- "dirA" exists under /ro, but /rw. and its mode is 0700. +- user invoked "chmod a+rx /au/dirA" +- the internal copy-up is activated and "/rw/dirA" is created and its + permission bits are set to world readable. +- then "/au/dirA" becomes world readable? + +In this case, /ro/dirA is still 0700 since it exists in readonly branch, +or it may be a natively readonly filesystem. If aufs respects the lower +branch, it should not respond readdir request from other users. But user +allowed it by chmod. Should really aufs rejects showing the entries +under /ro/dirA? + +To be honest, I don't have a good solution for this case. So aufs +implements 'dirperm1' and 'nodirperm1' mount options, and leave it to +users. +When dirperm1 is specified, aufs checks only the highest one for the +directory permission, and shows the entries. Otherwise, as usual, checks +every dir existing on all branches and rejects the request. + +As a side effect, dirperm1 option improves the performance of aufs +because the number of permission check is reduced when the number of +branch is many. + + +Revalidate Dentry and UDBA (User's Direct Branch Access) +---------------------------------------------------------------------- +Generally VFS helpers re-validate a dentry as a part of lookup. +0. digging down the directory hierarchy. +1. lock the parent dir by its i_mutex. +2. lookup the final (child) entry. +3. revalidate it. +4. call the actual operation (create, unlink, etc.) +5. unlock the parent dir + +If the filesystem implements its ->d_revalidate() (step 3), then it is +called. Actually aufs implements it and checks the dentry on a branch is +still valid. +But it is not enough. Because aufs has to release the lock for the +parent dir on a branch at the end of ->lookup() (step 2) and +->d_revalidate() (step 3) while the i_mutex of the aufs dir is still +held by VFS. +If the file on a branch is changed directly, eg. bypassing aufs, after +aufs released the lock, then the subsequent operation may cause +something unpleasant result. + +This situation is a result of VFS architecture, ->lookup() and +->d_revalidate() is separated. But I never say it is wrong. It is a good +design from VFS's point of view. It is just not suitable for sub-VFS +character in aufs. + +Aufs supports such case by three level of revalidation which is +selectable by user. +1. Simple Revalidate + Addition to the native flow in VFS's, confirm the child-parent + relationship on the branch just after locking the parent dir on the + branch in the "actual operation" (step 4). When this validation + fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still + checks the validation of the dentry on branches. +2. Monitor Changes Internally by Inotify/Fsnotify + Addition to above, in the "actual operation" (step 4) aufs re-lookup + the dentry on the branch, and returns EBUSY if it finds different + dentry. + Additionally, aufs sets the inotify/fsnotify watch for every dir on branches + during it is in cache. When the event is notified, aufs registers a + function to kernel 'events' thread by schedule_work(). And the + function sets some special status to the cached aufs dentry and inode + private data. If they are not cached, then aufs has nothing to + do. When the same file is accessed through aufs (step 0-3) later, + aufs will detect the status and refresh all necessary data. + In this mode, aufs has to ignore the event which is fired by aufs + itself. +3. No Extra Validation + This is the simplest test and doesn't add any additional revalidation + test, and skip the revalidation in step 4. It is useful and improves + aufs performance when system surely hide the aufs branches from user, + by over-mounting something (or another method). diff --git a/Documentation/filesystems/aufs/design/04branch.txt b/Documentation/filesystems/aufs/design/04branch.txt new file mode 100644 index 0000000000000..da5200be41a9f --- /dev/null +++ b/Documentation/filesystems/aufs/design/04branch.txt @@ -0,0 +1,74 @@ + +# Copyright (C) 2005-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +Branch Manipulation + +Since aufs supports dynamic branch manipulation, ie. add/remove a branch +and changing its permission/attribute, there are a lot of works to do. + + +Add a Branch +---------------------------------------------------------------------- +o Confirm the adding dir exists outside of aufs, including loopback + mount, and its various attributes. +o Initialize the xino file and whiteout bases if necessary. + See struct.txt. + +o Check the owner/group/mode of the directory + When the owner/group/mode of the adding directory differs from the + existing branch, aufs issues a warning because it may impose a + security risk. + For example, when a upper writable branch has a world writable empty + top directory, a malicious user can create any files on the writable + branch directly, like copy-up and modify manually. If something like + /etc/{passwd,shadow} exists on the lower readonly branch but the upper + writable branch, and the writable branch is world-writable, then a + malicious guy may create /etc/passwd on the writable branch directly + and the infected file will be valid in aufs. + I am afraid it can be a security issue, but aufs can do nothing except + producing a warning. + + +Delete a Branch +---------------------------------------------------------------------- +o Confirm the deleting branch is not busy + To be general, there is one merit to adopt "remount" interface to + manipulate branches. It is to discard caches. At deleting a branch, + aufs checks the still cached (and connected) dentries and inodes. If + there are any, then they are all in-use. An inode without its + corresponding dentry can be alive alone (for example, inotify/fsnotify case). + + For the cached one, aufs checks whether the same named entry exists on + other branches. + If the cached one is a directory, because aufs provides a merged view + to users, as long as one dir is left on any branch aufs can show the + dir to users. In this case, the branch can be removed from aufs. + Otherwise aufs rejects deleting the branch. + + If any file on the deleting branch is opened by aufs, then aufs + rejects deleting. + + +Modify the Permission of a Branch +---------------------------------------------------------------------- +o Re-initialize or remove the xino file and whiteout bases if necessary. + See struct.txt. + +o rw --> ro: Confirm the modifying branch is not busy + Aufs rejects the request if any of these conditions are true. + - a file on the branch is mmap-ed. + - a regular file on the branch is opened for write and there is no + same named entry on the upper branch. diff --git a/Documentation/filesystems/aufs/design/05wbr_policy.txt b/Documentation/filesystems/aufs/design/05wbr_policy.txt new file mode 100644 index 0000000000000..0262084bf6347 --- /dev/null +++ b/Documentation/filesystems/aufs/design/05wbr_policy.txt @@ -0,0 +1,64 @@ + +# Copyright (C) 2005-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +Policies to Select One among Multiple Writable Branches +---------------------------------------------------------------------- +When the number of writable branch is more than one, aufs has to decide +the target branch for file creation or copy-up. By default, the highest +writable branch which has the parent (or ancestor) dir of the target +file is chosen (top-down-parent policy). +By user's request, aufs implements some other policies to select the +writable branch, for file creation several policies, round-robin, +most-free-space, and other policies. For copy-up, top-down-parent, +bottom-up-parent, bottom-up and others. + +As expected, the round-robin policy selects the branch in circular. When +you have two writable branches and creates 10 new files, 5 files will be +created for each branch. mkdir(2) systemcall is an exception. When you +create 10 new directories, all will be created on the same branch. +And the most-free-space policy selects the one which has most free +space among the writable branches. The amount of free space will be +checked by aufs internally, and users can specify its time interval. + +The policies for copy-up is more simple, +top-down-parent is equivalent to the same named on in create policy, +bottom-up-parent selects the writable branch where the parent dir +exists and the nearest upper one from the copyup-source, +bottom-up selects the nearest upper writable branch from the +copyup-source, regardless the existence of the parent dir. + +There are some rules or exceptions to apply these policies. +- If there is a readonly branch above the policy-selected branch and + the parent dir is marked as opaque (a variation of whiteout), or the + target (creating) file is whiteout-ed on the upper readonly branch, + then the result of the policy is ignored and the target file will be + created on the nearest upper writable branch than the readonly branch. +- If there is a writable branch above the policy-selected branch and + the parent dir is marked as opaque or the target file is whiteouted + on the branch, then the result of the policy is ignored and the target + file will be created on the highest one among the upper writable + branches who has diropq or whiteout. In case of whiteout, aufs removes + it as usual. +- link(2) and rename(2) systemcalls are exceptions in every policy. + They try selecting the branch where the source exists as possible + since copyup a large file will take long time. If it can't be, + ie. the branch where the source exists is readonly, then they will + follow the copyup policy. +- There is an exception for rename(2) when the target exists. + If the rename target exists, aufs compares the index of the branches + where the source and the target exists and selects the higher + one. If the selected branch is readonly, then aufs follows the + copyup policy. diff --git a/Documentation/filesystems/aufs/design/06dirren.dot b/Documentation/filesystems/aufs/design/06dirren.dot new file mode 100644 index 0000000000000..2d62bb6dd55fd --- /dev/null +++ b/Documentation/filesystems/aufs/design/06dirren.dot @@ -0,0 +1,31 @@ + +// to view this graph, run dot(1) command in GRAPHVIZ. + +digraph G { +node [shape=box]; +whinfo [label="detailed info file\n(lower_brid_root-hinum, h_inum, namelen, old name)"]; + +node [shape=oval]; + +aufs_rename -> whinfo [label="store/remove"]; + +node [shape=oval]; +inode_list [label="h_inum list in branch\ncache"]; + +node [shape=box]; +whinode [label="h_inum list file"]; + +node [shape=oval]; +brmgmt [label="br_add/del/mod/umount"]; + +brmgmt -> inode_list [label="create/remove"]; +brmgmt -> whinode [label="load/store"]; + +inode_list -> whinode [style=dashed,dir=both]; + +aufs_rename -> inode_list [label="add/del"]; + +aufs_lookup -> inode_list [label="search"]; + +aufs_lookup -> whinfo [label="load/remove"]; +} diff --git a/Documentation/filesystems/aufs/design/06dirren.txt b/Documentation/filesystems/aufs/design/06dirren.txt new file mode 100644 index 0000000000000..38ae77b2c842d --- /dev/null +++ b/Documentation/filesystems/aufs/design/06dirren.txt @@ -0,0 +1,102 @@ + +# Copyright (C) 2017-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +Special handling for renaming a directory (DIRREN) +---------------------------------------------------------------------- +First, let's assume we have a simple usecase. + +- /u = /rw + /ro +- /rw/dirA exists +- /ro/dirA and /ro/dirA/file exist too +- there is no dirB on both branches +- a user issues rename("dirA", "dirB") + +Now, what should aufs behave against this rename(2)? +There are a few possible cases. + +A. returns EROFS. + since dirA exists on a readonly branch which cannot be renamed. +B. returns EXDEV. + it is possible to copy-up dirA (only the dir itself), but the child + entries ("file" in this case) should not be. it must be a bad + approach to copy-up recursively. +C. returns a success. + even the branch /ro is readonly, aufs tries renaming it. Obviously it + is a violation of aufs' policy. +D. construct an extra information which indicates that /ro/dirA should + be handled as the name of dirB. + overlayfs has a similar feature called REDIRECT. + +Until now, aufs implements the case B only which returns EXDEV, and +expects the userspace application behaves like mv(1) which tries +issueing rename(2) recursively. + +A new aufs feature called DIRREN is introduced which implements the case +D. There are several "extra information" added. + +1. detailed info per renamed directory + path: /rw/dirB/$AUFS_WH_DR_INFO_PFX. +2. the inode-number list of directories on a branch + path: /rw/dirB/$AUFS_WH_DR_BRHINO + +The filename of "detailed info per directory" represents the lower +branch, and its format is +- a type of the branch id + one of these. + + uuid (not implemented yet) + + fsid + + dev +- the inode-number of the branch root dir + +And it contains these info in a single regular file. +- magic number +- branch's inode-number of the logically renamed dir +- the name of the before-renamed dir + +The "detailed info per directory" file is created in aufs rename(2), and +loaded in any lookup. +The info is considered in lookup for the matching case only. Here +"matching" means that the root of branch (in the info filename) is same +to the current looking-up branch. After looking-up the before-renamed +name, the inode-number is compared. And the matched dentry is used. + +The "inode-number list of directories" is a regular file which contains +simply the inode-numbers on the branch. The file is created or updated +in removing the branch, and loaded in adding the branch. Its lifetime is +equal to the branch. +The list is refered in lookup, and when the current target inode is +found in the list, the aufs tries loading the "detailed info per +directory" and get the changed and valid name of the dir. + +Theoretically these "extra informaiton" may be able to be put into XATTR +in the dir inode. But aufs doesn't choose this way because +1. XATTR may not be supported by the branch (or its configuration) +2. XATTR may have its size limit. +3. XATTR may be less easy to convert than a regular file, when the + format of the info is changed in the future. +At the same time, I agree that the regular file approach is much slower +than XATTR approach. So, in the future, aufs may take the XATTR or other +better approach. + +This DIRREN feature is enabled by aufs configuration, and is activated +by a new mount option. + +For the more complicated case, there is a work with UDBA option, which +is to dected the direct access to the branches (by-passing aufs) and to +maintain the cashes in aufs. Since a single cached aufs dentry may +contains two names, before- and after-rename, the name comparision in +UDBA handler may not work correctly. In this case, the behaviour will be +equivalen to udba=reval case. diff --git a/Documentation/filesystems/aufs/design/06fhsm.txt b/Documentation/filesystems/aufs/design/06fhsm.txt new file mode 100644 index 0000000000000..df985662befb0 --- /dev/null +++ b/Documentation/filesystems/aufs/design/06fhsm.txt @@ -0,0 +1,120 @@ + +# Copyright (C) 2011-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +File-based Hierarchical Storage Management (FHSM) +---------------------------------------------------------------------- +Hierarchical Storage Management (or HSM) is a well-known feature in the +storage world. Aufs provides this feature as file-based with multiple +writable branches, based upon the principle of "Colder, the Lower". +Here the word "colder" means that the less used files, and "lower" means +that the position in the order of the stacked branches vertically. +These multiple writable branches are prioritized, ie. the topmost one +should be the fastest drive and be used heavily. + +o Characters in aufs FHSM story +- aufs itself and a new branch attribute. +- a new ioctl interface to move-down and to establish a connection with + the daemon ("move-down" is a converse of "copy-up"). +- userspace tool and daemon. + +The userspace daemon establishes a connection with aufs and waits for +the notification. The notified information is very similar to struct +statfs containing the number of consumed blocks and inodes. +When the consumed blocks/inodes of a branch exceeds the user-specified +upper watermark, the daemon activates its move-down process until the +consumed blocks/inodes reaches the user-specified lower watermark. + +The actual move-down is done by aufs based upon the request from +user-space since we need to maintain the inode number and the internal +pointer arrays in aufs. + +Currently aufs FHSM handles the regular files only. Additionally they +must not be hard-linked nor pseudo-linked. + + +o Cowork of aufs and the user-space daemon + During the userspace daemon established the connection, aufs sends a + small notification to it whenever aufs writes something into the + writable branch. But it may cost high since aufs issues statfs(2) + internally. So user can specify a new option to cache the + info. Actually the notification is controlled by these factors. + + the specified cache time. + + classified as "force" by aufs internally. + Until the specified time expires, aufs doesn't send the info + except the forced cases. When aufs decide forcing, the info is always + notified to userspace. + For example, the number of free inodes is generally large enough and + the shortage of it happens rarely. So aufs doesn't force the + notification when creating a new file, directory and others. This is + the typical case which aufs doesn't force. + When aufs writes the actual filedata and the files consumes any of new + blocks, the aufs forces notifying. + + +o Interfaces in aufs +- New branch attribute. + + fhsm + Specifies that the branch is managed by FHSM feature. In other word, + participant in the FHSM. + When nofhsm is set to the branch, it will not be the source/target + branch of the move-down operation. This attribute is set + independently from coo and moo attributes, and if you want full + FHSM, you should specify them as well. +- New mount option. + + fhsm_sec + Specifies a second to suppress many less important info to be + notified. +- New ioctl. + + AUFS_CTL_FHSM_FD + create a new file descriptor which userspace can read the notification + (a subset of struct statfs) from aufs. +- Module parameter 'brs' + It has to be set to 1. Otherwise the new mount option 'fhsm' will not + be set. +- mount helpers /sbin/mount.aufs and /sbin/umount.aufs + When there are two or more branches with fhsm attributes, + /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs + terminates it. As a result of remounting and branch-manipulation, the + number of branches with fhsm attribute can be one. In this case, + /sbin/mount.aufs will terminate the user-space daemon. + + +Finally the operation is done as these steps in kernel-space. +- make sure that, + + no one else is using the file. + + the file is not hard-linked. + + the file is not pseudo-linked. + + the file is a regular file. + + the parent dir is not opaqued. +- find the target writable branch. +- make sure the file is not whiteout-ed by the upper (than the target) + branch. +- make the parent dir on the target branch. +- mutex lock the inode on the branch. +- unlink the whiteout on the target branch (if exists). +- lookup and create the whiteout-ed temporary name on the target branch. +- copy the file as the whiteout-ed temporary name on the target branch. +- rename the whiteout-ed temporary name to the original name. +- unlink the file on the source branch. +- maintain the internal pointer array and the external inode number + table (XINO). +- maintain the timestamps and other attributes of the parent dir and the + file. + +And of course, in every step, an error may happen. So the operation +should restore the original file state after an error happens. diff --git a/Documentation/filesystems/aufs/design/06mmap.txt b/Documentation/filesystems/aufs/design/06mmap.txt new file mode 100644 index 0000000000000..9184f67104375 --- /dev/null +++ b/Documentation/filesystems/aufs/design/06mmap.txt @@ -0,0 +1,72 @@ + +# Copyright (C) 2005-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +mmap(2) -- File Memory Mapping +---------------------------------------------------------------------- +In aufs, the file-mapped pages are handled by a branch fs directly, no +interaction with aufs. It means aufs_mmap() calls the branch fs's +->mmap(). +This approach is simple and good, but there is one problem. +Under /proc, several entries show the mmapped files by its path (with +device and inode number), and the printed path will be the path on the +branch fs's instead of virtual aufs's. +This is not a problem in most cases, but some utilities lsof(1) (and its +user) may expect the path on aufs. + +To address this issue, aufs adds a new member called vm_prfile in struct +vm_area_struct (and struct vm_region). The original vm_file points to +the file on the branch fs in order to handle everything correctly as +usual. The new vm_prfile points to a virtual file in aufs, and the +show-functions in procfs refers to vm_prfile if it is set. +Also we need to maintain several other places where touching vm_file +such like +- fork()/clone() copies vma and the reference count of vm_file is + incremented. +- merging vma maintains the ref count too. + +This is not a good approach. It just fakes the printed path. But it +leaves all behaviour around f_mapping unchanged. This is surely an +advantage. +Actually aufs had adopted another complicated approach which calls +generic_file_mmap() and handles struct vm_operations_struct. In this +approach, aufs met a hard problem and I could not solve it without +switching the approach. + +There may be one more another approach which is +- bind-mount the branch-root onto the aufs-root internally +- grab the new vfsmount (ie. struct mount) +- lazy-umount the branch-root internally +- in open(2) the aufs-file, open the branch-file with the hidden + vfsmount (instead of the original branch's vfsmount) +- ideally this "bind-mount and lazy-umount" should be done atomically, + but it may be possible from userspace by the mount helper. + +Adding the internal hidden vfsmount and using it in opening a file, the +file path under /proc will be printed correctly. This approach looks +smarter, but is not possible I am afraid. +- aufs-root may be bind-mount later. when it happens, another hidden + vfsmount will be required. +- it is hard to get the chance to bind-mount and lazy-umount + + in kernel-space, FS can have vfsmount in open(2) via + file->f_path, and aufs can know its vfsmount. But several locks are + already acquired, and if aufs tries to bind-mount and lazy-umount + here, then it may cause a deadlock. + + in user-space, bind-mount doesn't invoke the mount helper. +- since /proc shows dev and ino, aufs has to give vma these info. it + means a new member vm_prinode will be necessary. this is essentially + equivalent to vm_prfile described above. + +I have to give up this "looks-smater" approach. diff --git a/Documentation/filesystems/aufs/design/06xattr.txt b/Documentation/filesystems/aufs/design/06xattr.txt new file mode 100644 index 0000000000000..d0f6aedfe2d00 --- /dev/null +++ b/Documentation/filesystems/aufs/design/06xattr.txt @@ -0,0 +1,96 @@ + +# Copyright (C) 2014-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + +Listing XATTR/EA and getting the value +---------------------------------------------------------------------- +For the inode standard attributes (owner, group, timestamps, etc.), aufs +shows the values from the topmost existing file. This behaviour is good +for the non-dir entries since the bahaviour exactly matches the shown +information. But for the directories, aufs considers all the same named +entries on the lower branches. Which means, if one of the lower entry +rejects readdir call, then aufs returns an error even if the topmost +entry allows it. This behaviour is necessary to respect the branch fs's +security, but can make users confused since the user-visible standard +attributes don't match the behaviour. +To address this issue, aufs has a mount option called dirperm1 which +checks the permission for the topmost entry only, and ignores the lower +entry's permission. + +A similar issue can happen around XATTR. +getxattr(2) and listxattr(2) families behave as if dirperm1 option is +always set. Otherwise these very unpleasant situation would happen. +- listxattr(2) may return the duplicated entries. +- users may not be able to remove or reset the XATTR forever, + + +XATTR/EA support in the internal (copy,move)-(up,down) +---------------------------------------------------------------------- +Generally the extended attributes of inode are categorized as these. +- "security" for LSM and capability. +- "system" for posix ACL, 'acl' mount option is required for the branch + fs generally. +- "trusted" for userspace, CAP_SYS_ADMIN is required. +- "user" for userspace, 'user_xattr' mount option is required for the + branch fs generally. + +Moreover there are some other categories. Aufs handles these rather +unpopular categories as the ordinary ones, ie. there is no special +condition nor exception. + +In copy-up, the support for XATTR on the dst branch may differ from the +src branch. In this case, the copy-up operation will get an error and +the original user operation which triggered the copy-up will fail. It +can happen that even all copy-up will fail. +When both of src and dst branches support XATTR and if an error occurs +during copying XATTR, then the copy-up should fail obviously. That is a +good reason and aufs should return an error to userspace. But when only +the src branch support that XATTR, aufs should not return an error. +For example, the src branch supports ACL but the dst branch doesn't +because the dst branch may natively un-support it or temporary +un-support it due to "noacl" mount option. Of course, the dst branch fs +may NOT return an error even if the XATTR is not supported. It is +totally up to the branch fs. + +Anyway when the aufs internal copy-up gets an error from the dst branch +fs, then aufs tries removing the just copied entry and returns the error +to the userspace. The worst case of this situation will be all copy-up +will fail. + +For the copy-up operation, there two basic approaches. +- copy the specified XATTR only (by category above), and return the + error unconditionally if it happens. +- copy all XATTR, and ignore the error on the specified category only. + +In order to support XATTR and to implement the correct behaviour, aufs +chooses the latter approach and introduces some new branch attributes, +"icexsec", "icexsys", "icextr", "icexusr", and "icexoth". +They correspond to the XATTR namespaces (see above). Additionally, to be +convenient, "icex" is also provided which means all "icex*" attributes +are set (here the word "icex" stands for "ignore copy-error on XATTR"). + +The meaning of these attributes is to ignore the error from setting +XATTR on that branch. +Note that aufs tries copying all XATTR unconditionally, and ignores the +error from the dst branch according to the specified attributes. + +Some XATTR may have its default value. The default value may come from +the parent dir or the environment. If the default value is set at the +file creating-time, it will be overwritten by copy-up. +Some contradiction may happen I am afraid. +Do we need another attribute to stop copying XATTR? I am unsure. For +now, aufs implements the branch attributes to ignore the error. diff --git a/Documentation/filesystems/aufs/design/07export.txt b/Documentation/filesystems/aufs/design/07export.txt new file mode 100644 index 0000000000000..6fcb00d7dbdb6 --- /dev/null +++ b/Documentation/filesystems/aufs/design/07export.txt @@ -0,0 +1,58 @@ + +# Copyright (C) 2005-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +Export Aufs via NFS +---------------------------------------------------------------------- +Here is an approach. +- like xino/xib, add a new file 'xigen' which stores aufs inode + generation. +- iget_locked(): initialize aufs inode generation for a new inode, and + store it in xigen file. +- destroy_inode(): increment aufs inode generation and store it in xigen + file. it is necessary even if it is not unlinked, because any data of + inode may be changed by UDBA. +- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise + build file handle by + + branch id (4 bytes) + + superblock generation (4 bytes) + + inode number (4 or 8 bytes) + + parent dir inode number (4 or 8 bytes) + + inode generation (4 bytes)) + + return value of exportfs_encode_fh() for the parent on a branch (4 + bytes) + + file handle for a branch (by exportfs_encode_fh()) +- fh_to_dentry(): + + find the index of a branch from its id in handle, and check it is + still exist in aufs. + + 1st level: get the inode number from handle and search it in cache. + + 2nd level: if not found in cache, get the parent inode number from + the handle and search it in cache. and then open the found parent + dir, find the matching inode number by vfs_readdir() and get its + name, and call lookup_one_len() for the target dentry. + + 3rd level: if the parent dir is not cached, call + exportfs_decode_fh() for a branch and get the parent on a branch, + build a pathname of it, convert it a pathname in aufs, call + path_lookup(). now aufs gets a parent dir dentry, then handle it as + the 2nd level. + + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount + for every branch, but not itself. to get this, (currently) aufs + searches in current->nsproxy->mnt_ns list. it may not be a good + idea, but I didn't get other approach. + + test the generation of the gotten inode. +- every inode operation: they may get EBUSY due to UDBA. in this case, + convert it into ESTALE for NFSD. +- readdir(): call lockdep_on/off() because filldir in NFSD calls + lookup_one_len(), vfs_getattr(), encode_fh() and others. diff --git a/Documentation/filesystems/aufs/design/08shwh.txt b/Documentation/filesystems/aufs/design/08shwh.txt new file mode 100644 index 0000000000000..d7e58319086b1 --- /dev/null +++ b/Documentation/filesystems/aufs/design/08shwh.txt @@ -0,0 +1,52 @@ + +# Copyright (C) 2005-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +Show Whiteout Mode (shwh) +---------------------------------------------------------------------- +Generally aufs hides the name of whiteouts. But in some cases, to show +them is very useful for users. For instance, creating a new middle layer +(branch) by merging existing layers. + +(borrowing aufs1 HOW-TO from a user, Michael Towers) +When you have three branches, +- Bottom: 'system', squashfs (underlying base system), read-only +- Middle: 'mods', squashfs, read-only +- Top: 'overlay', ram (tmpfs), read-write + +The top layer is loaded at boot time and saved at shutdown, to preserve +the changes made to the system during the session. +When larger changes have been made, or smaller changes have accumulated, +the size of the saved top layer data grows. At this point, it would be +nice to be able to merge the two overlay branches ('mods' and 'overlay') +and rewrite the 'mods' squashfs, clearing the top layer and thus +restoring save and load speed. + +This merging is simplified by the use of another aufs mount, of just the +two overlay branches using the 'shwh' option. +# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \ + aufs /livesys/merge_union + +A merged view of these two branches is then available at +/livesys/merge_union, and the new feature is that the whiteouts are +visible! +Note that in 'shwh' mode the aufs mount must be 'ro', which will disable +writing to all branches. Also the default mode for all branches is 'ro'. +It is now possible to save the combined contents of the two overlay +branches to a new squashfs, e.g.: +# mksquashfs /livesys/merge_union /path/to/newmods.squash + +This new squashfs archive can be stored on the boot device and the +initramfs will use it to replace the old one at the next boot. diff --git a/Documentation/filesystems/aufs/design/10dynop.txt b/Documentation/filesystems/aufs/design/10dynop.txt new file mode 100644 index 0000000000000..d55cae285dff3 --- /dev/null +++ b/Documentation/filesystems/aufs/design/10dynop.txt @@ -0,0 +1,47 @@ + +# Copyright (C) 2010-2020 Junjiro R. Okajima +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +Dynamically customizable FS operations +---------------------------------------------------------------------- +Generally FS operations (struct inode_operations, struct +address_space_operations, struct file_operations, etc.) are defined as +"static const", but it never means that FS have only one set of +operation. Some FS have multiple sets of them. For instance, ext2 has +three sets, one for XIP, for NOBH, and for normal. +Since aufs overrides and redirects these operations, sometimes aufs has +to change its behaviour according to the branch FS type. More importantly +VFS acts differently if a function (member in the struct) is set or +not. It means aufs should have several sets of operations and select one +among them according to the branch FS definition. + +In order to solve this problem and not to affect the behaviour of VFS, +aufs defines these operations dynamically. For instance, aufs defines +dummy direct_IO function for struct address_space_operations, but it may +not be set to the address_space_operations actually. When the branch FS +doesn't have it, aufs doesn't set it to its address_space_operations +while the function definition itself is still alive. So the behaviour +itself will not change, and it will return an error when direct_IO is +not set. + +The lifetime of these dynamically generated operation object is +maintained by aufs branch object. When the branch is removed from aufs, +the reference counter of the object is decremented. When it reaches +zero, the dynamically generated operation object will be freed. + +This approach is designed to support AIO (io_submit), Direct I/O and +XIP (DAX) mainly. +Currently this approach is applied to address_space_operations for +regular files only. diff --git a/Documentation/filesystems/ext4/attributes.rst b/Documentation/filesystems/ext4/attributes.rst index 54386a010a8d7..871d2da7a0a91 100644 --- a/Documentation/filesystems/ext4/attributes.rst +++ b/Documentation/filesystems/ext4/attributes.rst @@ -76,7 +76,7 @@ The beginning of an extended attribute block is in - Checksum of the extended attribute block. * - 0x14 - \_\_u32 - - h\_reserved[2] + - h\_reserved[3] - Zero. The checksum is calculated against the FS UUID, the 64-bit block number diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index f18506083ced8..26c0939695736 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -850,3 +850,11 @@ business doing so. d_alloc_pseudo() is internal-only; uses outside of alloc_file_pseudo() are very suspect (and won't work in modules). Such uses are very likely to be misspelled d_alloc_anon(). + +--- + +**mandatory** + +[should've been added in 2016] stale comment in finish_open() nonwithstanding, +failure exits in ->atomic_open() instances should *NOT* fput() the file, +no matter what. Everything is handled by the caller. diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt index d412b236a9d6f..7cf7143921a1f 100644 --- a/Documentation/filesystems/seq_file.txt +++ b/Documentation/filesystems/seq_file.txt @@ -192,6 +192,12 @@ between the calls to start() and stop(), so holding a lock during that time is a reasonable thing to do. The seq_file code will also avoid taking any other locks while the iterator is active. +The iterater value returned by start() or next() is guaranteed to be +passed to a subsequent next() or stop() call. This allows resources +such as locks that were taken to be reliably released. There is *no* +guarantee that the iterator will be passed to show(), though in practice +it often will be. + Formatted output diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt index ddf15b1b0d5a4..33ec0a01450dd 100644 --- a/Documentation/filesystems/sysfs.txt +++ b/Documentation/filesystems/sysfs.txt @@ -232,12 +232,10 @@ Other notes: is 4096. - show() methods should return the number of bytes printed into the - buffer. This is the return value of scnprintf(). + buffer. -- show() must not use snprintf() when formatting the value to be - returned to user space. If you can guarantee that an overflow - will never happen you can use sprintf() otherwise you must use - scnprintf(). +- show() should only use sysfs_emit() or sysfs_emit_at() when formatting + the value to be returned to user space. - store() should return the number of bytes used from the buffer. If the entire buffer has been used, just return the count argument. diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst index e588bccf51583..344284236a810 100644 --- a/Documentation/firmware-guide/acpi/apei/einj.rst +++ b/Documentation/firmware-guide/acpi/apei/einj.rst @@ -168,7 +168,7 @@ An error injection example:: 0x00000008 Memory Correctable 0x00000010 Memory Uncorrectable non-fatal # echo 0x12345000 > param1 # Set memory address for injection - # echo $((-1 << 12)) > param2 # Mask 0xfffffffffffff000 - anywhere in this page + # echo 0xfffffffffffff000 > param2 # Mask - anywhere in this page # echo 0x8 > error_type # Choose correctable memory error # echo 1 > error_inject # Inject now diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst index febccbc5689d0..1b05e8d0ddff7 100644 --- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst +++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst @@ -5,7 +5,7 @@ Referencing hierarchical data nodes =================================== -:Copyright: |copy| 2018 Intel Corporation +:Copyright: |copy| 2018, 2021 Intel Corporation :Author: Sakari Ailus ACPI in general allows referring to device objects in the tree only. @@ -52,12 +52,14 @@ the ANOD object which is also the final target node of the reference. Name (NOD0, Package() { ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { + Package () { "reg", 0 }, Package () { "random-property", 3 }, } }) Name (NOD1, Package() { ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"), Package () { + Package () { "reg", 1 }, Package () { "anothernode", "ANOD" }, } }) @@ -74,7 +76,11 @@ the ANOD object which is also the final target node of the reference. Name (_DSD, Package () { ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { - Package () { "reference", ^DEV0, "node@1", "anothernode" }, + Package () { + "reference", Package () { + ^DEV0, "node@1", "anothernode" + } + }, } }) } diff --git a/Documentation/hwmon/lm90.rst b/Documentation/hwmon/lm90.rst index 953315987c06e..03851cbe637ce 100644 --- a/Documentation/hwmon/lm90.rst +++ b/Documentation/hwmon/lm90.rst @@ -123,6 +123,18 @@ Supported chips: http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3497 + * Maxim MAX6654 + + Prefix: 'max6654' + + Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, + + 0x4c, 0x4d and 0x4e + + Datasheet: Publicly available at the Maxim website + + https://www.maximintegrated.com/en/products/sensors/MAX6654.html + * Maxim MAX6657 Prefix: 'max6657' @@ -253,6 +265,16 @@ Supported chips: http://www.ti.com/litv/pdf/sbos686 + * Texas Instruments TMP461 + + Prefix: 'tmp461' + + Addresses scanned: I2C 0x48 through 0x4F + + Datasheet: Publicly available at TI website + + https://www.ti.com/lit/gpn/tmp461 + Author: Jean Delvare @@ -301,6 +323,13 @@ ADT7461, ADT7461A, NCT1008: * Extended temperature range (breaks compatibility) * Lower resolution for remote temperature +MAX6654: + * Better local resolution + * Selectable address + * Remote sensor type selection + * Extended temperature range + * Extended resolution only available when conversion rate <= 1 Hz + MAX6657 and MAX6658: * Better local resolution * Remote sensor type selection @@ -336,8 +365,8 @@ SA56004X: All temperature values are given in degrees Celsius. Resolution is 1.0 degree for the local temperature, 0.125 degree for the remote -temperature, except for the MAX6657, MAX6658 and MAX6659 which have a -resolution of 0.125 degree for both temperatures. +temperature, except for the MAX6654, MAX6657, MAX6658 and MAX6659 which have +a resolution of 0.125 degree for both temperatures. Each sensor has its own high and low limits, plus a critical limit. Additionally, there is a relative hysteresis value common to both critical diff --git a/Documentation/hwmon/max31790.rst b/Documentation/hwmon/max31790.rst index 84c62a12ef3a8..f4749c44cfb8b 100644 --- a/Documentation/hwmon/max31790.rst +++ b/Documentation/hwmon/max31790.rst @@ -38,6 +38,7 @@ Sysfs entries fan[1-12]_input RO fan tachometer speed in RPM fan[1-12]_fault RO fan experienced fault fan[1-6]_target RW desired fan speed in RPM -pwm[1-6]_enable RW regulator mode, 0=disabled, 1=manual mode, 2=rpm mode -pwm[1-6] RW fan target duty cycle (0-255) +pwm[1-6]_enable RW regulator mode, 0=disabled (duty cycle=0%), 1=manual mode, 2=rpm mode +pwm[1-6] RW read: current pwm duty cycle, + write: target pwm duty cycle (0-255) ================== === ======================================================= diff --git a/Documentation/index.rst b/Documentation/index.rst index b843e313d2f2e..2ceab197246f7 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -135,6 +135,14 @@ needed). mic/index scheduler/index +Architecture-agnostic documentation +----------------------------------- + +.. toctree:: + :maxdepth: 2 + + asm-annotations + Architecture-specific documentation ----------------------------------- diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst index bef79cd4c6b4d..356dd9727f702 100644 --- a/Documentation/ioctl/ioctl-number.rst +++ b/Documentation/ioctl/ioctl-number.rst @@ -326,8 +326,11 @@ Code Seq# Include File Comments 0xAC 00-1F linux/raw.h 0xAD 00 Netfilter device in development: -0xAE all linux/kvm.h Kernel-based Virtual Machine +0xAE 00-1F linux/kvm.h Kernel-based Virtual Machine +0xAE 40-FF linux/kvm.h Kernel-based Virtual Machine + +0xAE 20-3F linux/nitro_enclaves.h Nitro Enclaves 0xAF 00-1F linux/fsl_hypervisor.h Freescale hypervisor 0xB0 all RATIO devices in development: diff --git a/Documentation/kbuild/index.rst b/Documentation/kbuild/index.rst index 0f144fad99a6a..3882bd5f7728c 100644 --- a/Documentation/kbuild/index.rst +++ b/Documentation/kbuild/index.rst @@ -19,6 +19,7 @@ Kernel Build System issues reproducible-builds + llvm .. only:: subproject and html diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index f1e5dce86af7c..852ccc551bb3a 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -262,3 +262,8 @@ KBUILD_BUILD_USER, KBUILD_BUILD_HOST These two variables allow to override the user@host string displayed during boot and in /proc/version. The default value is the output of the commands whoami and host, respectively. + +LLVM +---- +If this variable is set to 1, Kbuild will use Clang and LLVM utilities instead +of GCC and GNU binutils to build the kernel. diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst new file mode 100644 index 0000000000000..c776b6eee969f --- /dev/null +++ b/Documentation/kbuild/llvm.rst @@ -0,0 +1,87 @@ +============================== +Building Linux with Clang/LLVM +============================== + +This document covers how to build the Linux kernel with Clang and LLVM +utilities. + +About +----- + +The Linux kernel has always traditionally been compiled with GNU toolchains +such as GCC and binutils. Ongoing work has allowed for `Clang +`_ and `LLVM `_ utilities to be +used as viable substitutes. Distributions such as `Android +`_, `ChromeOS +`_, and `OpenMandriva +`_ use Clang built kernels. `LLVM is a +collection of toolchain components implemented in terms of C++ objects +`_. Clang is a front-end to LLVM that +supports C and the GNU C extensions required by the kernel, and is pronounced +"klang," not "see-lang." + +Clang +----- + +The compiler used can be swapped out via `CC=` command line argument to `make`. +`CC=` should be set when selecting a config and during a build. + + make CC=clang defconfig + + make CC=clang + +Cross Compiling +--------------- + +A single Clang compiler binary will typically contain all supported backends, +which can help simplify cross compiling. + + ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang + +`CROSS_COMPILE` is not used to prefix the Clang compiler binary, instead +`CROSS_COMPILE` is used to set a command line flag: `--target `. For +example: + + clang --target aarch64-linux-gnu foo.c + +LLVM Utilities +-------------- + +LLVM has substitutes for GNU binutils utilities. Kbuild supports `LLVM=1` +to enable them. + + make LLVM=1 + +They can be enabled individually. The full list of the parameters: + + make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \\ + OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump OBJSIZE=llvm-size \\ + READELF=llvm-readelf HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar \\ + HOSTLD=ld.lld + +Currently, the integrated assembler is disabled by default. You can pass +`LLVM_IAS=1` to enable it. + +Getting Help +------------ + +- `Website `_ +- `Mailing List `_: +- `Issue Tracker `_ +- IRC: #clangbuiltlinux on chat.freenode.net +- `Telegram `_: @ClangBuiltLinux +- `Wiki `_ +- `Beginner Bugs `_ + +Getting LLVM +------------- + +- http://releases.llvm.org/download.html +- https://github.com/llvm/llvm-project +- https://llvm.org/docs/GettingStarted.html +- https://llvm.org/docs/CMake.html +- https://apt.llvm.org/ +- https://www.archlinux.org/packages/extra/x86_64/llvm/ +- https://github.com/ClangBuiltLinux/tc-build +- https://github.com/ClangBuiltLinux/linux/wiki/Building-Clang-from-source +- https://android.googlesource.com/platform/prebuilts/clang/host/linux-x86/ diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index b89c88168d6a3..b9b50553bfc56 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -1115,23 +1115,6 @@ When kbuild executes, the following steps are followed (roughly): In this example, extra-y is used to list object files that shall be built, but shall not be linked as part of built-in.a. - header-test-y - - header-test-y specifies headers (`*.h`) in the current directory that - should be compile tested to ensure they are self-contained, - i.e. compilable as standalone units. If CONFIG_HEADER_TEST is enabled, - this builds them as part of extra-y. - - header-test-pattern-y - - This works as a weaker version of header-test-y, and accepts wildcard - patterns. The typical usage is:: - - header-test-pattern-y += *.h - - This specifies all the files that matches to `*.h` in the current - directory, but the files in 'header-test-' are excluded. - 6.7 Commands useful for building a boot image --------------------------------------------- diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index 774a998dcf370..199ce72bf9224 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -470,9 +470,9 @@ build. The syntax of the Module.symvers file is:: - + - 0xe1cc2a05 usb_stor_suspend USB_STORAGE drivers/usb/storage/usb-storage EXPORT_SYMBOL_GPL + 0xe1cc2a05 usb_stor_suspend drivers/usb/storage/usb-storage EXPORT_SYMBOL_GPL USB_STORAGE The fields are separated by tabs and values may be empty (e.g. if no namespace is defined for an exported symbol). diff --git a/Documentation/kmsg/IPVS b/Documentation/kmsg/IPVS new file mode 100644 index 0000000000000..b5ee24309a790 --- /dev/null +++ b/Documentation/kmsg/IPVS @@ -0,0 +1,81 @@ +/*? Text: "%s(): NULL arg\n" */ +/*? Text: "%s(): NULL scheduler_name\n" */ +/*? Text: "%s(): [%s] pe already existed in the system\n" */ +/*? Text: "%s(): [%s] pe already linked\n" */ +/*? Text: "%s(): [%s] pe is not in the list. failed\n" */ +/*? Text: "%s(): [%s] scheduler already existed in the system\n" */ +/*? Text: "%s(): [%s] scheduler already linked\n" */ +/*? Text: "%s(): [%s] scheduler is not in the list. failed\n" */ +/*? Text: "%s(): done error\n" */ +/*? Text: "%s(): init error\n" */ +/*? Text: "%s(): lower threshold is higher than upper threshold\n" */ +/*? Text: "%s(): no memory\n" */ +/*? Text: "%s(): request for already hashed, called from %pF\n" */ +/*? Text: "%s(): request for unhash flagged, called from %pF\n" */ +/*? Text: "%s(): server weight less than zero\n" */ +/*? Text: "%s: %s %pI4:%d - %s\n" */ +/*? Text: "%s: %s [%pI6]:%d - %s\n" */ +/*? Text: "%s: %s [%pI6c]:%d - %s\n" */ +/*? Text: "%s: FWM %u 0x%08X - %s\n" */ +/*? Text: "%s: enter\n" */ +/*? Text: "%s: loaded support on port[%d] = %d\n" */ +/*? Text: "BACKUP v0, Dropping buffer bogus conn options\n" */ +/*? Text: "BACKUP v0, bogus conn\n" */ +/*? Text: "BACKUP, Dropping buffer, Err: %d in decoding\n" */ +/*? Text: "BACKUP, Dropping buffer, Unknown version %d\n" */ +/*? Text: "BACKUP, Dropping buffer, msg > buffer\n" */ +/*? Text: "BACKUP, Dropping buffer, to small\n" */ +/*? Text: "BACKUP, Invalid PE parameters\n" */ +/*? Text: "BUG control DEL with n=0 : %s:%d to %s:%d\n" */ +/*? Text: "Connection hash table configured (size=%d, memory=%ldKbytes)\n" */ +/*? Text: "Error binding address of the mcast interface\n" */ +/*? Text: "Error binding to the multicast addr\n" */ +/*? Text: "Error connecting to the multicast addr\n" */ +/*? Text: "Error during creation of socket; terminating\n" */ +/*? Text: "Error joining to the multicast group\n" */ +/*? Text: "Error setting outbound mcast interface\n" */ +/*? Text: "Failed to stop Backup Daemon\n" */ +/*? Text: "Failed to stop Master Daemon\n" */ +/*? Text: "Registered protocols (%s)\n" */ +/*? Text: "SYNC, connection pe_data invalid\n" */ +/*? Text: "Schedule: port zero only supported in persistent services, check your ipvs configuration\n" */ +/*? Text: "Scheduler module ip_vs_%s not found\n" */ +/*? Text: "There is no net ptr to find in the skb in %s() line:%d\n" */ +/*? Text: "UDP no ns data\n" */ +/*? Text: "You probably need to specify IP address on multicast interface.\n" */ +/*? Text: "[%s] pe registered.\n" */ +/*? Text: "[%s] pe unregistered.\n" */ +/*? Text: "[%s] scheduler registered.\n" */ +/*? Text: "[%s] scheduler unregistered.\n" */ +/*? Text: "can't register hooks.\n" */ +/*? Text: "can't register netlink/ioctl.\n" */ +/*? Text: "can't setup connection table.\n" */ +/*? Text: "can't setup control.\n" */ +/*? Text: "cannot register Generic Netlink interface.\n" */ +/*? Text: "cannot register sockopt.\n" */ +/*? Text: "get_ctl: len %u < %u\n" */ +/*? Text: "ip_vs_send_async error %d\n" */ +/*? Text: "ip_vs_sync_buff_create failed.\n" */ +/*? Text: "ipvs loaded.\n" */ +/*? Text: "ipvs unloaded.\n" */ +/*? Text: "length: %u != %u\n" */ +/*? Text: "netif_stop_queue() cannot be called before register_netdev()\n" */ +/*? Text: "not enough space in Netlink message\n" */ +/*? Text: "persistence engine module ip_vs_pe_%s not found\n" */ +/*? Text: "receiving message error\n" */ +/*? Text: "request control ADD for already controlled: %s:%d to %s:%d\n" */ +/*? Text: "request control DEL for uncontrolled: %s:%d to %s:%d\n" */ +/*? Text: "set_ctl: invalid protocol: %d %pI4:%d %s\n" */ +/*? Text: "set_ctl: len %u != %u\n" */ +/*? Text: "shouldn't reach here, because the box is on the half connection in the tun/dr module.\n" */ +/*? Text: "stopping backup sync thread %d ...\n" */ +/*? Text: "stopping master sync thread %d ...\n" */ +/*? Text: "sync thread started: state = BACKUP, mcast_ifn = %s, syncid = %d\n" */ +/*? Text: "sync thread started: state = MASTER, mcast_ifn = %s, syncid = %d\n" */ +/*? Text: "unknown Generic Netlink command\n" */ +/*? Text: "sync thread started: state = MASTER, mcast_ifn = %s, syncid = %d, id = %d\n" */ +/*? Text: "sync thread started: state = BACKUP, mcast_ifn = %s, syncid = %d, id = %d\n" */ +/*? Text: "flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n" */ +/*? Text: "%s selects TX queue %d, but real number of TX queues is %d\n" */ +/*? Text: "Unknown mcast interface: %s\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/aes_s390 b/Documentation/kmsg/s390/aes_s390 new file mode 100644 index 0000000000000..e1a7451b1c77b --- /dev/null +++ b/Documentation/kmsg/s390/aes_s390 @@ -0,0 +1,45 @@ +/*? + * Text: "Allocating XTS fallback algorithm %s failed\n" + * Severity: Error + * Parameter: + * @1: algorithm name + * Description: + * The aes_s390 module failed to allocate a software fallback for the AES + * modes that are not supported by the hardware. A possible reason for this + * problem is that the aes_generic module that provides the fallback + * algorithms is not available. + * User action: + * Ensure that the aes_generic module is available and loaded and reload + * the aes_s390 module. + */ + +/*? + * Text: "Allocating AES fallback algorithm %s failed\n" + * Severity: Error + * Parameter: + * @1: algorithm name + * Description: + * The advanced encryption standard (AES) algorithm includes three modes with + * 128-bit, 192-bit, and 256-bit keys. Your hardware system only provides + * hardware acceleration for the 128-bit mode. The aes_s390 module failed to + * allocate a software fallback for the AES modes that are not supported by the + * hardware. A possible reason for this problem is that the aes_generic module + * that provides the fallback algorithms is not available. + * User action: + * Use the 128-bit mode only or ensure that the aes_generic module is available + * and loaded and reload the aes_s390 module. + */ + +/*? + * Text: "AES hardware acceleration is only available for 128-bit keys\n" + * Severity: Informational + * Description: + * The advanced encryption standard (AES) algorithm includes three modes with + * 128-bit, 192-bit, and 256-bit keys. Your hardware system only provides + * hardware acceleration for the 128-bit key mode. The aes_s390 module + * will use the less performant software fallback algorithm for the 192-bit + * and 256-bit key modes. + * User action: + * None. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/af_iucv b/Documentation/kmsg/s390/af_iucv new file mode 100644 index 0000000000000..6d5b39ade745f --- /dev/null +++ b/Documentation/kmsg/s390/af_iucv @@ -0,0 +1,23 @@ +/*? + * Text: "Application %s on z/VM guest %s exceeds message limit\n" + * Severity: Error + * Parameter: + * @1: application name + * @2: z/VM user ID + * Description: + * Messages or packets destined for the application have accumulated and + * reached the maximum value. The default for the message limit is 65535. + * You can specify a different limit as the value for MSGLIMIT within + * the IUCV statement of the z/VM virtual machine on which the application + * runs. + * User action: + * Ensure that you do not send data faster than the application retrieves + * them. Ensure that the message limit on the z/VM guest virtual machine + * on which the application runs is high enough. + */ + +/*? Text: "Attempt to release alive iucv socket %p\n" */ +/*? Text: "netif_stop_queue() cannot be called before register_netdev()\n" */ +/*? Text: "flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n" */ +/*? Text: "%s selects TX queue %d, but real number of TX queues is %d\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/ap b/Documentation/kmsg/s390/ap new file mode 100644 index 0000000000000..ffa1a43001c4c --- /dev/null +++ b/Documentation/kmsg/s390/ap @@ -0,0 +1,49 @@ +/*? + * Text: "%d is not a valid cryptographic domain\n" + * Severity: Warning + * Parameter: + * @1: AP domain index + * Description: + * The cryptographic domain specified for the 'domain=' module or kernel + * parameter must be an integer in the range 0 to 15. + * User action: + * Reload the cryptographic device driver with a correct module parameter. + * If the device driver has been compiled into the kernel, correct the value + * in the kernel parameter line and reboot Linux. + */ + +/*? + * Text: "The hardware system does not support AP instructions\n" + * Severity: Warning + * Description: + * The ap module addresses AP adapters through AP instructions. The hardware + * system on which the Linux instance runs does not support AP instructions. + * The ap module cannot detect any AP adapters. + * User action: + * Load the ap module only if your Linux instance runs on hardware that + * supports AP instructions. If the ap module has been compiled into the kernel, + * ignore this message. + */ + +/*? + * Text: "Registering adapter interrupts for AP device %02x.%04x failed\n" + * Severity: Error + * Parameter: + * @1: AP device ID + * @2: AP queue + * Description: + * The hardware system supports AP adapter interrupts but failed to enable + * an adapter for interrupts. Possible causes for this error are: + * i) The AP adapter firmware does not support AP interrupts. + * ii) An AP adapter firmware update to a firmware level that supports AP + * adapter interrupts failed. + * iii) The AP adapter firmware has been successfully updated to a level that + * supports AP interrupts but the new firmware has not been activated. + * User action: + * Ensure that the firmware on your AP adapters support AP interrupts and that + * any firmware updates have completed successfully. If necessary, deconfigure + * your cryptographic adapters and reconfigure them to ensure that any firmware + * updates become active, then reload the ap module. If the ap module has been + * compiled into the kernel, reboot Linux. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/appldata b/Documentation/kmsg/s390/appldata new file mode 100644 index 0000000000000..1129f96c313cb --- /dev/null +++ b/Documentation/kmsg/s390/appldata @@ -0,0 +1,91 @@ +/*? + * Text: "Starting the data collection for %s failed with rc=%d\n" + * Severity: Error + * Parameter: + * @1: appldata module + * @2: return code + * Description: + * The specified data collection module used the z/VM diagnose call + * DIAG 0xDC to start writing data. z/VM returned an error and the data + * collection could not start. If the return code is 5, your z/VM guest + * virtual machine is not authorized to write data records. + * User action: + * If the return code is 5, ensure that your z/VM guest virtual machine's + * entry in the z/VM directory includes the OPTION APPLMON statement. + * For other return codes see the section about DIAGNOSE Code X'DC' + * in "z/VM CP Programming Services". + */ + +/*? + * Text: "Stopping the data collection for %s failed with rc=%d\n" + * Severity: Error + * Parameter: + * @1: appldata module + * @2: return code + * Description: + * The specified data collection module used the z/VM diagnose call DIAG 0xDC + * to stop writing data. z/VM returned an error and the data collection + * continues. + * User action: + * See the section about DIAGNOSE Code X'DC' in "z/VM CP Programming Services". + */ + +/*? + * Text: "Starting a new OS data collection failed with rc=%d\n" + * Severity: Error + * Parameter: + * @1: return code + * Description: + * After a CPU hotplug event, the record size for the running operating + * system data collection is no longer correct. The appldata_os module tried + * to start a new data collection with the correct record size but received + * an error from the z/VM diagnose call DIAG 0xDC. Any data collected with + * the current record size might be faulty. + * User action: + * Start a new data collection with the cappldata_os module. For information + * about starting data collections see "Device Drivers, Features, and + * Commands". For information about the return codes see the section about + * DIAGNOSE Code X'DC' in "z/VM CP Programming Services". + */ + +/*? + * Text: "Stopping a faulty OS data collection failed with rc=%d\n" + * Severity: Error + * Parameter: + * @1: return code + * Description: + * After a CPU hotplug event, the record size for the running operating + * system data collection is no longer correct. The appldata_os module tried + * to stop the faulty data collection but received an error from the z/VM + * diagnose call DIAG 0xDC. Any data collected with the current record size + * might be faulty. + * User action: + * Try to restart appldata_os monitoring. For information about stopping + * and starting data collections see "Device Drivers, Features, and + * Commands". For information about the return codes see the section about + * DIAGNOSE Code X'DC' in "z/VM CP Programming Services". + */ + +/*? + * Text: "Maximum OS record size %i exceeds the maximum record size %i\n" + * Severity: Error + * Parameter: + * @1: no of bytes + * @2: no of bytes + * Description: + * The OS record size grows with the number of CPUs and is adjusted by the + * appldata_os module in response to CPU hotplug events. For more than 110 + * CPUs the record size would exceed the maximum record size of 4024 bytes + * that is supported by the z/VM hypervisor. To prevent the maximum supported + * record size from being exceeded while data collection is in progress, + * you cannot load the appldata_os module on Linux instances that are + * configured for a maximum of more than 110 CPUs. + * User action: + * If you do not want to collect operating system data, you can ignore this + * message. If you want to collect operating system data, reconfigure your + * Linux instance to support less than 110 CPUs. + */ + +/*? Text: "netif_stop_queue() cannot be called before register_netdev()\n" */ +/*? Text: "%s selects TX queue %d, but real number of TX queues is %d\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/bpf_jit b/Documentation/kmsg/s390/bpf_jit new file mode 100644 index 0000000000000..da2fb17c6783a --- /dev/null +++ b/Documentation/kmsg/s390/bpf_jit @@ -0,0 +1,16 @@ +/*? Text: "flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n" */ +/*? Text: "%s selects TX queue %d, but real number of TX queues is %d\n" */ +/*? Text: "netif_stop_queue() cannot be called before register_netdev()\n" */ + +/*? + * Text: "Unknown opcode %02x\n" + * Severity: Error + * Parameter: + * @1: Instruction opcode + * Description: + * The BPF JIT compiler has found an unknown instruction in the BPF program + * and therefore stops the compilation. As a fallback, the interpreter is used. + * User action: + * Report this problem and the error message to your support organization. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/cio b/Documentation/kmsg/s390/cio new file mode 100644 index 0000000000000..e90aae08d87ba --- /dev/null +++ b/Documentation/kmsg/s390/cio @@ -0,0 +1,247 @@ +/*? + * Text: "%s is not a valid device for the cio_ignore kernel parameter\n" + * Severity: Warning + * Parameter: + * @1: device bus-ID + * Description: + * The device specification for the cio_ignore kernel parameter is + * syntactically incorrect or specifies an unknown device. This device is not + * excluded from being sensed and analyzed. + * User action: + * Correct your device specification in the kernel parameter line to have the + * device excluded when you next reboot Linux. You can write the correct + * device specification to /proc/cio_ignore to add the device to the list of + * devices to be excluded. This does not immediately make the device + * inaccessible but the device is ignored if it disappears and later reappears. + */ + +/*? + * Text: "0.%x.%04x to 0.%x.%04x is not a valid range for cio_ignore\n" + * Severity: Warning + * Parameter: + * @1: from subchannel set ID + * @2: from device number + * @3: to subchannel set ID + * @4: to device number + * Description: + * The device range specified for the cio_ignore kernel parameter is + * syntactically incorrect. No devices specified with this range are + * excluded from being sensed and analyzed. + * User action: + * Correct your range specification in the kernel parameter line to have the + * range of devices excluded when you next reboot Linux. You can write the + * correct range specification to /proc/cio_ignore to add the range of devices + * to the list of devices to be excluded. This does not immediately make the + * devices in the range inaccessible but any of these devices are ignored if + * they disappear and later reappear. + */ + +/*? + * Text: "Processing %s for channel path %x.%02x\n" + * Severity: Notice + * Parameter: + * @1: configuration change + * @2: channel subsystem ID + * @3: CHPID + * Description: + * A configuration change is in progress for the given channel path. + * User action: + * None. + */ + +/*? + * Text: "No CCW console was found\n" + * Severity: Warning + * Description: + * Linux did not find the expected CCW console and tries to use an alternative + * console. A possible reason why the console was not found is that the console + * has been specified in the cio_ignore list. + * User action: + * None, if an appropriate alternative console has been found, and you want + * to use this alternative console. If you want to use the CCW console, ensure + * that is not specified in the cio_ignore list, explicitly specify the console + * with the 'condev=' kernel parameter, and reboot Linux. + */ + +/*? + * Text: "Channel measurement facility initialized using format %s (mode %s)\n" + * Severity: Informational + * Parameter: + * @1: format + * @2: mode + * Description: + * The channel measurement facility has been initialized successfully. + * Format 'extended' should be used for z990 and later mainframe systems. + * Format 'basic' is intended for earlier mainframes. Mode 'autodetected' means + * that the format has been set automatically. Mode 'parameter' means that the + * format has been set according to the 'format=' kernel parameter. + * User action: + * None. + */ + +/*? + * Text: "The CSS device driver initialization failed with errno=%d\n" + * Severity: Alert + * Parameter: + * @1: Return code + * Description: + * The channel subsystem bus could not be established. + * User action: + * See the errno man page to find out what caused the problem. + */ + /*? Text: "%s: Got subchannel machine check but no sch_event handler provided.\n" */ + +/*? + * Text: "%s: Setting the device online failed because it is boxed\n" + * Severity: Warning + * Parameter: + * @1: Device bus-ID + * Description: + * Initialization of a device did not complete because it did not respond in + * time or it was reserved by another operating system. + * User action: + * Make sure that the device is working correctly, then try again to set it + * online. For devices that support the reserve/release mechanism (for example + * DASDs), you can try to override the reservation of the other system by + * writing 'force' to the 'online' sysfs attribute of the affected device. + */ + +/*? + * Text: "%s: Setting the device online failed because it is not operational\n" + * Severity: Warning + * Parameter: + * @1: Device bus-ID + * Description: + * Initialization of a device did not complete because it is not present or + * not operational. + * User action: + * Make sure that the device is present and working correctly, then try again + * to set it online. + */ + +/*? + * Text: "%s: The device stopped operating while being set offline\n" + * Severity: Warning + * Parameter: + * @1: Device bus-ID + * Description: + * While the device was set offline, it was not present or not operational. + * The device is now inactive, but setting it online again might fail. + * User action: + * None. + */ + +/*? + * Text: "%s: The device entered boxed state while being set offline\n" + * Severity: Warning + * Parameter: + * @1: Device bus-ID + * Description: + * While the device was set offline, it did not respond in time or it was + * reserved by another operating system. The device is now inactive, but + * setting it online again might fail. + * User action: + * None. + */ + +/*? + * Text: "Logging for subchannel 0.%x.%04x failed with errno=%d\n" + * Severity: Warning + * Parameter: + * @1: subchannel set ID + * @2: subchannel number + * @3: errno + * Description: + * Capturing model-dependent logs and traces could not be triggered for the + * specified subchannel. + * User action: + * See the errno man page to find out what caused the problem. + */ + +/*? + * Text: "Logging for subchannel 0.%x.%04x was triggered\n" + * Severity: Notice + * Parameter: + * @1: subchannel set ID + * @2: subchannel number + * Description: + * Model-dependent logs and traces may be captured for the specified + * subchannel. + * User action: + * None. + */ + +/*? + * Text: "%s: No interrupt was received within %lus (CS=%02x, DS=%02x, CHPID=%x.%02x)\n" + * Severity: Warning + * Parameter: + * @1: device number + * @2: timeout value + * @3: channel status + * @4: device status + * @5: channel subsystem ID + * @6: CHPID + * Description: + * Internal I/Os are used by the common I/O layer to ensure that devices are + * operational and accessible. + * The common I/O layer did not receive an interrupt for an internal I/O + * during the specified timeout period. + * As a result, the device might assume a state that makes the device + * unusable to Linux until the problem is resolved. + * User action: + * Make sure that the device is working correctly and try the action again. + */ + +/*? + * Text: "Link stopped: RS=%02x RSID=%04x IC=%02x IUPARAMS=%s IUNODEID=%s AUPARAMS=%s AUNODEID=%s\n" + * Severity: Error + * Parameter: + * @1: reporting source + * @2: reporting source ID + * @3: incident code + * @4: incident unit parameters + * @5: incident unit node ID + * @6: attached unit parameters + * @7: attached unit node ID + * + * Description: + * A hardware error has occurred. A unit at one end of an interface + * link has detected a failure in the link or in one of the units attached to + * the link. As a result, data transfer across the link has stopped. In the + * message text, the node IDs of involved units are represented in the + * following format: TTTTTT/MDL,MMM.PPSSSSSSSSSSSS,XXXX where TTTTTT refers to + * the machine type, MDL the model number, MMM the manufacturer, PP the + * manufacturing plant, SSSSSSSSSSSS the unit sequence number and XXXX the + * machine type-dependent physical interface number. If no data is available + * for the unit parameters or node ID field, "n/a" is used instead. + * + * User action: + * Report the problem to your support organization. + */ + +/*? + * Text: "Link degraded: RS=%02x RSID=%04x IC=%02x IUPARAMS=%s IUNODEID=%s AUPARAMS=%s AUNODEID=%s\n" + * Severity: Warning + * Parameter: + * @1: reporting source + * @2: reporting source ID + * @3: incident code + * @4: incident unit parameters + * @5: incident unit node ID + * @6: attached unit parameters + * @7: attached unit node ID + * Description: + * A hardware error has occurred. A unit at one end of an interface + * link has detected a failure in the link or in one of the units attached to + * the link. As a result, data transfer across the link is degraded. In the + * message text, the node IDs of involved units are represented in the + * following format: TTTTTT/MDL,MMM.PPSSSSSSSSSSSS,XXXX where TTTTTT refers to + * the machine type, MDL the model number, MMM the manufacturer, PP the + * manufacturing plant, SSSSSSSSSSSS the unit sequence number and XXXX the + * machine type-dependent physical interface number. If no data is available + * for the unit parameters or node ID field, "n/a" is used instead. + * + * User action: + * Report the problem to your support organization. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/cpcmd b/Documentation/kmsg/s390/cpcmd new file mode 100644 index 0000000000000..e1b7eac834c45 --- /dev/null +++ b/Documentation/kmsg/s390/cpcmd @@ -0,0 +1,16 @@ +/*? + * Text: "The cpcmd kernel function failed to allocate a response buffer\n" + * Severity: Warning + * Description: + * IPL code, console detection, and device drivers like vmcp or vmlogrdr use + * the cpcmd kernel function to send commands to the z/VM control program (CP). + * If a program that uses the cpcmd function does not allocate a contiguous + * response buffer below 2 GB guest real storage, cpcmd creates a bounce buffer + * to be used as the response buffer. Because of low memory or memory + * fragmentation, cpcmd could not create the bounce buffer. + * User action: + * Look for related page allocation failure messages and at the stack trace to + * find out which program or operation failed. Free some memory and retry the + * failed operation. Consider allocating more memory to your z/VM guest virtual + * machine. + */ diff --git a/Documentation/kmsg/s390/cpu b/Documentation/kmsg/s390/cpu new file mode 100644 index 0000000000000..c0d3613a7db67 --- /dev/null +++ b/Documentation/kmsg/s390/cpu @@ -0,0 +1,46 @@ +/*? + * Text: "%d configured CPUs, %d standby CPUs\n" + * Severity: Informational + * Parameter: + * @1: number of configured CPUs + * @2: number of standby CPUs + * Description: + * The kernel detected the given number of configured and standby CPUs. + * User action: + * None. + */ + +/*? + * Text: "The CPU configuration topology of the machine is:" + * Severity: Informational + * Description: + * The first six values of the topology information represent fields Mag6 to + * Mag1 of system-information block (SYSIB) 15.1.2. These fields specify the + * maximum numbers of topology-list entries (TLE) at successive topology nesting + * levels. The last value represents the MNest value of SYSIB 15.1.2 which + * specifies the maximum possible nesting that can be configured through + * dynamic changes. For details see the SYSIB 15.1.2 information in the + * "Principles of Operation." + * User action: + * None. + */ + +/*? + * Text: "CPU %i exceeds the maximum %i and is excluded from the dump\n" + * Severity: Warning + * Parameter: + * @1: CPU number + * @2: maximum CPU number + * Description: + * The Linux kernel is used as a system dumper but it runs on more CPUs than + * it has been compiled for with the CONFIG_NR_CPUS kernel configuration + * option. The system dump will be created but information on one or more + * CPUs will be missing. + * User action: + * Update the system dump kernel to a newer version that supports more + * CPUs or reduce the number of installed CPUs and reproduce the problem + * that should be analyzed. If you send the system dump that prompted this + * message to a support organization, be sure to communicate that the dump + * does not include all CPU information. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/cpum_cf b/Documentation/kmsg/s390/cpum_cf new file mode 100644 index 0000000000000..fe5d83a5ba7fc --- /dev/null +++ b/Documentation/kmsg/s390/cpum_cf @@ -0,0 +1,68 @@ +/*? + * Text: "Enabling the performance measuring unit failed with rc=%x\n" + * Severity: Error + * Parameter: + * @1: error condition + * Description: + * The device driver failed to enable CPU counter sets with the + * load counter controls (lcctl) instruction. + * See the section about lcctl in "The Load-Program-Parameter and the CPU-Measurement + * Facilities", SA23-2260, for an explanation of the error conditions. + * User action: + * Stop the performance measurement programs and try again. + */ + +/*? + * Text: "Disabling the performance measuring unit failed with rc=%x\n" + * Severity: Error + * Parameter: + * @1: error condition + * Description: + * The device driver failed to disable CPU counter sets with the + * load counter controls (lcctl) instruction. + * See the section about lcctl in "The Load-Program-Parameter and the CPU-Measurement + * Facilities", SA23-2260, for an explanation of the error conditions. + * User action: + * Stop the performance measurement programs and try again. + */ + +/*? + * Text: "Registering the cpum_cf PMU failed with rc=%i\n" + * Severity: Error + * Parameter: + * @1: error code + * Description: + * The device driver could not register the Performance Measurement Unit (PMU) + * for the CPU-measurement counter facility. + * A possible cause of this problem is memory constraints. + * User action: + * If the error code is -12 (ENOMEM), consider assigning more memory + * to your Linux instance. + */ + +/*? + * Text: "CPU[%i] Counter data was lost\n" + * Severity: Error + * Parameter: + * @1: cpu number + * Description: + * CPU counter data was lost because of machine internal + * high-priority activities. + * User action: + * None. + */ + +/*? + * Text: "Registering for CPU-measurement alerts failed with rc=%i\n" + * Severity: Error + * Parameter: + * @1: error code + * Description: + * The device driver could not register to receive CPU-measurement alerts. + * Alerts make you aware of measurement errors. + * A possible cause of this problem is memory constraints. + * User action: + * If the error code is -12 (ENOMEM), consider assigning more memory + * to your Linux instance. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/cpum_sf b/Documentation/kmsg/s390/cpum_sf new file mode 100644 index 0000000000000..3187c303556ab --- /dev/null +++ b/Documentation/kmsg/s390/cpum_sf @@ -0,0 +1,104 @@ +/*? + * Text: "The sampling buffer limits have changed to: min=%lu max=%lu (diag=x%lu)\n" + * Severity: Informational + * Parameter: + * @1: minimum size in sample-data-blocks + * @2: maximum size in sample-data-blocks + * @3: size factor for buffering diagnostic-sampling data entries + * Description: + * The minimum or maximum size limit for the sampling facility buffer was + * changed. The change is effective immediately. + * User action: + * None. + */ + +/*? + * Text: "Switching off the sampling facility failed with rc=%i\n" + * Severity: Error + * Parameter: + * @1: error condition + * Description: + * The CPU-measurement sampling facility could not be switched off and continues + * to run. For details, see LOAD SAMPLING CONTROLS in + * "The Load-Program-Parameter and the CPU-Measurement Facilities", SA23-2260. + * User action: + * If this problem persists, reboot your Linux instance. + */ + +/*? + * Text: "Sample data was lost\n" + * Severity: Error + * Description: + * Sample data was lost because of machine-internal high-priority activities. + * The sampling facility is stopped. + * User action: + * End all performance measurement sessions. Discard the measurement data, + * which are likely to be flawed. Repeat your measurements. + * If the problem persists, contact your hardware administrator. + */ + +/*? + * Text: "Sampling facility support for perf is not available: reason=%04x\n" + * Severity: Error + * Parameter: + * @1: reason code + * Description: + * The device driver could not initialize the sampling facility support. + * Possible reason codes are: + * 0001: The device driver failed to query CPU-measurement sampling facility + * information. + * + * 0002: The device driver does not support the basic-sampling function that + * is available on the LPAR within which the Linux instance runs. + * + * 0003: The device driver could not register to receive CPU-measurement alerts. + * A possible cause of this problem is memory constraints. + * + * 0004: The device driver could not register the Performance Measurement Unit + * (PMU) for the CPU-measurement sampling facility. + * A possible cause of this problem is memory constraints. + * User action: + * Consider assigning more memory to your Linux instance. + */ + +/*? + * Text: "Loading sampling controls failed: op=%i err=%i\n" + * Severity: Error + * Parameter: + * @1: Type of operation + * @2: Error condition + * Description: + * The sampling facility support could not load sampling controls to enable + * (operation type 1) or disable (operation type 2) the CPU-measurement sampling + * facility. For details of the error condition, see LOAD SAMPLING CONTROLS in + * "The Load-Program-Parameter and the CPU-Measurement Facilities", SA23-2260. + * User action: + * If the problem persists, reboot your Linux instance. + */ + +/*? + * Text: "A sampling buffer entry is incorrect (alert=0x%x)\n" + * Severity: Error + * Parameter: + * @1: Alert code + * Description: + * An incorrect sampling facility buffer entry was detected. The alert code + * indicates the root cause, for example, an incorrect entry address or an + * incorrect sample-data-block-table entry. + * User action: + * End active performance measurement sessions, for example, perf processes. If + * the problem persists, reboot your Linux instance. + */ + +/*? + * Text: "Registering for s390dbf failed\n" + * Severity: Error + * Description: + * The device driver failed to register for the s390 debug feature. You will + * not receive any debug information. A possible cause of this problem is + * memory constraints. + * User action: + * Consider assigning more memory + * to your Linux instance. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/crc32-vx b/Documentation/kmsg/s390/crc32-vx new file mode 100644 index 0000000000000..4553d8b236cc8 --- /dev/null +++ b/Documentation/kmsg/s390/crc32-vx @@ -0,0 +1 @@ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/ctcm b/Documentation/kmsg/s390/ctcm new file mode 100644 index 0000000000000..9437da9c9ffe8 --- /dev/null +++ b/Documentation/kmsg/s390/ctcm @@ -0,0 +1,202 @@ +/*? + * Text: "%s: An I/O-error occurred on the CTCM device\n" + * Severity: Error + * Parameter: + * @1: bus ID of the CTCM device + * Description: + * An I/O error was detected on one of the subchannels of the CTCM device. + * Depending on the error, the CTCM device driver might attempt an automatic + * recovery. + * User action: + * Check the status of the CTCM device, for example, with ifconfig. If the + * device is not operational, perform a manual recovery. See "Device Drivers, + * Features, and Commands" for details about how to recover a CTCM device. + */ + +/*? + * Text: "%s: An adapter hardware operation timed out\n" + * Severity: Error + * Parameter: + * @1: bus ID of the CTCM device + * Description: + * The CTCM device uses an adapter to physically connect to its communication + * peer. An operation on this adapter timed out. + * User action: + * Check the status of the CTCM device, for example, with ifconfig. If the + * device is not operational, perform a manual recovery. See "Device Drivers, + * Features, and Commands" for details about how to recover a CTCM device. + */ + +/*? + * Text: "%s: An error occurred on the adapter hardware\n" + * Severity: Error + * Parameter: + * @1: bus ID of the CTCM device + * Description: + * The CTCM device uses an adapter to physically connect to its communication + * peer. An operation on this adapter returned an error. + * User action: + * Check the status of the CTCM device, for example, with ifconfig. If the + * device is not operational, perform a manual recovery. See "Device Drivers, + * Features, and Commands" for details about how to recover a CTCM device. + */ + +/*? + * Text: "%s: The communication peer has disconnected\n" + * Severity: Notice + * Parameter: + * @1: channel ID + * Description: + * The remote device has disconnected. Possible reasons are that the remote + * interface has been closed or that the operating system instance with the + * communication peer has been rebooted or shut down. + * User action: + * Check the status of the peer device. Ensure that the peer operating system + * instance is running and that the peer interface is operational. + */ + +/*? + * Text: "%s: The remote operating system is not available\n" + * Severity: Notice + * Parameter: + * @1: channel ID + * Description: + * The operating system instance with the communication peer has disconnected. + * Possible reasons are that the operating system instance has been rebooted + * or shut down. + * User action: + * Ensure that the peer operating system instance is running and that the peer + * interface is operational. + */ + +/*? + * Text: "%s: The adapter received a non-specific IRQ\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the CTCM device + * Description: + * The adapter hardware used by the CTCM device received an IRQ that cannot + * be mapped to a particular device. This is a hardware problem. + * User action: + * Check the status of the CTCM device, for example, with ifconfig. Check if + * the connection to the remote device still works. If the CTCM device is not + * operational, set it offline and back online. If this does not resolve the + * problem, perform a manual recovery. See "Device Drivers, Features, and + * Commands" for details about how to recover a CTCM device. If this problem + * persists, gather Linux debug data, collect the hardware logs, and report the + * problem to your support organization. + */ + +/*? + * Text: "%s: A check occurred on the subchannel\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the CTCM device + * Description: + * A check condition has been detected on the subchannel. + * User action: + * Check if the connection to the remote device still works. If the CTCM device + * is not operational, set it offline and back online. If this does not resolve + * the problem, perform a manual recovery. See "Device Drivers, Features, and + * Commands" for details about how to recover a CTCM device. If this problem + * persists, gather Linux debug data and report the problem to your support + * organization. + */ + +/*? + * Text: "%s: The communication peer is busy\n" + * Severity: Informational + * Parameter: + * @1: channel ID + * Description: + * A busy target device was reported. This might be a temporary problem. + * User action: + * If this problem persists or is reported frequently ensure that the target + * device is working properly. + */ + +/*? + * Text: "%s: The specified target device is not valid\n" + * Severity: Error + * Parameter: + * @1: channel ID + * Description: + * A target device was called with a faulty device specification. This is an + * adapter hardware problem. + * User action: + * Gather Linux debug data, collect the hardware logs, and contact IBM support. + */ + +/*? + * Text: "An I/O operation resulted in error %04x\n" + * Severity: Error + * Parameter: + * @1: channel ID + * @2: error information + * Description: + * A hardware operation ended with an error. + * User action: + * Check the status of the CTCM device, for example, with ifconfig. If the + * device is not operational, perform a manual recovery. See "Device Drivers, + * Features, and Commands" for details about how to recover a CTCM device. + * If this problem persists, gather Linux debug data, collect the hardware logs, + * and report the problem to your support organization. + */ + +/*? + * Text: "%s: Initialization failed with RX/TX init handshake error %s\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the CTCM device + * @2: error information + * Description: + * A problem occurred during the initialization of the connection. If the + * connection can be established after an automatic recovery, a success message + * is issued. + * User action: + * If the problem is not resolved by the automatic recovery process, check the + * local and remote device. If this problem persists, gather Linux debug data + * and report the problem to your support organization. + */ + +/*? + * Text: "%s: The network backlog for %s is exceeded, package dropped\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the CTCM device + * @2: calling function + * Description: + * There is more network traffic than can be handled by the device. The device + * is closed and some data has not been transmitted. The device might be + * recovered automatically. + * User action: + * Investigate and resolve the congestion. If necessary, set the device + * online to make it operational. + */ + +/*? + * Text: "%s: The XID used in the MPC protocol is not valid, rc = %d\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the CTCM device + * @2: return code + * Description: + * The exchange identification (XID) used by the CTCM device driver when + * in MPC mode is not valid. + * User action: + * Note the error information provided with this message and contact your + * support organization. + */ + +/*? Text: "CTCM driver unloaded\n" */ +/*? Text: "%s: %s Internal error: net_device is NULL, ch = 0x%p\n" */ +/*? Text: "%s / Initializing the ctcm device driver failed, ret = %d\n" */ +/*? Text: "%s: %s: Internal error: Can't determine channel for interrupt device %s\n" */ +/*? Text: "CTCM driver initialized\n" */ +/*? Text: "%s: setup OK : r/w = %s/%s, protocol : %d\n" */ +/*? Text: "%s: Connected with remote side\n" */ +/*? Text: "%s: Restarting device\n" */ +/*? Text: "netif_stop_queue() cannot be called before register_netdev()\n" */ +/*? Text: "flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n" */ +/*? Text: "%s selects TX queue %d, but real number of TX queues is %d\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/dasd b/Documentation/kmsg/s390/dasd new file mode 100644 index 0000000000000..975a8ccec89a0 --- /dev/null +++ b/Documentation/kmsg/s390/dasd @@ -0,0 +1,704 @@ +/* dasd_ioctl */ + +/*? + * Text: "%s: The DASD has been put in the quiesce state\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * Description: + * No I/O operation is possible on this device. + * User action: + * Resume the DASD to enable I/O operations. + */ + +/*? + * Text: "%s: I/O operations have been resumed on the DASD\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD is no longer in state quiesce and I/O operations can be performed + * on the device. + * User action: + * None. + */ + +/*? + * Text: "%s: The DASD cannot be formatted while it is enabled\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD you try to format is enabled. Enabled devices cannot be formatted. + * User action: + * Contact the owner of the formatting tool. + */ + +/*? + * Text: "%s: The specified DASD is a partition and cannot be formatted\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD you try to format is a partition. Partitions cannot be formatted + * separately. You can only format a complete DASD including all its partitions. + * User action: + * Format the complete DASD. + * ATTENTION: Formatting irreversibly destroys all data on all partitions + * of the DASD. + */ + +/*? + * Text: "%s: The specified DASD is a partition and cannot be checked\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD you try to check is a partition. Partitions cannot be checked + * separately. You can only check a complete DASD including all its partitions. + * User action: + * Check the complete DASD. + */ + +/*? + * Text: "%s: Formatting unit %d failed with rc=%d\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: start track + * @3: return code + * Description: + * The formatting process might have been interrupted by a signal, for example, + * CTRL+C. If the process was not interrupted intentionally, an I/O error + * might have occurred. + * User action: + * Retry to format the device. If the error persists, check the log file for + * related error messages. If you cannot resolve the error, note the return + * code and contact your support organization. + */ + + +/* dasd */ + +/*? + * Text: "%s: Cancelling request %p failed with rc=%d\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: pointer to request + * @3: return code of previous function + * Description: + * In response to a user action, the DASD device driver tried but failed to + * cancel a previously started I/O operation. + * User action: + * Try the action again. + */ + +/*? + * Text: "%s: Flushing the DASD request queue failed for request %p\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: pointer to request + * Description: + * As part of the unloading process, the DASD device driver flushes the + * request queue. This failed because a previously started I/O operation + * could not be canceled. + * User action: + * Try again to unload the DASD device driver or to shut down Linux. + */ + +/*? + * Text: "The DASD device driver could not be initialized\n" + * Severity: Informational + * Description: + * The initialization of the DASD device driver failed because of previous + * errors. + * User action: + * Check for related previous error messages. + */ + +/*? + * Text: "%s: Accessing the DASD failed because it is in probeonly mode\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * Description: + * The dasd= module or kernel parameter specified the probeonly attribute for + * the DASD you are trying to access. The DASD device driver cannot access + * DASDs that are in probeonly mode. + * User action: + * Change the dasd= parameter as to omit probeonly for the DASD and reload + * the DASD device driver. If the DASD device driver has been compiled into + * the kernel, reboot Linux. + */ + +/*? + * Text: "%s: cqr %p timed out (%lus), %i retries remaining\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: request + * @3: timeout value + * @4: number of retries left + * Description: + * A try of the error recovery procedure (ERP) for the channel queued request + * (cqr) timed out and failed to recover the error. ERP continues for the DASD. + * User action: + * Ignore this message if it occurs infrequently and if the recovery succeeds + * during one of the retries. If this error persists, check for related + * previous error messages and report the problem to your support organization. + * + * The timeout can be changed by writing a new value to the sysfs 'expires' attribute of the DASD. The value specifies the timeout in seconds. + */ + +/*? + * Text: "%s: cqr %p timed out (%lus) but cannot be ended, retrying in 5 s\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: request + * @3: timeout value + * Description: + * A try of the error recovery procedure (ERP) for the channel queued request + * (cqr) timed out and failed to recover the error. The I/O request submitted + * during the try could not be canceled. The ERP waits for 5 seconds before + * trying again. + * User action: + * Ignore this message if it occurs infrequently and if the recovery succeeds + * during one of the retries. If this error persists, check for related + * previous error messages and report the problem to your support organization. + * + * The timeout can be changed by writing a new value to the sysfs 'expires' attribute of the DASD. The value specifies the timeout in seconds. + */ + +/*? + * Text: "%s: The DASD cannot be set offline while it is in use\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD cannot be set offline because it is in use by an internal process. + * An action to free the DASD might not have completed yet. + * User action: + * Wait some time and set the DASD offline later. + */ + +/*? + * Text: "%s: The DASD cannot be set offline with open count %i\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: count + * Description: + * The DASD is being used by one or more processes and cannot be set offline. + * User action: + * Ensure that the DASD is not in use anymore, for example, unmount all + * partitions. Then try again to set the DASD offline. + */ + +/*? + * Text: "%s: Setting the DASD online failed with rc=%d\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: return code + * Description: + * The DASD could not be set online because of previous errors. + * User action: + * Look for previous error messages. If you cannot resolve the error, note + * the return code and contact your support organization. + */ + +/*? + * Text: "%s Setting the DASD online with discipline %s failed with rc=%i\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: discipline + * @3: return code + * Description: + * The DASD could not be set online because of previous errors. + * User action: + * Look for previous error messages. If you cannot resolve the error, note the + * return code and contact your support organization. + */ + +/*? + * Text: "%s Setting the DASD online failed because of missing DIAG discipline\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD was to be set online with discipline DIAG but this discipline of + * the DASD device driver is not available. + * User action: + * Ensure that the dasd_diag_mod module is loaded. If your Linux system does + * not include this module, you cannot set DASDs online with the DIAG + * discipline. + */ + +/*? + * Text: "%s Setting the DASD online failed because of a missing discipline\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD was to be set online with a DASD device driver discipline that + * is not available. + * User action: + * Ensure that all DASD modules are loaded correctly. + */ + +--------------------------- + +/*? + * Text: "The statistics feature has been switched off\n" + * Severity: Informational + * Description: + * The statistics feature of the DASD device driver has been switched off. + * User action: + * None. + */ + +/*? + * Text: "The statistics feature has been switched on\n" + * Severity: Informational + * Description: + * The statistics feature of the DASD device driver has been switched on. + * User action: + * None. + */ + +/*? + * Text: "The statistics have been reset\n" + * Severity: Informational + * Description: + * The DASD statistics data have been reset. + * User action: + * None. + */ + +/*? + * Text: "%s is not a supported value for /proc/dasd/statistics\n" + * Severity: Warning + * Parameter: + * @1: value + * Description: + * An incorrect value has been written to /proc/dasd/statistics. + * The supported values are: 'set on', 'set off', and 'reset'. + * User action: + * Write a supported value to /proc/dasd/statistics. + */ + +/*? + * Text: "%s is not a valid device range\n" + * Severity: Error + * Parameter: + * @1: range + * Description: + * A device range specified with the dasd= parameter is not valid. + * User action: + * Examine the dasd= parameter and correct the device range. + */ + +/*? + * Text: "The probeonly mode has been activated\n" + * Severity: Informational + * Description: + * The probeonly mode of the DASD device driver has been activated. In this + * mode the device driver rejects any 'open' syscalls with EPERM. + * User action: + * None. + */ + +/*? + * Text: "The IPL device is not a CCW device\n" + * Severity: Error + * Description: + * The value for the dasd= parameter contains the 'ipldev' keyword. During + * the boot process this keyword is replaced with the device from which the + * IPL was performed. The 'ipldev' keyword is not valid if the IPL device is + * not a CCW device. + * User action: + * Do not specify the 'ipldev' keyword when performing an IPL from a device + * other than a CCW device. + */ + +/*? + * Text: "A closing parenthesis ')' is missing in the dasd= parameter\n" + * Severity: Warning + * Description: + * The specification for the dasd= kernel or module parameter has an opening + * parenthesis '(' * without a matching closing parenthesis ')'. + * User action: + * Correct the parameter value. + */ + +/*? + * Text: "The autodetection mode has been activated\n" + * Severity: Informational + * Description: + * The autodetection mode of the DASD device driver has been activated. In + * this mode the DASD device driver sets all detected DASDs online. + * User action: + * None. + */ + +/*? + * Text: "%*s is not a supported device option\n" + * Severity: Warning + * Parameter: + * @1: length of option code + * @2: option code + * Description: + * The dasd= parameter includes an unknown option for a DASD or a device range. + * Options are specified in parenthesis and immediately follow a device or + * device range. + * User action: + * Check the dasd= syntax and remove any unsupported options from the dasd= + * parameter specification. + */ + +/*? + * Text: "PAV support has be deactivated\n" + * Severity: Informational + * Description: + * The 'nopav' keyword has been specified with the dasd= kernel or module + * parameter. The Parallel Access Volume (PAV) support of the DASD device + * driver has been deactivated. + * User action: + * None. + */ + +/*? + * Text: "'nopav' is not supported on z/VM\n" + * Severity: Informational + * Description: + * For Linux instances that run as guest operating systems of the z/VM + * hypervisor Parallel Access Volume (PAV) support is controlled by z/VM not + * by Linux. + * User action: + * Remove 'nopav' from the dasd= module or kernel parameter specification. + */ + +/*? + * Text: "High Performance FICON support has been deactivated\n" + * Severity: Informational + * Description: + * The 'nofcx' keyword has been specified with the dasd= kernel or module + * parameter. The High Performance FICON (transport mode) support of the DASD + * device driver has been deactivated. + * User action: + * None. + */ + +/*? + * Text: "The dasd= parameter value %s has an invalid ending\n" + * Severity: Warning + * Parameter: + * @1: parameter value + * Description: + * The specified value for the dasd= kernel or module parameter is not correct. + * User action: + * Check the module or the kernel parameter. + */ + +/*? + * Text: "Registering the device driver with major number %d failed\n" + * Severity: Warning + * Parameter: + * @1: DASD major + * Description: + * Major number 94 is reserved for the DASD device driver. The DASD device + * driver failed to register with this major number. Another device driver + * might have used major number 94. + * User action: + * Determine which device driver uses major number 94 instead of the DASD + * device driver and unload this device driver. Then try again to load the + * DASD device driver. + */ + +/*? + * Text: "%s: default ERP has run out of retries and failed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * The error recovery procedure (ERP) tried to recover an error but the number + * of retries for the I/O was exceeded before the error could be resolved. + * User action: + * Check for related previous error messages. + */ + +/*? + * Text: "%s: Unable to terminate request %p on suspend\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: pointer to request + * Description: + * As part of the suspend process, the DASD device driver terminates requests + * on the request queue. This failed because a previously started I/O operation + * could not be canceled. The suspend process will be stopped. + * User action: + * Try again to suspend the system. + */ + +/*? + * Text: "%s: ERP failed for the DASD\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * An error recovery procedure (ERP) was performed for the DASD but failed. + * User action: + * Check the message log for previous related error messages. + */ + +/*? + * Text: "%s: An error occurred in the DASD device driver, reason=%s\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: reason code + * Description: + * This problem indicates a program error in the DASD device driver. + * User action: + * Note the reason code and contact your support organization. +*/ + +/*? + * Text: "%s: No operational channel path is left for the device\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * All channel paths to the device have become non-operational. The DASD + * device driver suspends I/O operations and queues I/O requests for this + * device until at least one channel path becomes operational again. + * User action: + * Ensure that each channel path to the device has been set up correctly + * and that the related physical cable connections are in place. + */ + +/*? + * Text: "%s: No verified channel paths remain for the device\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * All verified channel paths to the device have become non-operational. + * Any other paths to the device have previously been identified as not usable. + * The DASD device driver suspends I/O operations and queues I/O requests + * for this device until at least one channel path becomes operational + * again. + * User action: + * Ensure that each channel path to the device has been set up correctly + * and that the related physical cable connections are in place. + * Set all paths to the device offline and online again to repeat the path + * verification. Alternatively, set the device offline and online again to + * verify all available paths for this device. + * If this problem persists, gather Linux debug data and report the problem + * to your support organization. + */ + +/*? + * Text: "%s: A channel path to the device has become operational\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * Description: + * At least one channel path of this device has become operational again. + * The DASD device driver resumes I/O operations to the device and processes + * the I/O requests that were queued while there was no operational channel path. + * User action: + * None. + */ + +------------------------------------------------------------------------------------ +/* dasd_diag */ + +/*? + * Text: "%s: A 64-bit DIAG call failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * 64-bit DIAG calls require a 64-bit z/VM version. + * User action: + * Use z/VM 5.2 or later or set the sysfs 'use_diag' attribute of the DASD to 0 + * to switch off DIAG. + */ + +/*? + * Text: "%s: Accessing the DASD failed because of an incorrect format (rc=%d)\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: return code + * Description: + * The format of the DASD is not correct. + * User action: + * Check the device format. For details about the return code see the + * section about the INITIALIZE function for DIAGNOSE Code X'250' + * in "z/VM CP Programming Services". If you cannot resolve the error, note + * the return code and contact your support organization. + */ + +/*? + * Text: "%s: New DASD with %ld byte/block, total size %ld KB%s\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * @2: bytes per block + * @3: size + * @4: access mode + * Description: + * A DASD with the indicated block size and total size has been set online. + * If the DASD is configured as read-only to the real or virtual hardware, + * the message includes an indication of this hardware access mode. The + * hardware access mode is independent from the 'readonly' attribute of + * the device in sysfs. + * User action: + * None. + */ + +/*? + * Text: "%s: DIAG ERP failed with rc=%d\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: return code + * Description: + * An error in the DIAG processing could not be recovered by the error + * recovery procedure (ERP) of the DIAG discipline. + * User action: + * Note the return code, check for related I/O errors, and report this problem + * to your support organization. + */ + +/*? + * Text: "%s: DIAG initialization failed with rc=%d\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: return code + * Description: + * Initializing the DASD with the DIAG discipline failed. Possible reasons for + * this problem are that the device has a device type other than FBA or ECKD, + * or has a block size other than one of the supported sizes: + * 512 byte, 1024 byte, 2048 byte, or 4096 byte. + * User action: + * Ensure that the device can be written to and has a supported device type + * and block size. For details about the return code see the section about + * the INITIALIZE function for DIAGNOSE Code X'250' in "z/VM CP Programming + * Services". If you cannot resolve the error, note the error code and contact + * your support organization. + */ + +/*? + * Text: "%s: Device type %d is not supported in DIAG mode\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: device type + * Description: + * Only DASD of type FBA and ECKD are supported in DIAG mode. + * User action: + * Set the sysfs 'use_diag' attribute of the DASD to 0 and try again to access + * the DASD. + */ + +/*? + * Text: "Discipline %s cannot be used without z/VM\n" + * Severity: Informational + * Parameter: + * @1: discipline name + * Description: + * The discipline that is specified with the dasd= kernel or module parameter + * is only available for Linux instances that run as guest operating + * systems of the z/VM hypervisor. + * User action: + * Remove the unsupported discipline from the parameter string. + */ + +/*? + * Text: "%s: The access mode of a DIAG device changed to read-only\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A device changed its access mode from writeable to + * read-only while in use. + * User action: + * Set the device offline, ensure that the device is configured correctly in + * z/VM, then set the device online again. + */ + +------------------------------------------------------------------------------------ +/* dasd_erp */ + +/*? + * Text: "%s: A timeout error occurred for cqr %p\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: pointer to request + * Description: + * A channel queued request (cqr) failed because it timed out. + * One possible reason for this error is that a request did not + * complete within the timeout interval specified for the DASD. + * The timeout interval is set as the value of the 'timeout' sysfs + * attribute of a DASD. A value of 0 disables the timeout function. + * The timeout function can be used; for example, by mirroring setups; + * to quickly process a request queue for a DASD that has become unavailable. + * User action: + * Check the message log for previous related error messages. Verify + * that the storage server and the connection from host to storage + * server are operational. If the 'timeout' sysfs attribute of the + * DASD has been set to a value other than 0, verify that this + * setting is intentional and change it if required. + */ + +/*? + * Text: "%s: A transport error occurred for cqr %p\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: pointer to request + * Description: + * A channel queued request (cqr) failed because the connection to the + * device was lost and the 'failfast' flag is set for the request. + * This flag can result from, for example: + * + * - A software layer above the DASD device driver; + * for example, in a host based mirroring setup. + * + * - Value 1 for the 'failfast' sysfs attribute of the DASD. + * This setting applies to all requests on the DASD. + * + * User action: + * Ensure that each channel path to the device has been set up + * correctly and that the related physical cable connections are in + * place. If the 'failfast' attribute of the DASD is set to 1, + * verify that this setting is intentional and change it to 0 if required. + */ + +/*? + * Text: "%s Setting the DASD online failed because the required module %s could not be loaded (rc=%d)\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: kernel module name + * @3: return code + * Description: + * The DASD was to be set online with discipline DIAG but this discipline of + * the DASD device driver is not available and an attempt to load the + * corresponding kernel module failed with the specified return code. + * + * User action: + * Ensure that the kernel module with the specified name is correctly installed + * or set the sysfs 'use_diag' attribute of the DASD to 0 to switch off DIAG. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/dasd-eckd b/Documentation/kmsg/s390/dasd-eckd new file mode 100644 index 0000000000000..fd58cba6f2553 --- /dev/null +++ b/Documentation/kmsg/s390/dasd-eckd @@ -0,0 +1,2154 @@ +/* dasd_eckd */ + +/*? + * Text: "%s: ERP failed for the DASD\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * An error recovery procedure (ERP) was performed for the DASD but failed. + * User action: + * Check the message log for previous related error messages. + */ + +/*? + * Text: "%s: An error occurred in the DASD device driver, reason=%s\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: reason code + * Description: + * This problem indicates a program error in the DASD device driver. + * User action: + * Note the reason code and contact your support organization. +*/ + +/*? + * Text: "%s: Allocating memory for private DASD data failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD device driver maintains data structures for each DASD it manages. + * There is not enough memory to allocate these data structures for one or + * more DASD. + * User action: + * Free some memory and try the operation again. + */ + +/*? + * Text: "%s: DASD with %d KB/block, %d KB total size, %d KB/track, %s\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * @2: block size + * @3: DASD size + * @4: track size + * @5: disc layout + * Description: + * A DASD with the shown characteristics has been set online. + * User action: + * None. + */ + +/*? + * Text: "%s: Start track number %u used in formatting is too big\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: track number + * Description: + * The DASD format I/O control was used incorrectly by a formatting tool. + * User action: + * Contact the owner of the formatting tool. + */ + +/*? + * Text: "%s: Stop track number %u used in formatting is too big\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: track number + * Description: + * The DASD format I/O control was used incorrectly by a formatting tool. + * User action: + * Contact the owner of the formatting tool. + */ + +/*? + * Text: "%s: The DASD is not formatted\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A DASD has been set online but it has not been formatted yet. You must + * format the DASD before you can use it. + * User action: + * Format the DASD, for example, with dasdfmt. + */ + +/*? + * Text: "%s: 0x%x is not a known command\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: command + * Description: + * This problem is likely to be caused by a programming error. + * User action: + * Contact your support organization. + */ + +/*? + * Text: "%s: Track 0 has no records following the VTOC\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * Linux has identified a volume table of contents (VTOC) on the DASD but + * cannot read any data records following the VTOC. A possible cause of this + * problem is that the DASD has been used with another System z operating + * system. + * User action: + * Format the DASD for usage with Linux, for example, with dasdfmt. + * ATTENTION: Formatting irreversibly destroys all data on the DASD. + */ + +/*? + * Text: "%s: An I/O control call used incorrect flags 0x%x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: flags + * Description: + * The DASD format I/O control was used incorrectly. + * User action: + * Contact the owner of the formatting tool. + */ + +/*? + * Text: "%s: New DASD %04X/%02X (CU %04X/%02X) with %d cylinders, %d heads, %d sectors%s\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * @2: device type + * @3: device model + * @4: control unit type + * @5: control unit model + * @6: number of cylinders + * @7: tracks per cylinder + * @8: sectors per track + * @9: access mode + * Description: + * A DASD with the shown characteristics has been set online. + * If the DASD is configured as read-only to the real or virtual hardware, + * the message includes an indication of this hardware access mode. The + * hardware access mode is independent from the 'readonly' attribute of + * the device in sysfs. + * User action: + * None. + */ + +/*? + * Text: "%s: The disk layout of the DASD is not supported\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD device driver only supports the following disk layouts: CDL, LDL, + * FBA, CMS, and CMS RESERVED. + * User action: + * None. + */ + +/*? + * Text: "%s: Start track %u used in formatting exceeds end track\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: track number + * Description: + * The DASD format I/O control was used incorrectly by a formatting tool. + * User action: + * Contact the owner of the formatting tool. + */ + +/*? + * Text: "%s: The DASD cache mode was set to %x (%i cylinder prestage)\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * @2: operation mode + * @3: number of cylinders + * Description: + * The DASD cache mode has been changed. See the storage system documentation + * for information about the different cache operation modes. + * User action: + * None. + */ + +/*? + * Text: "%s: The DASD cannot be formatted with block size %u\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: block size + * Description: + * The block size specified for a format instruction is not valid. The block + * size must be between 512 and 4096 byte and must be a power of 2. + * User action: + * Call the format command with a supported block size. + */ + +/*? + * Text: "%s: The UID of the DASD has changed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * The Unique Identifier (UID) of a DASD that is currently in use has changed. + * This indicates that the physical disk has been replaced. + * User action: + * None if the replacement was intentional. + * If the disk change is not expected, stop using the disk to prevent possible + * data loss. +*/ + + +/* dasd_3990_erp */ + +/*? + * Text: "%s: is offline or not installed - INTERVENTION REQUIRED!!\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD to be accessed is not in an accessible state. The I/O operation + * will wait until the device is operational again. This is an operating system + * independent message that is issued by the storage system. + * User action: + * Make the DASD accessible again. For details see the storage system + * documentation. + */ + +/*? + * Text: "%s: The DASD cannot be reached on any path (lpum=%x/opm=%x)\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: last path used mask + * @3: online path mask + * Description: + * After a path to the DASD failed, the error recovery procedure of the DASD + * device driver tried but failed to reconnect the DASD through an alternative + * path. + * User action: + * Ensure that the cabling between the storage server and the mainframe + * system is securely in place. Check the file systems on the DASD when it is + * accessible again. + */ + +/*? + * Text: "%s: Unable to allocate DCTL-CQR\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an internal error. + * User action: + * Contact your support organization. + */ + +/*? + * Text: "%s: FORMAT 0 - Invalid Parameter\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A data argument of a command is not valid. This is an operating system + * independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - DPS Installation Check\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This operating system independent message is issued by the storage system + * for one of the following reasons: + * - A 3380 Model D or E DASD does not have the Dynamic Path Selection (DPS) + * feature in the DASD A-unit. + * - The device type of an attached DASD is not supported by the firmware. + * - A type 3390 DASD is attached to a 3 MB channel. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 2 - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Drive motor switch is off\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - CCW Count less than required\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The CCW count of a command is less than required. This is an operating + * system independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Channel requested ... %02x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: reason code + * Description: + * This is an operating system independent message that is issued by the + * storage system. The possible reason codes indicate the following problems: + * 00 No Message. + * 01 The channel has requested unit check sense data. + * 02 The channel has requested retry and retry is exhausted. + * 03 A SA Check-2 error has occurred. This sense is presented with + * Equipment Check. + * 04 The channel has requested retry and retry is not possible. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Status Not As Required: reason %02x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: reason code + * Description: + * This is an operating system independent message that is issued by the + * storage system. There are several potential reasons for this message; + * byte 8 contains the reason code. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Device status 1 not valid\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Storage Path Restart\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * An operation for an active channel program was queued in a Storage Control + * when a warm start was received by the path. This is an operating system + * independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Reset Notification\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A system reset or its equivalent was received on an interface. The Unit + * Check that generates this sense is posted to the next channel initiated + * selection following the resetting event. This is an operating system + * independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Invalid Command Sequence\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * An incorrect sequence of commands has occurred. This is an operating system + * independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Missing device address bit\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Subsystem Processing Error\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A firmware logic error has been detected. This is an operating system + * independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Seek incomplete\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Invalid Command\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A command was issued that is not in the 2107/1750 command set. + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Command Invalid on Secondary Address\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A command or order not allowed on a PPRC secondary device has been received + * by the secondary device. This is an operating system independent message + * that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Invalid Defective/Alternate Track Pointer\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A defective track has been accessed. The subsystem generates an invalid + * Defective/Alternate Track Pointer as a part of RAID Recovery. + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Channel Returned with Incorrect retry CCW\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A command portion of the CCW returned after a command retry sequence does + * not match the command for which retry was signaled. This is an operating + * system independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Diagnostic of Special Command Violates File Mask\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A command is not allowed under the Access Authorization specified by the + * File Mask. This is an operating system independent message that is issued + * by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Head address does not compare\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Device did not respond to selection\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Device check-2 error or Set Sector is not complete\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Device Error Source\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The device has completed soft error logging. This is an operating system + * independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Data Pinned for Device\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * Modified data in cache or in persistent storage exists for the DASD. The + * data cannot be destaged to the device. This track is the first track pinned + * for this device. This is an operating system independent message that is + * issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 6 - Overrun on channel C\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Device Status 1 not as expected\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 0 - Device Fenced - device = %02x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: sense data byte 4 + * Description: + * The device shown in sense byte 4 has been fenced. This is an operating + * system independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Interruption cannot be reset\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Index missing\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - DASD Fast Write inhibited\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * DASD Fast Write is not allowed because of a nonvolatile storage battery + * check condition. This is an operating system independent message that is + * issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - Invalid tag-in for an extended command sequence\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - Key area error; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - Count area error; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Track physical address did not compare\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 2 - 3990 check-2 error\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Offset active cannot be reset\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - RCC 1 and RCC 2 sequences not successful\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - No sync byte in count address area; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - Data area error\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 6 - Overrun on channel A\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - No sync byte in count address area\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 5 - Data Check in the key area\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Caching status reset to default\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The storage director has assigned two new subsystem status devices and + * resets the status to its default value. This is an operating system + * independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 5 - Data Check in the data area; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 5 - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Device not ready\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - No sync byte in key area\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 8 - DASD controller failed to set or reset the long busy latch\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 1 - Cylinder address did not compare\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 3 - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - No sync byte in data area; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 2 - Support facility errors\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - Key area error\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 8 - End operation with transfer count not zero\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 2 - Microcode detected error %02x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: error code + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 5 - Data Check in the count area; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 3 - Allegiance terminated\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * Allegiance terminated because of a Reset Allegiance or an Unconditional + * Reserve command on another channel. This is an operating system independent + * message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - Home address area error\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - Count area error\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - Invalid tag-in during selection sequence\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - No sync byte in data area\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - No sync byte in home address area; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - Home address area error; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - Data area error; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - No sync byte in home address area\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 5 - Data Check in the home address area; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 5 - Data Check in the home address area\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 5 - Data Check in the count area\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 4 - No sync byte in key area; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - Invalid DCC selection response or timeout\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 5 - Data Check in the data area\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Operation Terminated\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The storage system ends an operation related to an active channel program + * when termination and redrive are required and logging is not desired. + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 6 - Overrun on channel B\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 5 - Data Check in the key area; offset active\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Volume is suspended duplex\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The duplex pair volume has entered the suspended duplex state because of a + * failure. This is an operating system independent message that is issued by + * the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 6 - Overrun on channel D\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - RCC 1 sequence not successful\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 6 - Overrun on channel E\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - 3990 microcode time out when stopping selection\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 6 - Overrun on channel F\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 6 - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - RCC initiated by a connection check alert\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 6 - Overrun on channel G\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - extra RCC required\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 6 - Overrun on channel H\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 8 - Unexpected end operation response code\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - Permanent path error (DASD controller not available)\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - Missing end operation; device transfer incomplete\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Cache or nonvolatile storage equipment failure\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * An equipment failure has occurred in the cache storage or nonvolatile + * storage of the storage system. This is an operating system independent + * message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 8 - DPS cannot be filled\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 8 - Error correction code hardware fault\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - Missing end operation; device transfer complete\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - DASD controller not available on disconnected command chain\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 8 - No interruption from device during a command chain\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - No response to selection after a poll interruption\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 9 - Track physical address did not compare while oriented\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 9 - Head address did not compare\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - Invalid tag-in for an immediate command sequence\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 9 - Cylinder address did not compare\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 8 - DPS checks after a system reset or selective reset\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Caching reinitiated\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * Caching has been automatically reinitiated following an error. + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 8 - End operation with transfer count zero\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 7 - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 9 - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 8 - Short busy time-out during device selection\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Caching terminated\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The storage system was unable to initiate caching or had to suspend caching + * for a 3990 control unit. If this problem is caused by a failure condition, + * an additional message will provide more information about the failure. + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * Check for additional messages that point out possible failures. For more + * information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Subsystem status cannot be determined\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The status of a DASD Fast Write or PPRC volume cannot be determined. + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Nonvolatile storage terminated\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The storage director has stopped using nonvolatile storage or cannot + * initiate nonvolatile storage. If this problem is caused by a failure, an + * additional message will provide more information about the failure. This is + * an operating system independent message that is issued by the storage system. + * User action: + * Check for additional messages that point out possible failures. For more + * information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT 8 - Reserved\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: Write inhibited path encountered\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an informational message. + * User action: + * None. + */ + +/*? + * Text: "%s: FORMAT 9 - Device check-2 error\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * This is an operating system independent message that is issued by the + * storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Track format incorrect\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A track format error occurred while data was being written to the DASD or + * while a duplex pair was being established. This is an operating system + * independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: FORMAT F - Cache fast write access not authorized\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * A request for Cache Fast Write Data access cannot be satisfied because + * of missing access authorization for the storage system. This is an operating + * system independent message that is issued by the storage system. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: Data recovered during retry with PCI fetch mode active\n" + * Severity: Emerg + * Parameter: + * @1: bus ID of the DASD + * Description: + * A data error has been recovered on the storages system but the Linux file + * system cannot be informed about the data mismatch. To prevent Linux from + * running with incorrect data, the DASD device driver will trigger a kernel + * panic. + * User action: + * Reset your real or virtual hardware and reboot Linux. + */ + +/*? + * Text: "%s: The specified record was not found\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * The record to be accessed does not exist. The DASD might be unformatted + * or defect. + * User action: + * Try to format the DASD or replace it. + * ATTENTION: Formatting irreversibly destroys all data on the DASD. + */ + +/*? + * Text: "%s: ERP %p (%02x) refers to %p\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: pointer to ERP + * @3: ERP status + * @4: cqr + * Description: + * This message provides debug information for the enhanced error recovery + * procedure (ERP). + * User action: + * If you do not need this information, you can suppress this message by + * switching off ERP logging, for example, by writing '1' to the 'erplog' + * sysfs attribute of the DASD. + */ + +/*? + * Text: "%s: ERP chain at END of ERP-ACTION\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * This message provides debug information for the enhanced error recovery + * procedure (ERP). + * User action: + * If you do not need this information, you can suppress this message by + * switching off ERP logging, for example, by writing '1' to the 'erplog' + * sysfs attribute of the DASD. + */ + +/*? + * Text: "%s: The cylinder data for accessing the DASD is inconsistent\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * An error occurred in the storage system hardware. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: Accessing the DASD failed because of a hardware error\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * An error occurred in the storage system hardware. + * User action: + * For more information see the documentation of your storage system. + */ + +/*? + * Text: "%s: ERP chain at BEGINNING of ERP-ACTION\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * This message provides debug information for the enhanced error recovery + * procedure (ERP). + * User action: + * If you do not need this information, you can suppress this message by + * switching off ERP logging, for example, by writing '1' to the 'erplog' + * sysfs attribute of the DASD. + */ + +/*? + * Text: "%s: ERP %p has run out of retries and failed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: ERP pointer + * Description: + * The error recovery procedure (ERP) tried to recover an error but the number + * of retries for the I/O was exceeded before the error could be resolved. + * User action: + * Check for related previous error messages. + */ + +/*? + * Text: "%s: SIM - SRC: %02x%02x%02x%02x\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: sense byte + * @3: sense byte + * @4: sense byte + * @5: sense byte + * Description: + * This error message is a System Information Message (SIM) generated by the + * storage system. The System Reference Code (SRC) defines the error in detail. + * User action: + * Look up the SRC in the storage server documentation. + */ + +/*? + * Text: "%s: log SIM - SRC: %02x%02x%02x%02x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: sense byte + * @3: sense byte + * @4: sense byte + * @5: sense byte + * Description: + * This System Information Message (SIM) is generated by the storage system. + * The System Reference Code (SRC) defines the error in detail. + * User action: + * Look up the SRC in the storage server documentation. + */ + +/*? + * Text: "%s: Reading device feature codes failed with rc=%d\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: return code + * Description: + * The device feature codes state which advanced features are supported by a + * device. + * Examples for advanced features are PAV or high performance FICON. + * Some early devices do not provide feature codes and no advanced features are + * available on these devices. + * User action: + * None, if the DASD does not provide feature codes. If the DASD provides + * feature codes, make sure that it is working correctly, then set it offline + * and back online. + */ + +/*? + * Text: "%s: A channel path group could not be established\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * Initialization of a DASD did not complete because a channel path group + * could not be established. + * User action: + * Make sure that the DASD is working correctly, then try again to set it + * online. If initialization still fails, reboot. + */ + +/*? + * Text: "%s: The DASD is not operating in multipath mode\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD channel path group could not be configured to use multipath mode. + * This might negatively affect I/O performance on this DASD. + * User action: + * Make sure that the DASD is working correctly, then try again to set it + * online. If initialization still fails, reboot. + */ + +/*? + * Text: "%s: Detecting the DASD disk layout failed because of an I/O error\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * The disk layout of the DASD could not be detected because of an unexpected + * I/O error. The DASD device driver treats the device like an unformatted DASD, + * and partitions on the device are not accessible. + * User action: + * If the DASD is formatted, make sure that the DASD is working correctly, + * then set it offline and back online. If the DASD is unformatted, format the + * DASD, for example, with dasdfmt. + * ATTENTION: Formatting irreversibly destroys all data on the DASD. + */ + +/*? + * Text: "%s: An I/O request was rejected because writing is inhibited\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * An I/O request was returned with an error indication of 'command reject' + * and 'write inhibited'. The most likely reason for this error is a + * failed write request to a device that was attached as read-only in z/VM. + * User action: + * Set the device offline, ensure that the device is configured correctly in + * z/VM, then set the device online again. + */ + +/*? + * Text: "%s: An Alias device was reassigned to a new base device with UID: %s\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the alias + * @2: UID of new base device + * Description: + * The alias device with the indicated bus ID has been reassigned. The UID of the new base device is shown in the message. + * User action: + * None. + */ + +/*? + * Text: "%s: Detecting the maximum supported data size for zHPF requests failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * High Performance FICON (zHPF) requests are limited to a hardware-dependent + * maximum data size. The DASD device driver failed to detect this size and zHPF + * is not available for this device. + * User action: + * Set the device offline and online again. If this problem persists, gather + * Linux debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: Reading device feature codes failed (rc=%d) for new path %x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: return code + * @3: path mask + * Description: + * A new path has been made available to the a device. + * A command to read the device feature codes on this device returned an error. + * The new path will not be used for I/O. + * User action: + * Set the new path offline and online again to repeat the path verification. + * Alternatively, set the device offline and online again to + * verify all available paths for this device. + * If this problem persists, gather Linux debug data and report the problem + * to your support organization. + */ + +/*? + * Text: "%s: Detecting the maximum data size for zHPF requests failed (rc=%d) for a new path %x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: return code + * @3: path mask + * Description: + * High Performance FICON (zHPF) requests are limited to a hardware-dependent + * maximum data size. A command to detect this size for + * a new path returned an error. The new path will not be used for I/O. + * User action: + * Set the new path offline and online again to repeat the path verification. + * Alternatively, set the device offline and online again to + * verify all available paths for this device. + * If this problem persists, gather Linux debug data and report the problem + * to your support organization. + */ + +/*? + * Text: "%s: The maximum data size for zHPF requests %u on a new path %x is below the active maximum %u\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * @2: size in bytes + * @3: path mask + * @4: size in bytes + * Description: + * High Performance FICON (zHPF) requests are limited to a hardware-dependent + * maximum data size. The maximum of the new path is below + * the previously established common maximum for the + * existing paths for this device. This could cause requests on the new + * path to fail. The new path will not be used for I/O. + * User action: + * Set the device offline and online again to establish a new common maximum + * data size for the device. + */ + +/*? + * Text: "%s: The device reservation was lost\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * This Linux instance has lost its reservation of the device to another + * operating system instance. Depending on the reservation policy for the + * device, I/O might be blocked until the other operating system instance + * surrenders the reservation or all I/O requests might fail until the + * device is reset. + * User action: + * None, if this situation is handled by system automation software. + * If this situation is not handled by automation, check the + * last_known_reservation_state attribute of the device in sysfs. + * If the value is 'lost', verify that the device is no longer reserved + * by another operating system instance, then set the device offline and + * online again. For any other value of the last_known_reservation_state + * no action is required. I/O will resume when the device reservation is + * surrendered by the other operating system instance. + */ + +/*? + * Text: "%s: The storage server does not support raw-track access\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD cannot be accessed in raw-track access mode because the storage + * server does not have all required features for this access mode. + * In raw-track access mode, the DASD device driver accesses complete ECKD + * tracks. + * By default, the DASD device driver accesses only the data fields of ECKD + * devices and omits the count and key data fields. + * User action: + * Ensure that the raw_track_access sysfs attribute of the DASD has the value + * 0 to access the device in default ECKD mode. + */ + +/*? + * Text: "%s: The newly added channel path %02X will not be used because it leads to a different device %s\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: logical path mask + * @3: UID + * Description: + * The newly added channel path has a different UID than the DASD device. This indicates + * an incorrect cabling. This path is not going to be used. + * User action: + * Check the cabling of the DASD device. Disconnect and reconnect the cable. + */ + +/*? + * Text: "%s: Not all channel paths lead to the same device, path %02X leads to device %s instead of %s\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: logical path mask + * @3: UID + * @4: UID + * Description: + * Some channel paths have a different UID than others. This indicates + * an incorrect cabling. The DASD device is not enabled. + * User action: + * Check cabling of the DASD device and retry to enable the device. + */ + +/*? + * Text: "Service on the storage server caused path %x.%02x to go offline" + * Severity: Warning + * Parameter: + * @1: channel subsystem ID + * @2: CHPID + * Description: + * A channel path to the DASD has been set offline because of + * a service action on the storage server. The path will be set back + * online automatically when the service action is completed. + * User action: + * None. + */ + +/*? + * Text: "Path %x.%02x is back online after service on the storage server" + * Severity: Informational + * Parameter: + * @1: channel subsystem ID + * @2: CHPID + * Description: + * A path had been set offline temporarily because of a service + * action on the storage server. + * The service action has completed, and the channel path is available + * again. + * User action: + * None. + */ + +/*? + * Text: "%s: High Performance FICON disabled\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * Description: + * High Performance FICON (HPF) has been disabled. Either the device + * lost HPF functionality, or none of the remaining channel paths are + * HPF capable. + * User action: + * Report the problem to your support organization. + * Ensure that the cabling between the storage server and the mainframe + * system is securely in place. + * Reset the device and channel paths by writing "all" or a logical path mask + * to the path_reset sysfs attribute of the device. + */ + +/*? + * Text: "%s: Channel path %02X lost HPF functionality and is disabled\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: logical path mask + * Description: + * A channel path has lost High Performance FICON (HPF) functionality + * and was removed from regular operations. + * User action: + * Report the problem to your support organization. + * Ensure that the cabling between the storage server and the mainframe + * system is securely in place. + * Reset the device and channel paths by writing "all" or a logical path mask + * to the path_reset sysfs attribute of the device. + */ + +/*? + * Text: "%s: Path %x.%02x (pathmask %02x) is disabled - IFCC threshold exceeded\n" + * Severity: Error + * Parameter: + * @1: bus ID of the DASD + * @2: cssid + * @3: chpid + * @4: logical path mask + * Description: + * Due to numerous interface or channel control checks (IFCCs), a channel path + * was removed from regular operations to retain good I/O performance. + * User action: + * Ensure that the cabling between the storage server and the mainframe + * system is securely in place. + * Reset the device and channel paths by writing "all" or a logical path mask + * to the path_reset sysfs attribute of the device. + * If the problem persists, report it to your support organization. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/dasd-fba b/Documentation/kmsg/s390/dasd-fba new file mode 100644 index 0000000000000..155dbcf6129ed --- /dev/null +++ b/Documentation/kmsg/s390/dasd-fba @@ -0,0 +1,36 @@ + +/*? + * Text: "%s: New FBA DASD %04X/%02X (CU %04X/%02X) with %d MB and %d B/blk%s\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the DASD + * @2: device type + * @3: device model + * @4: control unit type + * @5: control unit model + * @6: size + * @7: bytes per block + * @8: access mode + * Description: + * A DASD with the shown characteristics has been set online. + * If the DASD is configured as read-only to the real or virtual hardware, + * the message includes an indication of this hardware access mode. The + * hardware access mode is independent from the 'readonly' attribute of + * the device in sysfs. + * User action: + * None. + */ + +/*? + * Text: "%s: Allocating memory for private DASD data failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the DASD + * Description: + * The DASD device driver maintains data structures for each DASD it manages. + * There is not enough memory to allocate these data structures for one or + * more DASD. + * User action: + * Free some memory and try the operation again. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/dcssblk b/Documentation/kmsg/s390/dcssblk new file mode 100644 index 0000000000000..f7042e4d06275 --- /dev/null +++ b/Documentation/kmsg/s390/dcssblk @@ -0,0 +1,206 @@ +/*? + * Text: "Adjacent DCSSs %s and %s are not contiguous\n" + * Severity: Error + * Parameter: + * @1: name 1 + * @2: name 2 + * Description: + * You can only map a set of two or more DCSSs to a single DCSS device if the + * DCSSs in the set form a contiguous memory space. The DCSS device cannot be + * created because there is a memory gap between two adjacent DCSSs. + * User action: + * Ensure that you have specified all DCSSs that belong to the set. Check the + * definitions of the DCSSs on the z/VM hypervisor to verify that they form + * a contiguous memory space. + */ + +/*? + * Text: "DCSS %s and DCSS %s have incompatible types\n" + * Severity: Error + * Parameter: + * @1: name 1 + * @2: name 2 + * Description: + * You can only map a set of two or more DCSSs to a single DCSS device if + * either all DCSSs in the set have the same type or if the set contains DCSSs + * of the two types EW and EN but no other type. The DCSS device cannot be + * created because at least two of the specified DCSSs are not compatible. + * User action: + * Check the definitions of the DCSSs on the z/VM hypervisor to verify that + * their types are compatible. + */ + +/*? + * Text: "DCSS %s is of type SC and cannot be loaded as exclusive-writable\n" + * Severity: Error + * Parameter: + * @1: device name + * Description: + * You cannot load a DCSS device in exclusive-writable access mode if the DCSS + * devise maps to one or more DCSSs of type SC. + * User action: + * Load the DCSS in shared access mode. + */ + +/*? + * Text: "DCSS device %s is removed after a failed access mode change\n" + * Severity: Error + * Parameter: + * @1: device name + * Description: + * To change the access mode of a DCSS device, all DCSSs that map to the device + * were unloaded. Reloading the DCSSs for the new access mode failed and the + * device is removed. + * User action: + * Look for related messages to find out why the DCSSs could not be reloaded. + * If necessary, add the device again. + */ + +/*? + * Text: "All DCSSs that map to device %s are saved\n" + * Severity: Informational + * Parameter: + * @1: device name + * Description: + * A save request has been submitted for the DCSS device. Changes to all DCSSs + * that map to the device are saved permanently. + * User action: + * None. + */ + +/*? + * Text: "Device %s is in use, its DCSSs will be saved when it becomes idle\n" + * Severity: Informational + * Parameter: + * @1: device name + * Description: + * A save request for the device has been deferred until the device becomes + * idle. Then changes to all DCSSs that the device maps to will be saved + * permanently. + * User action: + * None. + */ + +/*? + * Text: "A pending save request for device %s has been canceled\n" + * Severity: Informational + * Parameter: + * @1: device name + * Description: + * A save request for the DCSSs that map to a DCSS device has been pending + * while the device was in use. This save request has been canceled. Changes to + * the DCSSs will not be saved permanently. + * User action: + * None. + */ + +/*? + * Text: "Loaded %s with total size %lu bytes and capacity %lu sectors\n" + * Severity: Informational + * Parameter: + * @1: DCSS names + * @2: total size in bytes + * @3: total size in 512 byte sectors + * Description: + * The listed DCSSs have been verified as contiguous and successfully loaded. + * The displayed sizes are the sums of all DCSSs. + * User action: + * None. + */ + +/*? + * Text: "Device %s cannot be removed because it is not a known device\n" + * Severity: Warning + * Parameter: + * @1: device name + * Description: + * The DCSS device you are trying to remove is not known to the DCSS device + * driver. + * User action: + * List the entries under /sys/devices/dcssblk/ to see the names of the + * existing DCSS devices. + */ + +/*? + * Text: "Device %s cannot be removed while it is in use\n" + * Severity: Warning + * Parameter: + * @1: device name + * Description: + * You are trying to remove a device that is in use. + * User action: + * Make sure that all users of the device close the device before you try to + * remove it. + */ + +/*? + * Text: "Device %s has become idle and is being saved now\n" + * Severity: Informational + * Parameter: + * @1: device name + * Description: + * A save request for the DCSSs that map to a DCSS device has been pending + * while the device was in use. The device has become idle and all changes + * to the DCSSs are now saved permanently. + * User action: + * None. + */ + +/*? + * Text: "Writing to %s failed because it is a read-only device\n" + * Severity: Warning + * Parameter: + * @1: device name + * Description: + * The DCSS device is in shared access mode and cannot be written to. Depending + * on the type of the DCSSs that the device maps to, you might be able to + * change the access mode to exclusive-writable. + * User action: + * If the DCSSs of the device are of type SC, do not attempt to write to the + * device. If the DCSSs of the device are of type ER or SR, change the access + * mode to exclusive-writable before writing to the device. + */ + +/*? + * Text: "The address range of DCSS %s changed while the system was suspended\n" + * Severity: Error + * Parameter: + * @1: device name + * Description: + * After resuming the system, the start address or end address of a DCSS does + * not match the address when the system was suspended. DCSSs must not be + * changed after the system was suspended. + * This error cannot be recovered. The system is stopped with a kernel panic. + * User action: + * Reboot Linux. + */ + +/*? + * Text: "Suspending the system failed because DCSS device %s is writable\n" + * Severity: Error + * Parameter: + * @1: device name + * Description: + * A system cannot be suspended if one or more DCSSs are accessed in exclusive- + * writable mode. DCSS segment types EW, SW, and EN are always writable and + * must be removed before a system is suspended. + * User action: + * Remove all DCSSs of segment types EW, SW, and EN by writing the DCSS name to + * the sysfs 'remove' attribute. Set the access mode for all DCSSs of segment + * types SR and ER to read-only by writing 1 to the sysfs 'shared' attribute of + * the DCSS. Then try again to suspend the system. + */ + +/*? + * Text: "DCSS %s is of type SN or EN and cannot be saved\n" + * Severity: Warning + * Parameter: + * @1: DCSS name + * Description: + * DCSSs of type SN or EN cannot be saved. + * User action: + * If the DCSS was set up with the intention to prevent the content from being saved, + * no action is necessary. + * To be able to save the content, you must define the DCSS with a type other than SN or EN. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/diag288_wdt b/Documentation/kmsg/s390/diag288_wdt new file mode 100644 index 0000000000000..7334dac2ed15f --- /dev/null +++ b/Documentation/kmsg/s390/diag288_wdt @@ -0,0 +1,66 @@ +/*? + * Text: "The watchdog cannot be activated\n" + * Severity: Error + * Description: + * Diagnose instruction 0x288 was called to activate the diag288 watchdog. + * The diagnose call returned an error that cannot be handled by the device driver. + * The watchdog stays inactive. + * User action: + * Contact your support organization. + */ + +/*? + * Text: "The watchdog cannot be initialized\n" + * Severity: Error + * Description: + * Diagnose instruction 0x288 was called to initialize the diag288 watchdog. + * The diagnose call returned an error that cannot be handled by the device driver. + * The watchdog stays inactive. + * A possible reason for this error is that your real or virtual hardware does not support + * the diag288 watchdog. + * User action: + * Confirm that the diag288 watchdog is supported in your environment. + * Use a watchdog that is supported in your environment. + */ + +/*? + * Text: "The watchdog cannot be deactivated\n" + * Severity: Error + * Description: + * Diagnose instruction 0x288 was called to deactivate the diag288 watchdog. + * The diagnose call returned an error that cannot be handled by the device driver. + * The watchdog stays active and a watchdog timeout will trigger the configured timeout action. + * The diag288 watchdog device driver might intentionally be configured to prevent deactivation. + * User action: + * You can configure the diag288 watchdog device driver such that it can be deactivated. + * If the diag288 device driver has been compiled as a separate module, diag288_wdt, reload the module + * without specifying the 'nowayout' module parameter. + * If the diag288 device driver has been compiled into your kernel, + * reboot Linux without specifying the 'diag288.nowayout' kernel parameter'. + */ + +/*? + * Text: "The watchdog timer cannot be started or reset\n" + * Severity: Error + * Description: + * Diagnose instruction 0x288 was called to start the diag288 watchdog or to set timer back to zero. + * The diagnose call returned an error that cannot be handled by the device driver. + * The watchdog stays inactive or becomes inactive. + * User action: + * Contact your support organization. + */ + +/*? + * Text: "Linux cannot be suspended while the watchdog is in use\n" + * Severity: Error + * Description: + * The watchdog must not time out while Linux is suspended. + * Therefore, the diag288 watchdog device driver prevents Linux from being suspended + * while the watchdog is in use. + * User action: + * i) Stop the watchdog application. ii) If the problem persists, close the watchdog + * device node by issuing 'echo V > /dev/watchdog'. + * iii) If the device driver still prevents Linux from being suspended, + * contact your support organization. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/extmem b/Documentation/kmsg/s390/extmem new file mode 100644 index 0000000000000..b8570674de3f3 --- /dev/null +++ b/Documentation/kmsg/s390/extmem @@ -0,0 +1,293 @@ +/*? + * Text: "Querying a DCSS type failed with rc=%ld\n" + * Severity: Warning + * Parameter: + * @1: return code + * Description: + * The DCSS kernel interface used z/VM diagnose call X'64' to query the + * type of a DCSS. z/VM failed to determine the type and returned an error. + * User action: + * Look for related messages to find out which DCSS is affected. + * For details about the return codes see the section about DIAGNOSE Code + * X'64' in "z/VM CP Programming Services". + */ + +/*? + * Text: "Loading DCSS %s failed with rc=%ld\n" + * Severity: Warning + * Parameter: + * @1: DCSS name + * @2: return code + * Description: + * The DCSS kernel interface used diagnose call X'64' to load a DCSS. z/VM + * failed to load the DCSS and returned an error. + * User action: + * For details about the return codes see the section about DIAGNOSE Code + * X'64' in "z/VM CP Programming Services". + */ + +/*? + * Text: "DCSS %s of range %p to %p and type %s loaded as exclusive-writable\n" + * Severity: Informational + * Parameter: + * @1: DCSS name + * @2: starting page address + * @3: ending page address + * @4: DCSS type + * Description: + * The DCSS was loaded successfully in exclusive-writable access mode. + * User action: + * None. + */ + +/*? + * Text: "DCSS %s of range %p to %p and type %s loaded in shared access mode\n" + * Severity: Informational + * Parameter: + * @1: DCSS name + * @2: starting page address + * @3: ending page address + * @4: DCSS type + * Description: + * The DCSS was loaded successfully in shared access mode. + * User action: + * None. + */ + +/*? + * Text: "DCSS %s is already in the requested access mode\n" + * Severity: Informational + * Parameter: + * @1: DCSS name + * Description: + * A request to reload a DCSS with a new access mode has been rejected + * because the new access mode is the same as the current access mode. + * User action: + * None. + */ + +/*? + * Text: "DCSS %s is in use and cannot be reloaded\n" + * Severity: Warning + * Parameter: + * @1: DCSS name + * Description: + * Reloading a DCSS in a different access mode has failed because the DCSS is + * being used by one or more device drivers. The DCSS remains loaded with the + * current access mode. + * User action: + * Ensure that the DCSS is not used by any device driver then try again to + * load the DCSS with the new access mode. + */ + +/*? + * Text: "DCSS %s overlaps with used memory resources and cannot be reloaded\n" + * Severity: Warning + * Parameter: + * @1: DCSS name + * Description: + * The DCSS has been unloaded and cannot be reloaded because it overlaps with + * another loaded DCSS or with the memory of the z/VM guest virtual machine + * (guest storage). + * User action: + * Ensure that no DCSS is loaded that has overlapping memory resources + * with the DCSS you want to reload. If the DCSS overlaps with guest storage, + * use the DEF STORE CONFIG z/VM CP command to create a sufficient storage gap + * for the DCSS. For details, see the section about the DCSS device driver in + * "Device Drivers, Features, and Commands". + */ + +/*? + * Text: "Reloading DCSS %s failed with rc=%ld\n" + * Severity: Warning + * Parameter: + * @1: DCSS name + * @2: return code + * Description: + * The DCSS kernel interface used z/VM diagnose call X'64' to reload a DCSS + * in a different access mode. The DCSS was unloaded but z/VM failed to reload + * the DCSS. + * User action: + * For details about the return codes see the section about DIAGNOSE Code + * X'64' in "z/VM CP Programming Services". + */ + +/*? + * Text: "Unloading unknown DCSS %s failed\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * The specified DCSS cannot be unloaded. The DCSS is known to the DCSS device + * driver but not to the DCSS kernel interface. This problem indicates a + * program error in extmem.c. + * User action: + * Report this problem to your support organization. + */ + +/*? + * Text: "Saving unknown DCSS %s failed\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * The specified DCSS cannot be saved. The DCSS is known to the DCSS device + * driver but not to the DCSS kernel interface. This problem indicates a + * program error in extmem.c. + * User action: + * Report this problem to your support organization. + */ + +/*? + * Text: "Saving a DCSS failed with DEFSEG response code %i\n" + * Severity: Error + * Parameter: + * @1: response-code + * Description: + * The DEFSEG z/VM CP command failed to permanently save changes to a DCSS. + * User action: + * Ensure that the z/VM guest virtual machine is authorized to issue + * the CP DEFSEG command (typically privilege class E). + * Look for related messages to find the cause of this error. See also message + * HCPE in the DEFSEG section of the "z/VM CP Command and + * Utility Reference". + */ + +/*? + * Text: "Saving a DCSS failed with SAVESEG response code %i\n" + * Severity: Error + * Parameter: + * @1: response-code + * Description: + * The SAVESEG z/VM CP command failed to permanently save changes to a DCSS. + * User action: + * Ensure that the z/VM guest virtual machine is authorized to issue + * the CP SAVESEG command (typically privilege class E). + * Look for related messages to find the cause of this error. See also message + * HCPE in the SAVESEG section of the "z/VM CP Command and + * Utility Reference". + */ + +/*? + * Text: "DCSS %s cannot be loaded or queried\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * You cannot load or query the specified DCSS because it either is not defined + * in the z/VM hypervisor, or it is a class S DCSS, or it is above 2047 MB + * and the Linux system is a 31-bit system. + * User action: + * Use the CP command "QUERY NSS" to find out if the DCSS is a valid + * DCSS that can be loaded. + */ + +/*? + * Text: "DCSS %s cannot be loaded or queried without z/VM\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * A DCSS is a z/VM resource. Your Linux instance is not running as a z/VM + * guest operating system and, therefore, cannot load DCSSs. + * User action: + * Load DCSSs only on Linux instances that run as z/VM guest operating systems. + */ + +/*? + * Text: "Loading or querying DCSS %s resulted in a hardware error\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * Either the z/VM DIAGNOSE X'64' query or load call issued for the DCSS + * returned with an error. + * User action: + * Look for previous extmem message to find the return code from the + * DIAGNOSE X'64' query or load call. For details about the return codes see + * the section about DIAGNOSE Code X'64' in "z/VM CP Programming Services". + */ + +/*? + * Text: "DCSS %s has multiple page ranges and cannot be loaded or queried\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * You can only load or query a DCSS with multiple page ranges if: + * - The DCSS has 6 or fewer page ranges + * - The page ranges form a contiguous address space + * - The page ranges are of type EW or EN + * User action: + * Check the definition of the DCSS to make sure that the conditions for + * DCSSs with multiple page ranges are met. + */ + +/*? + * Text: "%s needs used memory resources and cannot be loaded or queried\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * You cannot load or query the DCSS because it overlaps with an already + * loaded DCSS or with the memory of the z/VM guest virtual machine + * (guest storage). + * User action: + * Ensure that no DCSS is loaded that has overlapping memory resources + * with the DCSS you want to load or query. If the DCSS overlaps with guest + * storage, use the DEF STORE CONFIG z/VM CP command to create a sufficient + * storage gap for the DCSS. For details, see the section about the DCSS + * device driver in "Device Drivers, Features, and Commands". + */ + +/*? + * Text: "DCSS %s is already loaded in a different access mode\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * The DCSS you are trying to load has already been loaded in a different + * access mode. You cannot simultaneously load the DCSS in different modes. + * User action: + * Reload the DCSS in a different mode or load it with the same mode in which + * it has already been loaded. + */ + +/*? + * Text: "There is not enough memory to load or query DCSS %s\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * The available memory is not enough to load or query the DCSS. + * User action: + * Free some memory and repeat the failed operation. + */ + +/*? + * Text: "DCSS %s overlaps with used storage and cannot be loaded\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * You cannot load the DCSS because it overlaps with an already loaded DCSS + * or with the memory of the z/VM guest virtual machine (guest storage). + * User action: + * Ensure that no DCSS is loaded that has overlapping memory resources + * with the DCSS you want to load. If the DCSS overlaps with guest storage, + * use the DEF STORE CONFIG z/VM CP command to create a sufficient storage gap + * for the DCSS. For details, see the section about the DCSS device driver in + * "Device Drivers, Features, and Commands". + */ + +/*? + * Text: "DCSS %s exceeds the kernel mapping range (%lu) and cannot be loaded\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * @2: kernel mapping range in bytes + * Description: + * You cannot load the DCSS because it exceeds the kernel mapping range limit. + * User action: + * Ensure that the DCSS range is defined below the kernel mapping range. + */ diff --git a/Documentation/kmsg/s390/hmcdrv b/Documentation/kmsg/s390/hmcdrv new file mode 100644 index 0000000000000..402244284cd9b --- /dev/null +++ b/Documentation/kmsg/s390/hmcdrv @@ -0,0 +1,22 @@ +/*? + * Text: "Allocating the requested cache size of %zu bytes failed\n" + * Severity: Error + * Parameter: + * @1: size + * Description: + * You cannot use the 'hmcdrv' module. + * Either the cache size that was specified for the 'hmcdrv' module exceeded + * the maximum of 1048576 (1 megabyte), or not enough free memory was + * available. + * If the 'hmcdrv' module was compiled into the kernel, the cache size was + * specified with the 'hmcdrv.cachesize' kernel parameter. + * For a separate 'hmcdrv' module, the cache size was specified with the + * 'cachesize=' module parameter. + * User action: + * Specify a smaller cache size and try again to load the module. + * Do not exceed the maximum specification of 1048576 (1 megabyte). + * If necessary, free some memory and try again. + * If the module is compiled into the kernel, you must reboot Linux to change + * the cache size specification. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/hugetlb b/Documentation/kmsg/s390/hugetlb new file mode 100644 index 0000000000000..64856a89fa1b2 --- /dev/null +++ b/Documentation/kmsg/s390/hugetlb @@ -0,0 +1,13 @@ +/*? + * Text: "hugepagesz= specifies an unsupported page size %s\n" + * Severity: Error + * Parameter: + * @1: size + * Description: + * The hugepagesz= kernel parameter specifies a huge page size + * that is not supported. + * User action: + * Specify "1M" for 1 MB huge pages. These are supported as of z10. + * Specify "2G" for 2 GB huge pages. These are supported as of zEC12 + * and zBC12 machines. + */ diff --git a/Documentation/kmsg/s390/hvc_iucv b/Documentation/kmsg/s390/hvc_iucv new file mode 100644 index 0000000000000..cf9bafe3f5f98 --- /dev/null +++ b/Documentation/kmsg/s390/hvc_iucv @@ -0,0 +1,123 @@ +/*? + * Text: "The z/VM IUCV HVC device driver cannot be used without z/VM\n" + * Severity: Notice + * Description: + * The z/VM IUCV hypervisor console (HVC) device driver requires the + * z/VM inter-user communication vehicle (IUCV). + * User action: + * Set "hvc_iucv=" to zero in the kernel parameter line and reboot Linux. + */ + +/*? + * Text: "%lu is not a valid value for the hvc_iucv= kernel parameter\n" + * Severity: Error + * Parameter: + * @1: hvc_iucv_devices + * Description: + * The "hvc_iucv=" kernel parameter specifies the number of z/VM IUCV + * hypervisor console (HVC) terminal devices. + * The parameter value ranges from 0 to 8. + * If zero is specified, the z/VM IUCV HVC device driver is disabled + * and no IUCV-based terminal access is available. + * User action: + * Correct the "hvc_iucv=" setting in the kernel parameter line and + * reboot Linux. + */ + +/*? + * Text: "Creating a new HVC terminal device failed with error code=%d\n" + * Severity: Error + * Parameter: + * @1: errno + * Description: + * The device driver initialization failed to allocate a new + * HVC terminal device. + * A possible cause of this problem is memory constraints. + * User action: + * If the error code is -12 (ENOMEM), consider assigning more memory + * to your z/VM guest virtual machine. + */ + +/*? + * Text: "Registering HVC terminal device as Linux console failed\n" + * Severity: Error + * Description: + * The device driver initialization failed to set up the first HVC terminal + * device for use as Linux console. + * User action: + * If the error code is -12 (ENOMEM), consider assigning more memory + * to your z/VM guest virtual machine. + */ + +/*? + * Text: "Registering IUCV handlers failed with error code=%d\n" + * Severity: Error + * Parameter: + * @1: errno + * Description: + * The device driver initialization failed to register with z/VM IUCV to + * handle IUCV connections, as well as sending and receiving of IUCV messages. + * User action: + * Check for related IUCV error messages and see the errno manual page + * to find out what caused the problem. + */ + +/*? + * Text: "Allocating memory failed with reason code=%d\n" + * Severity: Error + * Parameter: + * @1: reason + * Description: + * The z/VM IUCV hypervisor console (HVC) device driver initialization failed, + * because of a general memory allocation failure. The reason code indicates + * the memory operation that has failed: + * kmem_cache (reason code=1), + * mempool (reason code=2), or + * hvc_iucv_allow= (reason code=3) + * User action: + * Consider assigning more memory to your z/VM guest virtual machine. + */ + +/*? + * Text: "hvc_iucv_allow= does not specify a valid z/VM user ID list\n" + * Severity: Error + * Description: + * The "hvc_iucv_allow=" kernel parameter specifies a comma-separated list + * of z/VM user IDs that are permitted to connect to the z/VM IUCV hypervisor + * device driver. + * The z/VM user IDs in the list must not exceed eight characters and must + * not contain spaces. + * User action: + * Correct the "hvc_iucv_allow=" setting in the kernel parameter line and reboot + * Linux. + */ + +/*? + * Text: "hvc_iucv_allow= specifies too many z/VM user IDs\n" + * Severity: Error + * Description: + * The "hvc_iucv_allow=" kernel parameter specifies a comma-separated list + * of z/VM user IDs that are permitted to connect to the z/VM IUCV hypervisor + * device driver. + * The number of z/VM user IDs that are specified with the "hvc_iucv_allow=" + * kernel parameter exceeds the maximum of 500. + * User action: + * Correct the "hvc_iucv_allow=" setting by reducing the z/VM user IDs in + * the list and reboot Linux. + */ + +/*? + * Text: "A connection request from z/VM user ID %s was refused\n" + * Severity: Informational + * Parameter: + * @1: ID + * Description: + * An IUCV connection request from another z/VM guest virtual machine has been + * refused. The request was from a z/VM guest virtual machine that is not + * listed by the "hvc_iucv_allow=" kernel parameter. + * User action: + * Check the "hvc_iucv_allow=" kernel parameter setting. + * Consider adding the z/VM user ID to the "hvc_iucv_allow=" list in the kernel + * parameter line and reboot Linux. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/hypfs b/Documentation/kmsg/s390/hypfs new file mode 100644 index 0000000000000..c84582b9634ae --- /dev/null +++ b/Documentation/kmsg/s390/hypfs @@ -0,0 +1,56 @@ +/*? + * Text: "The hardware system does not support hypfs\n" + * Severity: Error + * Description: + * hypfs requires DIAGNOSE Code X'204' but this diagnose code is not available + * on your hardware. You need more recent hardware to use hypfs. + * User action: + * None. + */ + +/*? + * Text: "The hardware system does not provide all functions required by hypfs\n" + * Severity: Error + * Description: + * hypfs requires DIAGNOSE Code X'224' but this diagnode code is not available + * on your hardware. You need more recent hardware to use hypfs. + * User action: + * None. + */ + +/*? + * Text: "Updating the hypfs tree failed\n" + * Severity: Error + * Description: + * There was not enough memory available to update the hypfs tree. + * User action: + * Free some memory and try again to update the hypfs tree. Consider assigning + * more memory to your LPAR or z/VM guest virtual machine. + */ + +/*? + * Text: "%s is not a valid mount option\n" + * Severity: Error + * Parameter: + * @1: mount option + * Description: + * hypfs has detected mount options that are not valid. + * User action: + * See "Device Drivers Features and Commands" for information about valid + * mount options for hypfs. + */ + +/*? + * Text: "Initialization of hypfs failed with rc=%i\n" + * Severity: Error + * Parameter: + * @1: error code + * Description: + * Initialization of hypfs failed because of resource or hardware constraints. + * Possible reasons for this problem are insufficient free memory or missing + * hardware interfaces. + * User action: + * See errno.h for information about the error codes. + */ + +/*? Text: "Hypervisor filesystem mounted\n" */ diff --git a/Documentation/kmsg/s390/iucv b/Documentation/kmsg/s390/iucv new file mode 100644 index 0000000000000..08972ccdefc42 --- /dev/null +++ b/Documentation/kmsg/s390/iucv @@ -0,0 +1,33 @@ +/*? + * Text: "Defining an interrupt buffer on CPU %i failed with 0x%02x (%s)\n" + * Severity: Warning + * Parameter: + * @1: CPU number + * @2: hexadecimal error value + * @3: short error code explanation + * Description: + * Defining an interrupt buffer for external interrupts failed. Error + * value 0x03 indicates a problem with the z/VM directory entry of the + * z/VM guest virtual machine. This problem can also be caused by a + * program error. + * User action: + * If the error value is 0x03, examine the z/VM directory entry of your + * z/VM guest virtual machine. If the directory entry is correct or if the + * error value is not 0x03, report this problem to your support organization. + */ + +/*? + * Text: "Suspending Linux did not completely close all IUCV connections\n" + * Severity: Warning + * Description: + * When resuming a suspended Linux instance, the IUCV base code found + * data structures from one or more IUCV connections that existed before the + * Linux instance was suspended. Modules that use IUCV connections must close + * these connections when a Linux instance is suspended. This problem + * indicates an error in a program that used an IUCV connection. + * User action: + * Report this problem to your support organization. + */ + +/*? Text: "iucv_external_interrupt: out of memory\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/lcs b/Documentation/kmsg/s390/lcs new file mode 100644 index 0000000000000..6b9c68b56bf33 --- /dev/null +++ b/Documentation/kmsg/s390/lcs @@ -0,0 +1,169 @@ +/*? + * Text: "%s: Allocating a socket buffer to interface %s failed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the LCS device + * @2: network interface + * Description: + * LAN channel station (LCS) devices require a socket buffer (SKB) structure + * for storing incoming data. The LCS device driver failed to allocate an SKB + * structure to the LCS device. A likely cause of this problem is memory + * constraints. + * User action: + * Free some memory and repeat the failed operation. + */ + +/*? + * Text: "%s: Shutting down the LCS device failed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the LCS device + * Description: + * A request to shut down a LAN channel station (LCS) device resulted in an + * error. The error is logged in the LCS trace at trace level 4. + * User action: + * Try again to shut down the device. If the error persists, see the LCS trace + * to find out what causes the error. + */ + +/*? + * Text: "%s: Detecting a network adapter for LCS devices failed with rc=%d (0x%x)\n" + * Severity: Error + * Parameter: + * @1: bus ID of the LCS device + * @2: lcs_detect return code in decimal notation + * @3: lcs_detect return code in hexadecimal notation + * Description: + * The LCS device driver could not initialize a network adapter. + * User action: + * Ensure that the physical connection from the port to the network is + * in place. If the error persists, note the return code from the error + * message and contact IBM support. + */ + +/*? + * Text: "%s: A recovery process has been started for the LCS device\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the LCS device + * Description: + * The LAN channel station (LCS) device is shut down and restarted. The recovery + * process might have been initiated by a user or started automatically as a + * response to a device problem. + * User action: + * Wait until a message indicates the completion of the recovery process. + */ + +/*? + * Text: "%s: An I/O-error occurred on the LCS device\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the LCS device + * Description: + * The LAN channel station (LCS) device reported a problem that can be recovered + * by the LCS device driver. Repeated occurrences of this problem indicate a + * malfunctioning device. + * User action: + * If this problem occurs frequently, initiate a recovery process for the + * device, for example, by writing '1' to the 'recover' sysfs attribute of the + * device. + */ + +/*? + * Text: "%s: A command timed out on the LCS device\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the LCS device + * Description: + * The LAN channel station (LCS) device reported a problem that can be recovered + * by the LCS device driver. Repeated occurrences of this problem indicate a + * malfunctioning device. + * User action: + * If this problem occurs frequently, initiate a recovery process for the + * device, for example, by writing '1' to the 'recover' sysfs attribute of the + * device. + */ + +/*? + * Text: "%s: An error occurred on the LCS device, rc=%ld\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the LCS device + * @2: return code + * Description: + * The LAN channel station (LCS) device reported a problem that can be recovered + * by the LCS device driver. Repeated occurrences of this problem indicate a + * malfunctioning device. + * User action: + * If this problem occurs frequently, initiate a recovery process for the + * device, for example, by writing '1' to the 'recover' sysfs attribute of the + * device. + */ + +/*? + * Text: "%s: The LCS device stopped because of an error, dstat=0x%X, cstat=0x%X \n" + * Severity: Warning + * Parameter: + * @1: bus ID of the LCS device + * @2: device status + * @3: subchannel status + * Description: + * The LAN channel station (LCS) device reported an error. The LCS device driver + * might start a device recovery process. + * User action: + * If the device driver does not start a recovery process, initiate a recovery + * process, for example, by writing '1' to the 'recover' sysfs attribute of the + * device. If the problem persists, note the status information provided with + * the message and contact IBM support. + */ + +/*? + * Text: "%s: Starting an LCS device resulted in an error, rc=%d!\n" + * Severity: Error + * Parameter: + * @1: bus ID of the LCS device + * @2: ccw_device_start return code in decimal notation + * Description: + * The LAN channel station (LCS) device driver failed to initialize an LCS + * device. The device is not operational. + * User action: + * Initiate a recovery process, for example, by writing '1' to the 'recover' + * sysfs attribute of the device. If the problem persists, contact IBM support. + */ + +/*? + * Text: "%s: Sending data from the LCS device to the LAN failed with rc=%d\n" + * Severity: Error + * Parameter: + * @1: bus ID of the LCS device + * @2: ccw_device_resume return code in decimal notation + * Description: + * The LAN channel station (LCS) device driver could not send data to the LAN + * using the LCS device. This might be a temporary problem. Operations continue + * on the LCS device. + * User action: + * If this problem occurs frequently, initiate a recovery process, for example, + * by writing '1' to the 'recover' sysfs attribute of the device. If the + * problem persists, contact IBM support. + */ + +/*? Text: "Query IPAssist failed. Assuming unsupported!\n" */ +/*? Text: "Stoplan for %s initiated by LGW\n" */ +/*? Text: "Not enough memory to add new multicast entry!\n" */ +/*? Text: "Not enough memory for debug facility.\n" */ +/*? Text: "Adding multicast address failed. Table possibly full!\n" */ +/*? Text: "Error in opening device!\n" */ +/*? Text: "LCS device %s %s IPv6 support\n" */ +/*? Text: "Device %s successfully recovered!\n" */ +/*? Text: "LCS device %s %s Multicast support\n" */ +/*? Text: " Initialization failed\n" */ +/*? Text: "Loading %s\n" */ +/*? Text: "Initialization failed\n" */ +/*? Text: "Terminating lcs module.\n" */ +/*? Text: "Device %s could not be recovered!\n" */ +/*? Text: "Initializing the lcs device driver failed\n" */ +/*? Text: "%s: The lcs device driver failed to recover the device\n" */ +/*? Text: "netif_stop_queue() cannot be called before register_netdev()\n" */ +/*? Text: "flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n" */ +/*? Text: "%s selects TX queue %d, but real number of TX queues is %d\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/monreader b/Documentation/kmsg/s390/monreader new file mode 100644 index 0000000000000..059e05a28da21 --- /dev/null +++ b/Documentation/kmsg/s390/monreader @@ -0,0 +1,128 @@ +/*? + * Text: "Reading monitor data failed with rc=%i\n" + * Severity: Error + * Parameter: + * @1: return code + * Description: + * The z/VM *MONITOR record device driver failed to read monitor data + * because the IUCV REPLY function failed. The read function against + * the monitor record device returns EIO. All monitor data that has been read + * since the last read with 0 size is incorrect. + * User action: + * Disregard all monitor data that has been read since the last read with + * 0 size. If the device driver has been compiled as a separate module, unload + * and reload the monreader module. If the device driver has been compiled + * into the kernel, reboot Linux. For more information about possible causes + * of the error see the IUCV section in "z/VM CP Programming Services" and + * the *MONITOR section in "z/VM Performance". + */ + +/*? + * Text: "z/VM *MONITOR system service disconnected with rc=%i\n" + * Severity: Error + * Parameter: + * @1: IPUSER SEVER return code + * Description: + * The z/VM *MONITOR record device driver receives monitor records through + * an IUCV connection to the z/VM *MONITOR system service. This connection + * has been severed and the read function of the z/VM *MONITOR device driver + * returns EIO. All data received since the last read with 0 size is incorrect. + * User action: + * Disregard all monitor data read since the last read with 0 size. Close and + * reopen the monitor record device. For information about the IPUSER SEVER + * return codes see "z/VM Performance". + */ + +/*? + * Text: "The read queue for monitor data is full\n" + * Severity: Warning + * Description: + * The read function of the z/VM *MONITOR device driver returns EOVERFLOW + * because not enough monitor data has been read since the monitor device + * has been opened. Monitor data already read are valid and subsequent reads + * return valid data but some intermediate data might be missing. + * User action: + * Be aware that monitor data might be missing. Assure that you regularly + * read monitor data after opening the monitor record device. + */ + +/*? + * Text: "Connecting to the z/VM *MONITOR system service failed with rc=%i\n" + * Severity: Error + * Parameter: + * @1: IUCV CONNECT return code + * Description: + * The z/VM *MONITOR record device driver receives monitor records through + * an IUCV connection to the z/VM *MONITOR system service. This connection + * could not be established when the monitor record device was opened. If + * the return code is 15, your z/VM guest virtual machine is not authorized + * to connect to the *MONITOR system service. + * User action: + * If the return code is 15, ensure that the IUCV *MONITOR statement is + * included in the z/VM directory entry for your z/VM guest virtual machine. + * For other IUCV CONNECT return codes see the IUCV section in "CP Programming + * Services" and the *MONITOR section in "z/VM Performance". + */ + +/*? + * Text: "Disconnecting the z/VM *MONITOR system service failed with rc=%i\n" + * Severity: Warning + * Parameter: + * @1: IUCV SEVER return code + * Description: + * The z/VM *MONITOR record device driver receives monitor data through an + * IUCV connection to the z/VM *MONITOR system service. This connection + * could not be closed when the monitor record device was closed. You might + * not be able to resume monitoring. + * User action: + * No immediate action is necessary. If you cannot open the monitor record + * device in the future, reboot Linux. For information about the IUCV SEVER + * return codes see the IUCV section in "CP Programming Services" and the + * *MONITOR section in "z/VM Performance". + */ + +/*? + * Text: "The z/VM *MONITOR record device driver cannot be loaded without z/VM\n" + * Severity: Error + * Description: + * The z/VM *MONITOR record device driver uses z/VM system services to provide + * monitor data about z/VM guest operating systems to applications on Linux. + * On Linux instances that run in environments other than the z/VM hypervisor, + * the z/VM *MONITOR record device driver does not provide any useful + * function and the corresponding monreader module cannot be loaded. + * User action: + * Load the z/VM *MONITOR record device driver only on Linux instances that run + * as guest operating systems of the z/VM hypervisor. If the z/VM *MONITOR + * record device driver has been compiled into the kernel, ignore this message. + */ + +/*? + * Text: "The z/VM *MONITOR record device driver failed to register with IUCV\n" + * Severity: Error + * Description: + * The z/VM *MONITOR record device driver receives monitor data through an IUCV + * connection and needs to register with the IUCV device driver. This + * registration failed and the z/VM *MONITOR record device driver was not + * loaded. A possible cause of this problem is insufficient memory. + * User action: + * Free some memory and try again to load the module. If the z/VM *MONITOR + * record device driver has been compiled into the kernel, you might have to + * configure more memory and reboot Linux. If you do not want to read monitor + * data, ignore this message. + */ + +/*? + * Text: "The specified *MONITOR DCSS %s does not have the required type SC\n" + * Severity: Error + * Parameter: + * @1: DCSS name + * Description: + * The DCSS that was specified with the monreader.mondcss kernel parameter or + * with the mondcss module parameter cannot be a *MONITOR DCSS because it is + * not of type SC. + * User action: + * Confirm that you are using the name of the DCSS that has been configured as + * the *MONITOR DCSS on the z/VM hypervisor. If the default name, MONDCSS, is + * used, omit the monreader.mondcss or mondcss parameter. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/monwriter b/Documentation/kmsg/s390/monwriter new file mode 100644 index 0000000000000..aaddda3e80e03 --- /dev/null +++ b/Documentation/kmsg/s390/monwriter @@ -0,0 +1,17 @@ +/*? + * Text: "Writing monitor data failed with rc=%i\n" + * Severity: Error + * Parameter: + * @1: return code + * Description: + * The monitor stream application device driver used the z/VM diagnose call + * DIAG X'DC' to start writing monitor data. z/VM returned an error and the + * monitor data cannot be written. If the return code is 5, your z/VM guest + * virtual machine is not authorized to write monitor data. + * User action: + * If the return code is 5, ensure that your z/VM guest virtual machine's + * entry in the z/VM directory includes the OPTION APPLMON statement. + * For other return codes see the section about DIAGNOSE Code X'DC' + * in "z/VM CP Programming Services". + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/netiucv b/Documentation/kmsg/s390/netiucv new file mode 100644 index 0000000000000..e805b1c121393 --- /dev/null +++ b/Documentation/kmsg/s390/netiucv @@ -0,0 +1,156 @@ +/*? + * Text: "%s: The peer interface of the IUCV device has closed the connection\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the IUCV device + * Description: + * The peer interface on the remote z/VM guest virtual machine has closed the + * connection. Do not expect further packets on this interface. Any packets + * you send to this interface will be dropped. + * User action: + * None. + */ + +/*? + * Text: "%s: The IUCV device failed to connect to z/VM guest %s\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the IUCV device + * @2: z/VM user ID + * Description: + * The connection cannot be established because the z/VM guest virtual + * machine with the peer interface is not running. + * User action: + * Ensure that the z/VM guest virtual machine with the peer interface is + * running; then try again to establish the connection. + */ + +/*? + * Text: "%s: The IUCV device failed to connect to the peer on z/VM guest %s\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the IUCV device + * @2: z/VM user ID + * Description: + * The connection cannot be established because the z/VM guest virtual machine + * with the peer interface is not configured for IUCV connections. + * User action: + * Configure the z/VM guest virtual machine with the peer interface for IUCV + * connections; then try again to establish the connection. + */ + +/*? + * Text: "%s: Connecting the IUCV device would exceed the maximum number of IUCV connections\n" + * Severity: Error + * Parameter: + * @1: bus ID of the IUCV device + * Description: + * The connection cannot be established because the maximum number of IUCV + * connections has been reached on the local z/VM guest virtual machine. + * User action: + * Close some of the established IUCV connections on the local z/VM guest + * virtual machine; then try again to establish the connection. + */ + +/*? + * Text: "%s: z/VM guest %s has too many IUCV connections to connect with the IUCV device\n" + * Severity: Error + * Parameter: + * @1: bus ID of the IUCV device + * @2: remote z/VM user ID + * Description: + * Connecting to the remote z/VM guest virtual machine failed because the + * maximum number of IUCV connections for the remote z/VM guest virtual + * machine has been reached. + * User action: + * Close some of the established IUCV connections on the remote z/VM guest + * virtual machine; then try again to establish the connection. + */ + +/*? + * Text: "%s: The IUCV device cannot connect to a z/VM guest with no IUCV authorization\n" + * Severity: Error + * Parameter: + * @1: bus ID of the IUCV device + * Description: + * Because the remote z/VM guest virtual machine is not authorized for IUCV + * connections, the connection cannot be established. + * User action: + * Add the statements 'IUCV ALLOW' and 'IUCV ANY' to the z/VM directory + * entry of the remote z/VM guest virtual machine; then try again to + * establish the connection. See "z/VM CP Planning and Administration" + * for details about the IUCV statements. + */ + +/*? + * Text: "%s: Connecting the IUCV device failed with error %d\n" + * Severity: Error + * Parameter: + * @1: bus ID of the IUCV device + * @2: error code + * Description: + * The connection cannot be established because of an IUCV CONNECT error. + * User action: + * Report this problem to your support organization. + */ + +/*? + * Text: "%s: The IUCV device has been connected successfully to %s\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the IUCV device + * @2: remote z/VM user ID + * Description: + * The connection has been established and the interface is ready to + * transmit communication packages. + * User action: + * None. + */ + +/*? + * Text: "%s: The IUCV interface to %s has been established successfully\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the IUCV device + * @2: remote z/VM user ID + * Description: + * The IUCV interface to the remote z/VM guest virtual machine has been + * established and can be activated with "ifconfig up" or an equivalent + * command. + * User action: + * None. + */ + +/*? + * Text: "%s: The IUCV device is connected to %s and cannot be removed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the IUCV device + * @2: remote z/VM user ID + * Description: + * Removing a connection failed because the interface is active with a peer + * interface on a remote z/VM guest virtual machine. + * User action: + * Deactivate the interface with "ifconfig down" or an equivalent command; + * then try again to remove the interface. + */ + +/*? + * Text: "%s: The peer z/VM guest %s has closed the connection\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the IUCV device + * @2: remote z/VM user ID + * Description: + * The peer interface is no longer available. + * User action: + * Either deactivate and remove the interface, or wait for the peer + * z/VM guest to re-establish the interface. + */ + +/*? Text: "driver unloaded\n" */ +/*? Text: "driver initialized\n" */ +/*? Text: "netif_stop_queue() cannot be called before register_netdev()\n" */ +/*? Text: "flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n" */ +/*? Text: "%s selects TX queue %d, but real number of TX queues is %d\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/numa b/Documentation/kmsg/s390/numa new file mode 100644 index 0000000000000..3cd65796ee4e7 --- /dev/null +++ b/Documentation/kmsg/s390/numa @@ -0,0 +1,11 @@ +/*? + * Text: "NUMA mode: %s\n" + * Severity: Informational + * Parameter: + * @1: mode + * Description: + * Linux started with the specified NUMA mode. + * User action: + * None. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/numa_emu b/Documentation/kmsg/s390/numa_emu new file mode 100644 index 0000000000000..20df7a1bff1ea --- /dev/null +++ b/Documentation/kmsg/s390/numa_emu @@ -0,0 +1,50 @@ +/*? + * Text: "Not enough memory for %d nodes, reducing node count\n" + * Severity: Warning + * Parameter: + * @1: requested number of nodes + * Description: + * Using the requested memory stripe size for emulating the requested number of + * NUMA nodes requires more than the available memory. The number of nodes is + * specified with the emu_nodes= kernel parameter. The memory stripe size to + * be used for distributing the available memory among the nodes is specified + * with the emu_size= kernel parameter. Fewer nodes were created than the + * requested number; each node has one memory stripe of the requested size. + * User action: + * Specify fewer nodes, reduce the memory stripe size, or make more memory + * available to your Linux instance. + */ + +/*? + * Text: "Creating %d nodes with memory stripe size %ld MB\n" + * Severity: Informational + * Parameter: + * @1: number of nodes + * @2: stripe size + * Description: + * NUMA emulation is activated with the reported number of NUMA nodes. + * The specified memory stripe size is used to distribute, in round-robin + * fashion, the available memory among the nodes. + * User action: + * None. + */ + +/*? + * Text: "Increasing memory stripe size from %ld MB to %ld MB\n" + * Severity: Warning + * Parameter: + * @1: requested memory stripe size + * @2: adjusted memory stripe size + * Description: + * NUMA emulation could not use the requested memory stripe size and + * therefore has increased it to the next possible value. + * The requested memory stripe size is a default value or it was specified + * with the emu_size= kernel parameter. + * The memory stripe size must be a multiple of the memory block size that + * can be read in hexadecimal notation from + * /sys/devices/system/memory/block_size_bytes. + * User action: + * To avoid this message in the future, specify a valid memory stripe size + * with the emu_size= kernel parameter. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/os_info b/Documentation/kmsg/s390/os_info new file mode 100644 index 0000000000000..de533a0992d05 --- /dev/null +++ b/Documentation/kmsg/s390/os_info @@ -0,0 +1,36 @@ +/*? + * Text: "entry %i: %s (addr=0x%lx size=%lu)\n" + * Severity: Informational + * Parameter: + * @1: entry ID + * @2: entry state + * @3: entry address + * @4: entry size + * Description: + * Linux is running in kdump mode and reports information defined by the + * previously running production kernel. Possible values for + * "entry state" are: + * + * - copied: The entry has been found, verified, and copied + * + * - not available: The entry has not been defined + * + * - checksum failed: The entry has been found, but it is not valid + * User action: + * If kdump fails, contact your service organization and include this message + * in the error report. + */ + +/*? + * Text: "crashkernel: addr=0x%lx size=%lu\n" + * Severity: Informational + * Parameter: + * @1: address + * @2: size + * Description: + * Linux is running in kdump mode and reports the address and size of + * the memory area that was reserved for kdump by the previously running + * production kernel. + * User action: + * None. + */ diff --git a/Documentation/kmsg/s390/perf b/Documentation/kmsg/s390/perf new file mode 100644 index 0000000000000..13325a6f51212 --- /dev/null +++ b/Documentation/kmsg/s390/perf @@ -0,0 +1,90 @@ +/*? + * Text: "CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n" + * Severity: Informational + * Parameter: + * @1: cpu number + * @2: first version number + * @3: second version number + * @4: counter set authorization + * @5: counter set enable controls + * @6: counter set activation controls + * Description: + * This message displays information about the CPU-measurement counter facility + * (CPUM_CF) on a particular CPU. For details, see + * "The Load-Program-Parameter and the CPU-Measurement Facilities", SA23-2260. + * User action: + * None. + */ + +/*? + * Text: "CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n" + * Severity: Informational + * Parameter: + * @1: cpu number + * @2: authorization status for the basic-sampling function + * @3: authorization status for the diagnostic-sampling function + * @4: minimum sampling interval + * @5: maximum sampling interval + * @6: cpu speed + * Description: + * This message displays generic information about the CPU-measurement sampling + * facility (CPUM_SF) on a particular CPU. For details, see + * "The Load-Program-Parameter and the CPU-Measurement Facilities", SA23-2260. + * User action: + * None. + */ + +/*? + * Text: "CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i bsdes=%i tear=%016lx dear=%016lx\n" + * Severity: Informational + * Parameter: + * @1: cpu number + * @2: authorization control + * @3: enable control + * @4: activation control + * @5: basic-sampling-data-entry size + * @6: tear register contents + * @7: dear register contents + * Description: + * This message displays information about the basic-sampling function of the + * CPU-measurement sampling facility (CPUM_SF) on a particular CPU. + * For details, see + * "The Load-Program-Parameter and the CPU-Measurement Facilities", SA23-2260. + * User action: + * None. + */ + +/*? + * Text: "CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i dsdes=%i tear=%016lx dear=%016lx\n" + * Severity: Informational + * Parameter: + * @1: cpu number + * @2: authorization control + * @3: enable control + * @4: activation control + * @5: diagnostic-sampling-data-entry size + * @6: tear register contents + * @7: dear register contents + * Description: + * This message displays information about the diagnostic-sampling function of the + * CPU-measurement sampling facility (CPUM_SF) on a particular CPU. + * For details, see + * "The Load-Program-Parameter and the CPU-Measurement Facilities", SA23-2260. + * User action: + * None. + */ + +/*? + * Text: "The sampling facility is already reserved by %p\n" + * Severity: Warning + * Parameter: + * @1: address of perf sampling support owner + * Description: + * A process tried to reserve the sampling facility support, but it was already + * reserved by another process. + * User action: + * Check whether another process, for example, the perf program or OProfile is + * currently active. Retry activating the sampling facility after the other + * process has ended. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/prng b/Documentation/kmsg/s390/prng new file mode 100644 index 0000000000000..84c243f06b74d --- /dev/null +++ b/Documentation/kmsg/s390/prng @@ -0,0 +1,103 @@ +/* prng */ + +/*? + * Text: "prng runs in TDES mode with chunksize=%d and reseed_limit=%u\n" + * Severity: Informational + * Parameter: + * @1: read chunk size in bytes + * @2: reseed limit + * Description: + * The pseudo-random number device driver started in triple DES mode. + * For IBM mainframes earlier than IBM zEnterprise EC12 (zEC12), + * triple DES is the only available mode. + * As of zEC12, the preferred mode is SHA-512. + * User action: + * If triple DES is the expected mode, no action is required. + * Otherwise, verify that the prng started with the mode= module or + * prng.mode= kernel parameter set to a value other than 1. + * The value 1 forces triple DES mode. Also ensure that the mainframe + * runs with the latest firmware level. + */ + +/*? + * Text: "The prng module stopped after running in triple DES mode\n" + * Severity: Informational + * Description: + * The pseudo-random number device driver was running in triple DES mode. + * The device driver module, prng, was unloaded, or it stopped + * because Linux shut down. + * User action: + * None. + */ + +/*? + * Text: "The prng module cannot start in SHA-512 mode\n" + * Severity: Error + * Description: + * The pseudo-random number device driver was loaded with the mode= module parameter + * or the prng.mode= kernel parameter set to 2. This setting forces SHA-512 mode, + * but the required support for MSA 5 is not available. This support requires an IBM + * zEnterprise EC12 (zEC12) or later mainframe. + * User action: + * If your mainframe is earlier than zEC12, set the mode= module or + * prng.mode= kernel parameter to 0 or 1 to run the + * pseudo-random number device driver in triple DES mode. + * Otherwise, ensure that MSA 5 support available. + */ + +/*? + * Text: "prng runs in SHA-512 mode with chunksize=%d and reseed_limit=%u\n" + * Severity: Informational + * Parameter: + * @1: read chunk size in bytes + * @2: reseed limit + * Description: + * The pseudo-random number device driver started in SHA-512 mode. + * As of IBM zEnterprise EC12, this is the preferred mode. + * User action: + * None. + */ + +/*? + * Text: "The prng module stopped after running in SHA-512 mode\n" + * Severity: Informational + * Description: + * The pseudo-random number device driver was running in SHA-512 mode. + * The device driver module, prng, was unloaded, or stopped + * because Linux shut down. + * User action: + * None. + */ + +/*? + * Text: "The prng self test state test for the SHA-512 mode failed\n" + * Severity: Error + * Description: + * The pseudo-random number device driver is not operational because the self test failed. + * After processing a published National Institute of Standards and Technology (NIST) test vector for the + * Deterministic Random Bit Generator (DRBG) algorithm, the device driver + * was not in the expected working state. This failure might indicate + * that the cryptographic software or hardware is not working correctly. + * The processed NIST test vector was: Hash Drbg, Sha-512, Count #0. + * User action: + * Unload and reload the prng module, or + * if prng was compiled into the kernel, restart Linux. + * If the error persists, contact your support organization. + */ + +/*? + * Text: "The prng self test data test for the SHA-512 mode failed\n" + * Severity: Error + * Description: + * The pseudo-random number device driver is not operational because the self test failed. + * After processing a published National Institute of Standards and Technology (NIST) test vector for the + * Deterministic Random Bit Generator (DRBG) algorithm, the device driver + * did not produce the expected pseudo-random data. This failure might indicate + * that the cryptographic software or hardware is not working correctly. + * The processed NIST test vector was: Hash Drbg, Sha-512, Count #0. + * User action: + * Unload and reload the prng module, or + * if prng was compiled into the kernel, restart Linux. + * If the error persists, contact your support organization. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/qeth b/Documentation/kmsg/s390/qeth new file mode 100644 index 0000000000000..3145186196186 --- /dev/null +++ b/Documentation/kmsg/s390/qeth @@ -0,0 +1,929 @@ +/*? + * Text: "%s: The LAN is offline\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * A start LAN command was sent by the qeth device driver but the physical or + * virtual adapter has not started the LAN. The LAN might take a few seconds + * to become available. + * User action: + * Check the status of the qeth device, for example, with the lsqeth command. + * If the device does not become operational within a few seconds, initiate a + * recovery process, for example, by writing '1' to the 'recover' sysfs + * attribute of the device. + */ + +/*? + * Text: "%s: A recovery process has been started for the device\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * A recovery process was started either by the qeth device driver or through + * a user command. + * User action: + * Wait until a message indicates the completion of the recovery process. + */ + +/*? + * Text: "%s: The qeth device driver failed to recover an error on the device\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The qeth device driver performed an automatic recovery operation to recover + * an error on a qeth device. The recovery operation failed. + * User action: + * Try the following actions in the given order: i) Check the status of the + * qeth device, for example, with the lsqeth command. ii) Initiate a recovery + * process by writing '1' to the 'recover' sysfs attribute of the device. + * iii) Ungroup and regroup the subchannel triplet of the device. vi) Reboot + * Linux. v) If the problem persists, gather Linux debug data and report the + * problem to your support organization. + */ + +/*? + * Text: "%s: Device recovery failed to restore all offload features\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The qeth device driver performed a recovery operation on a qeth device. Part + * of the recovery is to restore the offload features that were enabled before + * the recovery. At least one of those offload features could not be restored. + * User action: + * Check which offload features are enabled on the device, for example with + * the "ethtool -k" command. Try to explicitly re-enable the missing offload + * features for the device, for example with the "ethtool -K" command. + */ + +/*? + * Text: "%s: The link for interface %s on CHPID 0x%X failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * @3: CHPID + * Description: + * A network link failed. A possible reason for this error is that a physical + * network cable has been disconnected. + * User action: + * Ensure that the network cable on the adapter hardware is connected properly. + * If the connection is to a guest LAN, ensure that the device is still coupled + * to the guest LAN. + */ + +/*? + * Text: "%s: The link for %s on CHPID 0x%X has been restored\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * @3: CHPID + * Description: + * A failed network link has been re-established. A device recovery is in + * progress. + * User action: + * Wait until a message indicates the completion of the recovery process. + */ + +/*? + * Text: "%s: A hardware operation timed out on the device\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * A hardware operation timed out on the qeth device. + * User action: + * Check the status of the qeth device, for example, with the lsqeth command. + * If the device is not operational, initiate a recovery process, for example, + * by writing '1' to the 'recover' sysfs attribute of the device. + */ + +/*? + * Text: "%s: The adapter hardware is of an unknown type\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The qeth device driver does not recognize the adapter hardware. The cause + * of this problem could be a hardware error or a Linux level that does not + * support your adapter hardware. + * User action: + * i) Investigate if your adapter hardware is supported by your Linux level. + * Consider using hardware that is supported by your Linux level or upgrading + * to a Linux level that supports your hardware. ii) Install the latest + * firmware on your adapter hardware. iii) If the problem persists and is not + * caused by a version mismatch, contact IBM support. + */ + +/*? + * Text: "%s: The adapter is used exclusively by another host\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The qeth adapter is exclusively used by another host. + * User action: + * Use another qeth adapter or configure this one not exclusively to a + * particular host. + */ + +/*? + * Text: "%s: QDIO reported an error, rc=%i\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: return code + * Description: + * The QDIO subsystem reported an error. + * User action: + * Check for related QDIO errors. Check the status of the qeth device, for + * example, with the lsqeth command. If the device is not operational, initiate + * a recovery process, for example, by writing '1' to the 'recover' sysfs + * attribute of the device. + */ + +/*? + * Text: "%s: There is no kernel module to support discipline %d\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * @2: discipline + * Description: + * The qeth device driver or a user command requested a kernel module for a + * particular qeth discipline. Either the discipline is not supported by the + * qeth device driver or the requested module is not available to your Linux + * system. + * User action: + * Check if the requested discipline module has been compiled into the kernel + * or is present in /lib/modules//kernel/drivers/s390/net. + */ + +/*? + * Text: "Initializing the qeth device driver failed\n" + * Severity: Error + * Parameter: + * Description: + * The base module of the qeth device driver could not be initialized. + * User action: + * See errno.h to determine the reason for the error. + * i) Reboot Linux. ii) If the problem persists, gather Linux debug data and + * report the problem to your support organization. + */ + +/*? + * Text: "%s: Registering IP address %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: IP address + * Description: + * An IP address could not be registered with the network adapter. + * User action: + * Check if another operating system instance has already registered the + * IP address with the same network adapter or at the same logical IP subnet. + */ + +/*? + * Text: "%s: Reading the adapter MAC address failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The qeth device driver could not read the MAC address from the network + * adapter. + * User action: + * Ungroup and regroup the subchannel triplet of the device. If this does not + * resolve the problem, reboot Linux. If the problem persists, gather Linux + * debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: Starting ARP processing support for %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The qeth device driver could not start ARP support on the network adapter. + * User action: + * Ungroup and regroup the subchannel triplet of the device. If this does not + * resolve the problem, reboot Linux. If the problem persists, gather Linux + * debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: Starting IP fragmentation support for %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The qeth device driver could not start IP fragmentation support on the + * network adapter. + * User action: + * Ungroup and regroup the subchannel triplet of the device. If this does not + * resolve the problem, reboot Linux. If the problem persists, gather Linux + * debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: Starting VLAN support for %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The qeth device driver could not start VLAN support on the network adapter. + * User action: + * None if you do not require VLAN support. If you need VLAN support, + * ungroup and regroup the subchannel triplet of the device. If this does not + * resolve the problem, reboot Linux. If the problem persists, gather Linux + * debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: Starting multicast support for %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The qeth device driver could not start multicast support on the network + * adapter. + * User action: + * Ungroup and regroup the subchannel triplet of the device. If this does not + * resolve the problem, reboot Linux. If the problem persists, gather Linux + * debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: Activating IPv6 support for %s failed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The qeth device driver could not activate IPv6 support on the network + * adapter. + * User action: + * None if you do not require IPv6 communication. If you need IPv6 support, + * ungroup and regroup the subchannel triplet of the device. If this does not + * resolve the problem, reboot Linux. If the problem persists, gather Linux + * debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: Enabling the passthrough mode for %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The qeth device driver could not enable the passthrough mode on the + * network adapter. The passthrough mode is required for all network traffic + * other than IPv4. In particular, the passthrough mode is required for IPv6 + * traffic. + * User action: + * None if all you want to support is IPv4 communication. If you want to support + * IPv6 or other network traffic apart from IPv4, ungroup and regroup the + * subchannel triplet of the device. If this does not resolve the problem, + * reboot Linux. If the problem persists, gather Linux debug data and report + * the problem to your support organization. + */ + +/*? + * Text: "%s: Enabling broadcast filtering for %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The qeth device driver could not enable broadcast filtering on the network + * adapter. + * User action: + * Ungroup and regroup the subchannel triplet of the device. If this does not + * resolve the problem, reboot Linux. If the problem persists, gather Linux + * debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: Setting up broadcast filtering for %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The qeth device driver could not set up broadcast filtering on the network + * adapter. + * User action: + * Ungroup and regroup the subchannel triplet of the device. If this does not + * resolve the problem, reboot Linux. If the problem persists, gather Linux + * debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: Setting up broadcast echo filtering for %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The qeth device driver could not set up broadcast echo filtering on the + * network adapter. + * User action: + * Ungroup and regroup the subchannel triplet of the device. If this does not + * resolve the problem, reboot Linux. If the problem persists, gather Linux + * debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: Starting HW checksumming for %s failed, using SW checksumming\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The network adapter supports hardware checksumming for IP packages + * but the qeth device driver could not start hardware checksumming on the + * adapter. The qeth device driver continues to use software checksumming for + * IP packages. + * User action: + * None if you do not require hardware checksumming for network + * traffic. If you want to enable hardware checksumming, ungroup and regroup + * the subchannel triplet of the device. If this does not resolve the problem, + * reboot Linux. If the problem persists, gather Linux debug data and report + * the problem to your support organization. + */ + +/*? + * Text: "%s: Enabling HW checksumming for %s failed, using SW checksumming\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The network adapter supports hardware checksumming for IP packages + * but the qeth device driver could not enable hardware checksumming on the + * adapter. The qeth device driver continues to use software checksumming for + * IP packages. + * User action: + * None if you do not require hardware checksumming for network + * traffic. If you want to enable hardware checksumming, ungroup and regroup + * the subchannel triplet of the device. If this does not resolve the problem, + * reboot Linux. If the problem persists, gather Linux debug data and report + * the problem to your support organization. + */ + +/*? + * Text: "%s: Starting outbound TCP segmentation offload for %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The network adapter supports TCP segmentation offload, but the qeth device + * driver could not start this support on the adapter. + * User action: + * None if you do not require TCP segmentation offload. If you want to + * enable TCP segmentation offload, ungroup and regroup the subchannel triplet + * of the device. If this does not resolve the problem, reboot Linux. If the + * problem persists, gather Linux debug data and report the problem to your + * support organization. + */ + +/*? + * Text: "%s: The network adapter failed to generate a unique ID\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * In IBM mainframe environments, network interfaces are not identified by + * a specific MAC address. Therefore, the network adapters provide the network + * interfaces with unique IDs to be used in their IPv6 link local addresses. + * Without such a unique ID, duplicate addresses might be assigned in other + * LPARs. + * User action: + * Install the latest firmware on the adapter hardware. Manually, configure + * an IPv6 link local address for this device. + */ + +/*? + * Text: "There is no IPv6 support for the layer 3 discipline\n" + * Severity: Warning + * Description: + * If you want to use IPv6 with the layer 3 discipline, you need a Linux kernel + * with IPv6 support. Because your Linux kernel has not been compiled with + * IPv6 support, you cannot use IPv6 with the layer 3 discipline, even if your + * adapter supports IPv6. + * User action: + * Use a Linux kernel that has been complied to include IPv6 support if you + * want to use IPv6 with layer 3 qeth devices. + */ + +/*? + * Text: "%s: The qeth device is not configured for the OSI layer required by z/VM\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * A qeth device that connects to a virtual network on z/VM must be configured for the + * same Open Systems Interconnection (OSI) layer as the virtual network. An ETHERNET + * guest LAN or VSWITCH uses the data link layer (layer 2) while an IP guest LAN + * or VSWITCH uses the network layer (layer 3). + * User action: + * If you are connecting to an ETHERNET guest LAN or VSWITCH, set the layer2 sysfs + * attribute of the qeth device to 1. If you are connecting to an IP guest LAN or + * VSWITCH, set the layer2 sysfs attribute of the qeth device to 0. + */ + +/*? + * Text: "%s: Starting source MAC-address support for %s failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The qeth device driver could not enable source MAC-address on the network + * adapter. + * User action: + * Ungroup and regroup the subchannel triplet of the device. If this does not + * resolve the problem, reboot Linux. If the problem persists, gather Linux + * debug data and report the problem to your support organization. + */ + +/*? + * Text: "%s: MAC address %pM already exists\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: MAC-address + * Description: + * Setting the MAC address for the qeth device fails, because this + * MAC address is already defined on the OSA CHPID. + * User action: + * Use a different MAC address for this qeth device. + */ + +/*? + * Text: "%s: MAC address %pM is not authorized\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: MAC-address + * Description: + * This qeth device is a virtual network interface card (NIC), to which z/VM + * has already assigned a MAC address. z/VM MAC address verification does + * not allow you to change this predefined address. + * User action: + * None; use the MAC address that has been assigned by z/VM. + */ + +/*? + * Text: "%s: The HiperSockets network traffic analyzer is activated\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The sysfs 'sniffer' attribute of the HiperSockets device has the value '1'. + * The corresponding HiperSockets interface has been switched into promiscuous mode. + * As a result, the HiperSockets network traffic analyzer is started on the device. + * User action: + * None. + */ + + /*? + * Text: "%s: The HiperSockets network traffic analyzer is deactivated\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The sysfs 'sniffer' attribute of the HiperSockets device has the value '1'. + * Promiscuous mode has been switched off for the corresponding HiperSockets interface + * As a result, the HiperSockets network traffic analyzer is stopped on the device. + * User action: + * None. + */ + +/*? + * Text: "%s: The device is not authorized to run as a HiperSockets network traffic analyzer\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The sysfs 'sniffer' attribute of the HiperSockets device has the value '1'. + * The corresponding HiperSockets interface is switched into promiscuous mode + * but the network traffic analyzer (NTA) rules configured at the Support Element (SE) + * do not allow tracing. Possible reasons are: + * - Tracing is not authorized for all HiperSockets LANs in the mainframe system + * - Tracing is not authorized for this HiperSockets LAN + * - LPAR is not authorized to enable an NTA + * User action: + * Configure appropriate HiperSockets NTA rules at the SE. + */ + +/*? + * Text: "%s: A HiperSockets network traffic analyzer is already active in the HiperSockets LAN\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The sysfs 'sniffer' attribute of the HiperSockets device has the value '1'. + * The HiperSockets interface is switched into promiscuous mode but another + * HiperSockets device on the same HiperSockets LAN is already running as + * a network traffic analyzer. + * A HiperSockets LAN can only have one active network traffic analyzer. + * User action: + * Do not configure multiple HiperSockets devices in the same HiperSockets LAN as + * tracing devices. + */ + +/*? + * Text: "%s: Enabling HW TX checksumming for %s failed, using SW TX checksumming\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * Description: + * The network adapter supports hardware checksumming for outgoing IP packages + * but the qeth device driver could not enable hardware TX checksumming on the + * adapter. The qeth device driver continues to use software checksumming for + * outgoing IP packages. + * User action: + * None if you do not require hardware checksumming for outgoing network + * traffic. If you want to enable hardware checksumming, ungroup and regroup + * the subchannel triplet of the device. If this does not resolve the problem, + * reboot Linux. If the problem persists, gather Linux debug data and report + * the problem to your support organization. + */ + +/*? + * Text: "%s: A connection could not be established because of an OLM limit\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * z/OS has activated Optimized Latency Mode (OLM) for a connection through an OSA Express3 adapter. + * This reduces the maximum number of concurrent connections per physical port for shared adapters. + * The new connection would exceed the maximum. Linux cannot establish further connections using + * this adapter. + * User action: + * If possible, deactivate an existing connection that uses this adapter and try again to establish + * the new connection. If you cannot free an existing connection, use a different adapter for the + * new connection. + */ + +/*? + * Text: "%s: Setting the device online failed because of insufficient authorization\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The qeth device is configured with OSX CHPIDs. An OSX CHPID cannot be activated unless the LPAR is explicitly authorized to access it. + * For z/VM guest operating systems, the z/VM user ID must be explicitly authorized in addition to the LPAR. + * You grant these authorizations through the Service Element. + * User action: + * At the Service Element, authorize the LPAR and, if applicable, the z/VM user ID for using the OSX CHPIDs with which the qeth device has been configured. + * Then try again to set the device online. + */ + +/*? + * Text: "%s: portname is deprecated and is ignored\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * An OSA-Express port name was required to identify a shared OSA port. + * All operating system instances that shared the port had to use the same port name. + * This requirement no longer applies, and the specified portname attribute is ignored. + * User action: + * For future upgrades, remove OSA port name specifications from your + * network configuration. + */ + +/*? Text: "core functions removed\n" */ +/*? Text: "%s: Device is a%s card%s%s%s\nwith link type %s.\n" */ +/*? Text: "%s: issue_next_read failed: no iob available!\n" */ +/*? Text: "%s: Priority Queueing not supported\n" */ +/*? Text: "%s: sense data available. cstat 0x%X dstat 0x%X\n" */ +/*? Text: "loading core functions\n" */ +/*? Text: "%s: MAC address %pM successfully registered on device %s\n" */ +/*? Text: "%s: Device successfully recovered!\n" */ +/*? Text: "register layer 2 discipline\n" */ +/*? Text: "unregister layer 2 discipline\n" */ +/*? Text: "%s: Hardware IP fragmentation not supported on %s\n" */ +/*? Text: "%s: IPv6 not supported on %s\n" */ +/*? Text: "%s: VLAN not supported on %s\n" */ +/*? Text: "%s: Inbound source MAC-address not supported on %s\n" */ +/*? Text: "%s: IPV6 enabled\n" */ +/*? Text: "%s: ARP processing not supported on %s!\n" */ +/*? Text: "%s: Hardware IP fragmentation enabled \n" */ +/*? Text: "%s: set adapter parameters not supported.\n" */ +/*? Text: "%s: VLAN enabled\n" */ +/*? Text: "register layer 3 discipline\n" */ +/*? Text: "%s: Outbound TSO enabled\n" */ +/*? Text: "%s: Broadcast not supported on %s\n" */ +/*? Text: "%s: Outbound TSO not supported on %s\n" */ +/*? Text: "%s: Inbound HW Checksumming not supported on %s,\ncontinuing using Inbound SW Checksumming\n" */ +/*? Text: "%s: Using no checksumming on %s.\n" */ +/*? Text: "%s: Broadcast enabled\n" */ +/*? Text: "%s: Multicast not supported on %s\n" */ +/*? Text: "%s: Using SW checksumming on %s.\n" */ +/*? Text: "%s: HW Checksumming (%sbound) enabled\n" */ +/*? Text: "unregister layer 3 discipline\n" */ +/*? Text: "%s: Multicast enabled\n" */ +/*? Text: "%s: QDIO data connection isolation is deactivated\n" */ +/*? Text: "%s: QDIO data connection isolation is activated\n" */ +/*? Text: "%s: Adapter does not support QDIO data connection isolation\n" */ +/*? Text: "%s: Adapter is dedicated. QDIO data connection isolation not supported\n" */ +/*? Text: "%s: TSO does not permit QDIO data connection isolation\n" */ +/*? Text: "%s: HW TX Checksumming enabled\n" */ +/*? Text: "netif_stop_queue() cannot be called before register_netdev()\n" */ +/*? Text: "qeth_l3: ignoring TR device\n" */ +/*? Text: "flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n" */ +/*? Text: "%s selects TX queue %d, but real number of TX queues is %d\n" */ + +/*? + * Text: "%s: Turning off reflective relay mode at the adjacent switch failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The policy for the QDIO data connection isolation was + * changed successfully, and communications are now handled according to the + * new policy. The ISOLATION_FORWARD policy is no longer used, but the qeth + * device driver could not turn off the reflective relay mode on the adjacent + * switch port. + * User action: + * Check the adjacent switch for errors and correct the problem. + */ + +/*? + * Text: "%s: The adjacent switch port does not support reflective relay mode\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The 'isolation' sysfs attribute of the qeth device could not be set to 'forward'. + * This setting selects the ISOLATION_FORWARD policy for the QDIO data connection + * isolation. The ISOLATION_FORWARD policy requires a network adapter in Virtual + * Ethernet Port Aggregator (VEPA) mode with an adjacent switch port in reflective + * relay mode. + * User action: + * Use a switch port that supports reflective relay mode if you want to use the + * ISOLATION_FORWARD policy for the qeth device. + */ + +/*? + * Text: "%s: The reflective relay mode cannot be enabled at the adjacent switch port" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The 'isolation' sysfs attribute of the qeth device could not be set to 'forward'. + * This setting selects the ISOLATION_FORWARD policy for the QDIO data connection + * isolation. The ISOLATION_FORWARD policy requires a network adapter in Virtual + * Ethernet Port Aggregator (VEPA) mode with an adjacent switch port in reflective relay + * mode. The qeth device driver failed to enable the required reflective relay mode on + * the adjacent switch port although the switch port supports this mode. + * User action: + * Enable reflective relay mode on the switch for the adjacent port and try again. + */ + +/*? + * Text: "%s: Interface %s is down because the adjacent port is no longer in reflective relay mode\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * @2: interface name + * Description: + * The ISOLATION_FORWARD policy is active for the QDIO data connection isolation + * of the qeth device. This policy requires a network adapter in Virtual Ethernet + * Port Aggregator (VEPA) mode with an adjacent switch port in reflective relay mode. + * The reflective relay mode on the adjacent switch port was disabled. The qeth device + * was set offline and the interface was deactivated to prevent any unintended network traffic. + * User action: + * Enable the reflective relay mode again on the adjacent port or use the 'isolation' + * sysfs attribute of the qeth device to set a different policy for the QDIO data connection + * isolation. You can then resume operations by setting the qeth device back + * online and activating the interface. + */ + +/*? + * Text: "%s: Failed to create completion queue\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The HiperSockets device could not be configured with a completion queue. + * A completion queue is required to operate AF_IUCV communication in an LPAR. + * User action: + * i) Investigate if you have the latest firmware level in place. + * ii) If the problem persists and is not caused by a version mismatch, contact IBM + * support. + */ + +/*? + * Text: "%s: Completion Queueing supported\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The HiperSockets device supports completion queueing. This is required to + * set up AF_IUCV communication in an LPAR. + */ + +/*? + * Text: "%s: Completion Queue support enabled" + * Severity: Informational + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The HiperSockets device is enabled for completion queueing. This is part of + * the process to set up AF_IUCV communication in an LPAR. + */ + +/*? + * Text: "%s: Completion Queue support disabled" + * Severity: Informational + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The HiperSockets device is disabled for completion queueing. This device + * cannot or no longer be used to set up AF_IUCV communication in an LPAR. + */ + +/*? + * Text: "%s: The device represents a Bridge Capable Port\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the qeth device + * Description: + * You can configure this device as a Bridge Port. + * User action: + * None. + */ + +/*? + * Text: "%s: The device is not configured as a Bridge Port\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The Bridge Port role cannot be withdrawn from a device + * that is not configured as a Bridge Port. + * User action: + * None. + */ + +/*? + * Text: "%s: The LAN already has a primary Bridge Port\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * A LAN can have multiple secondary Bridge Ports, but only + * one primary Bridge Port. Configuring the device as a + * primary Bridge Port failed because another port on the + * LAN has been configured as the primary Bridge Port. + * User action: + * Find out which operating system instance has configured the primary + * Bridge Port. Assure that the primary role for this port is withdrawn + * before trying again to configure your device as the primary Bridge + * Port. Alternatively, consider configuring your device as a secondary + * Bridge Port. + */ + +/*? + * Text: "%s: The device is already a secondary Bridge Port\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * A device cannot be configured as a primary or secondary + * Bridge Port if it is already configured as a secondary Bridge Port. + * User action: + * None, if you want the device to be a secondary Bridge Port. + * If you want to configure the device as the primary Bridge Port, + * withdraw the secondary role by writing 'none' to the 'bridgeport_role' + * sysfs attribute of the device. Then try again to configure the + * device as the primary Bridge Port. + */ + +/*? + * Text: "%s: The LAN cannot have more secondary Bridge Ports\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * A LAN can have up to five secondary Bridge Ports. + * You cannot configure a further device as a secondary + * Bridge Port unless the Bridge Ports role is withdrawn from one of + * the existing secondary Bridge Ports. + * User action: + * Assure that the Bridge Port role is withdrawn from one of the + * existing secondary Bridge Ports before trying again to configure your + * device as a secondary Bridge Port. + */ + +/*? + * Text: "%s: The device is already a primary Bridge Port\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * A device cannot be configured as a primary or secondary + * Bridge Port if it is already configured as a primary Bridge Port. + * User action: + * None, if you want the device to be a primary Bridge Port. + * If you want to configure the device as a secondary Bridge Port, + * withdraw the primary role by writing 'none' to the 'bridgeport_role' + * sysfs attribute of the device. Then try again to configure the + * device as the secondary Bridge Port. + */ + +/*? + * Text: "%s: The device is not authorized to be a Bridge Port\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * The device cannot be configured as a Bridge Port because + * the required authorizations in the hardware are not in place. + * User action: + * See your hardware documentation about how to authorize + * ports for becoming a Bridge Port. + */ + +/*? + * Text: "%s: A Bridge Port is already configured by a different operating system\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * Linux instances cannot configure the target port as a Bridge Port. + * Another operating system already uses a Bridge Port on the HiperSockets + * or on the OSA adapter. For example, a z/VM instance might be using + * a port in a VSWITCH configuration. Multiple Bridge Ports on the same + * HiperSockets or OSA adapter must be configured by instances of the same + * operating system, for example, all Linux or all z/VM. + * User action: + * Reconsider your network topology. Configure Bridge Ports only for ports + * on adapters where any other Bridge Ports are configured by other Linux + * instances. + */ + +/*? + * Text: "%s: Setting address notification failed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the qeth device + * Description: + * Enabling or disabling the address notification feature of a + * HiperSockets device failed. The device might not be configured as a + * Bridge Port. + * User action: + * None, unless you need address notifications for this device. + * If you need notifications, confirm that your device is attached to a + * HiperSockets LAN that supports Bridge Capable Ports and that your + * device is configured as a Bridge Port. If the 'bridgeport_role' + * sysfs attribute of the device contains, one of the values 'primary' + * or 'secondary' and you cannot set the address notification, contact + * your support organization. + */ + +/*? + * Text: "%s: Address notification from the Bridge Port stopped %s (%s)\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the qeth device + * @2: network interface name + * @3: error reported by the hardware + * Description: + * A Bridge Port no longer provides address notifications. + * Possible reasons include traffic overflow and that the device is no + * longer configured as a Bridge Port. A udev event with + * BRIDGEDHOST=abort was emitted to alert applications that rely on the + * address notifications. + * User action: + * None. + */ + +/*? + * Text: "%s: The qeth driver ran out of channel command buffers\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the qeth device + * Description: + * Command buffers can temporarily run out during periods of + * intense network configuration activities. + * The device driver recovers from this condition as outstanding + * commands are completed. + * User action: + * Wait for a short time. If the problem persists, + * initiate a recovery process by writing '1' to the 'recover' + * sysfs attribute of the device. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/s390dbf b/Documentation/kmsg/s390/s390dbf new file mode 100644 index 0000000000000..b9286cf237a23 --- /dev/null +++ b/Documentation/kmsg/s390/s390dbf @@ -0,0 +1,83 @@ +/*? + * Text: "Root becomes the owner of all s390dbf files in sysfs\n" + * Severity: Warning + * Description: + * The S/390 debug feature you are using only supports uid/gid = 0. + * User action: + * None. + */ + +/*? + * Text: "Registering debug feature %s failed\n" + * Severity: Error + * Parameter: + * @1: feature name + * Description: + * The initialization of an S/390 debug feature failed. A likely cause of this + * problem is memory constraints. The system keeps running, but the debug + * data for this feature will not be available in sysfs. + * User action: + * Consider assigning more memory to your LPAR or z/VM guest virtual machine. + */ + +/*? + * Text: "Registering view %s/%s would exceed the maximum number of views %i\n" + * Severity: Error + * Parameter: + * @1: feature name + * @2: view name + * @3: maximum + * Description: + * The maximum number of allowed debug feature views has been reached. The + * view has not been registered. The system keeps running but the new view + * will not be available in sysfs. This is a program error. + * User action: + * Report this problem to your support partner. + */ + +/*? + * Text: "%s is not a valid level for a debug feature\n" + * Severity: Warning + * Parameter: + * @1: level + * Description: + * Setting a new level for a debug feature by using the 'level' sysfs attribute + * failed. Valid levels are the minus sign (-) and the integers in the + * range 0 to 6. The minus sign switches off the feature. The numbers switch + * the feature on, where higher numbers produce more debug output. + * User action: + * Write a valid value to the 'level' sysfs attribute. + */ + +/*? + * Text: "Flushing debug data failed because %c is not a valid area\n" + * Severity: Informational + * Parameter: + * @1: debug area number + * Description: + * Flushing a debug area by using the 'flush' sysfs attribute failed. Valid + * values are the minus sign (-) for flushing all areas, or the number of the + * respective area for flushing a single area. + * User action: + * Write a valid area number or the minus sign (-) to the 'flush' sysfs + * attribute. + */ + +/*? + * Text: "Allocating memory for %i pages failed\n" + * Severity: Informational + * Parameter: + * @1: number of pages + * Description: + * Setting the debug feature size by using the 'page' sysfs attribute failed. + * Linux did not have enough memory for expanding the debug feature to the + * requested size. + * User action: + * Use a smaller number of pages for the debug feature or allocate more + * memory to your LPAR or z/VM guest virtual machine. + */ + +/*? Text: "%s: set new size (%i pages)\n" */ +/*? Text: "%s: switched off\n" */ +/*? Text: "%s: level %i is out of range (%i - %i)\n" */ +/*? Text: "Registering view %s/%s failed due to out of memory\n" */ diff --git a/Documentation/kmsg/s390/sclp_cmd b/Documentation/kmsg/s390/sclp_cmd new file mode 100644 index 0000000000000..0729e350e2231 --- /dev/null +++ b/Documentation/kmsg/s390/sclp_cmd @@ -0,0 +1,44 @@ +/*? Text: "sync request failed (cmd=0x%08x, status=0x%02x)\n" */ +/*? Text: "readcpuinfo failed (response=0x%04x)\n" */ +/*? Text: "configure cpu failed (cmd=0x%08x, response=0x%04x)\n" */ +/*? Text: "configure channel-path failed (cmd=0x%08x, response=0x%04x)\n" */ +/*? Text: "read channel-path info failed (response=0x%04x)\n" */ +/*? Text: "assign storage failed (cmd=0x%08x, response=0x%04x, rn=0x%04x)\n" */ +/*? Text: "configure PCI I/O adapter failed: cmd=0x%08x response=0x%04x\n" */ +/*? Text: "request failed (status=0x%02x)\n" */ +/*? Text: "request failed with response code 0x%x\n" */ + +/*? + * Text: "Memory hotplug state changed, suspend refused.\n" + * Severity: Error + * Description: + * Suspend is refused after a memory hotplug operation was performed. + * User action: + * The system needs to be restarted and no memory hotplug operation must be + * performed in order to allow suspend. + */ + +/*? + * Text: "Standby memory at 0x%llx (%lluM of %lluM usable)\n" + * Severity: Informational + * Parameter: + * @1: start address of standby memory + * @2: usable memory in MB + * @3: total detected memory in MB + * Description: + * Standby memory was detected. It can be used for memory hotplug only + * if it is aligned to the Linux hotplug memory block size. + * If the aligned amount of memory matches the total amount, + * all detected standby memory can be used. Otherwise, some of the detected + * memory is unaligned and cannot be used. + * User action: + * None, if the usable and the total amount of detected standby memory match. + * If the amounts of memory do not match, + * check the memory setup of your guest virtual machine and ensure that + * the standby memory start and end + * address is aligned to the Linux hotplug memory block size. + * On Linux, issue "cat /sys/devices/system/memory/block_size_bytes" + * to find the hotplug memory block size value in hexadecimal notation. + * On z/VM, query your memory setup with "vmcp q v store". + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/sclp_config b/Documentation/kmsg/s390/sclp_config new file mode 100644 index 0000000000000..4e1f3b26edb20 --- /dev/null +++ b/Documentation/kmsg/s390/sclp_config @@ -0,0 +1,15 @@ +/*? + * Text: "CPU capability may have changed\n" + * Severity: Informational + * Description: + * The capability of the CPUs in the configuration may have been upgraded + * or downgraded. This message may also appear if the capability of the + * CPUs in the configuration did not change. + * For details see the STORE SYSTEM INFORMATION description in the + * "Principles of Operation." + * User action: + * The user can examine /proc/sysinfo for CPU capability values. + */ +/*? Text: "Open for Business request failed with response code 0x%04x\n" */ +/*? Text: "SCLP receiver did not register to receive Configuration Management Data Events.\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/sclp_cpi b/Documentation/kmsg/s390/sclp_cpi new file mode 100644 index 0000000000000..d52724fd1f52d --- /dev/null +++ b/Documentation/kmsg/s390/sclp_cpi @@ -0,0 +1,3 @@ +/*? Text: "request failed (status=0x%02x)\n" */ +/*? Text: "request failed with response code 0x%x\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/sclp_ocf b/Documentation/kmsg/s390/sclp_ocf new file mode 100644 index 0000000000000..4553d8b236cc8 --- /dev/null +++ b/Documentation/kmsg/s390/sclp_ocf @@ -0,0 +1 @@ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/sclp_sdias b/Documentation/kmsg/s390/sclp_sdias new file mode 100644 index 0000000000000..081046c8b505f --- /dev/null +++ b/Documentation/kmsg/s390/sclp_sdias @@ -0,0 +1,4 @@ +/*? Text: "sclp_send failed for get_nr_blocks\n" */ +/*? Text: "SCLP error: %x\n" */ +/*? Text: "sclp_send failed: %x\n" */ +/*? Text: "Error from SCLP while copying hsa. Event status = %x\n" */ diff --git a/Documentation/kmsg/s390/scm_block b/Documentation/kmsg/s390/scm_block new file mode 100644 index 0000000000000..aee44072e24be --- /dev/null +++ b/Documentation/kmsg/s390/scm_block @@ -0,0 +1,51 @@ +/*? + * Text: "%lx: The capabilities of the SCM increment changed\n" + * Severity: Informational + * Parameter: + * @1: start address of the SCM increment + * Description: + * A configuration change is in progress for the storage class memory (SCM) + * increment. + * User action: + * Verify that the capability of the SCM increment is as intended; for + * example, with lsscm. + */ + +/*? + * Text: "An I/O operation to SCM failed with rc=%d\n" + * Severity: Error + * Parameter: + * @1: return code + * Description: + * An error occurred during I/O to storage class memory (SCM). The operation + * was repeated, but the maximum number of retries was exceeded before the + * request could be fulfilled. + * User action: + * Contact your support organization. + */ + +/*? + * Text: "%lx: Write access to the SCM increment is suspended\n" + * Severity: Informational + * Parameter: + * @1: start address of the SCM increment + * Description: + * A concurrent firmware upgrade is in progress. For the duration of the + * upgrade, write access to the storage class memory (SCM) increment has been + * suspended. + * User action: + * None. + */ + +/*? + * Text: "%lx: Write access to the SCM increment is restored\n" + * Severity: Informational + * Parameter: + * @1: start address of the SCM increment + * Description: + * Write access to the storage class memory (SCM) increment was restored + * after a temporary suspension during a concurrent firmware upgrade. + * User action: + * None. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/setup b/Documentation/kmsg/s390/setup new file mode 100644 index 0000000000000..c90e603176486 --- /dev/null +++ b/Documentation/kmsg/s390/setup @@ -0,0 +1,165 @@ +/*? + * Text: "The initial RAM disk does not fit into the memory\n" + * Severity: Error + * Description: + * The load address and the size of the initial RAM disk specify a memory + * area that is not available. + * User action: + * Lower the load address of the initial RAM disk, reduce the size of the + * initial RAM disk, or increase the size of the system memory to make the + * initial RAM disk fit into the memory. + */ + +/*? + * Text: "The maximum memory size is %luMB\n" + * Severity: Notice + * Parameter: + * @1: size in MB + * Description: + * The system memory size cannot exceed the amount of memory that is + * provided by the real or virtual hardware. It can be further reduced + * through an upper memory address limit that is specified with the + * mem= kernel parameter. + * User action: + * None. + */ + +/*? + * Text: "Linux is running as a z/VM guest operating system in 31-bit mode\n" + * Severity: Informational + * Description: + * The 31-bit Linux kernel detected that it is running as a guest operating + * system of the z/VM hypervisor. + * User action: + * None. + */ + +/*? + * Text: "Linux is running natively in 31-bit mode\n" + * Severity: Informational + * Description: + * The 31-bit Linux kernel detected that it is running on an IBM mainframe, + * either as the sole operating system in an LPAR or as the sole operating + * system on the entire mainframe. The Linux kernel is not running as a + * guest operating system of the z/VM hypervisor. + * User action: + * None. + */ + +/*? + * Text: "The hardware system has IEEE compatible floating point units\n" + * Severity: Informational + * Description: + * The Linux kernel detected that it is running on a hardware system with + * CPUs that have IEEE compatible floating point units. + * User action: + * None. + */ + +/*? + * Text: "The hardware system has no IEEE compatible floating point units\n" + * Severity: Informational + * Description: + * The Linux kernel detected that it is running on a hardware system with + * CPUs that do not have IEEE compatible floating point units. + * User action: + * None. + */ + +/*? + * Text: "Linux is running as a z/VM guest operating system in 64-bit mode\n" + * Severity: Informational + * Description: + * The 64-bit Linux kernel detected that it is running as a guest operating + * system of the z/VM hypervisor. + * User action: + * None. + */ + +/*? + * Text: "Linux is running under KVM in 64-bit mode\n" + * Severity: Informational + * Description: + * The 64-bit Linux kernel detected that it is running as a guest operating + * system of the KVM hypervisor. + * User action: + * None. + */ + +/*? + * Text: "Linux is running natively in 64-bit mode\n" + * Severity: Informational + * Description: + * The 64-bit Linux kernel detected that it is running on an IBM mainframe, + * either as the sole operating system in an LPAR or as the sole operating + * system on the entire mainframe. The Linux kernel is not running as a + * guest operating system of the z/VM hypervisor. + * User action: + * None. + */ + +/*? + * Text: "Defining the Linux kernel NSS failed with rc=%d\n" + * Severity: Error + * Parameter: + * @1: return code + * Description: + * The Linux kernel could not define the named saved system (NSS) with + * the z/VM CP DEFSYS command. The return code represents the numeric + * portion of the CP DEFSYS error message. + * User action: + * For return code 1, the z/VM guest virtual machine is not authorized + * to define named saved systems. + * Ensure that the z/VM guest virtual machine is authorized to issue + * the CP DEFSYS command (typically privilege class E). + * For other return codes, see the help and message documentation for + * the CP DEFSYS command. + */ + +/*? + * Text: "Saving the Linux kernel NSS failed with rc=%d\n" + * Severity: Error + * Parameter: + * @1: return code + * Description: + * The Linux kernel could not save the named saved system (NSS) with + * the z/VM CP SAVESYS command. The return code represents the numeric + * portion of the CP SAVESYS error message. + * User action: + * For return code 1, the z/VM guest virtual machine is not authorized + * to save named saved systems. + * Ensure that the z/VM guest virtual machine is authorized to issue + * the CP SAVESYS command (typically privilege class E). + * For other return codes, see the help and message documentation for + * the CP SAVESYS command. + */ + +/*? + * Text: "crashkernel reservation failed: %s\n" + * Severity: Informational + * Parameter: + * @1: reason string + * Description: + * The memory reservation for the kdump "crashkernel" parameter was not + * successful. The Linux kernel was either not able to find a free memory + * area or an invalid area has been defined. The reason string describes the + * cause of the failure in more detail. + * User action: + * Increase the memory footprint of your virtual machine or adjust the values + * for the "crashkernel" kernel parameter. Then boot your Linux system again. + */ + +/*? + * Text: "Reserving %lluMB of memory at %lluMB for crashkernel (System RAM: %luMB)\n" + * Severity: Informational + * Parameter: + * @1: amount of reserved memory + * @2: storage location of reserved memory + * @3: amount of system RAM + * Description: + * The memory reservation for the kdump "crashkernel" parameter was successful + * and a kdump kernel can now be loaded with the kexec tool. + * User action: + * None. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/smsgiucv b/Documentation/kmsg/s390/smsgiucv new file mode 100644 index 0000000000000..4553d8b236cc8 --- /dev/null +++ b/Documentation/kmsg/s390/smsgiucv @@ -0,0 +1 @@ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/smsgiucv_app b/Documentation/kmsg/s390/smsgiucv_app new file mode 100644 index 0000000000000..4553d8b236cc8 --- /dev/null +++ b/Documentation/kmsg/s390/smsgiucv_app @@ -0,0 +1 @@ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/tape b/Documentation/kmsg/s390/tape new file mode 100644 index 0000000000000..980f2bf1298d9 --- /dev/null +++ b/Documentation/kmsg/s390/tape @@ -0,0 +1,63 @@ +/*? + * Text: "%s: A tape unit was detached while in use\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * A tape unit has been detached from the I/O configuration while a tape + * was being accessed. This typically results in I/O error messages and + * potentially in damaged data on the tape. + * User action: + * Check the output of the application that accesses the tape device. + * If this problem occurred during a write-type operation, consider repeating + * the operation after bringing the tape device back online. + */ + +/*? + * Text: "%s: A tape cartridge has been mounted\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the tape device + * Description: + * A tape cartridge has been inserted into the tape unit. The tape in the + * tape unit is ready to be accessed. + * User action: + * None. + */ + +/*? + * Text: "%s: The tape cartridge has been successfully unloaded\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape cartridge has been unloaded from the tape unit. Insert a tape + * cartridge before accessing the tape device. + * User action: + * None. + */ + +/*? + * Text: "A cartridge is loaded in tape device %s, refusing to suspend\n" + * Severity: Error + * Parameter: + * @1: bus ID of the tape device + * Description: + * A request to suspend a tape device currently loaded with a cartridge is + * rejected. + * User action: + * Unload the tape device. Then try to suspend the system again. + */ + +/*? + * Text: "Tape device %s is busy, refusing to suspend\n" + * Severity: Error + * Parameter: + * @1: bus ID of the tape device + * Description: + * A request to suspend a tape device being currently in use is rejected. + * User action: + * Terminate applications performing tape operations + * and then try to suspend the system again. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/tape_34xx b/Documentation/kmsg/s390/tape_34xx new file mode 100644 index 0000000000000..b61b7b044b322 --- /dev/null +++ b/Documentation/kmsg/s390/tape_34xx @@ -0,0 +1,418 @@ +/*? + * Text: "%s: An unexpected condition %d occurred in tape error recovery\n" + * Severity: Error + * Parameter: + * @1: bus ID of the tape device + * @2: number + * Description: + * The control unit has reported an error condition that is not recognized by + * the error recovery process of the tape device driver. + * User action: + * Report this problem and the condition number from the message to your + * support organization. + */ + +/*? + * Text: "%s: A data overrun occurred between the control unit and tape unit\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * A data overrun error has occurred on the connection between the control + * unit and the tape unit. If this problem occurred during a write-type + * operation, the integrity of the data on the tape might be compromised. + * User action: + * Use a faster connection. If this problem occurred during a write-type + * operation, consider repositioning the tape and repeating the operation. + */ + +/*? + * Text: "%s: The block ID sequence on the tape is incorrect\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The control unit has detected an incorrect block ID sequence on the tape. + * This problem typically indicates that the data on the tape is damaged. + * User action: + * If this problem occurred during a write-type operation reposition the tape + * and repeat the operation. + */ + +/*? + * Text: "%s: A read error occurred that cannot be recovered\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * A read error has occurred that cannot be recovered. The current tape might + * be damaged. + * User action: + * None. + */ + +/*? + * Text: "%s: A write error on the tape cannot be recovered\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * A write error has occurred that could not be recovered by the automatic + * error recovery process. + * User action: + * Use a different tape cartridge. + */ + +/*? + * Text: "%s: Writing the ID-mark failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The ID-mark at the beginning of tape could not be written. The tape medium + * might be write-protected. + * User action: + * Try a different tape cartridge. Ensure that the write-protection on the + * cartridge is switched off. + */ + +/*? + * Text: "%s: Reading the tape beyond the end of the recorded area failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * A read-type operation failed because it extended beyond the end of the + * recorded area on the tape medium. + * User action: + * None. + */ + +/*? + * Text: "%s: The tape contains an incorrect block ID sequence\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The control unit has detected an incorrect block ID sequence on the tape. + * This problem typically indicates that the data on the tape is damaged. + * User action: + * If this problem occurred during a write-type operation reposition the tape + * and repeat the operation. + */ + +/*? + * Text: "%s: A path equipment check occurred for the tape device\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * A path equipment check has occurred. This check indicates problems with the + * connection between the mainframe system and the tape control unit. + * User action: + * Ensure that the cable connections between the mainframe system and the + * control unit are securely in place and not damaged. + */ + +/*? + * Text: "%s: The tape unit cannot process the tape format\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * Either the tape unit is not able to read the format ID mark, or the + * specified format is not supported by the tape unit. + * User action: + * If you do not need the data recorded on the current tape, use a different + * tape or write a new format ID mark at the beginning of the tape. Be aware + * that writing a new ID mark leads to a loss of all data that has been + * recorded on the tape. If you need the data on the current tape, use a tape + * unit that supports the tape format. + */ + +/*? + * Text: "%s: The tape medium is write-protected\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * A write-type operation failed because the tape medium is write-protected. + * User action: + * Eject the tape cartridge, switch off the write protection on the cartridge, + * insert the cartridge, and try the operation again. + */ + +/*? + * Text: "%s: The tape does not have the required tape tension\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape does not have the required tape tension. + * User action: + * Rewind and reposition the tape, then repeat the operation. + */ + +/*? + * Text: "%s: The tape unit failed to load the cartridge\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * An error has occurred while loading the tape cartridge. + * User action: + * Unload the cartridge and load it again. + */ + +/*? + * Text: "%s: Automatic unloading of the tape cartridge failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape unit failed to unload the cartridge. + * User action: + * Unload the cartridge manually by using the eject button on the tape unit. + */ + +/*? + * Text: "%s: An equipment check has occurred on the tape unit\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * Possible reasons for the check condition are a unit adapter error, a buffer + * error on the lower interface, an unusable internal path, or an error that + * has occurred while loading the cartridge. + * User action: + * Examine the tape unit and the cartridge loader. Consult the tape unit + * documentation for details. + */ + +/*? + * Text: "%s: The tape information states an incorrect length\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape is shorter than stated at the beginning of the tape data. A + * possible reason for this problem is that the tape might have been physically + * truncated. Data written to the tape might be incomplete or damaged. + * User action: + * If this problem occurred during a write-type operation, consider repeating + * the operation with a different tape cartridge. + */ + +/*? + * Text: "%s: The tape unit is not ready\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape unit is online but not ready. + * User action: + * Turn the ready switch on the tape unit to the ready position and try the + * operation again. + */ + +/*? + * Text: "%s: The tape medium has been rewound or unloaded manually\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape unit rewind button, unload button, or both have been used to + * rewind or unload the tape cartridge. A tape cartridge other than the + * intended cartridge might have been inserted or the tape medium might not + * be at the expected position. + * User action: + * Verify that the correct tape cartridge has been inserted and that the tape + * medium is at the required position before continuing to work with the tape. + */ + +/*? + * Text: "%s: The tape subsystem is running in degraded mode\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape subsystem is not operating at its maximum performance. + * User action: + * Contact your service representative for the tape unit and report this + * problem. + */ + +/*? + * Text: "%s: The tape unit is already assigned\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape unit is already assigned to another channel path. + * User action: + * Free the tape unit from the operating system instance to which it is + * currently assigned then try again. + */ + +/*? + * Text: "%s: The tape unit is not online\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape unit is not online to the tape device driver. + * User action: + * Ensure that the tape unit is operational and that the cable connections + * between the control unit and the tape unit are securely in place and not + * damaged. + */ + +/*? + * Text: "%s: The control unit has fenced access to the tape volume\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The control unit fences further access to the current tape volume. The data + * integrity on the tape volume might have been compromised. + * User action: + * Rewind and unload the tape cartridge. + */ + +/*? + * Text: "%s: A parity error occurred on the tape bus\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * A data parity check error occurred on the bus. Data that was read or written + * while the error occurred is not valid. + * User action: + * Reposition the tape and repeat the read-type or write-type operation. + */ + +/*? + * Text: "%s: I/O error recovery failed on the tape control unit\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * An I/O error occurred that cannot be recovered by the automatic error + * recovery process of the tape control unit. The application that operates + * the tape unit will receive a return value of -EIO which indicates an + * I/O error. The data on the tape might be damaged. + * User action: + * If this problem occurred during a write-type operation, consider + * repositioning the tape and repeating the operation. + */ + +/*? + * Text: "%s: The tape unit requires a firmware update\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape unit requires firmware patches from the tape control unit but the + * required patches are not available on the control unit. + * User action: + * Make the require patches available on the control unit then reposition the + * tape and retry the operation. For details about obtaining and installing + * firmware updates see the control unit documentation. + */ + +/*? + * Text: "%s: The maximum block size for buffered mode is exceeded\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The block to be written is larger than allowed for the buffered mode. + * User action: + * Use a smaller block size. + */ + +/*? + * Text: "%s: A channel interface error cannot be recovered\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * An error has occurred on the channel interface. This error cannot + * be recovered by the control unit error recovery process. + * User action: + * See the documentation of the control unit. + */ + +/*? + * Text: "%s: A channel protocol error occurred\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * An error was detected in the channel protocol. + * User action: + * Reposition the tape and try the operation again. + */ + +/*? + * Text: "%s: The tape unit does not support the compaction algorithm\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape unit cannot read the current tape. The data on the tape has been + * compressed with an algorithm that is not supported by the tape unit. + * User action: + * Use a tape unit that supports the compaction algorithm used for the + * current tape. + */ + +/*? + * Text: "%s: The tape unit does not support tape format 3480-2 XF\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape unit does not support tapes recorded in the 3480-2 XF format. + * User action: + * If you do not need the data recorded on the current tape, rewind the tape + * and overwrite it with a supported format. If you need the data on the + * current tape, use a tape unit that supports the tape format. + */ + +/*? + * Text: "%s: The tape unit does not support format 3480 XF\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape unit does not support tapes recorded in the 3480 XF format. + * User action: + * If you do not need the data recorded on the current tape, rewind the tape + * and overwrite it with a supported format. If you need the data on the + * current tape, use a tape unit that supports the tape format. + */ + +/*? + * Text: "%s: The tape unit does not support the current tape length\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The length of the tape in the cartridge is incompatible with the tape unit. + * User action: + * Either use a different tape unit or use a tape with a supported length. + */ + +/*? + * Text: "%s: The tape unit does not support the tape length\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The length of the tape in the cartridge is incompatible with the tape + * unit. + * User action: + * Either use a different tape unit or use a tape with a supported length. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/tape_3590 b/Documentation/kmsg/s390/tape_3590 new file mode 100644 index 0000000000000..afcebac9f77d8 --- /dev/null +++ b/Documentation/kmsg/s390/tape_3590 @@ -0,0 +1,183 @@ +/*? + * Text: "%s: The tape medium must be loaded into a different tape unit\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape device has indicated an error condition that requires loading + * the tape cartridge into a different tape unit to recover. + * User action: + * Unload the cartridge and use a different tape unit to retry the operation. + */ + +/*? + * Text: "%s: Tape media information: exception %s, service %s\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * @2: exception + * @3: service + * Description: + * This is an operating system independent tape medium information message + * that was issued by the tape unit. The information in the message is + * intended for the IBM customer engineer. + * User action: + * See the documentation for the tape unit for further information. + */ + +/*? + * Text: "%s: Device subsystem information: exception %s, service %s\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * @2: exception + * @3: required service action + * Description: + * This is an operating system independent device subsystem information message + * that was issued by the tape unit. The information in the message is + * intended for the IBM customer engineer. + * User action: + * See the documentation for the tape unit for further information. + */ + +/*? + * Text: "%s: I/O subsystem information: exception %s, service %s\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * @2: exception + * @3: required service action + * Description: + * This is an operating system independent I/O subsystem information message + * that was issued by the tape unit. The information in the message is + * intended for the IBM customer engineer. + * User action: + * See the documentation for the tape unit for further information. + */ + +/*? + * Text: "%s: The tape unit has issued sense message %s\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * @2: sense message code + * Description: + * The tape unit has issued an operating system independent sense message. + * User action: + * See the documentation for the tape unit for further information. + */ + +/*? + * Text: "%s: The tape unit has issued an unknown sense message code 0x%x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * @2: code + * Description: + * The tape device driver has received an unknown sense message from the + * tape unit. + * User action: + * See the documentation for the tape unit for further information. + */ + +/*? + * Text: "%s: MIM SEV=%i, MC=%02x, ES=%x/%x, RC=%02x-%04x-%02x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * @2: SEV + * @3: message code + * @4: exception + * @5: required service action + * @6: refcode + * @7: mid + * @8: fid + * Description: + * This is an operating system independent information message that was + * issued by the tape unit. The information in the message is intended for + * the IBM customer engineer. + * User action: + * See to the documentation for the tape unit for further information. + */ + +/*? + * Text: "%s: IOSIM SEV=%i, DEVTYPE=3590/%02x, MC=%02x, ES=%x/%x, REF=0x%04x-0x%04x-0x%04x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * @2: SEV + * @3: model + * @4: message code + * @5: exception + * @6: required service action + * @7: refcode1 + * @8: refcode2 + * @9: refcode3 + * Description: + * This is an operating system independent I/O subsystem information message + * that was issued by the tape unit. The information in the message is + * intended for the IBM customer engineer. + * User action: + * See the documentation for the tape unit for further information. + */ + +/*? + * Text: "%s: DEVSIM SEV=%i, DEVTYPE=3590/%02x, MC=%02x, ES=%x/%x, REF=0x%04x-0x%04x-0x%04x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * @2: SEV + * @3: model + * @4: message code + * @5: exception + * @6: required service action + * @7: refcode1 + * @8: refcode2 + * @9: refcode3 + * Description: + * This is an operating system independent device subsystem information message + * issued by the tape unit. The information in the message is intended for + * the IBM customer engineer. + * User action: + * See the documentation for the tape unit for further information. + */ + +/*? + * Text: "%s: The tape unit has issued an unknown sense message code %x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * @2: code + * Description: + * The tape device has issued a sense message, that is unknown to the device + * driver. + * User action: + * Use the message code printed as hexadecimal value and see the documentation + * for the tape unit for further information. + */ + +/*? + * Text: "%s: The tape unit failed to obtain the encryption key from EKM\n" + * Severity: Error + * Parameter: + * @1: bus ID of the tape device + * Description: + * The tape unit was unable to retrieve the encryption key required to decode + * the data on the tape from the enterprise key manager (EKM). + * User action: + * See the EKM and tape unit documentation for information about how to enable + * the tape unit to retrieve the encryption key. + */ + +/*? + * Text: "%s: A different host has privileged access to the tape unit\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the tape device + * Description: + * You cannot access the tape unit because a different operating system + * instance has privileged access to the unit. + * User action: + * Unload the current cartridge to solve this problem. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/time b/Documentation/kmsg/s390/time new file mode 100644 index 0000000000000..aab05d224ea58 --- /dev/null +++ b/Documentation/kmsg/s390/time @@ -0,0 +1,36 @@ +/*? + * Text: "The ETR interface has adjusted the clock by %li microseconds\n" + * Severity: Notice + * Parameter: + * @1: number of microseconds + * Description: + * The external time reference (ETR) interface has synchronized the system + * clock with the external reference and set it to a new value. The time + * difference between the old and new clock value has been passed to the + * network time protocol (NTP) as a single shot adjustment. + * User action: + * None. + */ + +/*? + * Text: "The real or virtual hardware system does not provide an ETR interface\n" + * Severity: Warning + * Description: + * The 'etr=' parameter has been passed on the kernel parameter line for + * a Linux instance that does not have access to the external time reference + * (ETR) facility. + * User action: + * To avoid this warning remove the 'etr=' kernel parameter. + */ + +/*? + * Text: "The real or virtual hardware system does not provide an STP interface\n" + * Severity: Warning + * Description: + * The 'stp=' parameter has been passed on the kernel parameter line for + * a Linux instance that does not have access to the server time protocol + * (STP) facility. + * User action: + * To avoid this warning remove the 'stp=' kernel parameter. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/vmlogrdr b/Documentation/kmsg/s390/vmlogrdr new file mode 100644 index 0000000000000..ddea8968ddf8a --- /dev/null +++ b/Documentation/kmsg/s390/vmlogrdr @@ -0,0 +1,19 @@ +/*? Text: "vmlogrdr: failed to start recording automatically\n" */ +/*? Text: "vmlogrdr: connection severed with reason %i\n" */ +/*? Text: "vmlogrdr: iucv connection to %s failed with rc %i \n" */ +/*? Text: "vmlogrdr: failed to stop recording automatically\n" */ +/*? Text: "not running under VM, driver not loaded.\n" */ + +/*? + * Text: "vmlogrdr: device %s is busy. Refuse to suspend.\n" + * Severity: Error + * Parameter: + * @1: device name + * Description: + * Suspending vmlogrdr devices that are in uses is not supported. + * A request to suspend such a device is refused. + * User action: + * Close all applications that use any of the vmlogrdr devices + * and then try to suspend the system again. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/vmur b/Documentation/kmsg/s390/vmur new file mode 100644 index 0000000000000..50ef00d48ffa2 --- /dev/null +++ b/Documentation/kmsg/s390/vmur @@ -0,0 +1,48 @@ +/*? + * Text: "The %s cannot be loaded without z/VM\n" + * Severity: Error + * Parameter: + * @1: z/VM virtual unit record device driver + * Description: + * The z/VM virtual unit record device driver provides Linux with access to + * z/VM virtual unit record devices like punch card readers, card punches, and + * line printers. On Linux instances that run in environments other than the + * z/VM hypervisor, the device driver does not provide any useful function and + * the corresponding vmur module cannot be loaded. + * User action: + * Load the vmur module only on Linux instances that run as guest operating + * systems of the z/VM hypervisor. If the z/VM virtual unit record device + * has been compiled into the kernel, ignore this message. + */ + +/*? + * Text: "Kernel function alloc_chrdev_region failed with error code %d\n" + * Severity: Error + * Parameter: + * @1: error code according to errno definitions + * Description: + * The z/VM virtual unit record device driver (vmur) needs to register a range + * of character device minor numbers from 0x0000 to 0xffff. + * This registration failed, probably because of memory constraints. + * User action: + * Free some memory and reload the vmur module. If the z/VM virtual unit + * record device driver has been compiled into the kernel reboot Linux. + * Consider assigning more memory to your LPAR or z/VM guest virtual machine. + */ + +/*? + * Text: "Unit record device %s is busy, %s refusing to suspend.\n" + * Severity: Error + * Parameter: + * @1: bus ID of the unit record device + * @1: z/VM virtual unit record device driver + * Description: + * Linux cannot be suspended while a unit record device is in use. + * User action: + * Stop all applications that work on z/VM spool file queues, for example, the + * vmur tool. Then try again to suspend Linux. + */ + +/*? Text: "%s loaded.\n" */ +/*? Text: "%s unloaded.\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/xpram b/Documentation/kmsg/s390/xpram new file mode 100644 index 0000000000000..029372657496b --- /dev/null +++ b/Documentation/kmsg/s390/xpram @@ -0,0 +1,74 @@ +/*? + * Text: "%d is not a valid number of XPRAM devices\n" + * Severity: Error + * Parameter: + * @1: number of partitions + * Description: + * The number of XPRAM partitions specified for the 'devs' module parameter + * or with the 'xpram.parts' kernel parameter must be an integer in the + * range 1 to 32. The XPRAM device driver created a maximum of 32 partitions + * that are probably not configured as intended. + * User action: + * If the XPRAM device driver has been compiled as a separate module, + * unload the module and load it again with a correct value for the 'devs' + * module parameter. If the XPRAM device driver has been compiled + * into the kernel, correct the 'xpram.parts' parameter in the kernel + * command line and restart Linux. + */ + +/*? + * Text: "Not enough expanded memory available\n" + * Severity: Error + * Description: + * The amount of expanded memory required to set up your XPRAM partitions + * depends on the 'sizes' parameter specified for the xpram module or on + * the specifications for the 'xpram.parts' parameter if the XPRAM device + * driver has been compiled into the kernel. Your + * current specification exceed the amount of available expanded memory. + * Your XPRAM partitions are probably not configured as intended. + * User action: + * If the XPRAM device driver has been compiled as a separate module, + * unload the xpram module and load it again with an appropriate value + * for the 'sizes' module parameter. If the XPRAM device driver has been + * compiled into the kernel, adjust the 'xpram.parts' parameter in the + * kernel command line and restart Linux. If you need more than the + * available expanded memory, increase the expanded memory allocation for + * your virtual hardware or LPAR. + */ + +/*? + * Text: "No expanded memory available\n" + * Severity: Error + * Description: + * The XPRAM device driver has been loaded in a Linux instance that runs + * in an LPAR or virtual hardware without expanded memory. + * No XPRAM partitions are created. + * User action: + * Allocate expanded memory for your LPAR or virtual hardware or do not + * load the xpram module. You can ignore this message, if you do not want + * to create XPRAM partitions. + */ + +/*? + * Text: "Resuming the system failed: %s\n" + * Severity: Error + * Parameter: + * @1: cause of the failure + * Description: + * A system cannot be resumed if the expanded memory setup changes + * after hibernation. Possible reasons for the failure are: + * - Expanded memory was removed after hibernation. + * - Size of the expanded memory changed after hibernation. + * The system is stopped with a kernel panic. + * User action: + * Reboot Linux. + */ + +/*? Text: " number of devices (partitions): %d \n" */ +/*? Text: " size of partition %d: %u kB\n" */ +/*? Text: " size of partition %d to be set automatically\n" */ +/*? Text: " memory needed (for sized partitions): %lu kB\n" */ +/*? Text: " partitions to be sized automatically: %d\n" */ +/*? Text: " automatically determined partition size: %lu kB\n" */ +/*? Text: " %u pages expanded memory found (%lu KB).\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/zcrypt b/Documentation/kmsg/s390/zcrypt new file mode 100644 index 0000000000000..62eb7fd724613 --- /dev/null +++ b/Documentation/kmsg/s390/zcrypt @@ -0,0 +1,22 @@ +/*? + * Text: "Cryptographic device %02x.%04x failed and was set offline\n" + * Severity: Error + * Parameter: + * @1: AP device ID + * @2: AP queue + * Description: + * A cryptographic device failed to process a cryptographic request. + * The cryptographic device driver could not correct the error and + * set the device offline. The application that issued the + * request received an indication that the request has failed. + * User action: + * Use the lszcrypt command to confirm that the cryptographic + * hardware is still configured to your LPAR or z/VM guest virtual + * machine. If the device is available to your Linux instance the + * command output contains a line that begins with 'card', + * where is the two-digit decimal number in the message text. + * After ensuring that the device is available, use the chzcrypt command to + * set it online again. + * If the error persists, contact your support organization. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/zdump b/Documentation/kmsg/s390/zdump new file mode 100644 index 0000000000000..da4f7ef73e3ef --- /dev/null +++ b/Documentation/kmsg/s390/zdump @@ -0,0 +1,27 @@ +/*? + * Text: "The 32-bit dump tool cannot be used for a 64-bit system\n" + * Severity: Alert + * Description: + * The dump process ends without creating a system dump. + * User action: + * Use a 64-bit dump tool to obtain a system dump for 64-bit Linux instance. + */ +/*? + * Text: "The 64-bit dump tool cannot be used for a 32-bit system\n" + * Severity: Alert + * Description: + * The dump process ends without creating a system dump. + * User action: + * Use a 32-bit dump tool to obtain a system dump for 32-bit Linux instance. + */ +/*? + * Text: "The dump process started for a 64-bit operating system\n" + * Severity: Alert + * Description: + * The SCSI dump process started to create a dump for a 64-bit operating + * system instance. + * User action: + * None. + */ +/*? Text: "0x%x is an unknown architecture.\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/zfcp b/Documentation/kmsg/s390/zfcp new file mode 100644 index 0000000000000..9d271b2e4c80a --- /dev/null +++ b/Documentation/kmsg/s390/zfcp @@ -0,0 +1,709 @@ +/*? + * Text: "%s is not a valid SCSI device\n" + * Severity: Error + * Parameter: + * @1: device specification + * Description: + * The specification for an initial SCSI device provided with the 'zfcp.device' + * kernel parameter or with the 'device' module parameter is syntactically + * incorrect. The specified SCSI device could not be attached to the Linux + * system. + * User action: + * Correct the value for the 'zfcp.device' or 'device' parameter and reboot + * Linux. See "Device Drivers, Features, and Commands" for information about + * the syntax. + */ + +/*? + * Text: "The zfcp device driver could not register with the common I/O layer\n" + * Severity: Error + * Description: + * The device driver initialization failed. A possible cause of this problem is + * memory constraints. + * User action: + * Free some memory and try again to load the zfcp device driver. If the zfcp + * device driver has been compiled into the kernel, reboot Linux. Consider + * assigning more memory to your LPAR or z/VM guest virtual machine. If the + * problem persists, contact your support organization. + */ + +/*? + * Text: "%s: Setting up data structures for the FCP adapter failed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The zfcp device driver could not allocate data structures for an FCP adapter. + * A possible reason for this problem is memory constraints. + * User action: + * Set the FCP adapter offline or detach it from the Linux system, free some + * memory and set the FCP adapter online again or attach it again. If this + * problem persists, gather Linux debug data, collect the FCP adapter + * hardware logs, and report the problem to your support organization. + */ + +/*? + * Text: "%s: The FCP device is operational again\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * An FCP device has been unavailable because it had been detached from the + * Linux system or because the corresponding CHPID was offline. The FCP device + * is now available again and the zfcp device driver resumes all operations to + * the FCP device. + * User action: + * None. + */ + +/*? + * Text: "%s: The CHPID for the FCP device is offline\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The CHPID for an FCP device has been set offline, either logically in Linux + * or on the hardware. + * User action: + * Find out which CHPID corresponds to the FCP device, for example, with the + * lscss command. Check if the CHPID has been set logically offline in sysfs. + * Write 'on' to the CHPID's status attribute to set it online. If the CHPID is + * online in sysfs, find out if it has been varied offline through a hardware + * management interface, for example the service element (SE). + */ + +/*? + * Text: "%s: The FCP device has been detached\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * An FCP device is no longer available to Linux. + * User action: + * Ensure that the FCP adapter is operational and attached to the LPAR or z/VM + * virtual machine. + */ + +/*? + * Text: "%s: The FCP device did not respond within the specified time\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The common I/O layer waited for a response from the FCP adapter but + * no response was received within the specified time limit. This might + * indicate a hardware problem. + * User action: + * Consult your hardware administrator. If this problem persists, + * gather Linux debug data, collect the FCP adapter hardware logs, and + * report the problem to your support organization. + */ + +/*? + * Text: "%s: Registering the FCP device with the SCSI stack failed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The FCP adapter could not be registered with the Linux SCSI + * stack. A possible reason for this problem is memory constraints. + * User action: + * Set the FCP adapter offline or detach it from the Linux system, free some + * memory and set the FCP adapter online again or attach it again. If this + * problem persists, gather Linux debug data, collect the FCP adapter + * hardware logs, and report the problem to your support organization. + */ + +/*? + * Text: "%s: ERP cannot recover an error on the FCP device\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * An error occurred on an FCP device. The error recovery procedure (ERP) + * could not resolve the error. The FCP device driver cannot use the FCP device. + * User action: + * Check for previous error messages for the same FCP device to find the + * cause of the problem. + */ + +/*? + * Text: "%s: Creating an ERP thread for the FCP device failed.\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The zfcp device driver could not set up error recovery procedure (ERP) + * processing for the FCP device. The FCP device is not available for use + * in Linux. + * User action: + * Free some memory and try again to load the zfcp device driver. If the zfcp + * device driver has been compiled into the kernel, reboot Linux. Consider + * assigning more memory to your LPAR or z/VM guest virtual machine. If the + * problem persists, contact your support organization. + */ + +/*? + * Text: "%s: ERP failed for LUN 0x%016Lx on port 0x%016Lx\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * @2: LUN + * @3: WWPN + * Description: + * An error occurred on the SCSI device at the specified LUN. The error recovery + * procedure (ERP) could not resolve the error. The SCSI device is not + * available. + * User action: + * Verify that the LUN is correct. Check the fibre channel fabric for errors + * related to the specified WWPN and LUN, the storage server, and Linux. + */ + +/*? + * Text: "%s: ERP failed for remote port 0x%016Lx\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * @2: WWPN + * Description: + * An error occurred on a remote port. The error recovery procedure (ERP) + * could not resolve the error. The port is not available. + * User action: + * Verify that the WWPN is correct and check the fibre channel fabric for + * errors related to the WWPN. + */ + +/*? + * Text: "%s: Registering port 0x%016Lx failed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * @2: WWPN + * Description: + * The Linux kernel could not allocate enough memory to register the + * remote port with the indicated WWPN with the SCSI stack. The remote + * port is not available. + * User action: + * Free some memory and trigger the rescan for ports. + */ + +/*? + * Text: "%s: A QDIO problem occurred\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * QDIO reported a problem to the zfcp device driver. The zfcp device driver + * tries to recover this problem. + * User action: + * Check for related error messages. If this problem occurs frequently, gather + * Linux debug data and contact your support organization. + */ + +/*? + * Text: "%s: Setting up the QDIO connection to the FCP adapter failed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The zfcp device driver failed to establish a QDIO connection with the FCP + * adapter. + * User action: + * Set the FCP adapter offline or detach it from the Linux system, free some + * memory and set the FCP adapter online again or attach it again. If this + * problem persists, gather Linux debug data, collect the FCP adapter + * hardware logs, and report the problem to your support organization. + */ + +/*? + * Text: "%s: The FCP adapter reported a problem that cannot be recovered\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The FCP adapter has a problem that cannot be recovered by the zfcp device + * driver. The zfcp device driver stopped using the FCP device. + * User action: + * Gather Linux debug data, collect the FCP adapter hardware logs, and report + * this problem to your support organization. + */ + +/*? + * Text: "%s: There is a wrap plug instead of a fibre channel cable\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The FCP adapter is not physically connected to the fibre channel fabric. + * User action: + * Remove the wrap plug from the FCP adapter and connect the adapter with the + * fibre channel fabric. + */ + +/*? + * Text: "%s: FCP device not operational because of an unsupported FC class\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The FCP adapter hardware does not support the fibre channel service class + * requested by the zfcp device driver. This problem indicates a program error + * in the zfcp device driver. + * User action: + * Gather Linux debug data, collect the FCP adapter hardware logs, and report + * this problem to your support organization. + */ + +/*? + * Text: "%s: 0x%Lx is an ambiguous request identifier\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * @2: request ID + * Description: + * The FCP adapter reported that it received the same request ID twice. This is + * an error. The zfcp device driver stopped using the FCP device. + * User action: + * Gather Linux debug data, collect the FCP adapter hardware logs, and report + * this problem to your support organization. + */ + +/*? + * Text: "%s: QTCB version 0x%x not supported by FCP adapter (0x%x to 0x%x)\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * @2: requested version + * @3: lowest supported version + * @4: highest supported version + * Description: + * See message text. + * The queue transfer control block (QTCB) version requested by the zfcp device + * driver is not supported by the FCP adapter hardware. + * User action: + * If the requested version is higher than the highest version supported by the + * hardware, install more recent firmware on the FCP adapter. If the requested + * version is lower then the lowest version supported by the hardware, upgrade + * to a Linux level with a more recent zfcp device driver. + */ + +/*? + * Text: "%s: The FCP adapter could not log in to the fibre channel fabric\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The fibre channel switch rejected the login request from the FCP adapter. + * User action: + * Check the fibre channel fabric or switch logs for possible errors. + */ + +/*? + * Text: "%s: The FCP device is suspended because of a firmware update\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The FCP device is not available while a firmware update is in progress. This + * problem is temporary. The FCP device will resume operations when the + * firmware update is completed. + * User action: + * Wait 10 seconds and try the operation again. + */ + +/*? + * Text: "%s: All NPIV ports on the FCP adapter have been assigned\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The number of N_Port ID Virtualization (NPIV) ports that can be assigned + * on an FCP adapter is limited. Once assigned, NPIV ports are not released + * automatically but have to be released explicitly through the support + * element (SE). + * User action: + * Identify NPIV ports that have been assigned but are no longer in use and + * release them from the SE. + */ + +/*? + * Text: "%s: The link between the FCP adapter and the FC fabric is down\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The FCP adapter is not usable. Specific error information is not available. + * User action: + * Check the cabling and the fibre channel fabric configuration. If this + * problem persists, gather Linux debug data, collect the FCP adapter + * hardware logs, and report the problem to your support organization. + */ + +/*? + * Text: "%s: The QTCB type is not supported by the FCP adapter\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The queue transfer control block (QTCB) type requested by the zfcp device + * driver is not supported by the FCP adapter hardware. + * User action: + * Install the latest firmware on your FCP adapter hardware. If this does not + * resolve the problem, upgrade to a Linux level with a more recent zfcp device + * driver. If the problem persists, contact your support organization. + */ + +/*? + * Text: "%s: The error threshold for checksum statistics has been exceeded\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The FCP adapter has reported a large number of bit errors. This might + * indicate a problem with the physical components of the fibre channel fabric. + * Details about the errors have been written to the HBA trace for the FCP + * adapter. + * User action: + * Check for problems in the fibre channel fabric and ensure that all cables + * are properly plugged. + */ + +/*? + * Text: "%s: The local link has been restored\n" + * Severity: Informational + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * A problem with the connection between the FCP adapter and the adjacent node + * on the fibre channel fabric has been resolved. The FCP adapter is now + * available again. + * User action: + * None. + */ + +/*? + * Text: "%s: The mode table on the FCP adapter has been damaged\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * This is an FCP adapter hardware problem. + * User action: + * Report this problem with FCP hardware logs to IBM support. + */ + +/*? + * Text: "%s: The adjacent fibre channel node does not support FCP\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The fibre channel switch or storage system that is connected to the FCP + * channel does not support the fibre channel protocol (FCP). The zfcp + * device driver stopped using the FCP device. + * User action: + * Check the adjacent fibre channel node. + */ + +/*? + * Text: "%s: The FCP adapter does not recognize the command 0x%x\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * @2: command + * Description: + * A command code that was sent from the zfcp device driver to the FCP adapter + * is not valid. The zfcp device driver stopped using the FCP device. + * User action: + * Gather Linux debug data, collect the FCP adapter hardware logs, and report + * this problem to your support organization. + */ + +/*? + * Text: "%s: There is no light signal from the local fibre channel cable\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * There is no signal on the fibre channel cable that connects the FCP adapter + * to the fibre channel fabric. + * User action: + * Ensure that the cable is in place and connected properly to the FCP adapter + * and to the adjacent fibre channel switch or storage system. + */ + +/*? + * Text: "%s: The WWPN assignment file on the FCP adapter has been damaged\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * This is an FCP adapter hardware problem. + * User action: + * Report this problem with FCP hardware logs to IBM support. + */ + +/*? + * Text: "%s: The FCP device detected a WWPN that is duplicate or not valid\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * This condition indicates an error in the FCP adapter hardware or in the z/VM + * hypervisor. + * User action: + * Gather Linux debug data, collect the FCP adapter hardware logs, and report + * this problem to IBM support. + */ + +/*? + * Text: "%s: The fibre channel fabric does not support NPIV\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The FCP adapter requires N_Port ID Virtualization (NPIV) from the adjacent + * fibre channel node. Either the FCP adapter is connected to a fibre channel + * switch that does not support NPIV or the FCP adapter tries to use NPIV in a + * point-to-point setup. The connection is not operational. + * User action: + * Verify that NPIV is correctly used for this connection. Check the FCP adapter + * configuration and the fibre channel switch configuration. If necessary, + * update the fibre channel switch firmware. + */ + +/*? + * Text: "%s: The FCP adapter cannot support more NPIV ports\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * N_Port ID Virtualization (NPIV) ports consume physical resources on the FCP + * adapter. The FCP adapter resources are exhausted. The connection is not + * operational. + * User action: + * Analyze the number of available NPIV ports and which operating system + * instances use them. If necessary, reconfigure your setup to move some + * NPIV ports to an FCP adapter with free resources. + */ + +/*? + * Text: "%s: The adjacent switch cannot support more NPIV ports\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * N_Port ID Virtualization (NPIV) ports consume physical resources. The + * resources of the fibre channel switch that is connected to the FCP adapter + * are exhausted. The connection is not operational. + * User action: + * Analyze the number of available NPIV ports on the adjacent fibre channel + * switch and how they are used. If necessary, reconfigure your fibre channel + * fabric to accommodate the required NPIV ports. + */ + +/*? + * Text: "%s: 0x%x is not a valid transfer protocol status\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * @2: status information + * Description: + * The transfer protocol status information reported by the FCP adapter is not + * a valid status for the zfcp device driver. The zfcp device driver stopped + * using the FCP device. + * User action: + * Gather Linux debug data, collect the FCP adapter hardware logs, and report + * this problem to your support organization. + */ + +/*? + * Text: "%s: Unknown or unsupported arbitrated loop fibre channel topology detected\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The FCP device is connected to a fibre channel arbitrated loop or the FCP adapter + * reported an unknown fibre channel topology. The zfcp device driver supports + * point-to-point connections and switched fibre channel fabrics but not arbitrated + * loop topologies. The FCP device cannot be used. + * User action: + * Check the fibre channel setup and ensure that only supported topologies are + * connected to the FCP adapter. + */ + +/*? + * Text: "%s: FCP adapter maximum QTCB size (%d bytes) is too small\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * @2: maximum supported size + * @3: requested QTCB size + * Description: + * The queue transfer control block (QTCB) size requested by the zfcp + * device driver is not supported by the FCP adapter hardware. + * User action: + * Update the firmware on your FCP adapter hardware to the latest + * available level and update the Linux kernel to the latest supported + * level. If the problem persists, contact your support organization. + */ + +/*? + * Text: "%s: The FCP adapter only supports newer control block versions\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The protocol supported by the FCP adapter is not compatible with the zfcp + * device driver. + * User action: + * Upgrade your Linux kernel to a level that includes a zfcp device driver + * with support for the control block version required by your FCP adapter. + */ + +/*? + * Text: "%s: The FCP adapter only supports older control block versions\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * Description: + * The protocol supported by the FCP adapter is not compatible with the zfcp + * device driver. + * User action: + * Install the latest firmware on your FCP adapter. + */ + +/*? + * Text: "%s: Not enough FCP adapter resources to open remote port 0x%016Lx\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * @2: WWPN + * Description: + * Each port that is opened consumes physical resources of the FCP adapter to + * which it is attached. These resources are exhausted and the specified port + * cannot be opened. + * User action: + * Reduce the total number of remote ports that are attached to the + * FCP adapter. + */ + +/*? + * Text: "%s: LUN 0x%Lx on port 0x%Lx is already in use by CSS%d, MIF Image ID %x\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * @2: LUN + * @3: remote port WWPN + * @4: channel subsystem ID + * @5: MIF Image ID of the LPAR + * Description: + * The SCSI device at the indicated LUN is already in use by another system. + * Only one system at a time can use the SCSI device. + * User action: + * Ensure that the other system stops using the device before trying to use it. + */ + +/*? + * Text: "%s: No handle is available for LUN 0x%016Lx on port 0x%016Lx\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * @2: LUN + * @3: WWPN + * Description: + * The FCP adapter can only open a limited number of SCSI devices. This limit + * has been reached and the SCSI device at the indicated LUN cannot be opened. + * User action: + * For FCP subchannels running in non-NPIV mode, check all SCSI + * devices opened through the FCP adapter and close some of them. For + * FCP subchannels running in NPIV mode, verify the SAN zoning and + * host connections on the storage systems. Ensure that the zoning and + * host connections only allow access to the required LUNs. As a + * workaround, disable the automatic LUN scanning by setting the + * zfcp.allow_lun_scan kernel parameter or the allow_lun_scan module + * parameter to 0. + */ + +/*? + * Text: "%s: Incorrect direction %d, LUN 0x%016Lx on port 0x%016Lx closed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * @2: value in direction field + * @3: LUN + * @4: WWPN + * Description: + * The direction field in a SCSI request contains an incorrect value. The zfcp + * device driver closed down the SCSI device at the indicated LUN. + * User action: + * Gather Linux debug data and report this problem to your support organization. + */ + +/*? + * Text: "%s: Incorrect CDB length %d, LUN 0x%016Lx on port 0x%016Lx closed\n" + * Severity: Error + * Parameter: + * @1: bus ID of the zfcp device + * @2: value in length field + * @3: LUN + * @4: WWPN + * Description: + * The control-data-block (CDB) length field in a SCSI request is not valid or + * too large for the FCP adapter. The zfcp device driver closed down the SCSI + * device at the indicated LUN. + * User action: + * Gather Linux debug data and report this problem to your support organization. + */ + +/*? + * Text: "%s: Opening WKA port 0x%x failed\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * @2: destination ID of the WKA port + * Description: + * The FCP adapter rejected a request to open the specified + * well-known address (WKA) port. No retry is possible. + * User action: + * Verify the setup and check if the maximum number of remote ports + * used through this adapter is below the maximum allowed. If the + * problem persists, gather Linux debug data, collect the FCP adapter + * hardware logs, and report the problem to your support organization. + */ + +/*? + * Text: "%s: The name server reported %d words residual data\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * @2: number of words in residual data + * Description: + * The fibre channel name server sent too much information about remote ports. + * The zfcp device driver did not receive sufficient information to attach all + * available remote ports in the SAN. + * User action: + * Verify that you are running the latest firmware level on the FCP + * adapter. Check your SAN setup and consider reducing the number of ports + * visible to the FCP adapter by using more restrictive zoning in the SAN. + */ + +/*? + * Text: "%s: A port opened with WWPN 0x%016Lx returned data that identifies it as WWPN 0x%016Lx\n" + * Severity: Warning + * Parameter: + * @1: bus ID of the zfcp device + * @2: expected WWPN + * @3: reported WWPN + * Description: + * A remote port was opened successfully, but it reported an + * unexpected WWPN in the returned port login (PLOGI) data. This + * condition might have been caused by a change applied to the SAN + * configuration while the port was being opened. + * User action: + * If this condition is only temporary and access to the remote port + * is possible, no action is required. If the condition persists, + * identify the storage system with the specified WWPN and contact the + * support organization of the storage system. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/s390/zpci b/Documentation/kmsg/s390/zpci new file mode 100644 index 0000000000000..aa3310206a626 --- /dev/null +++ b/Documentation/kmsg/s390/zpci @@ -0,0 +1,42 @@ +/*? + * Text: "%s: Event 0x%x reconfigured PCI function 0x%x\n" + * Severity: Informational + * Parameter: + * @1: device name of the function + * @2: PCI event code + * @3: function ID + * Description: + * The availability of a PCI function has changed. + * Possible reasons for the change include PCI configuration actions on the + * Hardware Management Console or hypervisor. + * For shared PCI functions, the function might also have been reserved or + * released by another system. + * If the device name of a function is shown as 'n/a', the device registration + * with the PCI device driver has not completed. + * The function ID identifies the function to the I/O configuration (IOCDS). + * The PCI event code can be useful diagnostic information for your support + * organization. + * User action: + * None. + */ + +/*? + * Text: "%s: Event 0x%x reports an error for PCI function 0x%x\n" + * Severity: Error + * Parameter: + * @1: device name of the function + * @2: PCI event code + * @3: function ID + * Description: + * A PCI function entered an error state from which it cannot recover + * automatically. + * User action: + * Trigger a recovery action by writing '1' to the 'recover' sysfs attribute + * of the PCI function. + * In sysfs, PCI functions are represented as /sys/bus/pci/devices/, + * where is the device name of the function. + * If the device name of a function is shown as 'n/a', the device + * registration with the PCI device driver has not completed. + * If the problem persists, contact your support organization. + */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ diff --git a/Documentation/kmsg/sbp_target b/Documentation/kmsg/sbp_target new file mode 100644 index 0000000000000..e9356cf802ace --- /dev/null +++ b/Documentation/kmsg/sbp_target @@ -0,0 +1,49 @@ +/*? Text: "ABORT TASK SET not implemented\n" */ +/*? Text: "ABORT TASK not implemented\n" */ +/*? Text: "Cannot change the directory_id on an active target.\n" */ +/*? Text: "Cannot enable a target with no LUNs!\n" */ +/*? Text: "Could not update Config ROM\n" */ +/*? Text: "Ignoring ORB_POINTER write while active.\n" */ +/*? Text: "LOGICAL UNIT RESET not implemented\n" */ +/*? Text: "Node ACL not found for %s\n" */ +/*? Text: "Only one TPG per Unit is possible.\n" */ +/*? Text: "QUERY LOGINS not implemented\n" */ +/*? Text: "Reconnect timer expired for node: %016llx\n" */ +/*? Text: "SET PASSWORD not implemented\n" */ +/*? Text: "TARGET RESET not implemented\n" */ +/*? Text: "Unable to allocate struct sbp_nacl\n" */ +/*? Text: "Unable to allocate struct sbp_tpg\n" */ +/*? Text: "Unable to allocate struct sbp_tport\n" */ +/*? Text: "Waiting for reconnect from node: %016llx\n" */ +/*? Text: "cannot find login: %d\n" */ +/*? Text: "failed to allocate login descriptor\n" */ +/*? Text: "failed to allocate login response block\n" */ +/*? Text: "failed to allocate session descriptor\n" */ +/*? Text: "failed to init se_session\n" */ +/*? Text: "failed to map command block handler: %d\n" */ +/*? Text: "failed to read peer GUID: %d\n" */ +/*? Text: "ignoring management request while busy\n" */ +/*? Text: "ignoring request from foreign node (%x != %x)\n" */ +/*? Text: "ignoring request with wrong generation\n" */ +/*? Text: "initiator already logged-in\n" */ +/*? Text: "login to unknown LUN: %d\n" */ +/*? Text: "logout from different node ID\n" */ +/*? Text: "max number of logins reached\n" */ +/*? Text: "mgt_agent LOGIN to LUN %d from %016llx\n" */ +/*? Text: "mgt_agent LOGOUT from LUN %d session %d\n" */ +/*? Text: "mgt_agent RECONNECT from %016llx\n" */ +/*? Text: "mgt_agent RECONNECT login GUID doesn't match\n" */ +/*? Text: "mgt_agent RECONNECT unknown login ID\n" */ +/*? Text: "mgt_orb bad request\n" */ +/*? Text: "netif_stop_queue() cannot be called before register_netdev()\n" */ +/*? Text: "refusing exclusive login with other active logins\n" */ +/*? Text: "refusing login while another exclusive login present\n" */ +/*? Text: "sbp_run_transaction: page size ignored\n" */ +/*? Text: "sbp_send_sense: unknown sense format: 0x%x\n" */ +/*? Text: "target_fabric_configfs_init() failed\n" */ +/*? Text: "target_fabric_configfs_register() failed for SBP\n" */ +/*? Text: "unknown management function 0x%x\n" */ +/*? Text: "unlink LUN: failed to update unit directory\n" */ +/*? Text: "flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n" */ +/*? Text: "%s selects TX queue %d, but real number of TX queues is %d\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ \ No newline at end of file diff --git a/Documentation/kmsg/zram b/Documentation/kmsg/zram new file mode 100644 index 0000000000000..f6c13a03c23a1 --- /dev/null +++ b/Documentation/kmsg/zram @@ -0,0 +1,34 @@ +/*? Text: "Error allocating compressor buffer space\n" */ +/*? Text: "Error allocating memory for compressed page: %u, size=%zu\n" */ +/*? Text: "Error creating memory pool\n" */ +/*? Text: "num_devices not specified. Using default: 1\n" */ +/*? Text: "Error allocating compressor working memory!\n" */ +/*? Text: "Error allocating zram address table\n" */ +/*? Text: "Unable to get major number\n" */ +/*? Text: "Compression failed! err=%d\n" */ +/*? Text: "Decompression failed! err=%d, page=%u\n" */ +/*? Text: "There is little point creating a zram of greater than twice the size of memory since we expect a 2:1 compression ratio. Note that zram uses about 0.1%% of the size of the disk when not in use so a huge zram is wasteful.\n\tMemory Size: %zu kB\n\tSize you selected: %llu kB\nContinuing anyway ...\n" */ +/*? Text: "disk size not provided. You can use disksize_kb module param to specify size.\nUsing default: (%u%% of RAM).\n" */ +/*? Text: "Error creating sysfs group" */ +/*? Text: "Error allocating memory for incompressible page: %u\n" */ +/*? Text: "Creating %u devices ...\n" */ +/*? Text: "Initialization failed: err=%d\n" */ +/*? Text: "Error allocating disk queue for device %d\n" */ +/*? Text: "Error allocating disk structure for device %d\n" */ +/*? Text: "Invalid value for num_devices: %u\n" */ +/*? Text: "Error allocating temp memory!\n" */ +/*? Text: "Unable to allocate temp memory\n" */ +/*? Text: "Created %u device(s) ...\n" */ +/*? Text: "There is little point creating a zram of greater than twice the size of memory since we expect a 2:1 compression ratio. Note that zram uses about 0.1%% of the size of the disk when not in use so a huge zram is wasteful.\n\tMemory Size: %lu kB\n\tSize you selected: %llu kB\nContinuing anyway ...\n" */ +/*? Text: "Cannot change disksize for initialized device\n" */ +/*? Text: "Can't change algorithm for initialized device\n" */ +/*? Text: "Cannot initialise %s compressing backend\n" */ +/*? Text: "Cannot change max compression streams\n" */ +/*? Text: "Destroyed %u device(s)\n" */ +/*? Text: "Created %u device(s)\n" */ +/*? Text: "Unable to register zram-control class\n" */ +/*? Text: "Removed device: %s\n" */ +/*? Text: "Added device: %s\n" */ +/*? Text: "Error creating sysfs group for device %d\n" */ +/*? Text: "Error allocating memory for compressed page: %u, size=%u\n" */ +/*? Text: "%s: %d output lines suppressed due to ratelimiting\n" */ \ No newline at end of file diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt index ca983328976bc..f65b515230146 100644 --- a/Documentation/lzo.txt +++ b/Documentation/lzo.txt @@ -159,11 +159,15 @@ Byte sequences distance = 16384 + (H << 14) + D state = S (copy S literals after this block) End of stream is reached if distance == 16384 + In version 1 only, to prevent ambiguity with the RLE case when + ((distance & 0x803f) == 0x803f) && (261 <= length <= 264), the + compressor must not emit block copies where distance and length + meet these conditions. In version 1 only, this instruction is also used to encode a run of - zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1. + zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1. In this case, it is followed by a fourth byte, X. - run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4. + run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4 0 0 1 L L L L L (32..63) Copy of small block within 16kB distance (preferably less than 34B) diff --git a/Documentation/media/uapi/v4l/colorspaces-defs.rst b/Documentation/media/uapi/v4l/colorspaces-defs.rst index e122bbe3d799d..aabb08130354a 100644 --- a/Documentation/media/uapi/v4l/colorspaces-defs.rst +++ b/Documentation/media/uapi/v4l/colorspaces-defs.rst @@ -36,8 +36,7 @@ whole range, 0-255, dividing the angular value by 1.41. The enum :c:type:`v4l2_hsv_encoding` specifies which encoding is used. .. note:: The default R'G'B' quantization is full range for all - colorspaces except for BT.2020 which uses limited range R'G'B' - quantization. + colorspaces. HSV formats are always full range. .. tabularcolumns:: |p{6.7cm}|p{10.8cm}| @@ -169,8 +168,8 @@ whole range, 0-255, dividing the angular value by 1.41. The enum - Details * - ``V4L2_QUANTIZATION_DEFAULT`` - Use the default quantization encoding as defined by the - colorspace. This is always full range for R'G'B' (except for the - BT.2020 colorspace) and HSV. It is usually limited range for Y'CbCr. + colorspace. This is always full range for R'G'B' and HSV. + It is usually limited range for Y'CbCr. * - ``V4L2_QUANTIZATION_FULL_RANGE`` - Use the full range quantization encoding. I.e. the range [0…1] is mapped to [0…255] (with possible clipping to [1…254] to avoid the @@ -180,4 +179,4 @@ whole range, 0-255, dividing the angular value by 1.41. The enum * - ``V4L2_QUANTIZATION_LIM_RANGE`` - Use the limited range quantization encoding. I.e. the range [0…1] is mapped to [16…235]. Cb and Cr are mapped from [-0.5…0.5] to - [16…240]. + [16…240]. Limited Range cannot be used with HSV. diff --git a/Documentation/media/uapi/v4l/colorspaces-details.rst b/Documentation/media/uapi/v4l/colorspaces-details.rst index 8b0ba3668101d..fd0cf57691d87 100644 --- a/Documentation/media/uapi/v4l/colorspaces-details.rst +++ b/Documentation/media/uapi/v4l/colorspaces-details.rst @@ -377,9 +377,8 @@ Colorspace BT.2020 (V4L2_COLORSPACE_BT2020) The :ref:`itu2020` standard defines the colorspace used by Ultra-high definition television (UHDTV). The default transfer function is ``V4L2_XFER_FUNC_709``. The default Y'CbCr encoding is -``V4L2_YCBCR_ENC_BT2020``. The default R'G'B' quantization is limited -range (!), and so is the default Y'CbCr quantization. The chromaticities -of the primary colors and the white reference are: +``V4L2_YCBCR_ENC_BT2020``. The default Y'CbCr quantization is limited range. +The chromaticities of the primary colors and the white reference are: diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index e3abfbd32f71e..b020e6ce6dd49 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -191,11 +191,12 @@ ad_actor_sys_prio ad_actor_system In an AD system, this specifies the mac-address for the actor in - protocol packet exchanges (LACPDUs). The value cannot be NULL or - multicast. It is preferred to have the local-admin bit set for this - mac but driver does not enforce it. If the value is not given then - system defaults to using the masters' mac address as actors' system - address. + protocol packet exchanges (LACPDUs). The value cannot be a multicast + address. If the all-zeroes MAC is specified, bonding will internally + use the MAC of the bond itself. It is preferred to have the + local-admin bit set for this mac but driver does not enforce it. If + the value is not given then system defaults to using the masters' + mac address as actors' system address. This parameter has effect only in 802.3ad mode and is available through SysFs interface. diff --git a/Documentation/networking/device_drivers/amazon/ena.txt b/Documentation/networking/device_drivers/amazon/ena.txt index 1bb55c7b604ce..bc2093825ef9d 100644 --- a/Documentation/networking/device_drivers/amazon/ena.txt +++ b/Documentation/networking/device_drivers/amazon/ena.txt @@ -248,7 +248,7 @@ RSS: inputs for hash functions. - The driver configures RSS settings using the AQ SetFeature command (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and - ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG properties). + ENA_ADMIN_RSS_INDIRECTION_TABLE_CONFIG properties). - If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash function delivered in the Rx CQ descriptor is set in the received SKB. diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst index d071c6b49e1ff..7599dceba9f1b 100644 --- a/Documentation/networking/device_drivers/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst @@ -154,6 +154,27 @@ User command examples: values: cmode runtime value smfs +enable_roce: RoCE enablement state +---------------------------------- +RoCE enablement state controls driver support for RoCE traffic. +When RoCE is disabled, there is no gid table, only raw ethernet QPs are supported and traffic on the well known UDP RoCE port is handled as raw ethernet traffic. + +To change RoCE enablement state a user must change the driverinit cmode value and run devlink reload. + +User command examples: + +- Disable RoCE:: + + $ devlink dev param set pci/0000:06:00.0 name enable_roce value false cmode driverinit + $ devlink dev reload pci/0000:06:00.0 + +- Read RoCE enablement state:: + + $ devlink dev param show pci/0000:06:00.0 name enable_roce + pci/0000:06:00.0: + name enable_roce type generic + values: + cmode driverinit value true Devlink health reporters ======================== diff --git a/Documentation/networking/devlink-params-mlx5.txt b/Documentation/networking/devlink-params-mlx5.txt new file mode 100644 index 0000000000000..5071467118bd2 --- /dev/null +++ b/Documentation/networking/devlink-params-mlx5.txt @@ -0,0 +1,17 @@ +flow_steering_mode [DEVICE, DRIVER-SPECIFIC] + Controls the flow steering mode of the driver. + Two modes are supported: + 1. 'dmfs' - Device managed flow steering. + 2. 'smfs - Software/Driver managed flow steering. + In DMFS mode, the HW steering entities are created and + managed through the Firmware. + In SMFS mode, the HW steering entities are created and + managed though by the driver directly into Hardware + without firmware intervention. + Type: String + Configuration mode: runtime + +enable_roce [DEVICE, GENERIC] + Enable handling of RoCE traffic in the device. + Defaultly enabled. + Configuration mode: driverinit diff --git a/Documentation/networking/devlink-params.txt b/Documentation/networking/devlink-params.txt index ddba3e9b55b1f..04e234e9acc9b 100644 --- a/Documentation/networking/devlink-params.txt +++ b/Documentation/networking/devlink-params.txt @@ -65,3 +65,7 @@ reset_dev_on_drv_probe [DEVICE, GENERIC] Reset only if device firmware can be found in the filesystem. Type: u8 + +enable_roce [DEVICE, GENERIC] + Enable handling of RoCE traffic in the device. + Type: Boolean diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 8d4ad1d1ae26f..5cf601c94e354 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -876,7 +876,7 @@ cipso_cache_enable - BOOLEAN cipso_cache_bucket_size - INTEGER The CIPSO label cache consists of a fixed size hash table with each hash bucket containing a number of cache entries. This variable limits - the number of entries in each hash bucket; the larger the value the + the number of entries in each hash bucket; the larger the value is, the more CIPSO label mappings that can be cached. When the number of entries in a given hash bucket reaches this limit adding new entries causes the oldest entry in the bucket to be removed to make room. @@ -953,7 +953,7 @@ ip_nonlocal_bind - BOOLEAN which can be quite useful - but may break some applications. Default: 0 -ip_dynaddr - BOOLEAN +ip_dynaddr - INTEGER If set non-zero, enables support for dynamic addresses. If set to a non-zero value larger than 1, a kernel log message will be printed when dynamic address rewriting @@ -1000,12 +1000,14 @@ icmp_ratelimit - INTEGER icmp_msgs_per_sec - INTEGER Limit maximal number of ICMP packets sent per second from this host. Only messages whose type matches icmp_ratemask (see below) are - controlled by this limit. + controlled by this limit. For security reasons, the precise count + of messages per second is randomized. Default: 1000 icmp_msgs_burst - INTEGER icmp_msgs_per_sec controls number of ICMP packets sent per second, while icmp_msgs_burst controls the burst size of these packets. + For security reasons, the precise burst size is randomized. Default: 50 icmp_ratemask - INTEGER @@ -2282,7 +2284,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max Default: 4K sctp_wmem - vector of 3 INTEGERs: min, default, max - Currently this tunable has no effect. + Only the first value ("min") is used, "default" and "max" are + ignored. + + min: Minimum size of send buffer that can be used by SCTP sockets. + It is guaranteed to each SCTP socket (but not association) even + under moderate memory pressure. + + Default: 4K addr_scope_policy - INTEGER Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt index 056898685d408..fc531c29a2e83 100644 --- a/Documentation/networking/ipvs-sysctl.txt +++ b/Documentation/networking/ipvs-sysctl.txt @@ -30,8 +30,7 @@ conn_reuse_mode - INTEGER 0: disable any special handling on port reuse. The new connection will be delivered to the same real server that was - servicing the previous connection. This will effectively - disable expire_nodest_conn. + servicing the previous connection. bit 1: enable rescheduling of new connections when it is safe. That is, whenever expire_nodest_conn and for TCP sockets, when diff --git a/Documentation/networking/j1939.rst b/Documentation/networking/j1939.rst index dc60b13fcd096..4b0db514b2010 100644 --- a/Documentation/networking/j1939.rst +++ b/Documentation/networking/j1939.rst @@ -339,7 +339,7 @@ To claim an address following code example can be used: .pgn = J1939_PGN_ADDRESS_CLAIMED, .pgn_mask = J1939_PGN_PDU1_MAX, }, { - .pgn = J1939_PGN_ADDRESS_REQUEST, + .pgn = J1939_PGN_REQUEST, .pgn_mask = J1939_PGN_PDU1_MAX, }, { .pgn = J1939_PGN_ADDRESS_COMMANDED, @@ -414,8 +414,8 @@ Send: .can_family = AF_CAN, .can_addr.j1939 = { .name = J1939_NO_NAME; - .pgn = 0x30, - .addr = 0x12300, + .addr = 0x30, + .pgn = 0x12300, }, }; diff --git a/Documentation/networking/nf_flowtable.txt b/Documentation/networking/nf_flowtable.txt index ca2136c76042c..0bf32d1121bec 100644 --- a/Documentation/networking/nf_flowtable.txt +++ b/Documentation/networking/nf_flowtable.txt @@ -76,7 +76,7 @@ flowtable and add one rule to your forward chain. table inet x { flowtable f { - hook ingress priority 0 devices = { eth0, eth1 }; + hook ingress priority 0; devices = { eth0, eth1 }; } chain y { type filter hook forward priority 0; policy accept; diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 06f743b612c48..bb53707f72cc5 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -174,7 +174,16 @@ Trees - The finalized and tagged releases of all stable kernels can be found in separate branches per version at: - https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git + + - The release candidate of all stable kernel versions can be found at: + + https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git/ + + .. warning:: + The -stable-rc tree is a snapshot in time of the stable-queue tree and + will change frequently, hence will be rebased often. It should only be + used for testing purposes (e.g. to be consumed by CI systems). Review committee diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index fb56297f70dc8..857be0d44e809 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -133,7 +133,7 @@ as you intend it to. The maintainer will thank you if you write your patch description in a form which can be easily pulled into Linux's source code management -system, ``git``, as a "commit log". See :ref:`explicit_in_reply_to`. +system, ``git``, as a "commit log". See :ref:`the_canonical_patch_format`. Solve only one problem per patch. If your description starts to get long, that's a sign that you probably need to split up your patch. diff --git a/Documentation/s390/index.rst b/Documentation/s390/index.rst index f7af2061e4065..cf71df5776b48 100644 --- a/Documentation/s390/index.rst +++ b/Documentation/s390/index.rst @@ -15,6 +15,7 @@ s390 Architecture vfio-ccw zfcpdump common_io + pci text_files diff --git a/Documentation/s390/pci.rst b/Documentation/s390/pci.rst new file mode 100644 index 0000000000000..75e043d4da853 --- /dev/null +++ b/Documentation/s390/pci.rst @@ -0,0 +1,126 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========= +S/390 PCI +========= + +Authors: + - Pierre Morel + +Copyright, IBM Corp. 2020 + + +command line parameters and debugfs entries +=========================================== + +Command line parameters +----------------------- + +* nomio + + Do not use MIO instructions. + +* norid + + Ignore the RID field and force use of one PCI domain per PCI function. + +debugfs entries +--------------- + +* /sys/kernel/debug/s390dbf/pci_*/ (S/390 debug feature) + + Some views generated by the debug feature to hold various debug outputs. + + - /sys/kernel/debug/s390dbf/pci_msg/sprintf + Messages from the processing of PCI events like machine check handling + and setting of global functionality like UID checking. + + The level of logging can be changed to be more or less verbose by piping to + /sys/kernel/debug/s390dbf/pci_*/level a number between 0 and 6; see the + documentation on the S/390 debug feature (Documentation/s390/s390dbf.rst) + for details. + +Sysfs entries +============= + +Specific entries, or entries specificities for zPCI functions. + +* /sys/bus/pci/slots/XXXXXXXX + + The slot entries are setup using the FID (Function Identifier) of the + PCI function. + + - /sys/bus/pci/slots/XXXXXXXX/power + + A physical function currently supporting virtual function can not be + powered-off until all virtual-function have been removed with + echo 0 > /sys/bus/pci/devices/XXXX:XX:XX.X/sriov_numvf + +* /sys/bus/pci/devices/XXXX:XX:XX.X/ + + - function_id + zPCI function identifier unique for the complete Z System. + It define uniquely a function in the system. + + - function_handle + Low level identifier used for a configured PCI function. + It may be useful for debuging. + + - pchid + Model dependent location of the I/O adapter. + + - pfgid + PCI Function Group ID, functions sharing identical functionality + are using a common identifier. + A PCI group defines interrupts, IOMMU, IOTLB and DMA specifics. + + - vfn + The Virtual Function Number, from 1 to N for virtual functions. + 0 for physical functions. + + - pft + PCI function type specifies the type of the PCI function. + + - port + The port correspond to the physical port the function is attached to. + It also gives an indication on the physical function a virtual function + is attached to. + + - uid + The UID, Unique Identifier is defined when configuring a LPAR and is + unique inside an LPAR. + + - pfip/segmentX + The segments are used to determine the isolation of a function. + They corresponds to the physical path to the function. + The more the segment are different the more the functions are isolated. + +Enumeration and hotplug +======================= + +The PCI address is made of 4 parts: domain, bus, device and function, +like in DDDD:BB:dd.f + +* When not using multi-functions (norid is set or firmware does not support + multi-functions) + + - There is only one function per domain. + + - the domain is set from the zPCI function's UID as defined during the + LPAR creation. + + - Addresses look like DDDD:00:00.0 + +* When using multi-functions (norid parameter is not set), there are some + change in the way zPCI functions are addressed: + + - There is still only one bus per domain. + + - There can be up to 256 functions per bus. + + - The domain part of the address of all functions of all functions for + a multi-Function device is set from the zPCI function's UID as defined + in the LPAR creation for the function zero. + + - New functions will only be ready to be used after the function zero + (the function with devfn 0) has been enumerated. diff --git a/Documentation/scsi/smartpqi.txt b/Documentation/scsi/smartpqi.txt index 201f80c7c0506..df129f55ace5e 100644 --- a/Documentation/scsi/smartpqi.txt +++ b/Documentation/scsi/smartpqi.txt @@ -29,7 +29,7 @@ smartpqi specific entries in /sys smartpqi host attributes: ------------------------- /sys/class/scsi_host/host*/rescan - /sys/class/scsi_host/host*/version + /sys/class/scsi_host/host*/driver_version The host rescan attribute is a write only attribute. Writing to this attribute will trigger the driver to scan for new, changed, or removed diff --git a/Documentation/sound/hd-audio/index.rst b/Documentation/sound/hd-audio/index.rst index f8a72ffffe66f..6e12de9fc34e2 100644 --- a/Documentation/sound/hd-audio/index.rst +++ b/Documentation/sound/hd-audio/index.rst @@ -8,3 +8,4 @@ HD-Audio models controls dp-mst + realtek-pc-beep diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst index 11298f0ce44db..4c91abad7b35c 100644 --- a/Documentation/sound/hd-audio/models.rst +++ b/Documentation/sound/hd-audio/models.rst @@ -216,8 +216,6 @@ alc298-dell-aio ALC298 fixups on Dell AIO machines alc275-dell-xps ALC275 fixups on Dell XPS models -alc256-dell-xps13 - ALC256 fixups on Dell XPS13 lenovo-spk-noise Workaround for speaker noise on Lenovo machines lenovo-hotkey @@ -263,6 +261,10 @@ alc-sense-combo huawei-mbx-stereo Enable initialization verbs for Huawei MBX stereo speakers; might be risky, try this at your own risk +alc298-samsung-headphone + Samsung laptops with ALC298 +alc256-samsung-headphone + Samsung laptops with ALC256 ALC66x/67x/892 ============== diff --git a/Documentation/sound/hd-audio/realtek-pc-beep.rst b/Documentation/sound/hd-audio/realtek-pc-beep.rst new file mode 100644 index 0000000000000..be47c6f76a6e9 --- /dev/null +++ b/Documentation/sound/hd-audio/realtek-pc-beep.rst @@ -0,0 +1,129 @@ +=============================== +Realtek PC Beep Hidden Register +=============================== + +This file documents the "PC Beep Hidden Register", which is present in certain +Realtek HDA codecs and controls a muxer and pair of passthrough mixers that can +route audio between pins but aren't themselves exposed as HDA widgets. As far +as I can tell, these hidden routes are designed to allow flexible PC Beep output +for codecs that don't have mixer widgets in their output paths. Why it's easier +to hide a mixer behind an undocumented vendor register than to just expose it +as a widget, I have no idea. + +Register Description +==================== + +The register is accessed via processing coefficient 0x36 on NID 20h. Bits not +identified below have no discernible effect on my machine, a Dell XPS 13 9350:: + + MSB LSB + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | |h|S|L| | B |R| | Known bits + +=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+ + |0|0|1|1| 0x7 |0|0x0|1| 0x7 | Reset value + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +1Ah input select (B): 2 bits + When zero, expose the PC Beep line (from the internal beep generator, when + enabled with the Set Beep Generation verb on NID 01h, or else from the + external PCBEEP pin) on the 1Ah pin node. When nonzero, expose the headphone + jack (or possibly Line In on some machines) input instead. If PC Beep is + selected, the 1Ah boost control has no effect. + +Amplify 1Ah loopback, left (L): 1 bit + Amplify the left channel of 1Ah before mixing it into outputs as specified + by h and S bits. Does not affect the level of 1Ah exposed to other widgets. + +Amplify 1Ah loopback, right (R): 1 bit + Amplify the right channel of 1Ah before mixing it into outputs as specified + by h and S bits. Does not affect the level of 1Ah exposed to other widgets. + +Loopback 1Ah to 21h [active low] (h): 1 bit + When zero, mix 1Ah (possibly with amplification, depending on L and R bits) + into 21h (headphone jack on my machine). Mixed signal respects the mute + setting on 21h. + +Loopback 1Ah to 14h (S): 1 bit + When one, mix 1Ah (possibly with amplification, depending on L and R bits) + into 14h (internal speaker on my machine). Mixed signal **ignores** the mute + setting on 14h and is present whenever 14h is configured as an output. + +Path diagrams +============= + +1Ah input selection (DIV is the PC Beep divider set on NID 01h):: + + + | | | + +--DIV--+--!DIV--+ {1Ah boost control} + | | + +--(b == 0)--+--(b != 0)--+ + | + >1Ah (Beep/Headphone Mic/Line In)< + +Loopback of 1Ah to 21h/14h:: + + <1Ah (Beep/Headphone Mic/Line In)> + | + {amplify if L/R} + | + +-----!h-----+-----S-----+ + | | + {21h mute control} | + | | + >21h (Headphone)< >14h (Internal Speaker)< + +Background +========== + +All Realtek HDA codecs have a vendor-defined widget with node ID 20h which +provides access to a bank of registers that control various codec functions. +Registers are read and written via the standard HDA processing coefficient +verbs (Set/Get Coefficient Index, Set/Get Processing Coefficient). The node is +named "Realtek Vendor Registers" in public datasheets' verb listings and, +apart from that, is entirely undocumented. + +This particular register, exposed at coefficient 0x36 and named in commits from +Realtek, is of note: unlike most registers, which seem to control detailed +amplifier parameters not in scope of the HDA specification, it controls audio +routing which could just as easily have been defined using standard HDA mixer +and selector widgets. + +Specifically, it selects between two sources for the input pin widget with Node +ID (NID) 1Ah: the widget's signal can come either from an audio jack (on my +laptop, a Dell XPS 13 9350, it's the headphone jack, but comments in Realtek +commits indicate that it might be a Line In on some machines) or from the PC +Beep line (which is itself multiplexed between the codec's internal beep +generator and external PCBEEP pin, depending on if the beep generator is +enabled via verbs on NID 01h). Additionally, it can mix (with optional +amplification) that signal onto the 21h and/or 14h output pins. + +The register's reset value is 0x3717, corresponding to PC Beep on 1Ah that is +then amplified and mixed into both the headphones and the speakers. Not only +does this violate the HDA specification, which says that "[a vendor defined +beep input pin] connection may be maintained *only* while the Link reset +(**RST#**) is asserted", it means that we cannot ignore the register if we care +about the input that 1Ah would otherwise expose or if the PCBEEP trace is +poorly shielded and picks up chassis noise (both of which are the case on my +machine). + +Unfortunately, there are lots of ways to get this register configuration wrong. +Linux, it seems, has gone through most of them. For one, the register resets +after S3 suspend: judging by existing code, this isn't the case for all vendor +registers, and it's led to some fixes that improve behavior on cold boot but +don't last after suspend. Other fixes have successfully switched the 1Ah input +away from PC Beep but have failed to disable both loopback paths. On my +machine, this means that the headphone input is amplified and looped back to +the headphone output, which uses the exact same pins! As you might expect, this +causes terrible headphone noise, the character of which is controlled by the +1Ah boost control. (If you've seen instructions online to fix XPS 13 headphone +noise by changing "Headphone Mic Boost" in ALSA, now you know why.) + +The information here has been obtained through black-box reverse engineering of +the ALC256 codec's behavior and is not guaranteed to be correct. It likely +also applies for the ALC255, ALC257, ALC235, and ALC236, since those codecs +seem to be close relatives of the ALC256. (They all share one initialization +function.) Additionally, other codecs like the ALC225 and ALC285 also have this +register, judging by existing fixups in ``patch_realtek.c``, but specific +data (e.g. node IDs, bit positions, pin mappings) for those codecs may differ +from what I've described here. diff --git a/Documentation/sphinx/parse-headers.pl b/Documentation/sphinx/parse-headers.pl index c518050ffc3fb..0dcf369ab9040 100755 --- a/Documentation/sphinx/parse-headers.pl +++ b/Documentation/sphinx/parse-headers.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl use strict; use Text::Tabs; use Getopt::Long; diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py index 95d6e31f1e3a1..85e496086d6ec 100755 --- a/Documentation/target/tcm_mod_builder.py +++ b/Documentation/target/tcm_mod_builder.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # The TCM v4 multi-protocol fabric module generation script for drivers/target/$NEW_MOD # # Copyright (c) 2010 Rising Tide Systems diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index 8408670d03282..3f3d1b960fe79 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -191,7 +191,7 @@ Documentation written by Tom Zanussi with the event, in nanoseconds. May be modified by .usecs to have timestamps interpreted as microseconds. - cpu int the cpu on which the event occurred. + common_cpu int the cpu on which the event occurred. ====================== ==== ======================================= Extended error information diff --git a/Documentation/trace/postprocess/decode_msr.py b/Documentation/trace/postprocess/decode_msr.py index 0ab40e0db5809..aa9cc7abd5c2b 100644 --- a/Documentation/trace/postprocess/decode_msr.py +++ b/Documentation/trace/postprocess/decode_msr.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # add symbolic names to read_msr / write_msr in trace # decode_msr msr-index.h < trace import sys diff --git a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl index 0a120aae33ce5..b9b7d80c2f9d2 100644 --- a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl +++ b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # This is a POC (proof of concept or piece of crap, take your pick) for reading the # text representation of trace output related to page allocation. It makes an attempt # to extract some high-level information on what is going on. The accuracy of the parser diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl index 995da15b16cab..2f4e39875fb39 100644 --- a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # This is a POC for reading the text representation of trace output related to # page reclaim. It makes an attempt to extract some high-level information on # what is going on. The accuracy of the parser may vary diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index bd9165241b6c8..6efb41cc80725 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -250,14 +250,14 @@ Users can read via ``ioctl(SECCOMP_IOCTL_NOTIF_RECV)`` (or ``poll()``) on a seccomp notification fd to receive a ``struct seccomp_notif``, which contains five members: the input length of the structure, a unique-per-filter ``id``, the ``pid`` of the task which triggered this request (which may be 0 if the -task is in a pid ns not visible from the listener's pid namespace), a ``flags`` -member which for now only has ``SECCOMP_NOTIF_FLAG_SIGNALED``, representing -whether or not the notification is a result of a non-fatal signal, and the -``data`` passed to seccomp. Userspace can then make a decision based on this -information about what to do, and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a -response, indicating what should be returned to userspace. The ``id`` member of -``struct seccomp_notif_resp`` should be the same ``id`` as in ``struct -seccomp_notif``. +task is in a pid ns not visible from the listener's pid namespace). The +notification also contains the ``data`` passed to seccomp, and a filters flag. +The structure should be zeroed out prior to calling the ioctl. + +Userspace can then make a decision based on this information about what to do, +and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be +returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should +be the same ``id`` as in ``struct seccomp_notif``. It is worth noting that ``struct seccomp_data`` contains the values of register arguments to the syscall, but does not contain pointers to memory. The task's diff --git a/Documentation/virt/index.rst b/Documentation/virt/index.rst index 062ffb5270438..ce114d1ffba17 100644 --- a/Documentation/virt/index.rst +++ b/Documentation/virt/index.rst @@ -9,6 +9,7 @@ Linux Virtualization Support kvm/index paravirt_ops + ne_overview .. only:: html and subproject diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt index 4833904d32a5a..1a5afb6f5ce23 100644 --- a/Documentation/virt/kvm/api.txt +++ b/Documentation/virt/kvm/api.txt @@ -172,6 +172,9 @@ is dependent on the CPU capability and the kernel configuration. The limit can be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION ioctl() at run-time. +Creation of the VM will fail if the requested IPA size (whether it is +implicit or explicit) is unsupported on the host. + Please note that configuring the IPA size does not affect the capability exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects size of the address translated by the stage2 level (guest physical to @@ -1132,6 +1135,9 @@ field userspace_addr, which must point at user addressable memory for the entire memory slot size. Any object may back this memory, including anonymous memory, ordinary files, and hugetlbfs. +On architectures that support a form of address tagging, userspace_addr must +be an untagged address. + It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr be identical. This allows large pages in the guest to be backed by large pages in the host. @@ -1897,9 +1903,11 @@ Type: vcpu ioctl Parameters: struct kvm_one_reg (in) Returns: 0 on success, negative value on failure Errors: -  ENOENT:   no such register -  EINVAL:   invalid register ID, or no such register -  EPERM:    (arm64) register access not allowed before vcpu finalization +  ENOENT   no such register +  EINVAL   invalid register ID, or no such register or used with VMs in + protected virtualization mode on s390 +  EPERM    (arm64) register access not allowed before vcpu finalization + (These error codes are indicative only: do not rely on a specific error code being returned in a specific situation.) @@ -2290,9 +2298,11 @@ Type: vcpu ioctl Parameters: struct kvm_one_reg (in and out) Returns: 0 on success, negative value on failure Errors include: -  ENOENT:   no such register -  EINVAL:   invalid register ID, or no such register -  EPERM:    (arm64) register access not allowed before vcpu finalization +  ENOENT   no such register +  EINVAL   invalid register ID, or no such register or used with VMs in + protected virtualization mode on s390 +  EPERM    (arm64) register access not allowed before vcpu finalization + (These error codes are indicative only: do not rely on a specific error code being returned in a specific situation.) @@ -3058,51 +3068,116 @@ The fields in each entry are defined as follows: 4.89 KVM_S390_MEM_OP -Capability: KVM_CAP_S390_MEM_OP -Architectures: s390 -Type: vcpu ioctl -Parameters: struct kvm_s390_mem_op (in) -Returns: = 0 on success, - < 0 on generic error (e.g. -EFAULT or -ENOMEM), - > 0 if an exception occurred while walking the page tables +:Capability: KVM_CAP_S390_MEM_OP, KVM_CAP_S390_PROTECTED, KVM_CAP_S390_MEM_OP_EXTENSION +:Architectures: s390 +:Type: vm ioctl, vcpu ioctl +:Parameters: struct kvm_s390_mem_op (in) +:Returns: = 0 on success, + < 0 on generic error (e.g. -EFAULT or -ENOMEM), + > 0 if an exception occurred while walking the page tables -Read or write data from/to the logical (virtual) memory of a VCPU. +Read or write data from/to the VM's memory. +The KVM_CAP_S390_MEM_OP_EXTENSION capability specifies what functionality is +supported. -Parameters are specified via the following structure: +Parameters are specified via the following structure:: -struct kvm_s390_mem_op { + struct kvm_s390_mem_op { __u64 gaddr; /* the guest address */ __u64 flags; /* flags */ __u32 size; /* amount of bytes */ __u32 op; /* type of operation */ __u64 buf; /* buffer in userspace */ - __u8 ar; /* the access register number */ - __u8 reserved[31]; /* should be set to 0 */ -}; - -The type of operation is specified in the "op" field. It is either -KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or -KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The -KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check -whether the corresponding memory access would create an access exception -(without touching the data in the memory at the destination). In case an -access exception occurred while walking the MMU tables of the guest, the -ioctl returns a positive error number to indicate the type of exception. -This exception is also raised directly at the corresponding VCPU if the -flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field. + union { + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + }; + __u32 sida_offset; /* offset into the sida */ + __u8 reserved[32]; /* ignored */ + }; + }; The start address of the memory region has to be specified in the "gaddr" field, and the length of the region in the "size" field (which must not be 0). The maximum value for "size" can be obtained by checking the KVM_CAP_S390_MEM_OP capability. "buf" is the buffer supplied by the userspace application where the read data should be written to for -KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written is -stored for a KVM_S390_MEMOP_LOGICAL_WRITE. When KVM_S390_MEMOP_F_CHECK_ONLY -is specified, "buf" is unused and can be NULL. "ar" designates the access -register number to be used; the valid range is 0..15. - -The "reserved" field is meant for future extensions. It is not used by -KVM with the currently defined set of flags. +a read access, or where the data that should be written is stored for +a write access. The "reserved" field is meant for future extensions. +Reserved and unused values are ignored. Future extension that add members must +introduce new flags. + +The type of operation is specified in the "op" field. Flags modifying +their behavior can be set in the "flags" field. Undefined flag bits must +be set to 0. + +Possible operations are: + * ``KVM_S390_MEMOP_LOGICAL_READ`` + * ``KVM_S390_MEMOP_LOGICAL_WRITE`` + * ``KVM_S390_MEMOP_ABSOLUTE_READ`` + * ``KVM_S390_MEMOP_ABSOLUTE_WRITE`` + * ``KVM_S390_MEMOP_SIDA_READ`` + * ``KVM_S390_MEMOP_SIDA_WRITE`` + +Logical read/write: + +Access logical memory, i.e. translate the given guest address to an absolute +address given the state of the VCPU and use the absolute address as target of +the access. "ar" designates the access register number to be used; the valid +range is 0..15. +Logical accesses are permitted for the VCPU ioctl only. +Logical accesses are permitted for non-protected guests only. + +Supported flags: + * ``KVM_S390_MEMOP_F_CHECK_ONLY`` + * ``KVM_S390_MEMOP_F_INJECT_EXCEPTION`` + * ``KVM_S390_MEMOP_F_SKEY_PROTECTION`` + +The KVM_S390_MEMOP_F_CHECK_ONLY flag can be set to check whether the +corresponding memory access would cause an access exception; however, +no actual access to the data in memory at the destination is performed. +In this case, "buf" is unused and can be NULL. + +In case an access exception occurred during the access (or would occur +in case of KVM_S390_MEMOP_F_CHECK_ONLY), the ioctl returns a positive +error number indicating the type of exception. This exception is also +raised directly at the corresponding VCPU if the flag +KVM_S390_MEMOP_F_INJECT_EXCEPTION is set. + +If the KVM_S390_MEMOP_F_SKEY_PROTECTION flag is set, storage key +protection is also in effect and may cause exceptions if accesses are +prohibited given the access key designated by "key"; the valid range is 0..15. +KVM_S390_MEMOP_F_SKEY_PROTECTION is available if KVM_CAP_S390_MEM_OP_EXTENSION +is > 0. + +Absolute read/write: + +Access absolute memory. This operation is intended to be used with the +KVM_S390_MEMOP_F_SKEY_PROTECTION flag, to allow accessing memory and performing +the checks required for storage key protection as one operation (as opposed to +user space getting the storage keys, performing the checks, and accessing +memory thereafter, which could lead to a delay between check and access). +Absolute accesses are permitted for the VM ioctl if KVM_CAP_S390_MEM_OP_EXTENSION +is > 0. +Currently absolute accesses are not permitted for VCPU ioctls. +Absolute accesses are permitted for non-protected guests only. + +Supported flags: + * ``KVM_S390_MEMOP_F_CHECK_ONLY`` + * ``KVM_S390_MEMOP_F_SKEY_PROTECTION`` + +The semantics of the flags are as for logical accesses. + +SIDA read/write: + +Access the secure instruction data area which contains memory operands necessary +for instruction emulation for protected guests. +SIDA accesses are available if the KVM_CAP_S390_PROTECTED capability is available. +SIDA accesses are permitted for the VCPU ioctl only. +SIDA accesses are permitted for protected guests only. + +No flags are supported. 4.90 KVM_S390_GET_SKEYS @@ -4127,6 +4202,90 @@ Valid values for 'action': #define KVM_PMU_EVENT_DENY 1 +4.122 KVM_S390_NORMAL_RESET + +Capability: KVM_CAP_S390_VCPU_RESETS +Architectures: s390 +Type: vcpu ioctl +Parameters: none +Returns: 0 + +This ioctl resets VCPU registers and control structures according to +the cpu reset definition in the POP (Principles Of Operation). + +4.123 KVM_S390_INITIAL_RESET + +Capability: none +Architectures: s390 +Type: vcpu ioctl +Parameters: none +Returns: 0 + +This ioctl resets VCPU registers and control structures according to +the initial cpu reset definition in the POP. However, the cpu is not +put into ESA mode. This reset is a superset of the normal reset. + +4.124 KVM_S390_CLEAR_RESET + +Capability: KVM_CAP_S390_VCPU_RESETS +Architectures: s390 +Type: vcpu ioctl +Parameters: none +Returns: 0 + +This ioctl resets VCPU registers and control structures according to +the clear cpu reset definition in the POP. However, the cpu is not put +into ESA mode. This reset is a superset of the initial reset. + + +4.125 KVM_S390_PV_COMMAND +------------------------- + +:Capability: KVM_CAP_S390_PROTECTED +:Architectures: s390 +:Type: vm ioctl +:Parameters: struct kvm_pv_cmd +:Returns: 0 on success, < 0 on error + +:: + + struct kvm_pv_cmd { + __u32 cmd; /* Command to be executed */ + __u16 rc; /* Ultravisor return code */ + __u16 rrc; /* Ultravisor return reason code */ + __u64 data; /* Data or address */ + __u32 flags; /* flags for future extensions. Must be 0 for now */ + __u32 reserved[3]; + }; + +cmd values: + +KVM_PV_ENABLE + Allocate memory and register the VM with the Ultravisor, thereby + donating memory to the Ultravisor that will become inaccessible to + KVM. All existing CPUs are converted to protected ones. After this + command has succeeded, any CPU added via hotplug will become + protected during its creation as well. + +KVM_PV_DISABLE + + Deregister the VM from the Ultravisor and reclaim the memory that + had been donated to the Ultravisor, making it usable by the kernel + again. All registered VCPUs are converted back to non-protected + ones. + +KVM_PV_VM_SET_SEC_PARMS + Pass the image header from VM memory to the Ultravisor in + preparation of image unpacking and verification. + +KVM_PV_VM_UNPACK + Unpack (protect and decrypt) a page of the encrypted boot image. + +KVM_PV_VM_VERIFY + Verify the integrity of the unpacked image. Only if this succeeds, + KVM is allowed to start protected VCPUs. + + 5. The kvm_run structure ------------------------ @@ -4444,9 +4603,11 @@ EOI was received. #define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_HCALL 2 __u32 type; + __u32 pad1; union { struct { __u32 msr; + __u32 pad2; __u64 control; __u64 evt_page; __u64 msg_page; @@ -5322,3 +5483,21 @@ handling by KVM (as some KVM hypercall may be mistakenly treated as TLB flush hypercalls by Hyper-V) so userspace should disable KVM identification in CPUID and only exposes Hyper-V identification. In this case, guest thinks it's running on Hyper-V and only use Hyper-V hypercalls. + +8.22 KVM_CAP_S390_VCPU_RESETS + +Architectures: s390 + +This capability indicates that the KVM_S390_NORMAL_RESET and +KVM_S390_CLEAR_RESET ioctls are available. + +8.23 KVM_CAP_S390_PROTECTED + +Architecture: s390 + + +This capability indicates that the Ultravisor has been initialized and +KVM can therefore start protected VMs. +This capability governs the KVM_S390_PV_COMMAND ioctl and the +KVM_MP_STATE_LOAD MP_STATE. KVM_SET_MP_STATE can fail for protected +guests when the state change is invalid. diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst new file mode 100644 index 0000000000000..2357dd2d86553 --- /dev/null +++ b/Documentation/virt/kvm/arm/pvtime.rst @@ -0,0 +1,80 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Paravirtualized time support for arm64 +====================================== + +Arm specification DEN0057/A defines a standard for paravirtualised time +support for AArch64 guests: + +https://developer.arm.com/docs/den0057/a + +KVM/arm64 implements the stolen time part of this specification by providing +some hypervisor service calls to support a paravirtualized guest obtaining a +view of the amount of time stolen from its execution. + +Two new SMCCC compatible hypercalls are defined: + +* PV_TIME_FEATURES: 0xC5000020 +* PV_TIME_ST: 0xC5000021 + +These are only available in the SMC64/HVC64 calling convention as +paravirtualized time is not available to 32 bit Arm guests. The existence of +the PV_FEATURES hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES +mechanism before calling it. + +PV_TIME_FEATURES + ============= ======== ========== + Function ID: (uint32) 0xC5000020 + PV_call_id: (uint32) The function to query for support. + Currently only PV_TIME_ST is supported. + Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant + PV-time feature is supported by the hypervisor. + ============= ======== ========== + +PV_TIME_ST + ============= ======== ========== + Function ID: (uint32) 0xC5000021 + Return value: (int64) IPA of the stolen time data structure for this + VCPU. On failure: + NOT_SUPPORTED (-1) + ============= ======== ========== + +The IPA returned by PV_TIME_ST should be mapped by the guest as normal memory +with inner and outer write back caching attributes, in the inner shareable +domain. A total of 16 bytes from the IPA returned are guaranteed to be +meaningfully filled by the hypervisor (see structure below). + +PV_TIME_ST returns the structure for the calling VCPU. + +Stolen Time +----------- + +The structure pointed to by the PV_TIME_ST hypercall is as follows: + ++-------------+-------------+-------------+----------------------------+ +| Field | Byte Length | Byte Offset | Description | ++=============+=============+=============+============================+ +| Revision | 4 | 0 | Must be 0 for version 1.0 | ++-------------+-------------+-------------+----------------------------+ +| Attributes | 4 | 4 | Must be 0 | ++-------------+-------------+-------------+----------------------------+ +| Stolen time | 8 | 8 | Stolen time in unsigned | +| | | | nanoseconds indicating how | +| | | | much time this VCPU thread | +| | | | was involuntarily not | +| | | | running on a physical CPU. | ++-------------+-------------+-------------+----------------------------+ + +All values in the structure are stored little-endian. + +The structure will be updated by the hypervisor prior to scheduling a VCPU. It +will be present within a reserved region of the normal memory given to the +guest. The guest should not attempt to write into this memory. There is a +structure per VCPU of the guest. + +It is advisable that one or more 64k pages are set aside for the purpose of +these structures and not used for other purposes, this enables the guest to map +the region using 64k pages and avoids conflicting attributes with other memory. + +For the user space interface see Documentation/virt/kvm/devices/vcpu.txt +section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL". diff --git a/Documentation/virt/kvm/devices/s390_flic.txt b/Documentation/virt/kvm/devices/s390_flic.txt index a4e20a0901746..a347ce06af4d6 100644 --- a/Documentation/virt/kvm/devices/s390_flic.txt +++ b/Documentation/virt/kvm/devices/s390_flic.txt @@ -100,15 +100,9 @@ struct kvm_s390_io_adapter_req { mask or unmask the adapter, as specified in mask KVM_S390_IO_ADAPTER_MAP - perform a gmap translation for the guest address provided in addr, - pin a userspace page for the translated address and add it to the - list of mappings - Note: A new mapping will be created unconditionally; therefore, - the calling code should avoid making duplicate mappings. - + This is now a no-op. The mapping is purely done by the irq route. KVM_S390_IO_ADAPTER_UNMAP - release a userspace page for the translated address specified in addr - from the list of mappings + This is now a no-op. The mapping is purely done by the irq route. KVM_DEV_FLIC_AISM modify the adapter-interruption-suppression mode for a given isc if the diff --git a/Documentation/virt/kvm/devices/vcpu.txt b/Documentation/virt/kvm/devices/vcpu.txt index 2b5dab16c4f20..6f3bd64a05b0f 100644 --- a/Documentation/virt/kvm/devices/vcpu.txt +++ b/Documentation/virt/kvm/devices/vcpu.txt @@ -60,3 +60,17 @@ time to use the number provided for a given timer, overwriting any previously configured values on other VCPUs. Userspace should configure the interrupt numbers on at least one VCPU after creating all VCPUs and before running any VCPUs. + +3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL +Architectures: ARM64 + +3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA +Parameters: 64-bit base address +Returns: -ENXIO: Stolen time not implemented + -EEXIST: Base address already set for this VCPU + -EINVAL: Base address not 64 byte aligned + +Specifies the base address of the stolen time structure for this VCPU. The +base address must be 64 byte aligned and exist within a valid guest memory +region. See Documentation/virt/kvm/arm/pvtime.txt for more information +including the layout of the stolen time structure. diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst index ada224a511fec..a0dfab7ca85ec 100644 --- a/Documentation/virt/kvm/index.rst +++ b/Documentation/virt/kvm/index.rst @@ -9,4 +9,6 @@ KVM amd-memory-encryption cpuid + s390-pv + s390-pv-boot vcpu-requests diff --git a/Documentation/virt/kvm/mmu.txt b/Documentation/virt/kvm/mmu.txt index dadb29e8738fe..da1ac6a6398f6 100644 --- a/Documentation/virt/kvm/mmu.txt +++ b/Documentation/virt/kvm/mmu.txt @@ -152,8 +152,8 @@ Shadow pages contain the following information: shadow pages) so role.quadrant takes values in the range 0..3. Each quadrant maps 1GB virtual address space. role.access: - Inherited guest access permissions in the form uwx. Note execute - permission is positive, not negative. + Inherited guest access permissions from the parent ptes in the form uwx. + Note execute permission is positive, not negative. role.invalid: The page is invalid and should not be used. It is a root page that is currently pinned (by a cpu hardware register pointing to it); once it is @@ -420,7 +420,7 @@ If the generation number of the spte does not equal the global generation number, it will ignore the cached MMIO information and handle the page fault through the slow path. -Since only 19 bits are used to store generation-number on mmio spte, all +Since only 18 bits are used to store generation-number on mmio spte, all pages are zapped when there is an overflow. Unfortunately, a single memory access might access kvm_memslots(kvm) multiple diff --git a/Documentation/virt/kvm/s390-pv-boot.rst b/Documentation/virt/kvm/s390-pv-boot.rst new file mode 100644 index 0000000000000..8b8fa03904099 --- /dev/null +++ b/Documentation/virt/kvm/s390-pv-boot.rst @@ -0,0 +1,84 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====================================== +s390 (IBM Z) Boot/IPL of Protected VMs +====================================== + +Summary +------- +The memory of Protected Virtual Machines (PVMs) is not accessible to +I/O or the hypervisor. In those cases where the hypervisor needs to +access the memory of a PVM, that memory must be made accessible. +Memory made accessible to the hypervisor will be encrypted. See +:doc:`s390-pv` for details." + +On IPL (boot) a small plaintext bootloader is started, which provides +information about the encrypted components and necessary metadata to +KVM to decrypt the protected virtual machine. + +Based on this data, KVM will make the protected virtual machine known +to the Ultravisor (UV) and instruct it to secure the memory of the +PVM, decrypt the components and verify the data and address list +hashes, to ensure integrity. Afterwards KVM can run the PVM via the +SIE instruction which the UV will intercept and execute on KVM's +behalf. + +As the guest image is just like an opaque kernel image that does the +switch into PV mode itself, the user can load encrypted guest +executables and data via every available method (network, dasd, scsi, +direct kernel, ...) without the need to change the boot process. + + +Diag308 +------- +This diagnose instruction is the basic mechanism to handle IPL and +related operations for virtual machines. The VM can set and retrieve +IPL information blocks, that specify the IPL method/devices and +request VM memory and subsystem resets, as well as IPLs. + +For PVMs this concept has been extended with new subcodes: + +Subcode 8: Set an IPL Information Block of type 5 (information block +for PVMs) +Subcode 9: Store the saved block in guest memory +Subcode 10: Move into Protected Virtualization mode + +The new PV load-device-specific-parameters field specifies all data +that is necessary to move into PV mode. + +* PV Header origin +* PV Header length +* List of Components composed of + * AES-XTS Tweak prefix + * Origin + * Size + +The PV header contains the keys and hashes, which the UV will use to +decrypt and verify the PV, as well as control flags and a start PSW. + +The components are for instance an encrypted kernel, kernel parameters +and initrd. The components are decrypted by the UV. + +After the initial import of the encrypted data, all defined pages will +contain the guest content. All non-specified pages will start out as +zero pages on first access. + + +When running in protected virtualization mode, some subcodes will result in +exceptions or return error codes. + +Subcodes 4 and 7, which specify operations that do not clear the guest +memory, will result in specification exceptions. This is because the +UV will clear all memory when a secure VM is removed, and therefore +non-clearing IPL subcodes are not allowed. + +Subcodes 8, 9, 10 will result in specification exceptions. +Re-IPL into a protected mode is only possible via a detour into non +protected mode. + +Keys +---- +Every CEC will have a unique public key to enable tooling to build +encrypted images. +See `s390-tools `_ +for the tooling. diff --git a/Documentation/virt/kvm/s390-pv.rst b/Documentation/virt/kvm/s390-pv.rst new file mode 100644 index 0000000000000..774a8c6060918 --- /dev/null +++ b/Documentation/virt/kvm/s390-pv.rst @@ -0,0 +1,116 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================================= +s390 (IBM Z) Ultravisor and Protected VMs +========================================= + +Summary +------- +Protected virtual machines (PVM) are KVM VMs that do not allow KVM to +access VM state like guest memory or guest registers. Instead, the +PVMs are mostly managed by a new entity called Ultravisor (UV). The UV +provides an API that can be used by PVMs and KVM to request management +actions. + +Each guest starts in non-protected mode and then may make a request to +transition into protected mode. On transition, KVM registers the guest +and its VCPUs with the Ultravisor and prepares everything for running +it. + +The Ultravisor will secure and decrypt the guest's boot memory +(i.e. kernel/initrd). It will safeguard state changes like VCPU +starts/stops and injected interrupts while the guest is running. + +As access to the guest's state, such as the SIE state description, is +normally needed to be able to run a VM, some changes have been made in +the behavior of the SIE instruction. A new format 4 state description +has been introduced, where some fields have different meanings for a +PVM. SIE exits are minimized as much as possible to improve speed and +reduce exposed guest state. + + +Interrupt injection +------------------- +Interrupt injection is safeguarded by the Ultravisor. As KVM doesn't +have access to the VCPUs' lowcores, injection is handled via the +format 4 state description. + +Machine check, external, IO and restart interruptions each can be +injected on SIE entry via a bit in the interrupt injection control +field (offset 0x54). If the guest cpu is not enabled for the interrupt +at the time of injection, a validity interception is recognized. The +format 4 state description contains fields in the interception data +block where data associated with the interrupt can be transported. + +Program and Service Call exceptions have another layer of +safeguarding; they can only be injected for instructions that have +been intercepted into KVM. The exceptions need to be a valid outcome +of an instruction emulation by KVM, e.g. we can never inject a +addressing exception as they are reported by SIE since KVM has no +access to the guest memory. + + +Mask notification interceptions +------------------------------- +KVM cannot intercept lctl(g) and lpsw(e) anymore in order to be +notified when a PVM enables a certain class of interrupt. As a +replacement, two new interception codes have been introduced: One +indicating that the contents of CRs 0, 6, or 14 have been changed, +indicating different interruption subclasses; and one indicating that +PSW bit 13 has been changed, indicating that a machine check +intervention was requested and those are now enabled. + +Instruction emulation +--------------------- +With the format 4 state description for PVMs, the SIE instruction already +interprets more instructions than it does with format 2. It is not able +to interpret every instruction, but needs to hand some tasks to KVM; +therefore, the SIE and the ultravisor safeguard emulation inputs and outputs. + +The control structures associated with SIE provide the Secure +Instruction Data Area (SIDA), the Interception Parameters (IP) and the +Secure Interception General Register Save Area. Guest GRs and most of +the instruction data, such as I/O data structures, are filtered. +Instruction data is copied to and from the SIDA when needed. Guest +GRs are put into / retrieved from the Secure Interception General +Register Save Area. + +Only GR values needed to emulate an instruction will be copied into this +save area and the real register numbers will be hidden. + +The Interception Parameters state description field still contains the +the bytes of the instruction text, but with pre-set register values +instead of the actual ones. I.e. each instruction always uses the same +instruction text, in order not to leak guest instruction text. +This also implies that the register content that a guest had in r +may be in r from the hypervisor's point of view. + +The Secure Instruction Data Area contains instruction storage +data. Instruction data, i.e. data being referenced by an instruction +like the SCCB for sclp, is moved via the SIDA. When an instruction is +intercepted, the SIE will only allow data and program interrupts for +this instruction to be moved to the guest via the two data areas +discussed before. Other data is either ignored or results in validity +interceptions. + + +Instruction emulation interceptions +----------------------------------- +There are two types of SIE secure instruction intercepts: the normal +and the notification type. Normal secure instruction intercepts will +make the guest pending for instruction completion of the intercepted +instruction type, i.e. on SIE entry it is attempted to complete +emulation of the instruction with the data provided by KVM. That might +be a program exception or instruction completion. + +The notification type intercepts inform KVM about guest environment +changes due to guest instruction interpretation. Such an interception +is recognized, for example, for the store prefix instruction to provide +the new lowcore location. On SIE reentry, any KVM data in the data areas +is ignored and execution continues as if the guest instruction had +completed. For that reason KVM is not allowed to inject a program +interrupt. + +Links +----- +`KVM Forum 2019 presentation `_ diff --git a/Documentation/virt/ne_overview.rst b/Documentation/virt/ne_overview.rst new file mode 100644 index 0000000000000..74c2f5919c886 --- /dev/null +++ b/Documentation/virt/ne_overview.rst @@ -0,0 +1,100 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============== +Nitro Enclaves +============== + +Overview +======== + +Nitro Enclaves (NE) is a new Amazon Elastic Compute Cloud (EC2) capability +that allows customers to carve out isolated compute environments within EC2 +instances [1]. + +For example, an application that processes sensitive data and runs in a VM, +can be separated from other applications running in the same VM. This +application then runs in a separate VM than the primary VM, namely an enclave. +It runs alongside the VM that spawned it. This setup matches low latency +applications needs. + +The current supported architectures for the NE kernel driver, available in the +upstream Linux kernel, are x86 and ARM64. + +The resources that are allocated for the enclave, such as memory and CPUs, are +carved out of the primary VM. Each enclave is mapped to a process running in the +primary VM, that communicates with the NE kernel driver via an ioctl interface. + +In this sense, there are two components: + +1. An enclave abstraction process - a user space process running in the primary +VM guest that uses the provided ioctl interface of the NE driver to spawn an +enclave VM (that's 2 below). + +There is a NE emulated PCI device exposed to the primary VM. The driver for this +new PCI device is included in the NE driver. + +The ioctl logic is mapped to PCI device commands e.g. the NE_START_ENCLAVE ioctl +maps to an enclave start PCI command. The PCI device commands are then +translated into actions taken on the hypervisor side; that's the Nitro +hypervisor running on the host where the primary VM is running. The Nitro +hypervisor is based on core KVM technology. + +2. The enclave itself - a VM running on the same host as the primary VM that +spawned it. Memory and CPUs are carved out of the primary VM and are dedicated +for the enclave VM. An enclave does not have persistent storage attached. + +The memory regions carved out of the primary VM and given to an enclave need to +be aligned 2 MiB / 1 GiB physically contiguous memory regions (or multiple of +this size e.g. 8 MiB). The memory can be allocated e.g. by using hugetlbfs from +user space [2][3][7]. The memory size for an enclave needs to be at least +64 MiB. The enclave memory and CPUs need to be from the same NUMA node. + +An enclave runs on dedicated cores. CPU 0 and its CPU siblings need to remain +available for the primary VM. A CPU pool has to be set for NE purposes by an +user with admin capability. See the cpu list section from the kernel +documentation [4] for how a CPU pool format looks. + +An enclave communicates with the primary VM via a local communication channel, +using virtio-vsock [5]. The primary VM has virtio-pci vsock emulated device, +while the enclave VM has a virtio-mmio vsock emulated device. The vsock device +uses eventfd for signaling. The enclave VM sees the usual interfaces - local +APIC and IOAPIC - to get interrupts from virtio-vsock device. The virtio-mmio +device is placed in memory below the typical 4 GiB. + +The application that runs in the enclave needs to be packaged in an enclave +image together with the OS ( e.g. kernel, ramdisk, init ) that will run in the +enclave VM. The enclave VM has its own kernel and follows the standard Linux +boot protocol [6][8]. + +The kernel bzImage, the kernel command line, the ramdisk(s) are part of the +Enclave Image Format (EIF); plus an EIF header including metadata such as magic +number, eif version, image size and CRC. + +Hash values are computed for the entire enclave image (EIF), the kernel and +ramdisk(s). That's used, for example, to check that the enclave image that is +loaded in the enclave VM is the one that was intended to be run. + +These crypto measurements are included in a signed attestation document +generated by the Nitro Hypervisor and further used to prove the identity of the +enclave; KMS is an example of service that NE is integrated with and that checks +the attestation doc. + +The enclave image (EIF) is loaded in the enclave memory at offset 8 MiB. The +init process in the enclave connects to the vsock CID of the primary VM and a +predefined port - 9000 - to send a heartbeat value - 0xb7. This mechanism is +used to check in the primary VM that the enclave has booted. The CID of the +primary VM is 3. + +If the enclave VM crashes or gracefully exits, an interrupt event is received by +the NE driver. This event is sent further to the user space enclave process +running in the primary VM via a poll notification mechanism. Then the user space +enclave process can exit. + +[1] https://aws.amazon.com/ec2/nitro/nitro-enclaves/ +[2] https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html +[3] https://lwn.net/Articles/807108/ +[4] https://www.kernel.org/doc/html/latest/admin-guide/kernel-parameters.html +[5] https://man7.org/linux/man-pages/man7/vsock.7.html +[6] https://www.kernel.org/doc/html/latest/x86/boot.html +[7] https://www.kernel.org/doc/html/latest/arm64/hugetlbpage.html +[8] https://www.kernel.org/doc/html/latest/arm64/booting.html diff --git a/Documentation/vm/memory-model.rst b/Documentation/vm/memory-model.rst index 58a12376b7df7..94db75ba7fbe2 100644 --- a/Documentation/vm/memory-model.rst +++ b/Documentation/vm/memory-model.rst @@ -52,8 +52,7 @@ wrapper :c:func:`free_area_init`. Yet, the mappings array is not usable until the call to :c:func:`memblock_free_all` that hands all the memory to the page allocator. -If an architecture enables `CONFIG_ARCH_HAS_HOLES_MEMORYMODEL` option, -it may free parts of the `mem_map` array that do not cover the +An architecture may free parts of the `mem_map` array that do not cover the actual physical pages. In such case, the architecture specific :c:func:`pfn_valid` implementation should take the holes in the `mem_map` into account. diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst index 933ada4368ff3..309c1acb414b7 100644 --- a/Documentation/vm/slub.rst +++ b/Documentation/vm/slub.rst @@ -160,7 +160,7 @@ SLUB Debug output Here is a sample of slub debug output:: ==================================================================== - BUG kmalloc-8: Redzone overwritten + BUG kmalloc-8: Right Redzone overwritten -------------------------------------------------------------------- INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc @@ -168,10 +168,10 @@ Here is a sample of slub debug output:: INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58 INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554 - Bytes b4 0xc90f6d10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ - Object 0xc90f6d20: 31 30 31 39 2e 30 30 35 1019.005 - Redzone 0xc90f6d28: 00 cc cc cc . - Padding 0xc90f6d50: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ + Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ + Object (0xc90f6d20): 31 30 31 39 2e 30 30 35 1019.005 + Redzone (0xc90f6d28): 00 cc cc cc . + Padding (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ [] dump_trace+0x63/0x1eb [] show_trace_log_lvl+0x1a/0x2f diff --git a/Documentation/x86/topology.rst b/Documentation/x86/topology.rst index e29739904e37e..7f58010ea86af 100644 --- a/Documentation/x86/topology.rst +++ b/Documentation/x86/topology.rst @@ -41,6 +41,8 @@ Package Packages contain a number of cores plus shared resources, e.g. DRAM controller, shared caches etc. +Modern systems may also use the term 'Die' for package. + AMD nomenclature for package is 'Node'. Package-related topology information in the kernel: @@ -53,11 +55,18 @@ Package-related topology information in the kernel: The number of dies in a package. This information is retrieved via CPUID. + - cpuinfo_x86.cpu_die_id: + + The physical ID of the die. This information is retrieved via CPUID. + - cpuinfo_x86.phys_proc_id: The physical ID of the package. This information is retrieved via CPUID and deduced from the APIC IDs of the cores in the package. + Modern systems use this value for the socket. There may be multiple + packages within a socket. This value may differ from cpu_die_id. + - cpuinfo_x86.logical_proc_id: The logical ID of the package. As we do not trust BIOSes to enumerate the diff --git a/Documentation/xtensa/mmu.rst b/Documentation/xtensa/mmu.rst index e52a12960fdc4..450573afa31a6 100644 --- a/Documentation/xtensa/mmu.rst +++ b/Documentation/xtensa/mmu.rst @@ -82,7 +82,8 @@ Default MMUv2-compatible layout:: +------------------+ | VMALLOC area | VMALLOC_START 0xc0000000 128MB - 64KB +------------------+ VMALLOC_END - | Cache aliasing | TLBTEMP_BASE_1 0xc7ff0000 DCACHE_WAY_SIZE + +------------------+ + | Cache aliasing | TLBTEMP_BASE_1 0xc8000000 DCACHE_WAY_SIZE | remap area 1 | +------------------+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE @@ -124,7 +125,8 @@ Default MMUv2-compatible layout:: +------------------+ | VMALLOC area | VMALLOC_START 0xa0000000 128MB - 64KB +------------------+ VMALLOC_END - | Cache aliasing | TLBTEMP_BASE_1 0xa7ff0000 DCACHE_WAY_SIZE + +------------------+ + | Cache aliasing | TLBTEMP_BASE_1 0xa8000000 DCACHE_WAY_SIZE | remap area 1 | +------------------+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE @@ -167,7 +169,8 @@ Default MMUv2-compatible layout:: +------------------+ | VMALLOC area | VMALLOC_START 0x90000000 128MB - 64KB +------------------+ VMALLOC_END - | Cache aliasing | TLBTEMP_BASE_1 0x97ff0000 DCACHE_WAY_SIZE + +------------------+ + | Cache aliasing | TLBTEMP_BASE_1 0x98000000 DCACHE_WAY_SIZE | remap area 1 | +------------------+ | Cache aliasing | TLBTEMP_BASE_2 DCACHE_WAY_SIZE diff --git a/Kconfig b/Kconfig index e10b3ee084d4d..efa79986a2b80 100644 --- a/Kconfig +++ b/Kconfig @@ -21,6 +21,8 @@ source "net/Kconfig" source "drivers/Kconfig" +source "ubuntu/Kconfig" + source "fs/Kconfig" source "security/Kconfig" diff --git a/MAINTAINERS b/MAINTAINERS index 9d3a5c54a41d2..679c132ea85fe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -537,6 +537,17 @@ S: Maintained F: Documentation/scsi/advansys.txt F: drivers/scsi/advansys.c +ADVANTECH AHC1EC0 EMBEDDED CONTROLLER DRIVER +M: Campion Kang +L: linux-kernel@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/mfd/ahc1ec0.yaml +F: drivers/hwmon/ahc1ec0-hwmon.c +F: drivers/mfd/ahc1ec0.c +F: drivers/watchdog/ahc1ec0-wdt.c +F: include/dt-bindings/mfd/ahc1ec0-dt.h +F: include/linux/mfd/ahc1ec0.h + ADXL34X THREE-AXIS DIGITAL ACCELEROMETER DRIVER (ADXL345/ADXL346) M: Michael Hennerich W: http://wiki.analog.com/ADXL345 @@ -2832,6 +2843,19 @@ F: include/linux/audit.h F: include/uapi/linux/audit.h F: kernel/audit* +AUFS (advanced multi layered unification filesystem) FILESYSTEM +M: "J. R. Okajima" +L: aufs-users@lists.sourceforge.net (members only) +L: linux-unionfs@vger.kernel.org +W: http://aufs.sourceforge.net +T: git://github.com/sfjro/aufs4-linux.git +S: Supported +F: Documentation/filesystems/aufs/ +F: Documentation/ABI/testing/debugfs-aufs +F: Documentation/ABI/testing/sysfs-aufs +F: fs/aufs/ +F: include/uapi/linux/aufs_type.h + AUXILIARY DISPLAY DRIVERS M: Miguel Ojeda Sandonis S: Maintained @@ -4028,6 +4052,7 @@ B: https://github.com/ClangBuiltLinux/linux/issues C: irc://chat.freenode.net/clangbuiltlinux S: Supported K: \b(?i:clang|llvm)\b +F: Documentation/kbuild/llvm.rst CLEANCACHE API M: Konrad Rzeszutek Wilk @@ -6973,6 +6998,7 @@ L: linux-acpi@vger.kernel.org S: Maintained F: Documentation/firmware-guide/acpi/gpio-properties.rst F: drivers/gpio/gpiolib-acpi.c +F: drivers/gpio/gpiolib-acpi.h GPIO IR Transmitter M: Sean Young @@ -7364,6 +7390,25 @@ F: include/uapi/linux/if_hippi.h F: net/802/hippi.c F: drivers/net/hippi/ +HISILICON SECURITY ENGINE V2 DRIVER (SEC2) +M: Zaibo Xu +L: linux-crypto@vger.kernel.org +S: Maintained +F: drivers/crypto/hisilicon/sec2/sec_crypto.c +F: drivers/crypto/hisilicon/sec2/sec_main.c +F: drivers/crypto/hisilicon/sec2/sec_crypto.h +F: drivers/crypto/hisilicon/sec2/sec.h +F: Documentation/ABI/testing/debugfs-hisi-sec + +HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE) +M: Zaibo Xu +L: linux-crypto@vger.kernel.org +S: Maintained +F: drivers/crypto/hisilicon/hpre/hpre_crypto.c +F: drivers/crypto/hisilicon/hpre/hpre_main.c +F: drivers/crypto/hisilicon/hpre/hpre.h +F: Documentation/ABI/testing/debugfs-hisi-hpre + HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3) M: Yisen Zhuang M: Salil Mehta @@ -7372,6 +7417,11 @@ W: http://www.hisilicon.com S: Maintained F: drivers/net/ethernet/hisilicon/hns3/ +HISILICON TRUE RANDOM NUMBER GENERATOR V2 SUPPORT +M: Zaibo Xu +S: Maintained +F: drivers/char/hw_random/hisi-trng-v2.c + HISILICON LPC BUS DRIVER M: john.garry@huawei.com W: http://www.hisilicon.com @@ -7410,6 +7460,12 @@ S: Supported F: drivers/scsi/hisi_sas/ F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt +HISILICON V3XX SPI NOR FLASH Controller Driver +M: John Garry +W: http://www.hisilicon.com +S: Maintained +F: drivers/spi/spi-hisi-sfc-v3xx.c + HISILICON QM AND ZIP Controller DRIVER M: Zhou Wang L: linux-crypto@vger.kernel.org @@ -7417,7 +7473,6 @@ S: Maintained F: drivers/crypto/hisilicon/qm.c F: drivers/crypto/hisilicon/qm.h F: drivers/crypto/hisilicon/sgl.c -F: drivers/crypto/hisilicon/sgl.h F: drivers/crypto/hisilicon/zip/ F: Documentation/ABI/testing/debugfs-hisi-zip @@ -8200,7 +8255,7 @@ M: Joonas Lahtinen M: Rodrigo Vivi L: intel-gfx@lists.freedesktop.org W: https://01.org/linuxgraphics/ -B: https://01.org/linuxgraphics/documentation/how-report-bugs +B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs C: irc://chat.freenode.net/intel-gfx Q: http://patchwork.freedesktop.org/project/intel-gfx/ T: git git://anongit.freedesktop.org/drm-intel @@ -8703,8 +8758,10 @@ L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org W: http://www.isdn4linux.de S: Maintained -F: drivers/isdn/mISDN -F: drivers/isdn/hardware +F: drivers/isdn/mISDN/ +F: drivers/isdn/hardware/ +F: drivers/isdn/Kconfig +F: drivers/isdn/Makefile ISDN/CAPI SUBSYSTEM M: Karsten Keil @@ -8991,6 +9048,7 @@ L: kvm@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git S: Supported +F: Documentation/virt/kvm/s390* F: arch/s390/include/uapi/asm/kvm* F: arch/s390/include/asm/gmap.h F: arch/s390/include/asm/kvm* @@ -11511,6 +11569,19 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/lftan/nios2.git S: Maintained F: arch/nios2/ +NITRO ENCLAVES (NE) +M: Andra Paraschiv +M: Alexandru Vasile +M: Alexandru Ciobotaru +L: linux-kernel@vger.kernel.org +S: Supported +W: https://aws.amazon.com/ec2/nitro/nitro-enclaves/ +F: Documentation/virt/ne_overview.rst +F: drivers/virt/nitro_enclaves/ +F: include/linux/nitro_enclaves.h +F: include/uapi/linux/nitro_enclaves.h +F: samples/nitro_enclaves/ + NOHZ, DYNTICKS SUPPORT M: Frederic Weisbecker M: Thomas Gleixner @@ -13636,6 +13707,7 @@ F: arch/mips/configs/generic/board-ranchu.config RANDOM NUMBER DRIVER M: "Theodore Ts'o" +M: Jason A. Donenfeld S: Maintained F: drivers/char/random.c @@ -14131,6 +14203,7 @@ W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported F: arch/s390/pci/ F: drivers/pci/hotplug/s390_pci_hpc.c +F: Documentation/s390/pci.rst S390 VFIO-CCW DRIVER M: Cornelia Huck diff --git a/Makefile b/Makefile index d4d36c61940bc..9a8fa995f92a9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 0 +SUBLEVEL = 212 EXTRAVERSION = NAME = Kleptomaniac Octopus @@ -206,6 +206,20 @@ ifndef KBUILD_CHECKSRC KBUILD_CHECKSRC = 0 endif +# Call message checker as part of the C compilation +# +# Use 'make D=1' to enable checking +# Use 'make D=2' to create the message catalog + +ifdef D + ifeq ("$(origin D)", "command line") + KBUILD_KMSG_CHECK = $(D) + endif +endif +ifndef KBUILD_KMSG_CHECK + KBUILD_KMSG_CHECK = 0 +endif + # Use make M=dir or set the environment variable KBUILD_EXTMOD to specify the # directory of external module to build. Setting M= takes precedence. ifeq ("$(origin M)", "command line") @@ -394,8 +408,13 @@ HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS 2>/dev/null) HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS 2>/dev/null) HOST_LFS_LIBS := $(shell getconf LFS_LIBS 2>/dev/null) -HOSTCC = gcc -HOSTCXX = g++ +ifneq ($(LLVM),) +HOSTCC = clang +HOSTCXX = clang++ +else +HOSTCC = gcc +HOSTCXX = g++ +endif KBUILD_HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 \ -fomit-frame-pointer -std=gnu89 $(HOST_LFS_CFLAGS) \ $(HOSTCFLAGS) @@ -404,31 +423,49 @@ KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS) KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS) # Make variables (CC, etc...) -AS = $(CROSS_COMPILE)as -LD = $(CROSS_COMPILE)ld -CC = $(CROSS_COMPILE)gcc CPP = $(CC) -E +ifneq ($(LLVM),) +CC = clang +LD = ld.lld +AR = llvm-ar +NM = llvm-nm +OBJCOPY = llvm-objcopy +OBJDUMP = llvm-objdump +READELF = llvm-readelf +OBJSIZE = llvm-size +STRIP = llvm-strip +else +CC = $(CROSS_COMPILE)gcc +LD = $(CROSS_COMPILE)ld AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm -STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy OBJDUMP = $(CROSS_COMPILE)objdump +READELF = $(CROSS_COMPILE)readelf OBJSIZE = $(CROSS_COMPILE)size +STRIP = $(CROSS_COMPILE)strip +endif PAHOLE = pahole LEX = flex YACC = bison AWK = awk INSTALLKERNEL := installkernel -DEPMOD = /sbin/depmod +DEPMOD = depmod PERL = perl PYTHON = python -PYTHON2 = python2 PYTHON3 = python3 CHECK = sparse BASH = bash +KGZIP = gzip +KBZIP2 = bzip2 +KLZOP = lzop +LZMA = lzma +LZ4 = lz4c +XZ = xz CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ -Wbitwise -Wno-return-void -Wno-unknown-attribute $(CF) +KMSG_CHECK = $(srctree)/scripts/kmsg-doc NOSTDINC_FLAGS := CFLAGS_MODULE = AFLAGS_MODULE = @@ -437,6 +474,13 @@ CFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = +# Prefer linux-backports-modules +ifneq ($(KBUILD_SRC),) +ifneq ($(shell if test -e $(KBUILD_OUTPUT)/ubuntu-build; then echo yes; fi),yes) +UBUNTUINCLUDE := -I/usr/src/linux-headers-lbm-$(KERNELRELEASE) +endif +endif + # Use USERINCLUDE when you must reference the UAPI directories only. USERINCLUDE := \ -I$(srctree)/arch/$(SRCARCH)/include/uapi \ @@ -448,6 +492,7 @@ USERINCLUDE := \ # Use LINUXINCLUDE when you must reference the include/ directory. # Needed to be compatible with the O= option LINUXINCLUDE := \ + $(UBUNTUINCLUDE) \ -I$(srctree)/arch/$(SRCARCH)/include \ -I$(objtree)/arch/$(SRCARCH)/include/generated \ $(if $(building_out_of_srctree),-I$(srctree)/include) \ @@ -458,7 +503,7 @@ KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE KBUILD_CFLAGS := -Wall -Wundef -Werror=strict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE \ -Werror=implicit-function-declaration -Werror=implicit-int \ - -Wno-format-security \ + -Werror=return-type -Wno-format-security \ -std=gnu89 KBUILD_CPPFLAGS := -D__KERNEL__ KBUILD_AFLAGS_KERNEL := @@ -471,15 +516,17 @@ KBUILD_LDFLAGS := GCC_PLUGINS_CFLAGS := CLANG_FLAGS := -export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC -export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE PAHOLE LEX YACC AWK INSTALLKERNEL -export PERL PYTHON PYTHON2 PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX +export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC +export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF PAHOLE LEX YACC AWK INSTALLKERNEL +export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX +export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE export CFLAGS_KASAN CFLAGS_KASAN_NOSANITIZE CFLAGS_UBSAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE +export KBUILD_KMSG_CHECK KMSG_CHECK export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL @@ -528,13 +575,13 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) ifneq ($(CROSS_COMPILE),) CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) -CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR) +CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) endif ifneq ($(GCC_TOOLCHAIN),) CLANG_FLAGS += --gcc-toolchain=$(GCC_TOOLCHAIN) endif -ifeq ($(shell $(AS) --version 2>&1 | head -n 1 | grep clang),) +ifneq ($(LLVM_IAS),1) CLANG_FLAGS += -no-integrated-as endif CLANG_FLAGS += -Werror=unknown-warning-option @@ -587,12 +634,8 @@ KBUILD_MODULES := KBUILD_BUILTIN := 1 # If we have only "make modules", don't compile built-in objects. -# When we're building modules with modversions, we need to consider -# the built-in objects during the descend as well, in order to -# make sure the checksums are up to date before we record them. - ifeq ($(MAKECMDGOALS),modules) - KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1) + KBUILD_BUILTIN := endif # If we have "make modules", compile modules @@ -616,9 +659,8 @@ endif ifeq ($(KBUILD_EXTMOD),) # Objects we will link into vmlinux / subdirs we need to visit init-y := init/ -drivers-y := drivers/ sound/ +drivers-y := drivers/ drivers-$(CONFIG_SAMPLES) += samples/ -drivers-$(CONFIG_KERNEL_HEADER_TEST) += include/ net-y := net/ libs-y := lib/ core-y := usr/ @@ -708,12 +750,9 @@ else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os endif -ifdef CONFIG_CC_DISABLE_WARN_MAYBE_UNINITIALIZED -KBUILD_CFLAGS += -Wno-maybe-uninitialized -endif - # Tell gcc to never replace conditional load with a non-conditional one KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0) +KBUILD_CFLAGS += $(call cc-option,-fno-allow-store-data-races) include scripts/Makefile.kcov include scripts/Makefile.gcc-plugins @@ -750,16 +789,16 @@ KBUILD_CFLAGS += -Wno-tautological-compare KBUILD_CFLAGS += -mno-global-merge else -# These warnings generated too much noise in a regular build. -# Use make W=1 to enable them (see scripts/Makefile.extrawarn) -KBUILD_CFLAGS += -Wno-unused-but-set-variable - # Warn about unmarked fall-throughs in switch statement. # Disabled for clang while comment to attribute conversion happens and # https://github.com/ClangBuiltLinux/linux/issues/636 is discussed. KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough,) endif +# These warnings generated too much noise in a regular build. +# Use make W=1 to enable them (see scripts/Makefile.extrawarn) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) + KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls @@ -787,8 +826,11 @@ DEBUG_CFLAGS += -gsplit-dwarf else DEBUG_CFLAGS += -g endif +ifneq ($(LLVM_IAS),1) KBUILD_AFLAGS += -Wa,-gdwarf-2 endif +endif + ifdef CONFIG_DEBUG_INFO_DWARF4 DEBUG_CFLAGS += -gdwarf-4 endif @@ -861,6 +903,17 @@ KBUILD_CFLAGS += -Wno-pointer-sign # disable stringop warnings in gcc 8+ KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) +# We'll want to enable this eventually, but it's not going away for 5.7 at least +KBUILD_CFLAGS += $(call cc-disable-warning, zero-length-bounds) +KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds) +KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow) + +# Another good warning that we'll want to enable eventually +KBUILD_CFLAGS += $(call cc-disable-warning, restrict) + +# Enabled with W=2, disabled by default as noisy +KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) @@ -891,12 +944,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) # change __FILE__ to the relative path from the srctree KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) -# ensure -fcf-protection is disabled when using retpoline as it is -# incompatible with -mindirect-branch=thunk-extern -ifdef CONFIG_RETPOLINE -KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) -endif - include scripts/Makefile.kasan include scripts/Makefile.extrawarn include scripts/Makefile.ubsan @@ -909,12 +956,15 @@ KBUILD_CFLAGS += $(KCFLAGS) KBUILD_LDFLAGS_MODULE += --build-id LDFLAGS_vmlinux += --build-id +KBUILD_LDFLAGS += -z noexecstack +KBUILD_LDFLAGS += $(call ld-option,--no-warn-rwx-segments) + ifeq ($(CONFIG_STRIP_ASM_SYMS),y) LDFLAGS_vmlinux += $(call ld-option, -X,) endif ifeq ($(CONFIG_RELR),y) -LDFLAGS_vmlinux += --pack-dyn-relocs=relr +LDFLAGS_vmlinux += --pack-dyn-relocs=relr --use-android-relr-tags endif # make the checker run with the right architecture @@ -978,10 +1028,10 @@ export mod_strip_cmd mod_compress_cmd = true ifdef CONFIG_MODULE_COMPRESS ifdef CONFIG_MODULE_COMPRESS_GZIP - mod_compress_cmd = gzip -n -f + mod_compress_cmd = $(KGZIP) -n -f endif # CONFIG_MODULE_COMPRESS_GZIP ifdef CONFIG_MODULE_COMPRESS_XZ - mod_compress_cmd = xz -f + mod_compress_cmd = $(XZ) -f endif # CONFIG_MODULE_COMPRESS_XZ endif # CONFIG_MODULE_COMPRESS export mod_compress_cmd @@ -999,7 +1049,7 @@ HOST_LIBELF_LIBS = $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf) ifdef CONFIG_STACK_VALIDATION has_libelf := $(call try-run,\ - echo "int main() {}" | $(HOSTCC) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0) + echo "int main() {}" | $(HOSTCC) $(KBUILD_HOSTLDFLAGS) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0) ifeq ($(has_libelf),1) objtool_target := tools/objtool FORCE else @@ -1050,7 +1100,7 @@ PHONY += autoksyms_recursive ifdef CONFIG_TRIM_UNUSED_KSYMS autoksyms_recursive: descend modules.order $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \ - "$(MAKE) -f $(srctree)/Makefile vmlinux" + "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive" endif # For the kernel to actually contain only the needed exported symbols, @@ -1152,11 +1202,19 @@ define filechk_utsrelease.h endef define filechk_version.h - echo \#define LINUX_VERSION_CODE $(shell \ - expr $(VERSION) \* 65536 + 0$(PATCHLEVEL) \* 256 + 0$(SUBLEVEL)); \ - echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' + if [ $(SUBLEVEL) -gt 255 ]; then \ + echo \#define LINUX_VERSION_CODE $(shell \ + expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + 255); \ + else \ + echo \#define LINUX_VERSION_CODE $(shell \ + expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \ + fi; \ + echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + \ + ((c) > 255 ? 255 : (c)))' endef +$(version_h): PATCHLEVEL := $(if $(PATCHLEVEL), $(PATCHLEVEL), 0) +$(version_h): SUBLEVEL := $(if $(SUBLEVEL), $(SUBLEVEL), 0) $(version_h): FORCE $(call filechk,version.h) $(Q)rm -f $(old_version_h) @@ -1196,19 +1254,15 @@ headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts $(Q)$(MAKE) $(hdr-inst)=include/uapi $(Q)$(MAKE) $(hdr-inst)=arch/$(SRCARCH)/include/uapi +# Deprecated. It is no-op now. PHONY += headers_check -headers_check: headers - $(Q)$(MAKE) $(hdr-inst)=include/uapi HDRCHECK=1 - $(Q)$(MAKE) $(hdr-inst)=arch/$(SRCARCH)/include/uapi HDRCHECK=1 +headers_check: + @: ifdef CONFIG_HEADERS_INSTALL prepare: headers endif -ifdef CONFIG_HEADERS_CHECK -all: headers_check -endif - PHONY += scripts_unifdef scripts_unifdef: scripts_basic $(Q)$(MAKE) $(build)=scripts scripts/unifdef @@ -1242,12 +1296,16 @@ ifneq ($(dtstree),) %.dtb: include/config/kernel.release scripts_dtc $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ -PHONY += dtbs dtbs_install dt_binding_check -dtbs dtbs_check: include/config/kernel.release scripts_dtc +PHONY += dtbs dtbs_install dtbs_check +dtbs: include/config/kernel.release scripts_dtc $(Q)$(MAKE) $(build)=$(dtstree) +ifneq ($(filter dtbs_check, $(MAKECMDGOALS)),) +dtbs: dt_binding_check +endif + dtbs_check: export CHECK_DTBS=1 -dtbs_check: dt_binding_check +dtbs_check: dtbs dtbs_install: $(Q)$(MAKE) $(dtbinst)=$(dtstree) @@ -1262,6 +1320,7 @@ PHONY += scripts_dtc scripts_dtc: scripts_basic $(Q)$(MAKE) $(build)=scripts/dtc +PHONY += dt_binding_check dt_binding_check: scripts_dtc $(Q)$(MAKE) $(build)=Documentation/devicetree/bindings @@ -1274,6 +1333,13 @@ ifdef CONFIG_MODULES all: modules +# When we're building modules with modversions, we need to consider +# the built-in objects during the descend as well, in order to +# make sure the checksums are up to date before we record them. +ifdef CONFIG_MODVERSIONS + KBUILD_BUILTIN := 1 +endif + # Build modules # # A module can be listed more than once in obj-m resulting in @@ -1476,7 +1542,6 @@ help: @echo ' versioncheck - Sanity check on version.h usage' @echo ' includecheck - Check for duplicate included header files' @echo ' export_report - List the usages of all exported symbols' - @echo ' headers_check - Sanity check on exported headers' @echo ' headerdep - Detect inclusion cycles in headers' @echo ' coccicheck - Check with Coccinelle' @echo '' @@ -1641,6 +1706,50 @@ help: PHONY += prepare endif # KBUILD_EXTMOD +# Single targets +# --------------------------------------------------------------------------- +# To build individual files in subdirectories, you can do like this: +# +# make foo/bar/baz.s +# +# The supported suffixes for single-target are listed in 'single-targets' +# +# To build only under specific subdirectories, you can do like this: +# +# make foo/bar/baz/ + +ifdef single-build + +# .ko is special because modpost is needed +single-ko := $(sort $(filter %.ko, $(MAKECMDGOALS))) +single-no-ko := $(sort $(patsubst %.ko,%.mod, $(MAKECMDGOALS))) + +$(single-ko): single_modpost + @: +$(single-no-ko): descend + @: + +ifeq ($(KBUILD_EXTMOD),) +# For the single build of in-tree modules, use a temporary file to avoid +# the situation of modules_install installing an invalid modules.order. +MODORDER := .modules.tmp +endif + +PHONY += single_modpost +single_modpost: $(single-no-ko) + $(Q){ $(foreach m, $(single-ko), echo $(extmod-prefix)$m;) } > $(MODORDER) + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost + +KBUILD_MODULES := 1 + +export KBUILD_SINGLE_TARGETS := $(addprefix $(extmod-prefix), $(single-no-ko)) + +# trim unrelated directories +build-dirs := $(foreach d, $(build-dirs), \ + $(if $(filter $(d)/%, $(KBUILD_SINGLE_TARGETS)), $(d))) + +endif + # Handle descending into subdirectories listed in $(build-dirs) # Preset locale variables to speed up the build process. Limit locale # tweaks to this spot to avoid wrong language settings when running @@ -1649,7 +1758,9 @@ endif # KBUILD_EXTMOD PHONY += descend $(build-dirs) descend: $(build-dirs) $(build-dirs): prepare - $(Q)$(MAKE) $(build)=$@ single-build=$(single-build) need-builtin=1 need-modorder=1 + $(Q)$(MAKE) $(build)=$@ \ + single-build=$(if $(filter-out $@/, $(single-no-ko)),1) \ + need-builtin=1 need-modorder=1 clean-dirs := $(addprefix _clean_, $(clean-dirs)) PHONY += $(clean-dirs) clean @@ -1753,50 +1864,6 @@ tools/%: FORCE $(Q)mkdir -p $(objtree)/tools $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ $* -# Single targets -# --------------------------------------------------------------------------- -# To build individual files in subdirectories, you can do like this: -# -# make foo/bar/baz.s -# -# The supported suffixes for single-target are listed in 'single-targets' -# -# To build only under specific subdirectories, you can do like this: -# -# make foo/bar/baz/ - -ifdef single-build - -single-all := $(filter $(single-targets), $(MAKECMDGOALS)) - -# .ko is special because modpost is needed -single-ko := $(sort $(filter %.ko, $(single-all))) -single-no-ko := $(sort $(patsubst %.ko,%.mod, $(single-all))) - -$(single-ko): single_modpost - @: -$(single-no-ko): descend - @: - -ifeq ($(KBUILD_EXTMOD),) -# For the single build of in-tree modules, use a temporary file to avoid -# the situation of modules_install installing an invalid modules.order. -MODORDER := .modules.tmp -endif - -PHONY += single_modpost -single_modpost: $(single-no-ko) - $(Q){ $(foreach m, $(single-ko), echo $(extmod-prefix)$m;) } > $(MODORDER) - $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost - -KBUILD_MODULES := 1 - -export KBUILD_SINGLE_TARGETS := $(addprefix $(extmod-prefix), $(single-no-ko)) - -single-build = $(if $(filter-out $@/, $(single-no-ko)),1) - -endif - # FIXME Should go into a make.lib or something # =========================================================================== diff --git a/Ubuntu.md b/Ubuntu.md new file mode 100644 index 0000000000000..0cd42210336e6 --- /dev/null +++ b/Ubuntu.md @@ -0,0 +1,8 @@ +Name: linux-aws +Version: 5.4.0 +Series: 20.04 (focal) +Description: + This is the source code for the Ubuntu linux kernel for the 20.04 series. This + source tree is used to produce the flavours: aws. + This kernel is configured to support the widest range of desktop, laptop and + server configurations. diff --git a/arch/Kconfig b/arch/Kconfig index 5f8a5d84dbbe9..2219a07dca1ef 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -131,6 +131,22 @@ config UPROBES managed by the kernel and kept transparent to the probed application. ) +config HAVE_64BIT_ALIGNED_ACCESS + def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS + help + Some architectures require 64 bit accesses to be 64 bit + aligned, which also requires structs containing 64 bit values + to be 64 bit aligned too. This includes some 32 bit + architectures which can do 64 bit accesses, as well as 64 bit + architectures without unaligned access. + + This symbol should be selected by an architecture if 64 bit + accesses are required to be 64 bit aligned in this way even + though it is not a 64 bit architecture. + + See Documentation/unaligned-memory-access.txt for more + information on the topic of unaligned memory accesses. + config HAVE_EFFICIENT_UNALIGNED_ACCESS bool help @@ -396,15 +412,22 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE config HAVE_RCU_TABLE_FREE bool -config HAVE_RCU_TABLE_NO_INVALIDATE +config HAVE_MMU_GATHER_PAGE_SIZE bool -config HAVE_MMU_GATHER_PAGE_SIZE +config MMU_GATHER_NO_RANGE bool config HAVE_MMU_GATHER_NO_GATHER bool +config ARCH_WANT_IRQS_OFF_ACTIVATE_MM + bool + help + Temporary select until all architectures can be converted to have + irqs disabled over activate_mm. Architectures that do IPI based TLB + shootdowns should enable this. + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool @@ -892,27 +915,6 @@ config STRICT_MODULE_RWX config ARCH_HAS_PHYS_TO_DMA bool -config ARCH_HAS_REFCOUNT - bool - help - An architecture selects this when it has implemented refcount_t - using open coded assembly primitives that provide an optimized - refcount_t implementation, possibly at the expense of some full - refcount state checks of CONFIG_REFCOUNT_FULL=y. - - The refcount overflow check behavior, however, must be retained. - Catching overflows is the primary security concern for protecting - against bugs in reference counts. - -config REFCOUNT_FULL - bool "Perform full reference count validation at the expense of speed" - help - Enabling this switches the refcounting infrastructure from a fast - unchecked atomic_t implementation to a fully state checked - implementation, which can be (slightly) slower but provides protections - against various use-after-free conditions that can be used in - security flaw exploits. - config HAVE_ARCH_COMPILER_H bool help diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index f4ec420d7f2df..3a132c91d45bc 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -36,7 +36,6 @@ CONFIG_BLK_DEV_CY82C693=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_SCSI_AIC7XXX=m CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index af2c0063dc750..66a384a4ddbad 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -61,7 +61,7 @@ extern inline void set_hae(unsigned long new_hae) * Change virtual addresses to physical addresses and vv. */ #ifdef USE_48_BIT_KSEG -static inline unsigned long virt_to_phys(void *address) +static inline unsigned long virt_to_phys(volatile void *address) { return (unsigned long)address - IDENT_ADDR; } @@ -71,7 +71,7 @@ static inline void * phys_to_virt(unsigned long address) return (void *) (address + IDENT_ADDR); } #else -static inline unsigned long virt_to_phys(void *address) +static inline unsigned long virt_to_phys(volatile void *address) { unsigned long phys = (unsigned long)address; @@ -107,7 +107,7 @@ static inline void * phys_to_virt(unsigned long address) extern unsigned long __direct_map_base; extern unsigned long __direct_map_size; -static inline unsigned long __deprecated virt_to_bus(void *address) +static inline unsigned long __deprecated virt_to_bus(volatile void *address) { unsigned long phys = virt_to_phys(address); unsigned long bus = phys + __direct_map_base; @@ -322,14 +322,18 @@ static inline int __is_mmio(const volatile void __iomem *addr) #if IO_CONCAT(__IO_PREFIX,trivial_io_bw) extern inline unsigned int ioread8(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr); mb(); return ret; } extern inline unsigned int ioread16(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr); mb(); return ret; } @@ -370,7 +374,9 @@ extern inline void outw(u16 b, unsigned long port) #if IO_CONCAT(__IO_PREFIX,trivial_io_lq) extern inline unsigned int ioread32(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr); mb(); return ret; } @@ -415,14 +421,18 @@ extern inline void __raw_writew(u16 b, volatile void __iomem *addr) extern inline u8 readb(const volatile void __iomem *addr) { - u8 ret = __raw_readb(addr); + u8 ret; + mb(); + ret = __raw_readb(addr); mb(); return ret; } extern inline u16 readw(const volatile void __iomem *addr) { - u16 ret = __raw_readw(addr); + u16 ret; + mb(); + ret = __raw_readw(addr); mb(); return ret; } @@ -463,14 +473,18 @@ extern inline void __raw_writeq(u64 b, volatile void __iomem *addr) extern inline u32 readl(const volatile void __iomem *addr) { - u32 ret = __raw_readl(addr); + u32 ret; + mb(); + ret = __raw_readl(addr); mb(); return ret; } extern inline u64 readq(const volatile void __iomem *addr) { - u64 ret = __raw_readq(addr); + u64 ret; + mb(); + ret = __raw_readq(addr); mb(); return ret; } @@ -488,10 +502,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) } #endif -#define ioread16be(p) be16_to_cpu(ioread16(p)) -#define ioread32be(p) be32_to_cpu(ioread32(p)) -#define iowrite16be(v,p) iowrite16(cpu_to_be16(v), (p)) -#define iowrite32be(v,p) iowrite32(cpu_to_be32(v), (p)) +#define ioread16be(p) swab16(ioread16(p)) +#define ioread32be(p) swab32(ioread32(p)) +#define iowrite16be(v,p) iowrite16(swab16(v), (p)) +#define iowrite32be(v,p) iowrite32(swab32(v), (p)) #define inb_p inb #define inw_p inw @@ -499,14 +513,44 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) #define outb_p outb #define outw_p outw #define outl_p outl -#define readb_relaxed(addr) __raw_readb(addr) -#define readw_relaxed(addr) __raw_readw(addr) -#define readl_relaxed(addr) __raw_readl(addr) -#define readq_relaxed(addr) __raw_readq(addr) -#define writeb_relaxed(b, addr) __raw_writeb(b, addr) -#define writew_relaxed(b, addr) __raw_writew(b, addr) -#define writel_relaxed(b, addr) __raw_writel(b, addr) -#define writeq_relaxed(b, addr) __raw_writeq(b, addr) + +extern u8 readb_relaxed(const volatile void __iomem *addr); +extern u16 readw_relaxed(const volatile void __iomem *addr); +extern u32 readl_relaxed(const volatile void __iomem *addr); +extern u64 readq_relaxed(const volatile void __iomem *addr); + +#if IO_CONCAT(__IO_PREFIX,trivial_io_bw) +extern inline u8 readb_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readb(addr); +} + +extern inline u16 readw_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readw(addr); +} +#endif + +#if IO_CONCAT(__IO_PREFIX,trivial_io_lq) +extern inline u32 readl_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readl(addr); +} + +extern inline u64 readq_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readq(addr); +} +#endif + +#define writeb_relaxed writeb +#define writew_relaxed writew +#define writel_relaxed writel +#define writeq_relaxed writeq /* * String version of IO memory access ops: diff --git a/arch/alpha/include/asm/timex.h b/arch/alpha/include/asm/timex.h index b565cc6f408e9..f89798da8a147 100644 --- a/arch/alpha/include/asm/timex.h +++ b/arch/alpha/include/asm/timex.h @@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void) __asm__ __volatile__ ("rpcc %0" : "=r"(ret)); return ret; } +#define get_cycles get_cycles #endif diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c index c025a3e5e3578..938de13adfbfe 100644 --- a/arch/alpha/kernel/io.c +++ b/arch/alpha/kernel/io.c @@ -16,21 +16,27 @@ unsigned int ioread8(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread8)(addr); mb(); return ret; } unsigned int ioread16(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread16)(addr); mb(); return ret; } unsigned int ioread32(void __iomem *addr) { - unsigned int ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr); + unsigned int ret; + mb(); + ret = IO_CONCAT(__IO_PREFIX,ioread32)(addr); mb(); return ret; } @@ -148,28 +154,36 @@ EXPORT_SYMBOL(__raw_writeq); u8 readb(const volatile void __iomem *addr) { - u8 ret = __raw_readb(addr); + u8 ret; + mb(); + ret = __raw_readb(addr); mb(); return ret; } u16 readw(const volatile void __iomem *addr) { - u16 ret = __raw_readw(addr); + u16 ret; + mb(); + ret = __raw_readw(addr); mb(); return ret; } u32 readl(const volatile void __iomem *addr) { - u32 ret = __raw_readl(addr); + u32 ret; + mb(); + ret = __raw_readl(addr); mb(); return ret; } u64 readq(const volatile void __iomem *addr) { - u64 ret = __raw_readq(addr); + u64 ret; + mb(); + ret = __raw_readq(addr); mb(); return ret; } @@ -207,6 +221,38 @@ EXPORT_SYMBOL(writew); EXPORT_SYMBOL(writel); EXPORT_SYMBOL(writeq); +/* + * The _relaxed functions must be ordered w.r.t. each other, but they don't + * have to be ordered w.r.t. other memory accesses. + */ +u8 readb_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readb(addr); +} + +u16 readw_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readw(addr); +} + +u32 readl_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readl(addr); +} + +u64 readq_relaxed(const volatile void __iomem *addr) +{ + mb(); + return __raw_readq(addr); +} + +EXPORT_SYMBOL(readb_relaxed); +EXPORT_SYMBOL(readw_relaxed); +EXPORT_SYMBOL(readl_relaxed); +EXPORT_SYMBOL(readq_relaxed); /* * Read COUNT 8-bit bytes from port PORT into memory starting at SRC. diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 5f90df30be20a..06fd42417aa96 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -585,7 +585,7 @@ void smp_send_stop(void) { cpumask_t to_whom; - cpumask_copy(&to_whom, cpu_possible_mask); + cpumask_copy(&to_whom, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &to_whom); #ifdef DEBUG_IPI_MSG if (hard_smp_processor_id() != boot_cpu_id) diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 438b10c44d732..2b7a314b84522 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -59,7 +59,7 @@ srmcons_do_receive_chars(struct tty_port *port) } while((result.bits.status & 1) && (++loops < 10)); if (count) - tty_schedule_flip(port); + tty_flip_buffer_push(port); return count; } diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 8383155c8c824..a9d0b5310165f 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -29,6 +29,7 @@ config ARC select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_STACKOVERFLOW select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_IOREMAP_PROT diff --git a/arch/arc/Makefile b/arch/arc/Makefile index f1c44cccf8d6c..6f05e509889f6 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -90,16 +90,22 @@ libs-y += arch/arc/lib/ $(LIBGCC) boot := arch/arc/boot -#default target for make without any arguments. -KBUILD_IMAGE := $(boot)/bootpImage - -all: bootpImage -bootpImage: vmlinux - -boot_targets += uImage uImage.bin uImage.gz +boot_targets := uImage.bin uImage.gz uImage.lzma +PHONY += $(boot_targets) $(boot_targets): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ +uimage-default-y := uImage.bin +uimage-default-$(CONFIG_KERNEL_GZIP) := uImage.gz +uimage-default-$(CONFIG_KERNEL_LZMA) := uImage.lzma + +PHONY += uImage +uImage: $(uimage-default-y) + @ln -sf $< $(boot)/uImage + @$(kecho) ' Image $(boot)/uImage is ready' + +CLEAN_FILES += $(boot)/uImage + archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index 538b92f4dd253..3b1f8a69a89ef 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -targets := vmlinux.bin vmlinux.bin.gz uImage +targets := vmlinux.bin vmlinux.bin.gz # uImage build relies on mkimage being availble on your host for ARC target # You will need to build u-boot for ARC, rename mkimage to arc-elf32-mkimage @@ -13,11 +13,6 @@ LINUX_START_TEXT = $$(readelf -h vmlinux | \ UIMAGE_LOADADDR = $(CONFIG_LINUX_LINK_BASE) UIMAGE_ENTRYADDR = $(LINUX_START_TEXT) -suffix-y := bin -suffix-$(CONFIG_KERNEL_GZIP) := gz -suffix-$(CONFIG_KERNEL_LZMA) := lzma - -targets += uImage targets += uImage.bin targets += uImage.gz targets += uImage.lzma @@ -42,7 +37,3 @@ $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE $(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE $(call if_changed,uimage,lzma) - -$(obj)/uImage: $(obj)/uImage.$(suffix-y) - @ln -sf $(notdir $<) $@ - @echo ' Image $@ is ready' diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index 6ec1fcdfc0d7f..92247288d0562 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -85,7 +85,7 @@ * avoid duplicating the MB dtsi file given that IRQ from * this intc to cpu intc are different for axs101 and axs103 */ - mb_intc: dw-apb-ictl@e0012000 { + mb_intc: interrupt-controller@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0x0 0xe0012000 0x0 0x200 >; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index ac8e1b463a709..cd1edcf4f95ef 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -129,7 +129,7 @@ * avoid duplicating the MB dtsi file given that IRQ from * this intc to cpu intc are different for axs101 and axs103 */ - mb_intc: dw-apb-ictl@e0012000 { + mb_intc: interrupt-controller@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0x0 0xe0012000 0x0 0x200 >; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index 9da21e7fd246f..70779386ca796 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -135,7 +135,7 @@ * avoid duplicating the MB dtsi file given that IRQ from * this intc to cpu intc are different for axs101 and axs103 */ - mb_intc: dw-apb-ictl@e0012000 { + mb_intc: interrupt-controller@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0x0 0xe0012000 0x0 0x200 >; diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 08bcfed6b80f5..134cc223ea81b 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -77,6 +77,7 @@ interrupt-names = "macirq"; phy-mode = "rgmii"; snps,pbl = < 32 >; + snps,multicast-filter-bins = <256>; clocks = <&apbclk>; clock-names = "stmmaceth"; max-speed = <100>; diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 9acbeba832c0b..dcaa44e408ace 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -88,6 +88,8 @@ arcpct: pct { compatible = "snps,archs-pct"; + interrupt-parent = <&cpu_intc>; + interrupts = <20>; }; /* TIMER0 with interrupt for clockevent */ @@ -208,7 +210,7 @@ reg = <0x8000 0x2000>; interrupts = <10>; interrupt-names = "macirq"; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; snps,pbl = <32>; snps,multicast-filter-bins = <256>; clocks = <&gmacclk>; @@ -226,7 +228,7 @@ #address-cells = <1>; #size-cells = <0>; compatible = "snps,dwmac-mdio"; - phy0: ethernet-phy@0 { + phy0: ethernet-phy@0 { /* Micrel KSZ9031 */ reg = <0>; }; }; diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi index f8be7ba8dad49..c21d0eb07bf67 100644 --- a/arch/arc/boot/dts/vdk_axc003.dtsi +++ b/arch/arc/boot/dts/vdk_axc003.dtsi @@ -46,7 +46,7 @@ }; - mb_intc: dw-apb-ictl@e0012000 { + mb_intc: interrupt-controller@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0xe0012000 0x200 >; diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi index 0afa3e53a4e39..4d348853ac7c5 100644 --- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi +++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi @@ -54,7 +54,7 @@ }; - mb_intc: dw-apb-ictl@e0012000 { + mb_intc: interrupt-controller@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0xe0012000 0x200 >; diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index c77a0e3671acc..0284ace0e1ab4 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -19,7 +19,7 @@ #define R_ARC_32_PCREL 0x31 /*to set parameters in the core dumps */ -#define ELF_ARCH EM_ARCOMPACT +#define ELF_ARCH EM_ARC_INUSE #define ELF_CLASS ELFCLASS32 #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index d9ee43c6b7dbc..fe19f1d412e71 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -29,6 +29,8 @@ .endm #define ASM_NL ` /* use '`' to mark new line in macro */ +#define __ALIGN .align 4 +#define __ALIGN_STR __stringify(__ALIGN) /* annotation for data we want in DCCM - if enabled in .config */ .macro ARCFP_DATA nm diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index 0a32e8cfd074d..956f78ecf1938 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -7,9 +7,22 @@ #include +#ifdef CONFIG_ARC_HAS_PAE40 + +#define MAX_POSSIBLE_PHYSMEM_BITS 40 +#define PAGE_MASK_PHYS (0xff00000000ull | PAGE_MASK) + +#else /* CONFIG_ARC_HAS_PAE40 */ + +#define MAX_POSSIBLE_PHYSMEM_BITS 32 +#define PAGE_MASK_PHYS PAGE_MASK + +#endif /* CONFIG_ARC_HAS_PAE40 */ + #ifndef __ASSEMBLY__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) +#define copy_user_page(to, from, vaddr, pg) copy_page(to, from) #define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) struct vm_area_struct; diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 7addd0301c51a..a1987d07d08c1 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -108,8 +108,8 @@ #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL) - +#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \ + _PAGE_SPECIAL) /* More Abbrevaited helpers */ #define PAGE_U_NONE __pgprot(___DEF) #define PAGE_U_R __pgprot(___DEF | _PAGE_READ) @@ -133,11 +133,7 @@ #define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) -#ifdef CONFIG_ARC_HAS_PAE40 -#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) -#else -#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) -#endif +#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h index 7ddba13e9b599..c3f4714a4f5cd 100644 --- a/arch/arc/include/asm/syscalls.h +++ b/arch/arc/include/asm/syscalls.h @@ -11,6 +11,7 @@ #include int sys_clone_wrapper(int, int, int, int, int); +int sys_clone3_wrapper(void *, size_t); int sys_cacheflush(uint32_t, uint32_t uint32_t); int sys_arc_settls(void *); int sys_arc_gettls(void); diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 2a97e2718a219..2a4ad619abfba 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -33,5 +33,4 @@ #define PAGE_MASK (~(PAGE_SIZE-1)) - #endif /* _UAPI__ASM_ARC_PAGE_H */ diff --git a/arch/arc/include/uapi/asm/sigcontext.h b/arch/arc/include/uapi/asm/sigcontext.h index 95f8a4380e110..7a5449dfcb290 100644 --- a/arch/arc/include/uapi/asm/sigcontext.h +++ b/arch/arc/include/uapi/asm/sigcontext.h @@ -18,6 +18,7 @@ */ struct sigcontext { struct user_regs_struct regs; + struct user_regs_arcv2 v2abi; }; #endif /* _ASM_ARC_SIGCONTEXT_H */ diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h index 5eafa11151623..fa2713ae6bea5 100644 --- a/arch/arc/include/uapi/asm/unistd.h +++ b/arch/arc/include/uapi/asm/unistd.h @@ -21,6 +21,7 @@ #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_SYS_EXECVE #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_TIME32_SYSCALLS diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 72be01270e246..beb39930eedbe 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -35,6 +35,18 @@ ENTRY(sys_clone_wrapper) b .Lret_from_system_call END(sys_clone_wrapper) +ENTRY(sys_clone3_wrapper) + SAVE_CALLEE_SAVED_USER + bl @sys_clone3 + DISCARD_CALLEE_SAVED_USER + + GET_CURR_THR_INFO_FLAGS r10 + btst r10, TIF_SYSCALL_TRACE + bnz tracesys_exit + + b .Lret_from_system_call +END(sys_clone3_wrapper) + ENTRY(ret_from_fork) ; when the forked child comes here from the __switch_to function ; r0 has the last task pointer. @@ -153,7 +165,6 @@ END(EV_Extension) tracesys: ; save EFA in case tracer wants the PC of traced task ; using ERET won't work since next-PC has already committed - lr r12, [efa] GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11 st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address @@ -166,7 +177,7 @@ tracesys: ; Do the Sys Call as we normally would. ; Validate the Sys Call number - cmp r8, NR_syscalls + cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi tracesys_exit @@ -188,6 +199,7 @@ tracesys_exit: st r0, [sp, PT_r0] ; sys call return value in pt_regs ;POST Sys Call Ptrace Hook + mov r0, sp ; pt_regs needed bl @syscall_trace_exit b ret_from_exception ; NOT ret_from_system_call at is saves r0 which ; we'd done before calling post hook above @@ -196,15 +208,9 @@ tracesys_exit: ; Breakpoint TRAP ; --------------------------------------------- trap_with_param: - - ; stop_pc info by gdb needs this info - lr r0, [efa] + mov r0, r12 ; EFA in case ptracer/gdb wants stop_pc mov r1, sp - ; Now that we have read EFA, it is safe to do "fake" rtie - ; and get out of CPU exception mode - FAKE_RET_FROM_EXCPN - ; Save callee regs in case gdb wants to have a look ; SP will grow up by size of CALLEE Reg-File ; NOTE: clobbers r12 @@ -231,6 +237,10 @@ ENTRY(EV_Trap) EXCEPTION_PROLOGUE + lr r12, [efa] + + FAKE_RET_FROM_EXCPN + ;============ TRAP 1 :breakpoints ; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR) bmsk.f 0, r10, 7 @@ -238,9 +248,6 @@ ENTRY(EV_Trap) ;============ TRAP (no param): syscall top level - ; First return from Exception to pure K mode (Exception/IRQs renabled) - FAKE_RET_FROM_EXCPN - ; If syscall tracing ongoing, invoke pre-post-hooks GET_CURR_THR_INFO_FLAGS r10 btst r10, TIF_SYSCALL_TRACE @@ -249,7 +256,7 @@ ENTRY(EV_Trap) ;============ Normal syscall case ; syscall num shd not exceed the total system calls avail - cmp r8, NR_syscalls + cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi .Lret_from_system_call diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 661fd842ea97d..145722f80c9b7 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -562,7 +562,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) { struct arc_reg_pct_build pct_bcr; struct arc_reg_cc_build cc_bcr; - int i, has_interrupts; + int i, has_interrupts, irq = -1; int counter_size; /* in bits */ union cc_name { @@ -638,22 +638,25 @@ static int arc_pmu_device_probe(struct platform_device *pdev) }; if (has_interrupts) { - int irq = platform_get_irq(pdev, 0); + irq = platform_get_irq(pdev, 0); + if (irq >= 0) { + int ret; - if (irq < 0) { - pr_err("Cannot get IRQ number for the platform\n"); - return -ENODEV; - } + arc_pmu->irq = irq; - arc_pmu->irq = irq; + /* intc map function ensures irq_set_percpu_devid() called */ + ret = request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters", + this_cpu_ptr(&arc_pmu_cpu)); - /* intc map function ensures irq_set_percpu_devid() called */ - request_percpu_irq(irq, arc_pmu_intr, "ARC perf counters", - this_cpu_ptr(&arc_pmu_cpu)); + if (!ret) + on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1); + else + irq = -1; + } - on_each_cpu(arc_cpu_pmu_irq_init, &irq, 1); + } - } else + if (irq == -1) arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; /* diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index e1889ce3faf96..bfd4cbe74aa36 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -171,9 +171,8 @@ asmlinkage void ret_from_fork(void); * | user_r25 | * ------------------ <===== END of PAGE */ -int copy_thread(unsigned long clone_flags, - unsigned long usp, unsigned long kthread_arg, - struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long kthread_arg, struct task_struct *p, unsigned long tls) { struct pt_regs *c_regs; /* child's pt_regs */ unsigned long *childksp; /* to unwind out of __switch_to() */ @@ -231,7 +230,7 @@ int copy_thread(unsigned long clone_flags, * set task's userland tls data ptr from 4th arg * clone C-lib call is difft from clone sys-call */ - task_thread_info(p)->thr_ptr = regs->r3; + task_thread_info(p)->thr_ptr = tls; } else { /* Normal fork case: set parent's TLS ptr in child */ task_thread_info(p)->thr_ptr = diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 7ee89dc61f6e5..23dc002aa5749 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -409,12 +410,12 @@ static void arc_chk_core_config(void) if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr) panic("Linux built with incorrect DCCM Base address\n"); - if (CONFIG_ARC_DCCM_SZ != cpu->dccm.sz) + if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz) panic("Linux built with incorrect DCCM Size\n"); #endif #ifdef CONFIG_ARC_HAS_ICCM - if (CONFIG_ARC_ICCM_SZ != cpu->iccm.sz) + if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz) panic("Linux built with incorrect ICCM Size\n"); #endif diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 3d57ed0d85350..8877de0dfe6cf 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -61,6 +61,41 @@ struct rt_sigframe { unsigned int sigret_magic; }; +static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +{ + int err = 0; +#ifndef CONFIG_ISA_ARCOMPACT + struct user_regs_arcv2 v2abi; + + v2abi.r30 = regs->r30; +#ifdef CONFIG_ARC_HAS_ACCL_REGS + v2abi.r58 = regs->r58; + v2abi.r59 = regs->r59; +#else + v2abi.r58 = v2abi.r59 = 0; +#endif + err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi)); +#endif + return err; +} + +static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) +{ + int err = 0; +#ifndef CONFIG_ISA_ARCOMPACT + struct user_regs_arcv2 v2abi; + + err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi)); + + regs->r30 = v2abi.r30; +#ifdef CONFIG_ARC_HAS_ACCL_REGS + regs->r58 = v2abi.r58; + regs->r59 = v2abi.r59; +#endif +#endif + return err; +} + static int stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) @@ -94,9 +129,13 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch, sizeof(sf->uc.uc_mcontext.regs.scratch)); + + if (is_isa_arcv2()) + err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs); + err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t)); - return err; + return err ? -EFAULT : 0; } static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf) @@ -109,8 +148,12 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf) err |= __copy_from_user(&uregs.scratch, &(sf->uc.uc_mcontext.regs.scratch), sizeof(sf->uc.uc_mcontext.regs.scratch)); + + if (is_isa_arcv2()) + err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs); + if (err) - return err; + return -EFAULT; set_current_blocked(&set); regs->bta = uregs.scratch.bta; diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 1e440bbfa876b..fc3054c34db19 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -38,15 +38,15 @@ #ifdef CONFIG_ARC_DW2_UNWIND -static void seed_unwind_frame_info(struct task_struct *tsk, - struct pt_regs *regs, - struct unwind_frame_info *frame_info) +static int +seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs, + struct unwind_frame_info *frame_info) { /* * synchronous unwinding (e.g. dump_stack) * - uses current values of SP and friends */ - if (tsk == NULL && regs == NULL) { + if (regs == NULL && (tsk == NULL || tsk == current)) { unsigned long fp, sp, blink, ret; frame_info->task = current; @@ -65,11 +65,15 @@ static void seed_unwind_frame_info(struct task_struct *tsk, frame_info->call_frame = 0; } else if (regs == NULL) { /* - * Asynchronous unwinding of sleeping task - * - Gets SP etc from task's pt_regs (saved bottom of kernel - * mode stack of task) + * Asynchronous unwinding of a likely sleeping task + * - first ensure it is actually sleeping + * - if so, it will be in __switch_to, kernel mode SP of task + * is safe-kept and BLINK at a well known location in there */ + if (tsk->state == TASK_RUNNING) + return -1; + frame_info->task = tsk; frame_info->regs.r27 = TSK_K_FP(tsk); @@ -103,6 +107,8 @@ static void seed_unwind_frame_info(struct task_struct *tsk, frame_info->regs.r63 = regs->ret; frame_info->call_frame = 0; } + + return 0; } #endif @@ -112,11 +118,12 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, int (*consumer_fn) (unsigned int, void *), void *arg) { #ifdef CONFIG_ARC_DW2_UNWIND - int ret = 0; + int ret = 0, cnt = 0; unsigned int address; struct unwind_frame_info frame_info; - seed_unwind_frame_info(tsk, regs, &frame_info); + if (seed_unwind_frame_info(tsk, regs, &frame_info)) + return 0; while (1) { address = UNW_PC(&frame_info); @@ -132,6 +139,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, break; frame_info.regs.r63 = frame_info.regs.r31; + + if (cnt++ > 128) { + printk("unwinder looping too long, aborting !\n"); + return 0; + } } return address; /* return the last address it saw */ diff --git a/arch/arc/kernel/sys.c b/arch/arc/kernel/sys.c index fddecc76efb7c..1069446bdc589 100644 --- a/arch/arc/kernel/sys.c +++ b/arch/arc/kernel/sys.c @@ -7,6 +7,7 @@ #include #define sys_clone sys_clone_wrapper +#define sys_clone3 sys_clone3_wrapper #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 6c693a9d29b6d..0391b8293ad85 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -88,6 +88,8 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) } diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index a2fbea3ee07c7..102418ac5ff4a 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -1123,7 +1123,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) clear_page(to); clear_bit(PG_dc_clean, &page->flags); } - +EXPORT_SYMBOL(clear_user_page); /********************************************************************** * Explicit Cache flush request from user space via syscall diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index fac4adc902044..95c649fbc95af 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap); void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, unsigned long flags) { + unsigned int off; unsigned long vaddr; struct vm_struct *area; - phys_addr_t off, end; + phys_addr_t end; pgprot_t prot = __pgprot(flags); /* Don't allow wraparound, zero size */ @@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, /* Mappings have to be page-aligned */ off = paddr & ~PAGE_MASK; - paddr &= PAGE_MASK; + paddr &= PAGE_MASK_PHYS; size = PAGE_ALIGN(end + 1) - paddr; /* diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 10025e1993533..2430d537f2d38 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -597,7 +597,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; - phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS; struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index a376a50d3fea8..a645bca5899a0 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -6,8 +6,9 @@ menuconfig ARC_PLAT_EZNPS bool "\"EZchip\" ARC dev platform" + depends on ISA_ARCOMPACT select CPU_BIG_ENDIAN - select CLKSRC_NPS + select CLKSRC_NPS if !PHYS_ADDR_T_64BIT select EZNPS_GIC select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET help diff --git a/arch/arc/plat-eznps/include/plat/ctop.h b/arch/arc/plat-eznps/include/plat/ctop.h index a4a61531c7fb9..77712c5ffe848 100644 --- a/arch/arc/plat-eznps/include/plat/ctop.h +++ b/arch/arc/plat-eznps/include/plat/ctop.h @@ -33,7 +33,6 @@ #define CTOP_AUX_DPC (CTOP_AUX_BASE + 0x02C) #define CTOP_AUX_LPC (CTOP_AUX_BASE + 0x030) #define CTOP_AUX_EFLAGS (CTOP_AUX_BASE + 0x080) -#define CTOP_AUX_IACK (CTOP_AUX_BASE + 0x088) #define CTOP_AUX_GPA1 (CTOP_AUX_BASE + 0x08C) #define CTOP_AUX_UDMC (CTOP_AUX_BASE + 0x300) diff --git a/arch/arc/plat-hsdk/Kconfig b/arch/arc/plat-hsdk/Kconfig index ce81018345184..6b5c54576f54d 100644 --- a/arch/arc/plat-hsdk/Kconfig +++ b/arch/arc/plat-hsdk/Kconfig @@ -8,5 +8,6 @@ menuconfig ARC_SOC_HSDK select ARC_HAS_ACCL_REGS select ARC_IRQ_NO_AUTOSAVE select CLK_HSDK + select RESET_CONTROLLER select RESET_HSDK select HAVE_PCI diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8a50efb559f35..a4364cce85f8d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -26,7 +26,7 @@ config ARM select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_KEEP_MEMBLOCK if HAVE_ARCH_PFN_VALID || KEXEC + select ARCH_KEEP_MEMBLOCK select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX @@ -73,8 +73,9 @@ config ARM select HAVE_ARM_SMCCC if CPU_V7 select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 select HAVE_CONTEXT_TRACKING + select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT - select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE @@ -84,6 +85,7 @@ config ARM select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL && (CC_IS_GCC || CLANG_VERSION >= 100000) + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_IDE if PCI || ISA || PCMCIA @@ -117,7 +119,6 @@ config ARM select OLD_SIGSUSPEND3 select PCI_SYSCALL if PCI select PERF_USE_VMALLOC - select REFCOUNT_FULL select RTC_LIB select SYS_SUPPORTS_APM_EMULATION # Above selects are sorted alphabetically; please add new ones @@ -506,8 +507,10 @@ config ARCH_S3C24XX select HAVE_S3C2410_WATCHDOG if WATCHDOG select HAVE_S3C_RTC if RTC_CLASS select NEED_MACH_IO_H + select S3C2410_WATCHDOG select SAMSUNG_ATAGS select USE_OF + select WATCHDOG help Samsung S3C2410, S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 SoCs based systems, such as the Simtec Electronics BAST @@ -517,7 +520,6 @@ config ARCH_S3C24XX config ARCH_OMAP1 bool "TI OMAP1" depends on MMU - select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_OMAP select CLKDEV_LOOKUP select CLKSRC_MMIO @@ -1514,9 +1516,6 @@ config OABI_COMPAT UNPREDICTABLE (in fact it can be predicted that it won't work at all). If in doubt say N. -config ARCH_HAS_HOLES_MEMORYMODEL - bool - config ARCH_SPARSEMEM_ENABLE bool @@ -1524,7 +1523,7 @@ config ARCH_SPARSEMEM_DEFAULT def_bool ARCH_SPARSEMEM_ENABLE config HAVE_ARCH_PFN_VALID - def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM + def_bool y config HIGHMEM bool "High Memory Support" @@ -1906,7 +1905,7 @@ config XIP_DEFLATED_DATA config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) - depends on !CPU_V7M + depends on MMU select KEXEC_CORE help kexec is a system call that implements the ability to shutdown your diff --git a/arch/arm/Makefile b/arch/arm/Makefile index db857d07114f1..4f098edfbf202 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -66,15 +66,15 @@ KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m -arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) -arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) +arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m +arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 -march=armv7-a +arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 -march=armv6 # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 ifeq ($(CONFIG_CPU_32v6),y) -arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k) +arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 -march=armv6k endif -arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t) +arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 -march=armv5te arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4 arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3m @@ -88,7 +88,7 @@ tune-$(CONFIG_CPU_ARM720T) =-mtune=arm7tdmi tune-$(CONFIG_CPU_ARM740T) =-mtune=arm7tdmi tune-$(CONFIG_CPU_ARM9TDMI) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM940T) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM946E) =$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi) +tune-$(CONFIG_CPU_ARM946E) =-mtune=arm9e tune-$(CONFIG_CPU_ARM920T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM922T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM925T) =-mtune=arm9tdmi @@ -96,11 +96,11 @@ tune-$(CONFIG_CPU_ARM926T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_FA526) =-mtune=arm9tdmi tune-$(CONFIG_CPU_SA110) =-mtune=strongarm110 tune-$(CONFIG_CPU_SA1100) =-mtune=strongarm1100 -tune-$(CONFIG_CPU_XSCALE) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_XSC3) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_FEROCEON) =$(call cc-option,-mtune=marvell-f,-mtune=xscale) -tune-$(CONFIG_CPU_V6) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) -tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) +tune-$(CONFIG_CPU_XSCALE) =-mtune=xscale +tune-$(CONFIG_CPU_XSC3) =-mtune=xscale +tune-$(CONFIG_CPU_FEROCEON) =-mtune=xscale +tune-$(CONFIG_CPU_V6) =-mtune=arm1136j-s +tune-$(CONFIG_CPU_V6K) =-mtune=arm1136j-s # Evaluate tune cc-option calls now tune-y := $(tune-y) @@ -307,13 +307,15 @@ endif ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y) prepare: stack_protector_prepare stack_protector_prepare: prepare0 - $(eval KBUILD_CFLAGS += \ + $(eval SSP_PLUGIN_CFLAGS := \ -fplugin-arg-arm_ssp_per_task_plugin-tso=$(shell \ awk '{if ($$2 == "THREAD_SZ_ORDER") print $$3;}'\ include/generated/asm-offsets.h) \ -fplugin-arg-arm_ssp_per_task_plugin-offset=$(shell \ awk '{if ($$2 == "TI_STACK_CANARY") print $$3;}'\ include/generated/asm-offsets.h)) + $(eval KBUILD_CFLAGS += $(SSP_PLUGIN_CFLAGS)) + $(eval GCC_PLUGINS_CFLAGS += $(SSP_PLUGIN_CFLAGS)) endif all: $(notdir $(KBUILD_IMAGE)) diff --git a/arch/arm/boot/bootp/init.S b/arch/arm/boot/bootp/init.S index 5c476bd2b4ce9..b562da2f70408 100644 --- a/arch/arm/boot/bootp/init.S +++ b/arch/arm/boot/bootp/init.S @@ -13,7 +13,7 @@ * size immediately following the kernel, we could build this into * a binary blob, and concatenate the zImage using the cat command. */ - .section .start,#alloc,#execinstr + .section .start, "ax" .type _start, #function .globl _start diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 9219389bbe612..fb6cb24bde5c9 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -90,6 +90,8 @@ $(addprefix $(obj)/,$(libfdt_objs) atags_to_fdt.o): \ $(addprefix $(obj)/,$(libfdt_hdrs)) ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y) +CFLAGS_REMOVE_atags_to_fdt.o += -Wframe-larger-than=${CONFIG_FRAME_WARN} +CFLAGS_atags_to_fdt.o += -Wframe-larger-than=1280 OBJS += $(libfdt_objs) atags_to_fdt.o endif @@ -101,7 +103,6 @@ clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S \ $(libfdt) $(libfdt_hdrs) hyp-stub.S KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(DISABLE_ARM_SSP_PER_TASK_PLUGIN) ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) @@ -117,13 +118,14 @@ CFLAGS_fdt_ro.o := $(nossp_flags) CFLAGS_fdt_rw.o := $(nossp_flags) CFLAGS_fdt_wip.o := $(nossp_flags) -ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj) +ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin \ + -I$(obj) $(DISABLE_ARM_SSP_PER_TASK_PLUGIN) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. -KBSS_SZ = $(shell echo $$(($$($(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \ - sed -n -e 's/^\([^ ]*\) [AB] __bss_start$$/-0x\1/p' \ - -e 's/^\([^ ]*\) [AB] __bss_stop$$/+0x\1/p') )) ) +KBSS_SZ = $(shell echo $$(($$($(NM) $(obj)/../../../../vmlinux | \ + sed -n -e 's/^\([^ ]*\) [ABD] __bss_start$$/-0x\1/p' \ + -e 's/^\([^ ]*\) [ABD] __bss_stop$$/+0x\1/p') )) ) LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ) # Supply ZRELADDR to the decompressor via a linker symbol. ifneq ($(CONFIG_AUTO_ZRELADDR),y) @@ -165,7 +167,7 @@ $(obj)/bswapsdi2.S: $(srctree)/arch/$(SRCARCH)/lib/bswapsdi2.S # The .data section is already discarded by the linker script so no need # to bother about it here. check_for_bad_syms = \ -bad_syms=$$($(CROSS_COMPILE)nm $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \ +bad_syms=$$($(NM) $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \ [ -z "$$bad_syms" ] || \ ( echo "following symbols must have non local/private scope:" >&2; \ echo "$$bad_syms" >&2; false ) diff --git a/arch/arm/boot/compressed/big-endian.S b/arch/arm/boot/compressed/big-endian.S index 88e2a88d324b2..0e092c36da2f2 100644 --- a/arch/arm/boot/compressed/big-endian.S +++ b/arch/arm/boot/compressed/big-endian.S @@ -6,7 +6,7 @@ * Author: Nicolas Pitre */ - .section ".start", #alloc, #execinstr + .section ".start", "ax" mrc p15, 0, r0, c1, c0, 0 @ read control reg orr r0, r0, #(1 << 7) @ enable big endian mode diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index aa075d8372ea2..74255e8198314 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -47,7 +47,10 @@ extern char * strchrnul(const char *, int); #endif #ifdef CONFIG_KERNEL_XZ +/* Prevent KASAN override of string helpers in decompressor */ +#undef memmove #define memmove memmove +#undef memcpy #define memcpy memcpy #include "../../../../lib/decompress_unxz.c" #endif diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S index a5983588f96b8..dd53d6eb53ade 100644 --- a/arch/arm/boot/compressed/efi-header.S +++ b/arch/arm/boot/compressed/efi-header.S @@ -9,16 +9,22 @@ #include .macro __nop -#ifdef CONFIG_EFI_STUB - @ This is almost but not quite a NOP, since it does clobber the - @ condition flags. But it is the best we can do for EFI, since - @ PE/COFF expects the magic string "MZ" at offset 0, while the - @ ARM/Linux boot protocol expects an executable instruction - @ there. - .inst MZ_MAGIC | (0x1310 << 16) @ tstne r0, #0x4d000 -#else AR_CLASS( mov r0, r0 ) M_CLASS( nop.w ) + .endm + + .macro __initial_nops +#ifdef CONFIG_EFI_STUB + @ This is a two-instruction NOP, which happens to bear the + @ PE/COFF signature "MZ" in the first two bytes, so the kernel + @ is accepted as an EFI binary. Booting via the UEFI stub + @ will not execute those instructions, but the ARM/Linux + @ boot protocol does, so we need some NOPs here. + .inst MZ_MAGIC | (0xe225 << 16) @ eor r5, r5, 0x4d000 + eor r5, r5, 0x4d000 @ undo previous insn +#else + __nop + __nop #endif .endm diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 93dffed0ac6e0..17f87f4c74f51 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -140,7 +140,7 @@ #endif .endm - .section ".start", #alloc, #execinstr + .section ".start", "ax" /* * sort out different calling conventions */ @@ -165,7 +165,8 @@ start: * were patching the initial instructions of the kernel, i.e * had started to exploit this "patch area". */ - .rept 7 + __initial_nops + .rept 5 __nop .endr #ifndef CONFIG_THUMB2_KERNEL @@ -1142,9 +1143,9 @@ __armv4_mmu_cache_off: __armv7_mmu_cache_off: mrc p15, 0, r0, c1, c0 #ifdef CONFIG_MMU - bic r0, r0, #0x000d + bic r0, r0, #0x0005 #else - bic r0, r0, #0x000c + bic r0, r0, #0x0004 #endif mcr p15, 0, r0, c1, c0 @ turn MMU and cache off mov r12, lr @@ -1273,7 +1274,7 @@ iflush: __armv5tej_mmu_cache_flush: tst r4, #1 movne pc, lr -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate D cache +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate D cache bne 1b mcr p15, 0, r0, c7, c5, 0 @ flush I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h index b36c0289a308e..6a0f1f524466e 100644 --- a/arch/arm/boot/compressed/libfdt_env.h +++ b/arch/arm/boot/compressed/libfdt_env.h @@ -2,11 +2,13 @@ #ifndef _ARM_LIBFDT_ENV_H #define _ARM_LIBFDT_ENV_H +#include #include #include #include -#define INT_MAX ((int)(~0U>>1)) +#define INT32_MAX S32_MAX +#define UINT32_MAX U32_MAX typedef __be16 fdt16_t; typedef __be32 fdt32_t; diff --git a/arch/arm/boot/compressed/piggy.S b/arch/arm/boot/compressed/piggy.S index 0284f84dcf380..27577644ee721 100644 --- a/arch/arm/boot/compressed/piggy.S +++ b/arch/arm/boot/compressed/piggy.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .piggydata,#alloc + .section .piggydata, "a" .globl input_data input_data: .incbin "arch/arm/boot/compressed/piggy_data" diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index fc7ed03d8b932..51b078604978b 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -43,7 +43,7 @@ SECTIONS } .table : ALIGN(4) { _table_start = .; - LONG(ZIMAGE_MAGIC(2)) + LONG(ZIMAGE_MAGIC(4)) LONG(ZIMAGE_MAGIC(0x5a534c4b)) LONG(ZIMAGE_MAGIC(__piggy_size_addr - _start)) LONG(ZIMAGE_MAGIC(_kernel_bss_size)) diff --git a/arch/arm/boot/deflate_xip_data.sh b/arch/arm/boot/deflate_xip_data.sh index 40937248cebe3..304495c3c2c5d 100755 --- a/arch/arm/boot/deflate_xip_data.sh +++ b/arch/arm/boot/deflate_xip_data.sh @@ -56,7 +56,7 @@ trap 'rm -f "$XIPIMAGE.tmp"; exit 1' 1 2 3 # substitute the data section by a compressed version $DD if="$XIPIMAGE" count=$data_start iflag=count_bytes of="$XIPIMAGE.tmp" $DD if="$XIPIMAGE" skip=$data_start iflag=skip_bytes | -gzip -9 >> "$XIPIMAGE.tmp" +$KGZIP -9 >> "$XIPIMAGE.tmp" # replace kernel binary mv -f "$XIPIMAGE.tmp" "$XIPIMAGE" diff --git a/arch/arm/boot/dts/am335x-boneblack-common.dtsi b/arch/arm/boot/dts/am335x-boneblack-common.dtsi index 7ad079861efd6..91f93bc89716d 100644 --- a/arch/arm/boot/dts/am335x-boneblack-common.dtsi +++ b/arch/arm/boot/dts/am335x-boneblack-common.dtsi @@ -131,6 +131,11 @@ }; / { + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x20000000>; /* 512 MB */ + }; + clk_mcasp0_fixed: clk_mcasp0_fixed { #clock-cells = <0>; compatible = "fixed-clock"; diff --git a/arch/arm/boot/dts/am335x-cm-t335.dts b/arch/arm/boot/dts/am335x-cm-t335.dts index 1fe3b566ba3df..09b36df140e5a 100644 --- a/arch/arm/boot/dts/am335x-cm-t335.dts +++ b/arch/arm/boot/dts/am335x-cm-t335.dts @@ -516,7 +516,7 @@ status = "okay"; status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&spi0_pins>; - ti,pindir-d0-out-d1-in = <1>; + ti,pindir-d0-out-d1-in; /* WLS1271 WiFi */ wlcore: wlcore@1 { compatible = "ti,wl1271"; diff --git a/arch/arm/boot/dts/am335x-pocketbeagle.dts b/arch/arm/boot/dts/am335x-pocketbeagle.dts index ff4f919d22f62..abf2badce53d9 100644 --- a/arch/arm/boot/dts/am335x-pocketbeagle.dts +++ b/arch/arm/boot/dts/am335x-pocketbeagle.dts @@ -88,7 +88,6 @@ AM33XX_PADCONF(AM335X_PIN_MMC0_DAT3, PIN_INPUT_PULLUP, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MMC0_CMD, PIN_INPUT_PULLUP, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MMC0_CLK, PIN_INPUT_PULLUP, MUX_MODE0) - AM33XX_PADCONF(AM335X_PIN_MCASP0_ACLKR, PIN_INPUT, MUX_MODE4) /* (B12) mcasp0_aclkr.mmc0_sdwp */ >; }; diff --git a/arch/arm/boot/dts/am335x-sancloud-bbe.dts b/arch/arm/boot/dts/am335x-sancloud-bbe.dts index 8678e6e35493f..e5fdb7abb0d54 100644 --- a/arch/arm/boot/dts/am335x-sancloud-bbe.dts +++ b/arch/arm/boot/dts/am335x-sancloud-bbe.dts @@ -108,7 +108,7 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii-txid"; + phy-mode = "rgmii-id"; }; &i2c0 { diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index fb6b8aa12cc56..77fa7c0f21046 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -40,6 +40,9 @@ ethernet1 = &cpsw_emac1; spi0 = &spi0; spi1 = &spi1; + mmc0 = &mmc1; + mmc1 = &mmc2; + mmc2 = &mmc3; }; cpus { diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts index a1fd3e63e86ec..3db30ce134b95 100644 --- a/arch/arm/boot/dts/am3517-evm.dts +++ b/arch/arm/boot/dts/am3517-evm.dts @@ -160,6 +160,8 @@ /* HS USB Host PHY on PORT 1 */ hsusb1_phy: hsusb1_phy { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb1_rst_pins>; compatible = "usb-nop-xceiv"; reset-gpios = <&gpio2 25 GPIO_ACTIVE_LOW>; /* gpio_57 */ #phy-cells = <0>; @@ -167,7 +169,9 @@ }; &davinci_emac { - status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <ðernet_pins>; + status = "okay"; }; &davinci_mdio { @@ -192,6 +196,8 @@ }; &i2c2 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c2_pins>; clock-frequency = <400000>; /* User DIP swithes [1:8] / User LEDS [1:2] */ tca6416: gpio@21 { @@ -204,6 +210,8 @@ }; &i2c3 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c3_pins>; clock-frequency = <400000>; }; @@ -222,6 +230,8 @@ }; &usbhshost { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb1_pins>; port1-mode = "ehci-phy"; }; @@ -230,8 +240,35 @@ }; &omap3_pmx_core { - pinctrl-names = "default"; - pinctrl-0 = <&hsusb1_rst_pins>; + + ethernet_pins: pinmux_ethernet_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21fe, PIN_INPUT | MUX_MODE0) /* rmii_mdio_data */ + OMAP3_CORE1_IOPAD(0x2200, MUX_MODE0) /* rmii_mdio_clk */ + OMAP3_CORE1_IOPAD(0x2202, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd0 */ + OMAP3_CORE1_IOPAD(0x2204, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_rxd1 */ + OMAP3_CORE1_IOPAD(0x2206, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_crs_dv */ + OMAP3_CORE1_IOPAD(0x2208, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_rxer */ + OMAP3_CORE1_IOPAD(0x220a, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd0 */ + OMAP3_CORE1_IOPAD(0x220c, PIN_OUTPUT_PULLDOWN | MUX_MODE0) /* rmii_txd1 */ + OMAP3_CORE1_IOPAD(0x220e, PIN_OUTPUT_PULLDOWN |MUX_MODE0) /* rmii_txen */ + OMAP3_CORE1_IOPAD(0x2210, PIN_INPUT_PULLDOWN | MUX_MODE0) /* rmii_50mhz_clk */ + >; + }; + + i2c2_pins: pinmux_i2c2_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21be, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c2_scl */ + OMAP3_CORE1_IOPAD(0x21c0, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c2_sda */ + >; + }; + + i2c3_pins: pinmux_i2c3_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21c2, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c3_scl */ + OMAP3_CORE1_IOPAD(0x21c4, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c3_sda */ + >; + }; leds_pins: pinmux_leds_pins { pinctrl-single,pins = < @@ -299,8 +336,6 @@ }; &omap3_pmx_core2 { - pinctrl-names = "default"; - pinctrl-0 = <&hsusb1_pins>; hsusb1_pins: pinmux_hsusb1_pins { pinctrl-single,pins = < diff --git a/arch/arm/boot/dts/am3517-som.dtsi b/arch/arm/boot/dts/am3517-som.dtsi index 8b669e2eafec4..f7b680f6c48ad 100644 --- a/arch/arm/boot/dts/am3517-som.dtsi +++ b/arch/arm/boot/dts/am3517-som.dtsi @@ -69,6 +69,8 @@ }; &i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&i2c1_pins>; clock-frequency = <400000>; s35390a: s35390a@30 { @@ -179,6 +181,13 @@ &omap3_pmx_core { + i2c1_pins: pinmux_i2c1_pins { + pinctrl-single,pins = < + OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c1_scl */ + OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT_PULLUP | MUX_MODE0) /* i2c1_sda */ + >; + }; + wl12xx_buffer_pins: pinmux_wl12xx_buffer_pins { pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x2156, PIN_OUTPUT | MUX_MODE4) /* mmc1_dat7.gpio_129 */ diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index cae4500194fec..126965a34841e 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -86,7 +86,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; backlight = <&lcd_bl>; @@ -829,11 +829,14 @@ status = "okay"; }; +&gpio5_target { + ti,no-reset-on-init; +}; + &gpio5 { pinctrl-names = "default"; pinctrl-0 = <&display_mux_pins>; status = "okay"; - ti,no-reset-on-init; p8 { /* diff --git a/arch/arm/boot/dts/am437x-idk-evm.dts b/arch/arm/boot/dts/am437x-idk-evm.dts index f3ced6df0c9b2..9f66f96d09c91 100644 --- a/arch/arm/boot/dts/am437x-idk-evm.dts +++ b/arch/arm/boot/dts/am437x-idk-evm.dts @@ -526,11 +526,11 @@ * Supply voltage supervisor on board will not allow opp50 so * disable it and set opp100 as suspend OPP. */ - opp50@300000000 { + opp50-300000000 { status = "disabled"; }; - opp100@600000000 { + opp100-600000000 { opp-suspend; }; }; diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index 59770dd3785ee..64fdd5079d49b 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -1576,8 +1576,9 @@ reg-names = "rev"; ti,hwmods = "d_can0"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN0_CLKCTRL 0>; - clock-names = "fck"; + clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN0_CLKCTRL 0>, + <&dcan0_fck>; + clock-names = "fck", "osc"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0xcc000 0x2000>; @@ -1585,6 +1586,8 @@ dcan0: can@0 { compatible = "ti,am4372-d_can", "ti,am3352-d_can"; reg = <0x0 0x2000>; + clocks = <&dcan0_fck>; + clock-names = "fck"; syscon-raminit = <&scm_conf 0x644 0>; interrupts = ; status = "disabled"; @@ -1597,8 +1600,9 @@ reg-names = "rev"; ti,hwmods = "d_can1"; /* Domains (P, C): per_pwrdm, l4ls_clkdm */ - clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN1_CLKCTRL 0>; - clock-names = "fck"; + clocks = <&l4ls_clkctrl AM4_L4LS_D_CAN1_CLKCTRL 0>, + <&dcan1_fck>; + clock-names = "fck", "osc"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0xd0000 0x2000>; @@ -1606,6 +1610,8 @@ dcan1: can@0 { compatible = "ti,am4372-d_can", "ti,am3352-d_can"; reg = <0x0 0x2000>; + clocks = <&dcan1_fck>; + clock-names = "fck"; syscon-raminit = <&scm_conf 0x644 1>; interrupts = ; status = "disabled"; @@ -2071,7 +2077,7 @@ }; }; - target-module@22000 { /* 0x48322000, ap 116 64.0 */ + gpio5_target: target-module@22000 { /* 0x48322000, ap 116 64.0 */ compatible = "ti,sysc-omap2", "ti,sysc"; ti,hwmods = "gpio6"; reg = <0x22000 0x4>, diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 95314121d1115..d0ea95830d454 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -42,7 +42,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; backlight = <&lcd_bl>; @@ -589,7 +589,7 @@ status = "okay"; pinctrl-names = "default"; pinctrl-0 = <&i2c0_pins>; - clock-frequency = <400000>; + clock-frequency = <100000>; tps65218: tps65218@24 { reg = <0x24>; @@ -848,6 +848,7 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&spi0_pins_default>; pinctrl-1 = <&spi0_pins_sleep>; + ti,pindir-d0-out-d1-in; }; &spi1 { @@ -855,6 +856,7 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&spi1_pins_default>; pinctrl-1 = <&spi1_pins_sleep>; + ti,pindir-d0-out-d1-in; }; &usb2_phy1 { diff --git a/arch/arm/boot/dts/am43xx-clocks.dtsi b/arch/arm/boot/dts/am43xx-clocks.dtsi index 091356f2a8c16..c726cd8dbdf1b 100644 --- a/arch/arm/boot/dts/am43xx-clocks.dtsi +++ b/arch/arm/boot/dts/am43xx-clocks.dtsi @@ -704,6 +704,60 @@ ti,bit-shift = <8>; reg = <0x2a48>; }; + + clkout1_osc_div_ck: clkout1-osc-div-ck { + #clock-cells = <0>; + compatible = "ti,divider-clock"; + clocks = <&sys_clkin_ck>; + ti,bit-shift = <20>; + ti,max-div = <4>; + reg = <0x4100>; + }; + + clkout1_src2_mux_ck: clkout1-src2-mux-ck { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&clk_rc32k_ck>, <&sysclk_div>, <&dpll_ddr_m2_ck>, + <&dpll_per_m2_ck>, <&dpll_disp_m2_ck>, + <&dpll_mpu_m2_ck>; + reg = <0x4100>; + }; + + clkout1_src2_pre_div_ck: clkout1-src2-pre-div-ck { + #clock-cells = <0>; + compatible = "ti,divider-clock"; + clocks = <&clkout1_src2_mux_ck>; + ti,bit-shift = <4>; + ti,max-div = <8>; + reg = <0x4100>; + }; + + clkout1_src2_post_div_ck: clkout1-src2-post-div-ck { + #clock-cells = <0>; + compatible = "ti,divider-clock"; + clocks = <&clkout1_src2_pre_div_ck>; + ti,bit-shift = <8>; + ti,max-div = <32>; + ti,index-power-of-two; + reg = <0x4100>; + }; + + clkout1_mux_ck: clkout1-mux-ck { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&clkout1_osc_div_ck>, <&clk_rc32k_ck>, + <&clkout1_src2_post_div_ck>, <&dpll_extdev_m2_ck>; + ti,bit-shift = <16>; + reg = <0x4100>; + }; + + clkout1_ck: clkout1-ck { + #clock-cells = <0>; + compatible = "ti,gate-clock"; + clocks = <&clkout1_mux_ck>; + ti,bit-shift = <23>; + reg = <0x4100>; + }; }; &prcm { diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts index 0aaacea1d887b..10105a497c1ac 100644 --- a/arch/arm/boot/dts/am571x-idk.dts +++ b/arch/arm/boot/dts/am571x-idk.dts @@ -167,11 +167,7 @@ &pcie1_rc { status = "okay"; - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; -}; - -&pcie1_ep { - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; + gpios = <&gpio5 18 GPIO_ACTIVE_HIGH>; }; &mmc1 { diff --git a/arch/arm/boot/dts/am572x-idk-common.dtsi b/arch/arm/boot/dts/am572x-idk-common.dtsi index a064f13b38802..ddf123620e962 100644 --- a/arch/arm/boot/dts/am572x-idk-common.dtsi +++ b/arch/arm/boot/dts/am572x-idk-common.dtsi @@ -147,10 +147,6 @@ gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; }; -&pcie1_ep { - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; -}; - &mailbox5 { status = "okay"; mbox_ipu1_ipc3x: mbox_ipu1_ipc3x { diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi index bc76f1705c0f6..a813a0cf3ff39 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi +++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi @@ -29,6 +29,27 @@ reg = <0x0 0x80000000 0x0 0x80000000>; }; + main_12v0: fixedregulator-main_12v0 { + /* main supply */ + compatible = "regulator-fixed"; + regulator-name = "main_12v0"; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; + regulator-always-on; + regulator-boot-on; + }; + + evm_5v0: fixedregulator-evm_5v0 { + /* Output of TPS54531D */ + compatible = "regulator-fixed"; + regulator-name = "evm_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + vin-supply = <&main_12v0>; + regulator-always-on; + regulator-boot-on; + }; + vdd_3v3: fixedregulator-vdd_3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3"; @@ -547,10 +568,6 @@ gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; }; -&pcie1_ep { - gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; -}; - &mcasp3 { #sound-dai-cells = <0>; assigned-clocks = <&l4per2_clkctrl DRA7_L4PER2_MCASP3_CLKCTRL 24>; diff --git a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts index 34ca761aeded2..e86d4795e0244 100644 --- a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts +++ b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts @@ -611,12 +611,11 @@ >; }; -&gpio3 { - status = "okay"; +&gpio3_target { ti,no-reset-on-init; }; -&gpio2 { +&gpio2_target { status = "okay"; ti,no-reset-on-init; }; diff --git a/arch/arm/boot/dts/armada-385-turris-omnia.dts b/arch/arm/boot/dts/armada-385-turris-omnia.dts index 768b6c5d2129a..fde4c302f08ec 100644 --- a/arch/arm/boot/dts/armada-385-turris-omnia.dts +++ b/arch/arm/boot/dts/armada-385-turris-omnia.dts @@ -236,6 +236,7 @@ status = "okay"; compatible = "ethernet-phy-id0141.0DD1", "ethernet-phy-ieee802.3-c22"; reg = <1>; + marvell,reg-init = <3 18 0 0x4985>; /* irq is connected to &pcawan pin 7 */ }; diff --git a/arch/arm/boot/dts/armada-388-helios4.dts b/arch/arm/boot/dts/armada-388-helios4.dts index 705adfa8c680f..a94758090fb0d 100644 --- a/arch/arm/boot/dts/armada-388-helios4.dts +++ b/arch/arm/boot/dts/armada-388-helios4.dts @@ -70,6 +70,9 @@ system-leds { compatible = "gpio-leds"; + pinctrl-names = "default"; + pinctrl-0 = <&helios_system_led_pins>; + status-led { label = "helios4:green:status"; gpios = <&gpio0 24 GPIO_ACTIVE_LOW>; @@ -86,6 +89,9 @@ io-leds { compatible = "gpio-leds"; + pinctrl-names = "default"; + pinctrl-0 = <&helios_io_led_pins>; + sata1-led { label = "helios4:green:ata1"; gpios = <&gpio1 17 GPIO_ACTIVE_LOW>; @@ -121,11 +127,15 @@ fan1: j10-pwm { compatible = "pwm-fan"; pwms = <&gpio1 9 40000>; /* Target freq:25 kHz */ + pinctrl-names = "default"; + pinctrl-0 = <&helios_fan1_pins>; }; fan2: j17-pwm { compatible = "pwm-fan"; pwms = <&gpio1 23 40000>; /* Target freq:25 kHz */ + pinctrl-names = "default"; + pinctrl-0 = <&helios_fan2_pins>; }; usb2_phy: usb2-phy { @@ -291,16 +301,22 @@ "mpp39", "mpp40"; marvell,function = "sd0"; }; - helios_led_pins: helios-led-pins { - marvell,pins = "mpp24", "mpp25", - "mpp49", "mpp50", + helios_system_led_pins: helios-system-led-pins { + marvell,pins = "mpp24", "mpp25"; + marvell,function = "gpio"; + }; + helios_io_led_pins: helios-io-led-pins { + marvell,pins = "mpp49", "mpp50", "mpp52", "mpp53", "mpp54"; marvell,function = "gpio"; }; - helios_fan_pins: helios-fan-pins { - marvell,pins = "mpp41", "mpp43", - "mpp48", "mpp55"; + helios_fan1_pins: helios_fan1_pins { + marvell,pins = "mpp41", "mpp43"; + marvell,function = "gpio"; + }; + helios_fan2_pins: helios_fan2_pins { + marvell,pins = "mpp48", "mpp55"; marvell,function = "gpio"; }; microsom_spi1_cs_pins: spi1-cs-pins { diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 3f4bb44d85f00..5b82e58a1cf06 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -165,7 +165,7 @@ }; uart0: serial@12000 { - compatible = "marvell,armada-38x-uart"; + compatible = "marvell,armada-38x-uart", "ns16550a"; reg = <0x12000 0x100>; reg-shift = <2>; interrupts = ; @@ -175,7 +175,7 @@ }; uart1: serial@12100 { - compatible = "marvell,armada-38x-uart"; + compatible = "marvell,armada-38x-uart", "ns16550a"; reg = <0x12100 0x100>; reg-shift = <2>; interrupts = ; @@ -339,7 +339,8 @@ comphy: phy@18300 { compatible = "marvell,armada-380-comphy"; - reg = <0x18300 0x100>; + reg-names = "comphy", "conf"; + reg = <0x18300 0x100>, <0x18460 4>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi index 267d0c178e55c..30abb4b64a1b6 100644 --- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi +++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi @@ -266,11 +266,6 @@ reg = <0x11000 0x100>; }; -&i2c1 { - compatible = "marvell,mv78230-i2c", "marvell,mv64xxx-i2c"; - reg = <0x11100 0x100>; -}; - &mpic { reg = <0x20a00 0x2d0>, <0x21070 0x58>; }; diff --git a/arch/arm/boot/dts/aspeed-ast2500-evb.dts b/arch/arm/boot/dts/aspeed-ast2500-evb.dts index c9d88c90135e6..9db4d42d0deb3 100644 --- a/arch/arm/boot/dts/aspeed-ast2500-evb.dts +++ b/arch/arm/boot/dts/aspeed-ast2500-evb.dts @@ -5,7 +5,7 @@ / { model = "AST2500 EVB"; - compatible = "aspeed,ast2500"; + compatible = "aspeed,ast2500-evb", "aspeed,ast2500"; aliases { serial4 = &uart5; diff --git a/arch/arm/boot/dts/aspeed-ast2600-evb.dts b/arch/arm/boot/dts/aspeed-ast2600-evb.dts index 9870553919b7c..f00b19ad4fa62 100644 --- a/arch/arm/boot/dts/aspeed-ast2600-evb.dts +++ b/arch/arm/boot/dts/aspeed-ast2600-evb.dts @@ -7,7 +7,7 @@ / { model = "AST2600 EVB"; - compatible = "aspeed,ast2600"; + compatible = "aspeed,ast2600-evb-a1", "aspeed,ast2600"; aliases { serial4 = &uart5; diff --git a/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts b/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts index 682f729ea25e1..c58230fea45f8 100644 --- a/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts +++ b/arch/arm/boot/dts/aspeed-bmc-facebook-tiogapass.dts @@ -81,11 +81,6 @@ status = "okay"; }; -&vuart { - // VUART Host Console - status = "okay"; -}; - &uart1 { // Host Console status = "okay"; diff --git a/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts b/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts index 22dade6393d06..d1dbe3b6ad5a7 100644 --- a/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts +++ b/arch/arm/boot/dts/aspeed-bmc-intel-s2600wf.dts @@ -22,9 +22,9 @@ #size-cells = <1>; ranges; - vga_memory: framebuffer@7f000000 { + vga_memory: framebuffer@9f000000 { no-map; - reg = <0x7f000000 0x01000000>; + reg = <0x9f000000 0x01000000>; /* 16M */ }; }; diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi index dffb595d30e40..679d04d585a4a 100644 --- a/arch/arm/boot/dts/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed-g4.dtsi @@ -371,6 +371,7 @@ compatible = "aspeed,ast2400-ibt-bmc"; reg = <0xc0 0x18>; interrupts = <8>; + clocks = <&syscon ASPEED_CLK_GATE_LCLK>; status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi index e8feb8b66a2f7..412c96b3c3ac0 100644 --- a/arch/arm/boot/dts/aspeed-g5.dtsi +++ b/arch/arm/boot/dts/aspeed-g5.dtsi @@ -464,6 +464,7 @@ compatible = "aspeed,ast2500-ibt-bmc"; reg = <0xc0 0x18>; interrupts = <8>; + clocks = <&syscon ASPEED_CLK_GATE_LCLK>; status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index 5b8bf58e89cb4..ac723fe898c76 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -117,11 +117,6 @@ groups = "FWSPID"; }; - pinctrl_fwqspid_default: fwqspid_default { - function = "FWQSPID"; - groups = "FWQSPID"; - }; - pinctrl_fwspiwp_default: fwspiwp_default { function = "FWSPIWP"; groups = "FWSPIWP"; @@ -208,12 +203,12 @@ }; pinctrl_hvi3c3_default: hvi3c3_default { - function = "HVI3C3"; + function = "I3C3"; groups = "HVI3C3"; }; pinctrl_hvi3c4_default: hvi3c4_default { - function = "HVI3C4"; + function = "I3C4"; groups = "HVI3C4"; }; @@ -653,12 +648,12 @@ }; pinctrl_qspi1_default: qspi1_default { - function = "QSPI1"; + function = "SPI1"; groups = "QSPI1"; }; pinctrl_qspi2_default: qspi2_default { - function = "QSPI2"; + function = "SPI2"; groups = "QSPI2"; }; diff --git a/arch/arm/boot/dts/at91-sama5d27_som1.dtsi b/arch/arm/boot/dts/at91-sama5d27_som1.dtsi index 7788d5db65c25..ae6d07dc02832 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1.dtsi +++ b/arch/arm/boot/dts/at91-sama5d27_som1.dtsi @@ -44,8 +44,8 @@ pinctrl-0 = <&pinctrl_macb0_default>; phy-mode = "rmii"; - ethernet-phy@0 { - reg = <0x0>; + ethernet-phy@7 { + reg = <0x7>; interrupt-parent = <&pioA>; interrupts = ; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts index 89f0c9979b89c..4f63158d6b9bd 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts @@ -69,7 +69,6 @@ isc: isc@f0008000 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_isc_base &pinctrl_isc_data_8bit &pinctrl_isc_data_9_10 &pinctrl_isc_data_11_12>; - status = "okay"; }; qspi1: spi@f0024000 { diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts index ba7f3e646c260..b8db77b7f5d80 100644 --- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts @@ -40,7 +40,7 @@ ahb { usb0: gadget@300000 { - atmel,vbus-gpio = <&pioA PIN_PA27 GPIO_ACTIVE_HIGH>; + atmel,vbus-gpio = <&pioA PIN_PB11 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usba_vbus>; status = "okay"; @@ -125,8 +125,6 @@ bus-width = <8>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sdmmc0_default>; - non-removable; - mmc-ddr-1_8v; status = "okay"; }; diff --git a/arch/arm/boot/dts/at91-sama5d3_xplained.dts b/arch/arm/boot/dts/at91-sama5d3_xplained.dts index 61f068a7b362a..400eaf640fe42 100644 --- a/arch/arm/boot/dts/at91-sama5d3_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d3_xplained.dts @@ -242,6 +242,11 @@ atmel,pins = ; /* PE9, conflicts with A9 */ }; + pinctrl_usb_default: usb_default { + atmel,pins = + ; + }; }; }; }; @@ -259,6 +264,8 @@ &pioE 3 GPIO_ACTIVE_LOW &pioE 4 GPIO_ACTIVE_LOW >; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usb_default>; status = "okay"; }; diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index fdfc37d716e01..1d101067371b4 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -133,6 +133,11 @@ atmel,pins = ; }; + pinctrl_usb_default: usb_default { + atmel,pins = + ; + }; pinctrl_key_gpio: key_gpio_0 { atmel,pins = ; @@ -158,6 +163,8 @@ &pioE 11 GPIO_ACTIVE_HIGH &pioE 14 GPIO_ACTIVE_HIGH >; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usb_default>; status = "okay"; }; diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts index 3ca97b47c69ce..7e5c598e7e68f 100644 --- a/arch/arm/boot/dts/at91-tse850-3.dts +++ b/arch/arm/boot/dts/at91-tse850-3.dts @@ -262,7 +262,7 @@ &macb1 { status = "okay"; - phy-mode = "rgmii"; + phy-mode = "rmii"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index dee9c0c8a0964..16c6fd3c42462 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -187,7 +187,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - ; }; @@ -221,7 +221,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - ; }; @@ -239,7 +239,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - ; }; @@ -257,7 +257,7 @@ usart3 { pinctrl_usart3: usart3-0 { atmel,pins = - ; }; @@ -275,7 +275,7 @@ uart0 { pinctrl_uart0: uart0-0 { atmel,pins = - ; }; }; @@ -283,7 +283,7 @@ uart1 { pinctrl_uart1: uart1-0 { atmel,pins = - ; }; }; diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi index dba025a985270..5ed3d745ac867 100644 --- a/arch/arm/boot/dts/at91sam9261.dtsi +++ b/arch/arm/boot/dts/at91sam9261.dtsi @@ -329,7 +329,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - , + , ; }; @@ -347,7 +347,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - , + , ; }; @@ -365,7 +365,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - , + , ; }; diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index 99678abdda930..5c990cfae254e 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -183,7 +183,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - ; }; @@ -201,7 +201,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - ; }; @@ -219,7 +219,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - ; }; diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi index bda22700110cd..287566e09a673 100644 --- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi +++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi @@ -217,6 +217,12 @@ wm8731: wm8731@1b { compatible = "wm8731"; reg = <0x1b>; + + /* PCK0 at 12MHz */ + clocks = <&pmc PMC_TYPE_SYSTEM 8>; + clock-names = "mclk"; + assigned-clocks = <&pmc PMC_TYPE_SYSTEM 8>; + assigned-clock-rates = <12000000>; }; }; diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index 691c95ea61754..fd179097a4bfd 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -556,7 +556,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - ; }; @@ -574,7 +574,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - ; }; @@ -592,7 +592,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - ; }; @@ -610,7 +610,7 @@ usart3 { pinctrl_usart3: usart3-0 { atmel,pins = - ; }; diff --git a/arch/arm/boot/dts/at91sam9rl.dtsi b/arch/arm/boot/dts/at91sam9rl.dtsi index 8643b71515650..0121bb0ecde16 100644 --- a/arch/arm/boot/dts/at91sam9rl.dtsi +++ b/arch/arm/boot/dts/at91sam9rl.dtsi @@ -278,23 +278,26 @@ atmel,adc-use-res = "highres"; trigger0 { - trigger-name = "timer-counter-0"; + trigger-name = "external-rising"; trigger-value = <0x1>; + trigger-external; }; + trigger1 { - trigger-name = "timer-counter-1"; - trigger-value = <0x3>; + trigger-name = "external-falling"; + trigger-value = <0x2>; + trigger-external; }; trigger2 { - trigger-name = "timer-counter-2"; - trigger-value = <0x5>; + trigger-name = "external-any"; + trigger-value = <0x3>; + trigger-external; }; trigger3 { - trigger-name = "external"; - trigger-value = <0x13>; - trigger-external; + trigger-name = "continuous"; + trigger-value = <0x6>; }; }; @@ -682,7 +685,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - , + , ; }; @@ -721,7 +724,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - , + , ; }; @@ -744,7 +747,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - , + , ; }; @@ -767,7 +770,7 @@ usart3 { pinctrl_usart3: usart3-0 { atmel,pins = - , + , ; }; diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 2dac3efc76405..9ca7b4241c972 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -174,8 +174,8 @@ mdio: mdio@18002000 { compatible = "brcm,iproc-mdio"; reg = <0x18002000 0x8>; - #size-cells = <1>; - #address-cells = <0>; + #size-cells = <0>; + #address-cells = <1>; status = "disabled"; gphy0: ethernet-phy@0 { @@ -460,7 +460,7 @@ status = "disabled"; }; - nand: nand@18046000 { + nand_controller: nand-controller@18046000 { compatible = "brcm,nand-iproc", "brcm,brcmnand-v6.1"; reg = <0x18046000 0x600>, <0xf8105408 0x600>, <0x18046f00 0x20>; diff --git a/arch/arm/boot/dts/bcm-hr2.dtsi b/arch/arm/boot/dts/bcm-hr2.dtsi index e4d49731287f6..30574101471a5 100644 --- a/arch/arm/boot/dts/bcm-hr2.dtsi +++ b/arch/arm/boot/dts/bcm-hr2.dtsi @@ -75,7 +75,7 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; }; @@ -83,7 +83,7 @@ compatible = "arm,cortex-a9-twd-timer"; reg = <0x20600 0x20>; interrupts = ; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; @@ -91,7 +91,7 @@ compatible = "arm,cortex-a9-twd-wdt"; reg = <0x20620 0x20>; interrupts = ; + IRQ_TYPE_EDGE_RISING)>; clocks = <&periph_clk>; }; @@ -179,7 +179,7 @@ status = "disabled"; }; - nand: nand@26000 { + nand_controller: nand-controller@26000 { compatible = "brcm,nand-iproc", "brcm,brcmnand-v6.1"; reg = <0x26000 0x600>, <0x11b408 0x600>, @@ -217,7 +217,7 @@ }; qspi: spi@27200 { - compatible = "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi"; + compatible = "brcm,spi-nsp-qspi", "brcm,spi-bcm-qspi"; reg = <0x027200 0x184>, <0x027000 0x124>, <0x11c408 0x004>, diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index da6d70f09ef19..5a1352fd90d16 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -77,7 +77,7 @@ interrupt-affinity = <&cpu0>, <&cpu1>; }; - mpcore@19000000 { + mpcore-bus@19000000 { compatible = "simple-bus"; ranges = <0x00000000 0x19000000 0x00023000>; #address-cells = <1>; @@ -217,7 +217,7 @@ #dma-cells = <1>; }; - sdio: sdhci@21000 { + sdio: mmc@21000 { compatible = "brcm,sdhci-iproc-cygnus"; reg = <0x21000 0x100>; interrupts = ; @@ -257,17 +257,17 @@ status = "disabled"; }; - mailbox: mailbox@25000 { + mailbox: mailbox@25c00 { compatible = "brcm,iproc-fa2-mbox"; - reg = <0x25000 0x445>; - interrupts = ; + reg = <0x25c00 0x400>; + interrupts = ; #mbox-cells = <1>; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; dma-coherent; }; - nand: nand@26000 { + nand_controller: nand-controller@26000 { compatible = "brcm,nand-iproc", "brcm,brcmnand-v6.1"; reg = <0x026000 0x600>, <0x11b408 0x600>, @@ -282,7 +282,7 @@ }; qspi: spi@27200 { - compatible = "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi"; + compatible = "brcm,spi-nsp-qspi", "brcm,spi-bcm-qspi"; reg = <0x027200 0x184>, <0x027000 0x124>, <0x11c408 0x004>, diff --git a/arch/arm/boot/dts/bcm2835-rpi-b.dts b/arch/arm/boot/dts/bcm2835-rpi-b.dts index 2b69957e0113e..1838e0fa0ff59 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-b.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-b.dts @@ -53,18 +53,17 @@ "GPIO18", "NC", /* GPIO19 */ "NC", /* GPIO20 */ - "GPIO21", + "CAM_GPIO0", "GPIO22", "GPIO23", "GPIO24", "GPIO25", "NC", /* GPIO26 */ - "CAM_GPIO0", - /* Binary number representing build/revision */ - "CONFIG0", - "CONFIG1", - "CONFIG2", - "CONFIG3", + "GPIO27", + "GPIO28", + "GPIO29", + "GPIO30", + "GPIO31", "NC", /* GPIO32 */ "NC", /* GPIO33 */ "NC", /* GPIO34 */ diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index b75af21069f95..34a85ad9f03c2 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -24,7 +24,7 @@ leds { act { - gpios = <&gpio 47 GPIO_ACTIVE_HIGH>; + gpios = <&gpio 47 GPIO_ACTIVE_LOW>; }; }; @@ -74,16 +74,18 @@ "GPIO27", "SDA0", "SCL0", - "NC", /* GPIO30 */ - "NC", /* GPIO31 */ - "NC", /* GPIO32 */ - "NC", /* GPIO33 */ - "NC", /* GPIO34 */ - "NC", /* GPIO35 */ - "NC", /* GPIO36 */ - "NC", /* GPIO37 */ - "NC", /* GPIO38 */ - "NC", /* GPIO39 */ + /* Used by BT module */ + "CTS0", + "RTS0", + "TXD0", + "RXD0", + /* Used by Wifi */ + "SD1_CLK", + "SD1_CMD", + "SD1_DATA0", + "SD1_DATA1", + "SD1_DATA2", + "SD1_DATA3", "CAM_GPIO1", /* GPIO40 */ "WL_ON", /* GPIO41 */ "NC", /* GPIO42 */ @@ -112,6 +114,7 @@ &sdhci { #address-cells = <1>; #size-cells = <0>; + pinctrl-names = "default"; pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>; bus-width = <4>; mmc-pwrseq = <&wifi_pwrseq>; diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts index 74ed6d0478070..d9f63fc59f165 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts @@ -43,7 +43,7 @@ #gpio-cells = <2>; gpio-line-names = "BT_ON", "WL_ON", - "STATUS_LED_R", + "PWR_LED_R", "LAN_RUN", "", "CAM_GPIO0", diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts index 588d9411ceb61..3dfce4312dfc4 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts @@ -63,8 +63,8 @@ "GPIO43", "GPIO44", "GPIO45", - "GPIO46", - "GPIO47", + "SMPS_SCL", + "SMPS_SDA", /* Used by eMMC */ "SD_CLK_R", "SD_CMD_R", diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi index beb6c502dadc7..bcad098a7fccb 100644 --- a/arch/arm/boot/dts/bcm2837.dtsi +++ b/arch/arm/boot/dts/bcm2837.dtsi @@ -38,12 +38,26 @@ #size-cells = <0>; enable-method = "brcm,bcm2836-smp"; // for ARM 32-bit + /* Source for d/i-cache-line-size and d/i-cache-sets + * https://developer.arm.com/documentation/ddi0500/e/level-1-memory-system + * /about-the-l1-memory-system?lang=en + * + * Source for d/i-cache-size + * https://magpi.raspberrypi.com/articles/raspberry-pi-3-specs-benchmarks + */ cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a53"; reg = <0>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000d8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu1: cpu@1 { @@ -52,6 +66,13 @@ reg = <1>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu2: cpu@2 { @@ -60,6 +81,13 @@ reg = <2>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000e8>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; }; cpu3: cpu@3 { @@ -68,6 +96,27 @@ reg = <3>; enable-method = "spin-table"; cpu-release-addr = <0x0 0x000000f0>; + d-cache-size = <0x8000>; + d-cache-line-size = <64>; + d-cache-sets = <128>; // 32KiB(size)/64(line-size)=512ways/4-way set + i-cache-size = <0x8000>; + i-cache-line-size = <64>; + i-cache-sets = <256>; // 32KiB(size)/64(line-size)=512ways/2-way set + next-level-cache = <&l2>; + }; + + /* Source for cache-line-size + cache-sets + * https://developer.arm.com/documentation/ddi0500 + * /e/level-2-memory-system/about-the-l2-memory-system?lang=en + * Source for cache-size + * https://datasheets.raspberrypi.com/cm/cm1-and-cm3-datasheet.pdf + */ + l2: l2-cache0 { + compatible = "cache"; + cache-size = <0x80000>; + cache-line-size = <64>; + cache-sets = <512>; // 512KiB(size)/64(line-size)=8192ways/16-way set + cache-level = <2>; }; }; }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 2d191fcbc2cc1..af81f386793ca 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -40,7 +40,7 @@ trips { cpu-crit { - temperature = <80000>; + temperature = <90000>; hysteresis = <0>; type = "critical"; }; @@ -183,6 +183,7 @@ interrupt-controller; #interrupt-cells = <2>; + gpio-ranges = <&gpio 0 0 54>; /* Defines pin muxing groups according to * BCM2835-ARM-Peripherals.pdf page 102. @@ -488,6 +489,7 @@ "dsi0_ddr2", "dsi0_ddr"; + status = "disabled"; }; thermal: thermal@7e212000 { diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 372dc1eb88a0e..05d67f9769118 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -242,6 +242,8 @@ gpio-controller; #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; }; pcie0: pcie@12000 { @@ -353,8 +355,8 @@ mdio: mdio@18003000 { compatible = "brcm,iproc-mdio"; reg = <0x18003000 0x8>; - #size-cells = <1>; - #address-cells = <0>; + #size-cells = <0>; + #address-cells = <1>; }; mdio-bus-mux@18003000 { @@ -387,7 +389,7 @@ i2c0: i2c@18009000 { compatible = "brcm,iproc-i2c"; reg = <0x18009000 0x50>; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; clock-frequency = <100000>; @@ -488,33 +490,33 @@ }; spi@18029200 { - compatible = "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi"; + compatible = "brcm,spi-nsp-qspi", "brcm,spi-bcm-qspi"; reg = <0x18029200 0x184>, <0x18029000 0x124>, <0x1811b408 0x004>, <0x180293a0 0x01c>; reg-names = "mspi", "bspi", "intr_regs", "intr_status_reg"; - interrupts = , + interrupts = , + , + , , , , - , - , - ; - interrupt-names = "spi_lr_fullness_reached", + ; + interrupt-names = "mspi_done", + "mspi_halted", + "spi_lr_fullness_reached", "spi_lr_session_aborted", "spi_lr_impatient", "spi_lr_session_done", - "spi_lr_overhead", - "mspi_done", - "mspi_halted"; + "spi_lr_overread"; clocks = <&iprocmed>; clock-names = "iprocmed"; num-cs = <2>; #address-cells = <1>; #size-cells = <0>; - spi_nor: spi-nor@0 { + spi_nor: flash@0 { compatible = "jedec,spi-nor"; reg = <0>; spi-max-frequency = <20000000>; diff --git a/arch/arm/boot/dts/bcm63138.dtsi b/arch/arm/boot/dts/bcm63138.dtsi index 9c0325cf9e22e..cca49a2e2d623 100644 --- a/arch/arm/boot/dts/bcm63138.dtsi +++ b/arch/arm/boot/dts/bcm63138.dtsi @@ -203,7 +203,7 @@ status = "disabled"; }; - nand: nand@2000 { + nand_controller: nand-controller@2000 { #address-cells = <1>; #size-cells = <0>; compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.0", "brcm,brcmnand"; diff --git a/arch/arm/boot/dts/bcm7445-bcm97445svmb.dts b/arch/arm/boot/dts/bcm7445-bcm97445svmb.dts index 8313b7cad5427..f92d2cf859726 100644 --- a/arch/arm/boot/dts/bcm7445-bcm97445svmb.dts +++ b/arch/arm/boot/dts/bcm7445-bcm97445svmb.dts @@ -14,10 +14,10 @@ }; }; -&nand { +&nand_controller { status = "okay"; - nandcs@1 { + nand@1 { compatible = "brcm,nandcs"; reg = <1>; nand-ecc-step-size = <512>; diff --git a/arch/arm/boot/dts/bcm7445.dtsi b/arch/arm/boot/dts/bcm7445.dtsi index 58f67c9b830b8..5ac2042515b8f 100644 --- a/arch/arm/boot/dts/bcm7445.dtsi +++ b/arch/arm/boot/dts/bcm7445.dtsi @@ -148,7 +148,7 @@ reg-names = "aon-ctrl", "aon-sram"; }; - nand: nand@3e2800 { + nand_controller: nand-controller@3e2800 { status = "disabled"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/bcm911360_entphn.dts b/arch/arm/boot/dts/bcm911360_entphn.dts index b2d323f4a5aba..a76c74b44bbaf 100644 --- a/arch/arm/boot/dts/bcm911360_entphn.dts +++ b/arch/arm/boot/dts/bcm911360_entphn.dts @@ -82,8 +82,8 @@ status = "okay"; }; -&nand { - nandcs@1 { +&nand_controller { + nand@1 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/bcm958300k.dts b/arch/arm/boot/dts/bcm958300k.dts index b4a1392bd5a6c..dda3e11b711f6 100644 --- a/arch/arm/boot/dts/bcm958300k.dts +++ b/arch/arm/boot/dts/bcm958300k.dts @@ -60,8 +60,8 @@ status = "okay"; }; -&nand { - nandcs@1 { +&nand_controller { + nand@1 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/bcm958305k.dts b/arch/arm/boot/dts/bcm958305k.dts index 3378683321d3c..ea3c6b88b313b 100644 --- a/arch/arm/boot/dts/bcm958305k.dts +++ b/arch/arm/boot/dts/bcm958305k.dts @@ -68,8 +68,8 @@ status = "okay"; }; -&nand { - nandcs@1 { +&nand_controller { + nand@1 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/bcm958522er.dts b/arch/arm/boot/dts/bcm958522er.dts index 8c388eb8a08f8..e9b2d3b37ca45 100644 --- a/arch/arm/boot/dts/bcm958522er.dts +++ b/arch/arm/boot/dts/bcm958522er.dts @@ -70,8 +70,8 @@ status = "okay"; }; -&nand { - nandcs@0 { +&nand_controller { + nand@0 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/bcm958525er.dts b/arch/arm/boot/dts/bcm958525er.dts index c339771bb22e0..dfe145a3d05a2 100644 --- a/arch/arm/boot/dts/bcm958525er.dts +++ b/arch/arm/boot/dts/bcm958525er.dts @@ -70,8 +70,8 @@ status = "okay"; }; -&nand { - nandcs@0 { +&nand_controller { + nand@0 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/bcm958525xmc.dts b/arch/arm/boot/dts/bcm958525xmc.dts index 1c72ec8288de4..17e6a683e678a 100644 --- a/arch/arm/boot/dts/bcm958525xmc.dts +++ b/arch/arm/boot/dts/bcm958525xmc.dts @@ -86,8 +86,8 @@ }; }; -&nand { - nandcs@0 { +&nand_controller { + nand@0 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/bcm958622hr.dts b/arch/arm/boot/dts/bcm958622hr.dts index 96a021cebd97b..1d1bc8dbb3425 100644 --- a/arch/arm/boot/dts/bcm958622hr.dts +++ b/arch/arm/boot/dts/bcm958622hr.dts @@ -74,8 +74,8 @@ status = "okay"; }; -&nand { - nandcs@0 { +&nand_controller { + nand@0 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts index b2c7f21d471e6..d5d9a273bb6d1 100644 --- a/arch/arm/boot/dts/bcm958623hr.dts +++ b/arch/arm/boot/dts/bcm958623hr.dts @@ -74,8 +74,8 @@ status = "okay"; }; -&nand { - nandcs@0 { +&nand_controller { + nand@0 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts index a2c9de35ddfbd..670363bca9173 100644 --- a/arch/arm/boot/dts/bcm958625hr.dts +++ b/arch/arm/boot/dts/bcm958625hr.dts @@ -90,8 +90,8 @@ status = "okay"; }; -&nand { - nandcs@0 { +&nand_controller { + nand@0 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/bcm958625k.dts b/arch/arm/boot/dts/bcm958625k.dts index 3fcca12d83c2d..f15cd38c849e4 100644 --- a/arch/arm/boot/dts/bcm958625k.dts +++ b/arch/arm/boot/dts/bcm958625k.dts @@ -64,8 +64,8 @@ status = "okay"; }; -&nand { - nandcs@0 { +&nand_controller { + nand@0 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/bcm963138dvt.dts b/arch/arm/boot/dts/bcm963138dvt.dts index 5b177274f1826..df5c8ab906273 100644 --- a/arch/arm/boot/dts/bcm963138dvt.dts +++ b/arch/arm/boot/dts/bcm963138dvt.dts @@ -31,10 +31,10 @@ status = "okay"; }; -&nand { +&nand_controller { status = "okay"; - nandcs@0 { + nand@0 { compatible = "brcm,nandcs"; reg = <0>; nand-ecc-strength = <4>; diff --git a/arch/arm/boot/dts/bcm988312hr.dts b/arch/arm/boot/dts/bcm988312hr.dts index edd0f630e0251..16b212cc8a2a0 100644 --- a/arch/arm/boot/dts/bcm988312hr.dts +++ b/arch/arm/boot/dts/bcm988312hr.dts @@ -74,8 +74,8 @@ status = "okay"; }; -&nand { - nandcs@0 { +&nand_controller { + nand@0 { compatible = "brcm,nandcs"; reg = <0>; nand-on-flash-bbt; diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts index 3931fb068ff09..91d1018ab75fc 100644 --- a/arch/arm/boot/dts/dm8148-evm.dts +++ b/arch/arm/boot/dts/dm8148-evm.dts @@ -24,12 +24,12 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &cpsw_emac1 { phy-handle = <ðphy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &davinci_mdio { diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts index 9e43d5ec0bb2f..79ccdd4470f4c 100644 --- a/arch/arm/boot/dts/dm8148-t410.dts +++ b/arch/arm/boot/dts/dm8148-t410.dts @@ -33,12 +33,12 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &cpsw_emac1 { phy-handle = <ðphy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &davinci_mdio { diff --git a/arch/arm/boot/dts/dra62x-j5eco-evm.dts b/arch/arm/boot/dts/dra62x-j5eco-evm.dts index 861ab90a3f3aa..c16e183822bee 100644 --- a/arch/arm/boot/dts/dra62x-j5eco-evm.dts +++ b/arch/arm/boot/dts/dra62x-j5eco-evm.dts @@ -24,12 +24,12 @@ &cpsw_emac0 { phy-handle = <ðphy0>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &cpsw_emac1 { phy-handle = <ðphy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; }; &davinci_mdio { diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 5cac2dd58241f..3f845a8531f40 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -1176,7 +1176,7 @@ }; }; - target-module@34000 { /* 0x48034000, ap 7 46.0 */ + timer3_target: target-module@34000 { /* 0x48034000, ap 7 46.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; ti,hwmods = "timer3"; reg = <0x34000 0x4>, @@ -1204,7 +1204,7 @@ }; }; - target-module@36000 { /* 0x48036000, ap 9 4e.0 */ + timer4_target: target-module@36000 { /* 0x48036000, ap 9 4e.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; ti,hwmods = "timer4"; reg = <0x36000 0x4>, @@ -1326,7 +1326,7 @@ }; }; - target-module@55000 { /* 0x48055000, ap 13 0e.0 */ + gpio2_target: target-module@55000 { /* 0x48055000, ap 13 0e.0 */ compatible = "ti,sysc-omap2", "ti,sysc"; reg = <0x55000 0x4>, <0x55010 0x4>, @@ -1359,7 +1359,7 @@ }; }; - target-module@57000 { /* 0x48057000, ap 15 06.0 */ + gpio3_target: target-module@57000 { /* 0x48057000, ap 15 06.0 */ compatible = "ti,sysc-omap2", "ti,sysc"; reg = <0x57000 0x4>, <0x57010 0x4>, @@ -3059,7 +3059,7 @@ davinci_mdio: mdio@1000 { compatible = "ti,cpsw-mdio","ti,davinci_mdio"; - clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 0>; + clocks = <&gmac_main_clk>; clock-names = "fck"; #address-cells = <1>; #size-cells = <0>; @@ -3413,6 +3413,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER13_CLKCTRL 24>; clock-names = "fck"; interrupts = ; + ti,timer-pwm; }; }; @@ -3441,6 +3442,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER14_CLKCTRL 24>; clock-names = "fck"; interrupts = ; + ti,timer-pwm; }; }; @@ -3469,6 +3471,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; clock-names = "fck"; interrupts = ; + ti,timer-pwm; }; }; @@ -3497,6 +3500,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; clock-names = "fck"; interrupts = ; + ti,timer-pwm; }; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 953f0ffce2a90..f73324cb31f31 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -46,6 +46,7 @@ timer { compatible = "arm,armv7-timer"; + status = "disabled"; /* See ARM architected timer wrap erratum i940 */ interrupts = , , , @@ -148,6 +149,7 @@ #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x0 0x0 0xc0000000>; + dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>; ti,hwmods = "l3_main_1", "l3_main_2"; reg = <0x0 0x44000000 0x0 0x1000000>, <0x0 0x45000000 0x0 0x1000>; @@ -171,6 +173,7 @@ #address-cells = <1>; ranges = <0x51000000 0x51000000 0x3000 0x0 0x20000000 0x10000000>; + dma-ranges; /** * To enable PCI endpoint mode, disable the pcie1_rc * node and enable pcie1_ep mode. @@ -228,6 +231,7 @@ #address-cells = <1>; ranges = <0x51800000 0x51800000 0x3000 0x0 0x30000000 0x10000000>; + dma-ranges; status = "disabled"; pcie2_rc: pcie@51800000 { reg = <0x51800000 0x2000>, <0x51802000 0x14c>, <0x1000 0x2000>; @@ -763,3 +767,22 @@ #include "dra7-l4.dtsi" #include "dra7xx-clocks.dtsi" + +/* Local timers, see ARM architected timer wrap erratum i940 */ +&timer3_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>; + assigned-clock-parents = <&timer_sys_clk_div>; + }; +}; + +&timer4_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>; + assigned-clock-parents = <&timer_sys_clk_div>; + }; +}; diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index cdcba3f561c4b..859e4382ac4bb 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -32,8 +32,8 @@ interrupts = , ; interrupt-names = "int0", "int1"; - clocks = <&mcan_clk>, <&l3_iclk_div>; - clock-names = "cclk", "hclk"; + clocks = <&l3_iclk_div>, <&mcan_clk>; + clock-names = "hclk", "cclk"; bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; }; }; @@ -86,3 +86,8 @@ &usb4_tm { status = "disabled"; }; + +&mmc3 { + /* dra76x is not affected by i887 */ + max-frequency = <96000000>; +}; diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index 93e1eb83bed9a..d7d98d2069dff 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -796,16 +796,6 @@ clock-div = <1>; }; - ipu1_gfclk_mux: ipu1_gfclk_mux@520 { - #clock-cells = <0>; - compatible = "ti,mux-clock"; - clocks = <&dpll_abe_m2x2_ck>, <&dpll_core_h22x2_ck>; - ti,bit-shift = <24>; - reg = <0x0520>; - assigned-clocks = <&ipu1_gfclk_mux>; - assigned-clock-parents = <&dpll_core_h22x2_ck>; - }; - dummy_ck: dummy_ck { #clock-cells = <0>; compatible = "fixed-clock"; @@ -1564,6 +1554,8 @@ compatible = "ti,clkctrl"; reg = <0x20 0x4>; #clock-cells = <2>; + assigned-clocks = <&ipu1_clkctrl DRA7_IPU1_MMU_IPU1_CLKCTRL 24>; + assigned-clock-parents = <&dpll_core_h22x2_ck>; }; ipu_clkctrl: ipu-clkctrl@50 { diff --git a/arch/arm/boot/dts/exynos3250-artik5.dtsi b/arch/arm/boot/dts/exynos3250-artik5.dtsi index dee35e3a5c4ba..69d134db6e94e 100644 --- a/arch/arm/boot/dts/exynos3250-artik5.dtsi +++ b/arch/arm/boot/dts/exynos3250-artik5.dtsi @@ -75,7 +75,7 @@ s2mps14_pmic@66 { compatible = "samsung,s2mps14-pmic"; interrupt-parent = <&gpx3>; - interrupts = <5 IRQ_TYPE_NONE>; + interrupts = <5 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&s2mps14_irq>; reg = <0x66>; diff --git a/arch/arm/boot/dts/exynos3250-monk.dts b/arch/arm/boot/dts/exynos3250-monk.dts index 248bd372fe705..a23a8749c94e4 100644 --- a/arch/arm/boot/dts/exynos3250-monk.dts +++ b/arch/arm/boot/dts/exynos3250-monk.dts @@ -195,7 +195,7 @@ s2mps14_pmic@66 { compatible = "samsung,s2mps14-pmic"; interrupt-parent = <&gpx0>; - interrupts = <7 IRQ_TYPE_NONE>; + interrupts = <7 IRQ_TYPE_LEVEL_LOW>; reg = <0x66>; wakeup-source; diff --git a/arch/arm/boot/dts/exynos3250-rinato.dts b/arch/arm/boot/dts/exynos3250-rinato.dts index 86c26a4edfd72..468932f452895 100644 --- a/arch/arm/boot/dts/exynos3250-rinato.dts +++ b/arch/arm/boot/dts/exynos3250-rinato.dts @@ -260,7 +260,7 @@ s2mps14_pmic@66 { compatible = "samsung,s2mps14-pmic"; interrupt-parent = <&gpx0>; - interrupts = <7 IRQ_TYPE_NONE>; + interrupts = <7 IRQ_TYPE_LEVEL_LOW>; reg = <0x66>; wakeup-source; diff --git a/arch/arm/boot/dts/exynos4210-universal_c210.dts b/arch/arm/boot/dts/exynos4210-universal_c210.dts index 09d3d54d09ff8..1b5578381d782 100644 --- a/arch/arm/boot/dts/exynos4210-universal_c210.dts +++ b/arch/arm/boot/dts/exynos4210-universal_c210.dts @@ -115,7 +115,7 @@ gpio-sck = <&gpy3 1 GPIO_ACTIVE_HIGH>; gpio-mosi = <&gpy3 3 GPIO_ACTIVE_HIGH>; num-chipselects = <1>; - cs-gpios = <&gpy4 3 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpy4 3 GPIO_ACTIVE_LOW>; lcd@0 { compatible = "samsung,ld9040"; @@ -124,8 +124,6 @@ vci-supply = <&ldo17_reg>; reset-gpios = <&gpy4 5 GPIO_ACTIVE_HIGH>; spi-max-frequency = <1200000>; - spi-cpol; - spi-cpha; power-on-delay = <10>; reset-delay = <10>; panel-width-mm = <90>; diff --git a/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi b/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi index ce87d2ff27aab..4b9c4cab0314b 100644 --- a/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi +++ b/arch/arm/boot/dts/exynos4412-galaxy-s3.dtsi @@ -68,7 +68,7 @@ i2c_cm36651: i2c-gpio-2 { compatible = "i2c-gpio"; - gpios = <&gpf0 0 GPIO_ACTIVE_LOW>, <&gpf0 1 GPIO_ACTIVE_LOW>; + gpios = <&gpf0 0 GPIO_ACTIVE_HIGH>, <&gpf0 1 GPIO_ACTIVE_HIGH>; i2c-gpio,delay-us = <2>; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/exynos4412-midas.dtsi b/arch/arm/boot/dts/exynos4412-midas.dtsi index 83be3a797411e..fedb21377c66c 100644 --- a/arch/arm/boot/dts/exynos4412-midas.dtsi +++ b/arch/arm/boot/dts/exynos4412-midas.dtsi @@ -139,7 +139,7 @@ max77693@66 { compatible = "maxim,max77693"; interrupt-parent = <&gpx1>; - interrupts = <5 IRQ_TYPE_EDGE_FALLING>; + interrupts = <5 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&max77693_irq>; reg = <0x66>; @@ -187,7 +187,7 @@ max77693-fuel-gauge@36 { compatible = "maxim,max17047"; interrupt-parent = <&gpx2>; - interrupts = <3 IRQ_TYPE_EDGE_FALLING>; + interrupts = <3 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&max77693_fuel_irq>; reg = <0x36>; @@ -588,7 +588,7 @@ max77686: max77686_pmic@9 { compatible = "maxim,max77686"; interrupt-parent = <&gpx0>; - interrupts = <7 IRQ_TYPE_NONE>; + interrupts = <7 IRQ_TYPE_LEVEL_LOW>; pinctrl-0 = <&max77686_irq>; pinctrl-names = "default"; reg = <0x09>; diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi index ea55f377d17c0..424d12ecc9018 100644 --- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi +++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi @@ -274,7 +274,7 @@ max77686: pmic@9 { compatible = "maxim,max77686"; interrupt-parent = <&gpx3>; - interrupts = <2 IRQ_TYPE_NONE>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&max77686_irq>; reg = <0x09>; diff --git a/arch/arm/boot/dts/exynos5250-pinctrl.dtsi b/arch/arm/boot/dts/exynos5250-pinctrl.dtsi index d31a68672bfac..d7d756614edd1 100644 --- a/arch/arm/boot/dts/exynos5250-pinctrl.dtsi +++ b/arch/arm/boot/dts/exynos5250-pinctrl.dtsi @@ -260,7 +260,7 @@ }; uart3_data: uart3-data { - samsung,pins = "gpa1-4", "gpa1-4"; + samsung,pins = "gpa1-4", "gpa1-5"; samsung,pin-function = ; samsung,pin-pud = ; samsung,pin-drv = ; diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts index 6dc96948a9ccc..c7e350ea03fe3 100644 --- a/arch/arm/boot/dts/exynos5250-smdk5250.dts +++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts @@ -117,6 +117,9 @@ status = "okay"; ddc = <&i2c_2>; hpd-gpios = <&gpx3 7 GPIO_ACTIVE_HIGH>; + vdd-supply = <&ldo8_reg>; + vdd_osc-supply = <&ldo10_reg>; + vdd_pll-supply = <&ldo8_reg>; }; &i2c_0 { @@ -125,7 +128,7 @@ samsung,i2c-max-bus-freq = <20000>; eeprom@50 { - compatible = "samsung,s524ad0xd1"; + compatible = "samsung,s524ad0xd1", "atmel,24c128"; reg = <0x50>; }; @@ -133,7 +136,7 @@ compatible = "maxim,max77686"; reg = <0x09>; interrupt-parent = <&gpx3>; - interrupts = <2 IRQ_TYPE_NONE>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&max77686_irq>; wakeup-source; @@ -284,7 +287,7 @@ samsung,i2c-max-bus-freq = <20000>; eeprom@51 { - compatible = "samsung,s524ad0xd1"; + compatible = "samsung,s524ad0xd1", "atmel,24c128"; reg = <0x51>; }; diff --git a/arch/arm/boot/dts/exynos5250-snow-common.dtsi b/arch/arm/boot/dts/exynos5250-snow-common.dtsi index c952a615148e5..737f0e20a4525 100644 --- a/arch/arm/boot/dts/exynos5250-snow-common.dtsi +++ b/arch/arm/boot/dts/exynos5250-snow-common.dtsi @@ -292,7 +292,7 @@ max77686: max77686@9 { compatible = "maxim,max77686"; interrupt-parent = <&gpx3>; - interrupts = <2 IRQ_TYPE_NONE>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&max77686_irq>; wakeup-source; diff --git a/arch/arm/boot/dts/exynos5250-spring.dts b/arch/arm/boot/dts/exynos5250-spring.dts index 3d501926c2278..2355c53164840 100644 --- a/arch/arm/boot/dts/exynos5250-spring.dts +++ b/arch/arm/boot/dts/exynos5250-spring.dts @@ -108,7 +108,7 @@ compatible = "samsung,s5m8767-pmic"; reg = <0x66>; interrupt-parent = <&gpx3>; - interrupts = <2 IRQ_TYPE_NONE>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&s5m8767_irq &s5m8767_dvs &s5m8767_ds>; wakeup-source; diff --git a/arch/arm/boot/dts/exynos5410-odroidxu.dts b/arch/arm/boot/dts/exynos5410-odroidxu.dts index e0db251e253f0..f68baaf58f9e3 100644 --- a/arch/arm/boot/dts/exynos5410-odroidxu.dts +++ b/arch/arm/boot/dts/exynos5410-odroidxu.dts @@ -327,6 +327,8 @@ regulator-name = "vddq_lcd"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + /* Supplies also GPK and GPJ */ + regulator-always-on; }; ldo8_reg: LDO8 { @@ -637,11 +639,11 @@ }; &usbdrd_dwc3_0 { - dr_mode = "host"; + dr_mode = "peripheral"; }; &usbdrd_dwc3_1 { - dr_mode = "peripheral"; + dr_mode = "host"; }; &usbdrd3_0 { diff --git a/arch/arm/boot/dts/exynos5410-pinctrl.dtsi b/arch/arm/boot/dts/exynos5410-pinctrl.dtsi index 369a8a7f21050..481ee99aa9c97 100644 --- a/arch/arm/boot/dts/exynos5410-pinctrl.dtsi +++ b/arch/arm/boot/dts/exynos5410-pinctrl.dtsi @@ -560,6 +560,34 @@ interrupt-controller; #interrupt-cells = <2>; }; + + usb3_1_oc: usb3-1-oc { + samsung,pins = "gpk2-4", "gpk2-5"; + samsung,pin-function = ; + samsung,pin-pud = ; + samsung,pin-drv = ; + }; + + usb3_1_vbusctrl: usb3-1-vbusctrl { + samsung,pins = "gpk2-6", "gpk2-7"; + samsung,pin-function = ; + samsung,pin-pud = ; + samsung,pin-drv = ; + }; + + usb3_0_oc: usb3-0-oc { + samsung,pins = "gpk3-0", "gpk3-1"; + samsung,pin-function = ; + samsung,pin-pud = ; + samsung,pin-drv = ; + }; + + usb3_0_vbusctrl: usb3-0-vbusctrl { + samsung,pins = "gpk3-2", "gpk3-3"; + samsung,pin-function = ; + samsung,pin-pud = ; + samsung,pin-drv = ; + }; }; &pinctrl_2 { diff --git a/arch/arm/boot/dts/exynos5410.dtsi b/arch/arm/boot/dts/exynos5410.dtsi index e6f78b1cee7c8..d077373cf872d 100644 --- a/arch/arm/boot/dts/exynos5410.dtsi +++ b/arch/arm/boot/dts/exynos5410.dtsi @@ -398,6 +398,8 @@ &usbdrd3_0 { clocks = <&clock CLK_USBD300>; clock-names = "usbdrd30"; + pinctrl-names = "default"; + pinctrl-0 = <&usb3_0_oc>, <&usb3_0_vbusctrl>; }; &usbdrd_phy0 { @@ -409,6 +411,8 @@ &usbdrd3_1 { clocks = <&clock CLK_USBD301>; clock-names = "usbdrd30"; + pinctrl-names = "default"; + pinctrl-0 = <&usb3_1_oc>, <&usb3_1_vbusctrl>; }; &usbdrd_dwc3_1 { diff --git a/arch/arm/boot/dts/exynos5420-arndale-octa.dts b/arch/arm/boot/dts/exynos5420-arndale-octa.dts index 592d7b45ecc87..53bf988855e0d 100644 --- a/arch/arm/boot/dts/exynos5420-arndale-octa.dts +++ b/arch/arm/boot/dts/exynos5420-arndale-octa.dts @@ -349,7 +349,7 @@ reg = <0x66>; interrupt-parent = <&gpx3>; - interrupts = <2 IRQ_TYPE_EDGE_FALLING>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&s2mps11_irq>; diff --git a/arch/arm/boot/dts/exynos5420-smdk5420.dts b/arch/arm/boot/dts/exynos5420-smdk5420.dts index 8240e51869729..fb92d87d8dedc 100644 --- a/arch/arm/boot/dts/exynos5420-smdk5420.dts +++ b/arch/arm/boot/dts/exynos5420-smdk5420.dts @@ -132,6 +132,9 @@ hpd-gpios = <&gpx3 7 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&hdmi_hpd_irq>; + vdd-supply = <&ldo6_reg>; + vdd_osc-supply = <&ldo7_reg>; + vdd_pll-supply = <&ldo6_reg>; }; &hsi2c_4 { diff --git a/arch/arm/boot/dts/exynos5422-odroid-core.dtsi b/arch/arm/boot/dts/exynos5422-odroid-core.dtsi index 829147e320e08..9e64a4ab94940 100644 --- a/arch/arm/boot/dts/exynos5422-odroid-core.dtsi +++ b/arch/arm/boot/dts/exynos5422-odroid-core.dtsi @@ -141,7 +141,7 @@ samsung,s2mps11-acokb-ground; interrupt-parent = <&gpx0>; - interrupts = <4 IRQ_TYPE_EDGE_FALLING>; + interrupts = <4 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&s2mps11_irq>; diff --git a/arch/arm/boot/dts/exynos5422-odroidhc1.dts b/arch/arm/boot/dts/exynos5422-odroidhc1.dts index d271e75488262..fce5a4579693e 100644 --- a/arch/arm/boot/dts/exynos5422-odroidhc1.dts +++ b/arch/arm/boot/dts/exynos5422-odroidhc1.dts @@ -22,7 +22,7 @@ label = "blue:heartbeat"; pwms = <&pwm 2 2000000 0>; pwm-names = "pwm2"; - max_brightness = <255>; + max-brightness = <255>; linux,default-trigger = "heartbeat"; }; }; diff --git a/arch/arm/boot/dts/exynos5422-odroidxu4.dts b/arch/arm/boot/dts/exynos5422-odroidxu4.dts index 892d389d6d091..1fc3544854030 100644 --- a/arch/arm/boot/dts/exynos5422-odroidxu4.dts +++ b/arch/arm/boot/dts/exynos5422-odroidxu4.dts @@ -24,7 +24,7 @@ label = "blue:heartbeat"; pwms = <&pwm 2 2000000 0>; pwm-names = "pwm2"; - max_brightness = <255>; + max-brightness = <255>; linux,default-trigger = "heartbeat"; }; }; diff --git a/arch/arm/boot/dts/exynos54xx-odroidxu-leds.dtsi b/arch/arm/boot/dts/exynos54xx-odroidxu-leds.dtsi index 56acd832f0b3c..16e1087ec7172 100644 --- a/arch/arm/boot/dts/exynos54xx-odroidxu-leds.dtsi +++ b/arch/arm/boot/dts/exynos54xx-odroidxu-leds.dtsi @@ -22,7 +22,7 @@ * Green LED is much brighter than the others * so limit its max brightness */ - max_brightness = <127>; + max-brightness = <127>; linux,default-trigger = "mmc0"; }; @@ -30,7 +30,7 @@ label = "blue:heartbeat"; pwms = <&pwm 2 2000000 0>; pwm-names = "pwm2"; - max_brightness = <255>; + max-brightness = <255>; linux,default-trigger = "heartbeat"; }; }; diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts index 360642a02a488..d0bbf2b970dfc 100644 --- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts +++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts @@ -140,7 +140,7 @@ }; }; - mdio0: ethernet-phy { + mdio0: mdio { compatible = "virtual,mdio-gpio"; /* Uses MDC and MDIO */ gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>, /* MDC */ diff --git a/arch/arm/boot/dts/gemini-nas4220b.dts b/arch/arm/boot/dts/gemini-nas4220b.dts index 521714f38eeda..60cec653ac7c6 100644 --- a/arch/arm/boot/dts/gemini-nas4220b.dts +++ b/arch/arm/boot/dts/gemini-nas4220b.dts @@ -62,7 +62,7 @@ }; }; - mdio0: ethernet-phy { + mdio0: mdio { compatible = "virtual,mdio-gpio"; gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>, /* MDC */ <&gpio0 21 GPIO_ACTIVE_HIGH>; /* MDIO */ @@ -84,7 +84,7 @@ partitions { compatible = "redboot-fis"; /* Eraseblock at 0xfe0000 */ - fis-index-block = <0x1fc>; + fis-index-block = <0x7f>; }; }; diff --git a/arch/arm/boot/dts/gemini-rut1xx.dts b/arch/arm/boot/dts/gemini-rut1xx.dts index 9611ddf067927..0ebda4efd9d0f 100644 --- a/arch/arm/boot/dts/gemini-rut1xx.dts +++ b/arch/arm/boot/dts/gemini-rut1xx.dts @@ -56,7 +56,7 @@ }; }; - mdio0: ethernet-phy { + mdio0: mdio { compatible = "virtual,mdio-gpio"; gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>, /* MDC */ <&gpio0 21 GPIO_ACTIVE_HIGH>; /* MDIO */ @@ -125,18 +125,6 @@ }; }; - ethernet@60000000 { - status = "okay"; - - ethernet-port@0 { - phy-mode = "rgmii"; - phy-handle = <&phy0>; - }; - ethernet-port@1 { - /* Not used in this platform */ - }; - }; - usb@68000000 { status = "okay"; }; diff --git a/arch/arm/boot/dts/gemini-wbd111.dts b/arch/arm/boot/dts/gemini-wbd111.dts index 3a2761dd460f9..5602ba8f30f2f 100644 --- a/arch/arm/boot/dts/gemini-wbd111.dts +++ b/arch/arm/boot/dts/gemini-wbd111.dts @@ -68,7 +68,7 @@ }; }; - mdio0: ethernet-phy { + mdio0: mdio { compatible = "virtual,mdio-gpio"; gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>, /* MDC */ <&gpio0 21 GPIO_ACTIVE_HIGH>; /* MDIO */ diff --git a/arch/arm/boot/dts/gemini-wbd222.dts b/arch/arm/boot/dts/gemini-wbd222.dts index 52b4dbc0c0723..a4a260c36d752 100644 --- a/arch/arm/boot/dts/gemini-wbd222.dts +++ b/arch/arm/boot/dts/gemini-wbd222.dts @@ -67,7 +67,7 @@ }; }; - mdio0: ethernet-phy { + mdio0: mdio { compatible = "virtual,mdio-gpio"; gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>, /* MDC */ <&gpio0 21 GPIO_ACTIVE_HIGH>; /* MDIO */ diff --git a/arch/arm/boot/dts/gemini.dtsi b/arch/arm/boot/dts/gemini.dtsi index 8cf67b11751f7..ef4f1c5323bd7 100644 --- a/arch/arm/boot/dts/gemini.dtsi +++ b/arch/arm/boot/dts/gemini.dtsi @@ -286,6 +286,7 @@ clock-names = "PCLK", "PCICLK"; pinctrl-names = "default"; pinctrl-0 = <&pci_default_pins>; + device_type = "pci"; #address-cells = <3>; #size-cells = <2>; #interrupt-cells = <1>; diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts index 0b2701ca29212..858b8057e8540 100644 --- a/arch/arm/boot/dts/imx23-evk.dts +++ b/arch/arm/boot/dts/imx23-evk.dts @@ -79,7 +79,6 @@ MX23_PAD_LCD_RESET__GPIO_1_18 MX23_PAD_PWM3__GPIO_1_29 MX23_PAD_PWM4__GPIO_1_30 - MX23_PAD_SSP1_DETECT__SSP1_DETECT >; fsl,drive-strength = ; fsl,voltage = ; diff --git a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts index 0cd75dadf292c..188639738dc3e 100644 --- a/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts +++ b/arch/arm/boot/dts/imx27-phytec-phycard-s-rdk.dts @@ -75,8 +75,8 @@ imx27-phycard-s-rdk { pinctrl_i2c1: i2c1grp { fsl,pins = < - MX27_PAD_I2C2_SDA__I2C2_SDA 0x0 - MX27_PAD_I2C2_SCL__I2C2_SCL 0x0 + MX27_PAD_I2C_DATA__I2C_DATA 0x0 + MX27_PAD_I2C_CLK__I2C_CLK 0x0 >; }; diff --git a/arch/arm/boot/dts/imx50-evk.dts b/arch/arm/boot/dts/imx50-evk.dts index a25da415cb02e..907339bc81e54 100644 --- a/arch/arm/boot/dts/imx50-evk.dts +++ b/arch/arm/boot/dts/imx50-evk.dts @@ -59,7 +59,7 @@ MX50_PAD_CSPI_MISO__CSPI_MISO 0x00 MX50_PAD_CSPI_MOSI__CSPI_MOSI 0x00 MX50_PAD_CSPI_SS0__GPIO4_11 0xc4 - MX50_PAD_ECSPI1_MOSI__CSPI_SS1 0xf4 + MX50_PAD_ECSPI1_MOSI__GPIO4_13 0x84 >; }; diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts index 719ed5ca454af..d002c8f738b53 100644 --- a/arch/arm/boot/dts/imx53-m53menlo.dts +++ b/arch/arm/boot/dts/imx53-m53menlo.dts @@ -53,14 +53,40 @@ }; }; + lvds-decoder { + compatible = "ti,ds90cf364a", "lvds-decoder"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + lvds_decoder_in: endpoint { + remote-endpoint = <&lvds0_out>; + }; + }; + + port@1 { + reg = <1>; + + lvds_decoder_out: endpoint { + remote-endpoint = <&panel_in>; + }; + }; + }; + }; + panel { compatible = "edt,etm0700g0dh6"; pinctrl-0 = <&pinctrl_display_gpio>; + pinctrl-names = "default"; enable-gpios = <&gpio6 0 GPIO_ACTIVE_HIGH>; port { panel_in: endpoint { - remote-endpoint = <&lvds0_out>; + remote-endpoint = <&lvds_decoder_out>; }; }; }; @@ -76,8 +102,7 @@ regulator-name = "vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; - gpio = <&gpio1 2 GPIO_ACTIVE_HIGH>; - enable-active-high; + gpio = <&gpio1 2 0>; }; }; @@ -388,13 +413,13 @@ pinctrl_power_button: powerbutgrp { fsl,pins = < - MX53_PAD_SD2_DATA2__GPIO1_13 0x1e4 + MX53_PAD_SD2_DATA0__GPIO1_15 0x1e4 >; }; pinctrl_power_out: poweroutgrp { fsl,pins = < - MX53_PAD_SD2_DATA0__GPIO1_15 0x1e4 + MX53_PAD_SD2_DATA2__GPIO1_13 0x1e4 >; }; @@ -450,7 +475,7 @@ reg = <2>; lvds0_out: endpoint { - remote-endpoint = <&panel_in>; + remote-endpoint = <&lvds_decoder_in>; }; }; }; diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts index 5ff9a179c83c3..c80d1700e0949 100644 --- a/arch/arm/boot/dts/imx53-ppd.dts +++ b/arch/arm/boot/dts/imx53-ppd.dts @@ -70,6 +70,12 @@ clock-frequency = <11289600>; }; + achc_24M: achc-clock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <24000000>; + }; + sgtlsound: sound { compatible = "fsl,imx53-cpuvo-sgtl5000", "fsl,imx-audio-sgtl5000"; @@ -287,16 +293,13 @@ &gpio4 12 GPIO_ACTIVE_LOW>; status = "okay"; - spidev0: spi@0 { - compatible = "ge,achc"; - reg = <0>; - spi-max-frequency = <1000000>; - }; - - spidev1: spi@1 { - compatible = "ge,achc"; - reg = <1>; - spi-max-frequency = <1000000>; + spidev0: spi@1 { + compatible = "ge,achc", "nxp,kinetis-k20"; + reg = <1>, <0>; + vdd-supply = <®_3v3>; + vdda-supply = <®_3v3>; + clocks = <&achc_24M>; + reset-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>; }; gpioxra0: gpio@2 { diff --git a/arch/arm/boot/dts/imx6dl-icore-mipi.dts b/arch/arm/boot/dts/imx6dl-icore-mipi.dts index e43bccb78ab2b..d8f3821a0ffdc 100644 --- a/arch/arm/boot/dts/imx6dl-icore-mipi.dts +++ b/arch/arm/boot/dts/imx6dl-icore-mipi.dts @@ -8,7 +8,7 @@ /dts-v1/; #include "imx6dl.dtsi" -#include "imx6qdl-icore.dtsi" +#include "imx6qdl-icore-1.5.dtsi" / { model = "Engicam i.CoreM6 DualLite/Solo MIPI Starter Kit"; diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index e8d800fec6379..ce4a5a8074422 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -99,9 +99,13 @@ phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; phy-reset-duration = <20>; phy-supply = <&sw2_reg>; - phy-handle = <ðphy0>; status = "okay"; + fixed-link { + speed = <1000>; + full-duplex; + }; + mdio { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts b/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts index 0d594e4bd559d..a1173bf5bff5e 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts +++ b/arch/arm/boot/dts/imx6dl-yapp4-ursa.dts @@ -38,7 +38,7 @@ }; &switch_ports { - /delete-node/ port@2; + /delete-node/ port@3; }; &touchscreen { diff --git a/arch/arm/boot/dts/imx6q-b450v3.dts b/arch/arm/boot/dts/imx6q-b450v3.dts index 95b8f2d718214..fb0980190aa07 100644 --- a/arch/arm/boot/dts/imx6q-b450v3.dts +++ b/arch/arm/boot/dts/imx6q-b450v3.dts @@ -65,13 +65,6 @@ }; }; -&clks { - assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; - assigned-clock-parents = <&clks IMX6QDL_CLK_PLL3_USB_OTG>, - <&clks IMX6QDL_CLK_PLL3_USB_OTG>; -}; - &ldb { status = "okay"; diff --git a/arch/arm/boot/dts/imx6q-b650v3.dts b/arch/arm/boot/dts/imx6q-b650v3.dts index 611cb7ae7e556..8f762d9c5ae99 100644 --- a/arch/arm/boot/dts/imx6q-b650v3.dts +++ b/arch/arm/boot/dts/imx6q-b650v3.dts @@ -65,13 +65,6 @@ }; }; -&clks { - assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; - assigned-clock-parents = <&clks IMX6QDL_CLK_PLL3_USB_OTG>, - <&clks IMX6QDL_CLK_PLL3_USB_OTG>; -}; - &ldb { status = "okay"; diff --git a/arch/arm/boot/dts/imx6q-b850v3.dts b/arch/arm/boot/dts/imx6q-b850v3.dts index e4cb118f88c6c..1ea64ecf4291c 100644 --- a/arch/arm/boot/dts/imx6q-b850v3.dts +++ b/arch/arm/boot/dts/imx6q-b850v3.dts @@ -53,17 +53,6 @@ }; }; -&clks { - assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>, - <&clks IMX6QDL_CLK_IPU1_DI0_PRE_SEL>, - <&clks IMX6QDL_CLK_IPU2_DI0_PRE_SEL>; - assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL2_PFD2_396M>, - <&clks IMX6QDL_CLK_PLL2_PFD2_396M>; -}; - &ldb { fsl,dual-channel; status = "okay"; diff --git a/arch/arm/boot/dts/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/imx6q-bx50v3.dtsi index fa27dcdf06f1b..1938b04199c48 100644 --- a/arch/arm/boot/dts/imx6q-bx50v3.dtsi +++ b/arch/arm/boot/dts/imx6q-bx50v3.dtsi @@ -377,3 +377,18 @@ #interrupt-cells = <1>; }; }; + +&clks { + assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, + <&clks IMX6QDL_CLK_LDB_DI1_SEL>, + <&clks IMX6QDL_CLK_IPU1_DI0_PRE_SEL>, + <&clks IMX6QDL_CLK_IPU1_DI1_PRE_SEL>, + <&clks IMX6QDL_CLK_IPU2_DI0_PRE_SEL>, + <&clks IMX6QDL_CLK_IPU2_DI1_PRE_SEL>; + assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, + <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>, + <&clks IMX6QDL_CLK_PLL2_PFD0_352M>; +}; diff --git a/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts b/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts index 9c61e3be2d9a3..1c46df6827f50 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts +++ b/arch/arm/boot/dts/imx6q-dhcom-pdk2.dts @@ -55,7 +55,7 @@ #sound-dai-cells = <0>; clocks = <&clk_ext_audio_codec>; VDDA-supply = <®_3p3v>; - VDDIO-supply = <®_3p3v>; + VDDIO-supply = <&sw2_reg>; }; }; diff --git a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi index 387801dde02e2..3233927d33b4c 100644 --- a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi +++ b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi @@ -98,30 +98,40 @@ reg = <0>; max-speed = <100>; reset-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>; - reset-delay-us = <1000>; - reset-post-delay-us = <1000>; + reset-assert-us = <1000>; + reset-deassert-us = <1000>; + smsc,disable-energy-detect; /* Make plugin detection reliable */ }; }; }; &i2c1 { clock-frequency = <100000>; - pinctrl-names = "default"; + pinctrl-names = "default", "gpio"; pinctrl-0 = <&pinctrl_i2c1>; + pinctrl-1 = <&pinctrl_i2c1_gpio>; + scl-gpios = <&gpio3 21 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; + sda-gpios = <&gpio3 28 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; status = "okay"; }; &i2c2 { clock-frequency = <100000>; - pinctrl-names = "default"; + pinctrl-names = "default", "gpio"; pinctrl-0 = <&pinctrl_i2c2>; + pinctrl-1 = <&pinctrl_i2c2_gpio>; + scl-gpios = <&gpio4 12 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; + sda-gpios = <&gpio4 13 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; status = "okay"; }; &i2c3 { clock-frequency = <100000>; - pinctrl-names = "default"; + pinctrl-names = "default", "gpio"; pinctrl-0 = <&pinctrl_i2c3>; + pinctrl-1 = <&pinctrl_i2c3_gpio>; + scl-gpios = <&gpio1 3 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; + sda-gpios = <&gpio1 6 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; status = "okay"; ltc3676: pmic@3c { @@ -206,7 +216,7 @@ }; rtc@56 { - compatible = "rv3029c2"; + compatible = "microcrystal,rv3029"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_rtc_hw300>; reg = <0x56>; @@ -287,6 +297,13 @@ >; }; + pinctrl_i2c1_gpio: i2c1-gpio-grp { + fsl,pins = < + MX6QDL_PAD_EIM_D21__GPIO3_IO21 0x4001b8b1 + MX6QDL_PAD_EIM_D28__GPIO3_IO28 0x4001b8b1 + >; + }; + pinctrl_i2c2: i2c2-grp { fsl,pins = < MX6QDL_PAD_KEY_COL3__I2C2_SCL 0x4001b8b1 @@ -294,6 +311,13 @@ >; }; + pinctrl_i2c2_gpio: i2c2-gpio-grp { + fsl,pins = < + MX6QDL_PAD_KEY_COL3__GPIO4_IO12 0x4001b8b1 + MX6QDL_PAD_KEY_ROW3__GPIO4_IO13 0x4001b8b1 + >; + }; + pinctrl_i2c3: i2c3-grp { fsl,pins = < MX6QDL_PAD_GPIO_3__I2C3_SCL 0x4001b8b1 @@ -301,6 +325,13 @@ >; }; + pinctrl_i2c3_gpio: i2c3-gpio-grp { + fsl,pins = < + MX6QDL_PAD_GPIO_3__GPIO1_IO03 0x4001b8b1 + MX6QDL_PAD_GPIO_6__GPIO1_IO06 0x4001b8b1 + >; + }; + pinctrl_pmic_hw300: pmic-hw300-grp { fsl,pins = < MX6QDL_PAD_EIM_A25__GPIO5_IO02 0x1B0B0 @@ -408,6 +439,18 @@ vin-supply = <&sw1_reg>; }; +®_pu { + vin-supply = <&sw1_reg>; +}; + +®_vdd1p1 { + vin-supply = <&sw2_reg>; +}; + +®_vdd2p5 { + vin-supply = <&sw2_reg>; +}; + &uart1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart1>; diff --git a/arch/arm/boot/dts/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/imx6qdl-apalis.dtsi index 7c4ad541c3f5e..b165bd6ea53b6 100644 --- a/arch/arm/boot/dts/imx6qdl-apalis.dtsi +++ b/arch/arm/boot/dts/imx6qdl-apalis.dtsi @@ -314,6 +314,8 @@ codec: sgtl5000@a { compatible = "fsl,sgtl5000"; reg = <0x0a>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_sgtl5000>; clocks = <&clks IMX6QDL_CLK_CKO>; VDDA-supply = <®_module_3v3_audio>; VDDIO-supply = <®_module_3v3>; @@ -543,8 +545,6 @@ MX6QDL_PAD_DISP0_DAT21__AUD4_TXD 0x130b0 MX6QDL_PAD_DISP0_DAT22__AUD4_TXFS 0x130b0 MX6QDL_PAD_DISP0_DAT23__AUD4_RXD 0x130b0 - /* SGTL5000 sys_mclk */ - MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0 >; }; @@ -810,6 +810,12 @@ >; }; + pinctrl_sgtl5000: sgtl5000grp { + fsl,pins = < + MX6QDL_PAD_GPIO_5__CCM_CLKO1 0x130b0 + >; + }; + pinctrl_spdif: spdifgrp { fsl,pins = < MX6QDL_PAD_GPIO_16__SPDIF_IN 0x1b0b0 diff --git a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi index 828cf3e39784a..c4e146f3341bb 100644 --- a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi +++ b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi @@ -126,7 +126,7 @@ compatible = "nxp,pca8574"; reg = <0x3a>; gpio-controller; - #gpio-cells = <1>; + #gpio-cells = <2>; }; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi index 1a9a9d98f2848..14d6fec50dee2 100644 --- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi @@ -273,7 +273,7 @@ /* VDD_AUD_1P8: Audio codec */ reg_aud_1p8v: ldo3 { - regulator-name = "vdd1p8"; + regulator-name = "vdd1p8a"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-boot-on; diff --git a/arch/arm/boot/dts/imx6qdl-gw551x.dtsi b/arch/arm/boot/dts/imx6qdl-gw551x.dtsi index c23ba229fd057..8c33510c9519d 100644 --- a/arch/arm/boot/dts/imx6qdl-gw551x.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw551x.dtsi @@ -105,19 +105,16 @@ sound-digital { compatible = "simple-audio-card"; simple-audio-card,name = "tda1997x-audio"; + simple-audio-card,format = "i2s"; + simple-audio-card,bitclock-master = <&sound_codec>; + simple-audio-card,frame-master = <&sound_codec>; - simple-audio-card,dai-link@0 { - format = "i2s"; - - cpu { - sound-dai = <&ssi2>; - }; + sound_cpu: simple-audio-card,cpu { + sound-dai = <&ssi1>; + }; - codec { - bitclock-master; - frame-master; - sound-dai = <&hdmi_receiver>; - }; + sound_codec: simple-audio-card,codec { + sound-dai = <&hdmi_receiver>; }; }; }; diff --git a/arch/arm/boot/dts/imx6qdl-icore.dtsi b/arch/arm/boot/dts/imx6qdl-icore.dtsi index 7814f1ef08043..fde56f98398dd 100644 --- a/arch/arm/boot/dts/imx6qdl-icore.dtsi +++ b/arch/arm/boot/dts/imx6qdl-icore.dtsi @@ -384,7 +384,7 @@ pinctrl_usbotg: usbotggrp { fsl,pins = < - MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x17059 + MX6QDL_PAD_ENET_RX_ER__USB_OTG_ID 0x17059 >; }; @@ -396,6 +396,7 @@ MX6QDL_PAD_SD1_DAT1__SD1_DATA1 0x17070 MX6QDL_PAD_SD1_DAT2__SD1_DATA2 0x17070 MX6QDL_PAD_SD1_DAT3__SD1_DATA3 0x17070 + MX6QDL_PAD_GPIO_1__GPIO1_IO01 0x1b0b0 >; }; diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi index 81c7ebb4b3fbe..eea317b41020d 100644 --- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi +++ b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi @@ -167,7 +167,7 @@ i2c-gpio,delay-us = <2>; /* ~100 kHz */ #address-cells = <1>; #size-cells = <0>; - status = "disabld"; + status = "disabled"; }; i2c_cam: i2c-gpio-cam { @@ -179,7 +179,7 @@ i2c-gpio,delay-us = <2>; /* ~100 kHz */ #address-cells = <1>; #size-cells = <0>; - status = "disabld"; + status = "disabled"; }; }; @@ -551,7 +551,7 @@ pinctrl_i2c3: i2c3grp { fsl,pins = < - MX6QDL_PAD_GPIO_3__I2C3_SCL 0x4001b8b1 + MX6QDL_PAD_GPIO_5__I2C3_SCL 0x4001b8b1 MX6QDL_PAD_GPIO_16__I2C3_SDA 0x4001b8b1 >; }; diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi index bc43c75f17450..3617089dbe36d 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi @@ -315,8 +315,8 @@ fsl,pins = < MX6QDL_PAD_EIM_D24__UART3_TX_DATA 0x1b0b1 MX6QDL_PAD_EIM_D25__UART3_RX_DATA 0x1b0b1 - MX6QDL_PAD_EIM_D30__UART3_RTS_B 0x1b0b1 - MX6QDL_PAD_EIM_D31__UART3_CTS_B 0x1b0b1 + MX6QDL_PAD_EIM_D31__UART3_RTS_B 0x1b0b1 + MX6QDL_PAD_EIM_D30__UART3_CTS_B 0x1b0b1 >; }; @@ -403,6 +403,7 @@ &uart3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart3>; + uart-has-rtscts; status = "disabled"; }; @@ -432,6 +433,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; + vmmc-supply = <&vdd_sd1_reg>; status = "disabled"; }; @@ -441,5 +443,6 @@ &pinctrl_usdhc3_cdwp>; cd-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio1 29 GPIO_ACTIVE_HIGH>; + vmmc-supply = <&vdd_sd0_reg>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi index 6486df3e2942d..31fa37d2fe471 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi @@ -107,14 +107,14 @@ regulators { vdd_arm: buck1 { regulator-name = "vdd_arm"; - regulator-min-microvolt = <730000>; + regulator-min-microvolt = <925000>; regulator-max-microvolt = <1380000>; regulator-always-on; }; vdd_soc: buck2 { regulator-name = "vdd_soc"; - regulator-min-microvolt = <730000>; + regulator-min-microvolt = <1150000>; regulator-max-microvolt = <1380000>; regulator-always-on; }; @@ -183,7 +183,6 @@ pinctrl-0 = <&pinctrl_usdhc4>; bus-width = <8>; non-removable; - vmmc-supply = <&vdd_emmc_1p8>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi index 71ca76a5e4a51..fe59dde41b649 100644 --- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi @@ -749,10 +749,6 @@ vin-supply = <&vgen5_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&vgen5_reg>; }; diff --git a/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi index 44a97ba93a957..352ac585ca6b8 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som-ti.dtsi @@ -153,6 +153,7 @@ bus-width = <4>; keep-power-in-suspend; mmc-pwrseq = <&pwrseq_ti_wifi>; + cap-power-off-card; non-removable; vmmc-supply = <&vcc_3v3>; /* vqmmc-supply = <&nvcc_sd1>; - MMC layer doesn't like it! */ diff --git a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi index 6d7f6b9035bc1..f2649241167ec 100644 --- a/arch/arm/boot/dts/imx6qdl-sr-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sr-som.dtsi @@ -54,7 +54,13 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_microsom_enet_ar8035>; phy-mode = "rgmii-id"; - phy-reset-duration = <2>; + + /* + * The PHY seems to require a long-enough reset duration to avoid + * some rare issues where the PHY gets stuck in an inconsistent and + * non-functional state at boot-up. 10ms proved to be fine . + */ + phy-reset-duration = <10>; phy-reset-gpios = <&gpio4 15 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi index f0be516dc28e1..9181fbeb833d3 100644 --- a/arch/arm/boot/dts/imx6qdl-ts7970.dtsi +++ b/arch/arm/boot/dts/imx6qdl-ts7970.dtsi @@ -226,7 +226,7 @@ reg = <0x28>; #gpio-cells = <2>; gpio-controller; - ngpio = <32>; + ngpios = <62>; }; sgtl5000: codec@a { diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index 776bfc77f89d0..6c8da3f037335 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -5,6 +5,8 @@ * Author: Fabio Estevam */ +#include + / { aliases { backlight = &backlight; @@ -98,7 +100,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; @@ -218,6 +220,7 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 + MX6QDL_PAD_SD3_DAT5__GPIO7_IO00 0x1b0b0 >; }; @@ -290,7 +293,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - non-removable; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi index 93909796885a0..b9b698f72b261 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi @@ -166,7 +166,6 @@ MX6QDL_PAD_RGMII_RD2__RGMII_RD2 0x1b030 MX6QDL_PAD_RGMII_RD3__RGMII_RD3 0x1b030 MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL 0x1b030 - MX6QDL_PAD_GPIO_6__ENET_IRQ 0x000b1 >; }; diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi index 93be00a60c887..a66c4fac6baf0 100644 --- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi @@ -627,7 +627,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; no-1-8-v; @@ -640,7 +640,7 @@ pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; no-1-8-v; @@ -774,6 +774,7 @@ &usbh1 { vbus-supply = <®_5p0v_main>; disable-over-current; + maximum-speed = "full-speed"; status = "okay"; }; @@ -1055,7 +1056,6 @@ MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17059 MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17059 MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17059 - MX6QDL_PAD_NANDF_D3__GPIO2_IO03 0x40010040 MX6QDL_PAD_NANDF_D2__GPIO2_IO02 0x40010040 >; }; @@ -1068,7 +1068,6 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 - MX6QDL_PAD_NANDF_D1__GPIO2_IO01 0x40010040 MX6QDL_PAD_NANDF_D0__GPIO2_IO00 0x40010040 >; diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi index e6b4b8525f989..e9955ef12e02d 100644 --- a/arch/arm/boot/dts/imx6qdl.dtsi +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -766,7 +766,7 @@ regulator-name = "vddpu"; regulator-min-microvolt = <725000>; regulator-max-microvolt = <1450000>; - regulator-enable-ramp-delay = <150>; + regulator-enable-ramp-delay = <380>; anatop-reg-offset = <0x140>; anatop-vol-bit-shift = <9>; anatop-vol-bit-width = <5>; @@ -1039,9 +1039,8 @@ compatible = "fsl,imx6q-fec"; reg = <0x02188000 0x4000>; interrupt-names = "int0", "pps"; - interrupts-extended = - <&intc 0 118 IRQ_TYPE_LEVEL_HIGH>, - <&intc 0 119 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <0 118 IRQ_TYPE_LEVEL_HIGH>, + <0 119 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET_REF>; diff --git a/arch/arm/boot/dts/imx6qp.dtsi b/arch/arm/boot/dts/imx6qp.dtsi index 5f51f8e5c1fae..d91f92f944c53 100644 --- a/arch/arm/boot/dts/imx6qp.dtsi +++ b/arch/arm/boot/dts/imx6qp.dtsi @@ -77,7 +77,6 @@ }; &fec { - /delete-property/interrupts-extended; interrupts = <0 118 IRQ_TYPE_LEVEL_HIGH>, <0 119 IRQ_TYPE_LEVEL_HIGH>; }; diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts index 4829aa682aeb0..bc86cfaaa9c27 100644 --- a/arch/arm/boot/dts/imx6sl-evk.dts +++ b/arch/arm/boot/dts/imx6sl-evk.dts @@ -584,10 +584,6 @@ vin-supply = <&sw2_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&sw2_reg>; }; diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi index 3a96b5538a2a1..540880f0413fd 100644 --- a/arch/arm/boot/dts/imx6sl.dtsi +++ b/arch/arm/boot/dts/imx6sl.dtsi @@ -936,8 +936,10 @@ }; rngb: rngb@21b4000 { + compatible = "fsl,imx6sl-rngb", "fsl,imx25-rngb"; reg = <0x021b4000 0x4000>; interrupts = <0 5 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clks IMX6SL_CLK_DUMMY>; }; weim: weim@21b8000 { diff --git a/arch/arm/boot/dts/imx6sll-evk.dts b/arch/arm/boot/dts/imx6sll-evk.dts index 3e1d32fdf4b85..5ace9e6acf85c 100644 --- a/arch/arm/boot/dts/imx6sll-evk.dts +++ b/arch/arm/boot/dts/imx6sll-evk.dts @@ -265,10 +265,6 @@ status = "okay"; }; -®_3p0 { - vin-supply = <&sw2_reg>; -}; - &snvs_poweroff { status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts index 315044ccd65fc..e4719566133c8 100644 --- a/arch/arm/boot/dts/imx6sx-sabreauto.dts +++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts @@ -99,7 +99,7 @@ &fec2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet2>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <ðphy0>; fsl,magic-packet; status = "okay"; diff --git a/arch/arm/boot/dts/imx6sx-sdb-reva.dts b/arch/arm/boot/dts/imx6sx-sdb-reva.dts index f1830ed387a55..91a7548fdb8db 100644 --- a/arch/arm/boot/dts/imx6sx-sdb-reva.dts +++ b/arch/arm/boot/dts/imx6sx-sdb-reva.dts @@ -159,10 +159,6 @@ vin-supply = <&vgen6_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&vgen6_reg>; }; diff --git a/arch/arm/boot/dts/imx6sx-sdb.dts b/arch/arm/boot/dts/imx6sx-sdb.dts index a8ee7087af5a5..5a63ca6157229 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dts +++ b/arch/arm/boot/dts/imx6sx-sdb.dts @@ -141,10 +141,6 @@ vin-supply = <&vgen6_reg>; }; -®_vdd3p0 { - vin-supply = <&sw2_reg>; -}; - ®_vdd2p5 { vin-supply = <&vgen6_reg>; }; diff --git a/arch/arm/boot/dts/imx6sx-sdb.dtsi b/arch/arm/boot/dts/imx6sx-sdb.dtsi index f6972deb5e39c..865528b134d80 100644 --- a/arch/arm/boot/dts/imx6sx-sdb.dtsi +++ b/arch/arm/boot/dts/imx6sx-sdb.dtsi @@ -213,7 +213,7 @@ &fec2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet2>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <ðphy2>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi index c2a9dd57e56ad..aa86341adaaa4 100644 --- a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi +++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi @@ -215,7 +215,7 @@ flash0: n25q256a@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "micron,n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; spi-max-frequency = <29000000>; spi-rx-bus-width = <4>; spi-tx-bus-width = <4>; diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6310-s.dts b/arch/arm/boot/dts/imx6ul-kontron-n6310-s.dts index 0205fd56d9754..4e99e6c79a68b 100644 --- a/arch/arm/boot/dts/imx6ul-kontron-n6310-s.dts +++ b/arch/arm/boot/dts/imx6ul-kontron-n6310-s.dts @@ -157,10 +157,6 @@ status = "okay"; }; -&snvs_poweroff { - status = "okay"; -}; - &uart1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_uart1>; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index f008036e92948..ae0722b93b9d7 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -62,20 +62,18 @@ clock-frequency = <696000000>; clock-latency = <61036>; /* two CLK32 periods */ #cooling-cells = <2>; - operating-points = < + operating-points = /* kHz uV */ - 696000 1275000 - 528000 1175000 - 396000 1025000 - 198000 950000 - >; - fsl,soc-operating-points = < + <696000 1275000>, + <528000 1175000>, + <396000 1025000>, + <198000 950000>; + fsl,soc-operating-points = /* KHz uV */ - 696000 1275000 - 528000 1175000 - 396000 1175000 - 198000 1175000 - >; + <696000 1275000>, + <528000 1175000>, + <396000 1175000>, + <198000 1175000>; clocks = <&clks IMX6UL_CLK_ARM>, <&clks IMX6UL_CLK_PLL2_BUS>, <&clks IMX6UL_CLK_PLL2_PFD2>, @@ -157,6 +155,9 @@ ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x20000>; + ranges = <0 0x00900000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; }; intc: interrupt-controller@a01000 { @@ -966,7 +967,7 @@ }; csi: csi@21c4000 { - compatible = "fsl,imx6ul-csi", "fsl,imx7-csi"; + compatible = "fsl,imx6ul-csi"; reg = <0x021c4000 0x4000>; interrupts = ; clocks = <&clks IMX6UL_CLK_CSI>; @@ -975,7 +976,7 @@ }; lcdif: lcdif@21c8000 { - compatible = "fsl,imx6ul-lcdif", "fsl,imx28-lcdif"; + compatible = "fsl,imx6ul-lcdif", "fsl,imx6sx-lcdif"; reg = <0x021c8000 0x4000>; interrupts = ; clocks = <&clks IMX6UL_CLK_LCDIF_PIX>, @@ -996,7 +997,7 @@ qspi: spi@21e0000 { #address-cells = <1>; #size-cells = <0>; - compatible = "fsl,imx6ul-qspi", "fsl,imx6sx-qspi"; + compatible = "fsl,imx6ul-qspi"; reg = <0x021e0000 0x4000>, <0x60000000 0x10000000>; reg-names = "QuadSPI", "QuadSPI-memory"; interrupts = ; diff --git a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi index 038d8c90f6dfe..621396884c318 100644 --- a/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri-wifi.dtsi @@ -43,6 +43,7 @@ assigned-clock-rates = <0>, <198000000>; cap-power-off-card; keep-power-in-suspend; + max-frequency = <25000000>; mmc-pwrseq = <&wifi_pwrseq>; no-1-8-v; non-removable; diff --git a/arch/arm/boot/dts/imx6ull-colibri.dtsi b/arch/arm/boot/dts/imx6ull-colibri.dtsi index d56728f03c35f..c83323b9ea53c 100644 --- a/arch/arm/boot/dts/imx6ull-colibri.dtsi +++ b/arch/arm/boot/dts/imx6ull-colibri.dtsi @@ -37,7 +37,7 @@ reg_sd1_vmmc: regulator-sd1-vmmc { compatible = "regulator-gpio"; - gpio = <&gpio5 9 GPIO_ACTIVE_HIGH>; + gpios = <&gpio5 9 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_snvs_reg_sd>; regulator-always-on; diff --git a/arch/arm/boot/dts/imx6ull-pinfunc.h b/arch/arm/boot/dts/imx6ull-pinfunc.h index eb025a9d47592..7328d4ef8559f 100644 --- a/arch/arm/boot/dts/imx6ull-pinfunc.h +++ b/arch/arm/boot/dts/imx6ull-pinfunc.h @@ -82,6 +82,6 @@ #define MX6ULL_PAD_CSI_DATA04__ESAI_TX_FS 0x01F4 0x0480 0x0000 0x9 0x0 #define MX6ULL_PAD_CSI_DATA05__ESAI_TX_CLK 0x01F8 0x0484 0x0000 0x9 0x0 #define MX6ULL_PAD_CSI_DATA06__ESAI_TX5_RX0 0x01FC 0x0488 0x0000 0x9 0x0 -#define MX6ULL_PAD_CSI_DATA07__ESAI_T0 0x0200 0x048C 0x0000 0x9 0x0 +#define MX6ULL_PAD_CSI_DATA07__ESAI_TX0 0x0200 0x048C 0x0000 0x9 0x0 #endif /* __DTS_IMX6ULL_PINFUNC_H */ diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi index 917eb0b58b132..1e61e38621066 100644 --- a/arch/arm/boot/dts/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/imx7-colibri.dtsi @@ -77,7 +77,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&codec>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -152,7 +152,7 @@ compatible = "fsl,sgtl5000"; #sound-dai-cells = <0>; reg = <0x0a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai1_mclk>; VDDA-supply = <®_module_3v3_avdd>; @@ -337,7 +337,6 @@ assigned-clock-rates = <400000000>; bus-width = <8>; fsl,tuning-step = <2>; - max-frequency = <100000000>; vmmc-supply = <®_module_3v3>; vqmmc-supply = <®_DCDC3>; non-removable; @@ -347,7 +346,7 @@ &iomuxc { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpio1 &pinctrl_gpio2 &pinctrl_gpio3 &pinctrl_gpio4 - &pinctrl_gpio7>; + &pinctrl_gpio7 &pinctrl_usbc_det>; pinctrl_gpio1: gpio1-grp { fsl,pins = < @@ -452,7 +451,6 @@ pinctrl_enet1: enet1grp { fsl,pins = < - MX7D_PAD_ENET1_CRS__GPIO7_IO14 0x14 MX7D_PAD_ENET1_RGMII_RX_CTL__ENET1_RGMII_RX_CTL 0x73 MX7D_PAD_ENET1_RGMII_RD0__ENET1_RGMII_RD0 0x73 MX7D_PAD_ENET1_RGMII_RD1__ENET1_RGMII_RD1 0x73 @@ -650,6 +648,12 @@ >; }; + pinctrl_usbc_det: gpio-usbc-det { + fsl,pins = < + MX7D_PAD_ENET1_CRS__GPIO7_IO14 0x14 + >; + }; + pinctrl_usbh_reg: gpio-usbh-vbus { fsl,pins = < MX7D_PAD_UART3_CTS_B__GPIO4_IO7 0x14 /* SODIMM 129 USBH PEN */ diff --git a/arch/arm/boot/dts/imx7-mba7.dtsi b/arch/arm/boot/dts/imx7-mba7.dtsi index 50abf18ad30b2..887497e3bb4b8 100644 --- a/arch/arm/boot/dts/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/imx7-mba7.dtsi @@ -250,7 +250,7 @@ tlv320aic32x4: audio-codec@18 { compatible = "ti,tlv320aic32x4"; reg = <0x18>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; ldoin-supply = <®_audio_3v3>; iov-supply = <®_audio_3v3>; diff --git a/arch/arm/boot/dts/imx7d-meerkat96.dts b/arch/arm/boot/dts/imx7d-meerkat96.dts index 5339210b63d0f..dd8003bd1fc09 100644 --- a/arch/arm/boot/dts/imx7d-meerkat96.dts +++ b/arch/arm/boot/dts/imx7d-meerkat96.dts @@ -193,7 +193,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc1>; keep-power-in-suspend; - tuning-step = <2>; + fsl,tuning-step = <2>; vmmc-supply = <®_3p3v>; no-1-8-v; broken-cd; diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts index 6b4acea1ef795..ecfa179ccab1c 100644 --- a/arch/arm/boot/dts/imx7d-nitrogen7.dts +++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts @@ -284,7 +284,7 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; wlf,shared-lrclk; }; diff --git a/arch/arm/boot/dts/imx7d-pico-hobbit.dts b/arch/arm/boot/dts/imx7d-pico-hobbit.dts index 7b2198a9372c6..d917dc4f2f227 100644 --- a/arch/arm/boot/dts/imx7d-pico-hobbit.dts +++ b/arch/arm/boot/dts/imx7d-pico-hobbit.dts @@ -31,7 +31,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&sgtl5000>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -41,7 +41,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; diff --git a/arch/arm/boot/dts/imx7d-pico-pi.dts b/arch/arm/boot/dts/imx7d-pico-pi.dts index 70bea95c06d83..f263e391e24cb 100644 --- a/arch/arm/boot/dts/imx7d-pico-pi.dts +++ b/arch/arm/boot/dts/imx7d-pico-pi.dts @@ -31,7 +31,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&sgtl5000>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -41,7 +41,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; VDDA-supply = <®_2p5v>; VDDIO-supply = <®_vref_1v8>; }; diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi index 6f50ebf31a0ab..8a8df54ff5639 100644 --- a/arch/arm/boot/dts/imx7d-pico.dtsi +++ b/arch/arm/boot/dts/imx7d-pico.dtsi @@ -307,7 +307,7 @@ pinctrl-2 = <&pinctrl_usdhc1_200mhz>; cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>; bus-width = <4>; - tuning-step = <2>; + fsl,tuning-step = <2>; vmmc-supply = <®_3p3v>; wakeup-source; no-1-8-v; diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 869efbc4af42c..a97cda17e484e 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -356,7 +356,7 @@ codec: wm8960@1a { compatible = "wlf,wm8960"; reg = <0x1a>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; clock-names = "mclk"; wlf,shared-lrclk; }; diff --git a/arch/arm/boot/dts/imx7s-colibri.dtsi b/arch/arm/boot/dts/imx7s-colibri.dtsi index 1fb1ec5d3d707..6d16e32aed899 100644 --- a/arch/arm/boot/dts/imx7s-colibri.dtsi +++ b/arch/arm/boot/dts/imx7s-colibri.dtsi @@ -49,3 +49,7 @@ reg = <0x80000000 0x10000000>; }; }; + +&gpmi { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx7s-warp.dts b/arch/arm/boot/dts/imx7s-warp.dts index d6b4888fa686b..e035dd5bf4f62 100644 --- a/arch/arm/boot/dts/imx7s-warp.dts +++ b/arch/arm/boot/dts/imx7s-warp.dts @@ -75,7 +75,7 @@ dailink_master: simple-audio-card,codec { sound-dai = <&codec>; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; }; }; }; @@ -232,7 +232,7 @@ #sound-dai-cells = <0>; reg = <0x0a>; compatible = "fsl,sgtl5000"; - clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_CLK>; + clocks = <&clks IMX7D_AUDIO_MCLK_ROOT_DIV>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sai1_mclk>; VDDA-supply = <&vgen4_reg>; diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi index 6859a3a83750c..0108b63df77d3 100644 --- a/arch/arm/boot/dts/imx7ulp.dtsi +++ b/arch/arm/boot/dts/imx7ulp.dtsi @@ -37,10 +37,10 @@ #address-cells = <1>; #size-cells = <0>; - cpu0: cpu@0 { + cpu0: cpu@f00 { compatible = "arm,cortex-a7"; device_type = "cpu"; - reg = <0>; + reg = <0xf00>; }; }; @@ -397,7 +397,7 @@ clocks = <&pcc2 IMX7ULP_CLK_RGPIO2P1>, <&pcc3 IMX7ULP_CLK_PCTLC>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc1 0 0 32>; + gpio-ranges = <&iomuxc1 0 0 20>; }; gpio_ptd: gpio@40af0000 { @@ -411,7 +411,7 @@ clocks = <&pcc2 IMX7ULP_CLK_RGPIO2P1>, <&pcc3 IMX7ULP_CLK_PCTLD>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc1 0 32 32>; + gpio-ranges = <&iomuxc1 0 32 12>; }; gpio_pte: gpio@40b00000 { @@ -425,7 +425,7 @@ clocks = <&pcc2 IMX7ULP_CLK_RGPIO2P1>, <&pcc3 IMX7ULP_CLK_PCTLE>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc1 0 64 32>; + gpio-ranges = <&iomuxc1 0 64 16>; }; gpio_ptf: gpio@40b10000 { @@ -439,7 +439,7 @@ clocks = <&pcc2 IMX7ULP_CLK_RGPIO2P1>, <&pcc3 IMX7ULP_CLK_PCTLF>; clock-names = "gpio", "port"; - gpio-ranges = <&iomuxc1 0 96 32>; + gpio-ranges = <&iomuxc1 0 96 20>; }; }; diff --git a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts index f7a841a288654..270e4986b6e64 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts @@ -11,3 +11,18 @@ model = "LogicPD Zoom OMAP35xx SOM-LV Development Kit"; compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3"; }; + +&omap3_pmx_core2 { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb2_2_pins>; + hsusb2_2_pins: pinmux_hsusb2_2_pins { + pinctrl-single,pins = < + OMAP3430_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */ + OMAP3430_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */ + OMAP3430_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */ + OMAP3430_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */ + OMAP3430_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */ + OMAP3430_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */ + >; + }; +}; diff --git a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts index a604d92221a4f..c757f0d7781c1 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-som-lv-37xx-devkit.dts @@ -11,3 +11,18 @@ model = "LogicPD Zoom DM3730 SOM-LV Development Kit"; compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3630", "ti,omap3"; }; + +&omap3_pmx_core2 { + pinctrl-names = "default"; + pinctrl-0 = <&hsusb2_2_pins>; + hsusb2_2_pins: pinmux_hsusb2_2_pins { + pinctrl-single,pins = < + OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */ + OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */ + OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */ + OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */ + OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */ + OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */ + >; + }; +}; diff --git a/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi b/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi index 100396f6c2feb..395e05f10d36c 100644 --- a/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv-baseboard.dtsi @@ -51,6 +51,8 @@ &mcbsp2 { status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&mcbsp2_pins>; }; &charger { @@ -102,35 +104,18 @@ regulator-max-microvolt = <3300000>; }; - lcd0: display@0 { - compatible = "panel-dpi"; - label = "28"; - status = "okay"; - /* default-on; */ + lcd0: display { + /* This isn't the exact LCD, but the timings meet spec */ + compatible = "logicpd,type28"; pinctrl-names = "default"; pinctrl-0 = <&lcd_enable_pin>; - enable-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>; /* gpio155, lcd INI */ + backlight = <&bl>; + enable-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>; port { lcd_in: endpoint { remote-endpoint = <&dpi_out>; }; }; - - panel-timing { - clock-frequency = <9000000>; - hactive = <480>; - vactive = <272>; - hfront-porch = <3>; - hback-porch = <2>; - hsync-len = <42>; - vback-porch = <3>; - vfront-porch = <2>; - vsync-len = <11>; - hsync-active = <1>; - vsync-active = <1>; - de-active = <1>; - pixelclk-active = <0>; - }; }; bl: backlight { diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index b56524cc7fe27..55b619c99e24d 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -265,21 +265,6 @@ }; }; -&omap3_pmx_core2 { - pinctrl-names = "default"; - pinctrl-0 = <&hsusb2_2_pins>; - hsusb2_2_pins: pinmux_hsusb2_2_pins { - pinctrl-single,pins = < - OMAP3630_CORE2_IOPAD(0x25f0, PIN_OUTPUT | MUX_MODE3) /* etk_d10.hsusb2_clk */ - OMAP3630_CORE2_IOPAD(0x25f2, PIN_OUTPUT | MUX_MODE3) /* etk_d11.hsusb2_stp */ - OMAP3630_CORE2_IOPAD(0x25f4, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d12.hsusb2_dir */ - OMAP3630_CORE2_IOPAD(0x25f6, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d13.hsusb2_nxt */ - OMAP3630_CORE2_IOPAD(0x25f8, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d14.hsusb2_data0 */ - OMAP3630_CORE2_IOPAD(0x25fa, PIN_INPUT_PULLDOWN | MUX_MODE3) /* etk_d15.hsusb2_data1 */ - >; - }; -}; - &uart2 { interrupts-extended = <&intc 73 &omap3_pmx_core OMAP3_UART2_RX>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit-28.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit-28.dts index 07ac99b9cda66..cdb89b3e2a9bd 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit-28.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit-28.dts @@ -11,22 +11,6 @@ #include "logicpd-torpedo-37xx-devkit.dts" &lcd0 { - - label = "28"; - - panel-timing { - clock-frequency = <9000000>; - hactive = <480>; - vactive = <272>; - hfront-porch = <3>; - hback-porch = <2>; - hsync-len = <42>; - vback-porch = <3>; - vfront-porch = <2>; - vsync-len = <11>; - hsync-active = <1>; - vsync-active = <1>; - de-active = <1>; - pixelclk-active = <0>; - }; + /* To make it work, set CONFIG_OMAP2_DSS_MIN_FCK_PER_PCK=4 */ + compatible = "logicpd,type28"; }; diff --git a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi index 449cc7616da63..e7a8f8addb6e0 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi @@ -80,6 +80,8 @@ }; &mcbsp2 { + pinctrl-names = "default"; + pinctrl-0 = <&mcbsp2_pins>; status = "okay"; }; diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index 7b7ec7b1217b8..824393e1bcfb7 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -329,9 +329,6 @@ clocks = <&xtal_32k>, <&xtal>; clock-names = "xtal_32k", "xtal"; - - assigned-clocks = <&clk LPC32XX_CLK_HCLK_PLL>; - assigned-clock-rates = <208000000>; }; }; diff --git a/arch/arm/boot/dts/ls1021a-tsn.dts b/arch/arm/boot/dts/ls1021a-tsn.dts index 5b7689094b70e..7235ce2a32936 100644 --- a/arch/arm/boot/dts/ls1021a-tsn.dts +++ b/arch/arm/boot/dts/ls1021a-tsn.dts @@ -247,7 +247,7 @@ flash@0 { /* Rev. A uses 64MB flash, Rev. B & C use 32MB flash */ - compatible = "jedec,spi-nor", "s25fl256s1", "s25fl512s"; + compatible = "jedec,spi-nor"; spi-max-frequency = <20000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 2f6977ada4476..aeb8a40b6b601 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -181,7 +181,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x0 0x1550000 0x0 0x10000>, - <0x0 0x40000000 0x0 0x40000000>; + <0x0 0x40000000 0x0 0x20000000>; reg-names = "QuadSPI", "QuadSPI-memory"; interrupts = ; clock-names = "qspi_en", "qspi"; @@ -311,39 +311,6 @@ #thermal-sensor-cells = <1>; }; - thermal-zones { - cpu_thermal: cpu-thermal { - polling-delay-passive = <1000>; - polling-delay = <5000>; - - thermal-sensors = <&tmu 0>; - - trips { - cpu_alert: cpu-alert { - temperature = <85000>; - hysteresis = <2000>; - type = "passive"; - }; - cpu_crit: cpu-crit { - temperature = <95000>; - hysteresis = <2000>; - type = "critical"; - }; - }; - - cooling-maps { - map0 { - trip = <&cpu_alert>; - cooling-device = - <&cpu0 THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu1 THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - }; - }; - }; - }; - dspi0: spi@2100000 { compatible = "fsl,ls1021a-v1.0-dspi"; #address-cells = <1>; @@ -728,7 +695,7 @@ }; mdio0: mdio@2d24000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; @@ -737,7 +704,7 @@ }; mdio1: mdio@2d64000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; @@ -753,7 +720,7 @@ fsl,tmr-prsc = <2>; fsl,tmr-add = <0xaaaaaaab>; fsl,tmr-fiper1 = <999999995>; - fsl,tmr-fiper2 = <99990>; + fsl,tmr-fiper2 = <999999995>; fsl,max-adj = <499999999>; fsl,extts-fifo; }; @@ -984,4 +951,37 @@ }; }; + + thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <1000>; + polling-delay = <5000>; + + thermal-sensors = <&tmu 0>; + + trips { + cpu_alert: cpu-alert { + temperature = <85000>; + hysteresis = <2000>; + type = "passive"; + }; + cpu_crit: cpu-crit { + temperature = <95000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&cpu_alert>; + cooling-device = + <&cpu0 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + }; + }; + }; }; diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index c4447f6c8b2cb..e141ce7484841 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -49,14 +49,14 @@ }; uart_A: serial@84c0 { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x84c0 0x18>; interrupts = ; status = "disabled"; }; uart_B: serial@84dc { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x84dc 0x18>; interrupts = ; status = "disabled"; @@ -94,7 +94,7 @@ }; uart_C: serial@8700 { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x8700 0x18>; interrupts = ; status = "disabled"; @@ -196,7 +196,7 @@ }; uart_AO: serial@4c0 { - compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart"; reg = <0x4c0 0x18>; interrupts = ; status = "disabled"; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 5a7e3e5caebe2..d7c9dbee0f016 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -129,8 +129,8 @@ gpu_opp_table: gpu-opp-table { compatible = "operating-points-v2"; - opp-182150000 { - opp-hz = /bits/ 64 <182150000>; + opp-182142857 { + opp-hz = /bits/ 64 <182142857>; opp-microvolt = <1150000>; }; opp-318750000 { @@ -230,8 +230,6 @@ , , , - , - , , , , @@ -243,8 +241,13 @@ "pp2", "ppmmu2", "pp4", "ppmmu4", "pp5", "ppmmu5", "pp6", "ppmmu6"; resets = <&reset RESET_MALI>; + clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; clock-names = "bus", "core"; + + assigned-clocks = <&clkc CLKID_MALI>; + assigned-clock-rates = <318750000>; + operating-points-v2 = <&gpu_opp_table>; }; }; @@ -253,7 +256,7 @@ &aobus { pmu: pmu@e0 { compatible = "amlogic,meson8-pmu", "syscon"; - reg = <0xe0 0x8>; + reg = <0xe0 0x18>; }; pinctrl_aobus: pinctrl@84 { diff --git a/arch/arm/boot/dts/meson8b-ec100.dts b/arch/arm/boot/dts/meson8b-ec100.dts index bed1dfef19857..32d1c322dbc65 100644 --- a/arch/arm/boot/dts/meson8b-ec100.dts +++ b/arch/arm/boot/dts/meson8b-ec100.dts @@ -148,7 +148,7 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; - vin-supply = <&vcc_5v>; + pwm-supply = <&vcc_5v>; pwms = <&pwm_cd 0 1148 0>; pwm-dutycycle-range = <100 0>; @@ -232,7 +232,7 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; - vin-supply = <&vcc_5v>; + pwm-supply = <&vcc_5v>; pwms = <&pwm_cd 1 1148 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm/boot/dts/meson8b-mxq.dts b/arch/arm/boot/dts/meson8b-mxq.dts index 6e39ad52e42d3..ab8fe55963f7c 100644 --- a/arch/arm/boot/dts/meson8b-mxq.dts +++ b/arch/arm/boot/dts/meson8b-mxq.dts @@ -39,6 +39,8 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; + pwm-supply = <&vcc_5v>; + pwms = <&pwm_cd 0 1148 0>; pwm-dutycycle-range = <100 0>; @@ -84,7 +86,7 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; - vin-supply = <&vcc_5v>; + pwm-supply = <&vcc_5v>; pwms = <&pwm_cd 1 1148 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm/boot/dts/meson8b-odroidc1.dts b/arch/arm/boot/dts/meson8b-odroidc1.dts index a24eccc354b95..c413af9a7af8e 100644 --- a/arch/arm/boot/dts/meson8b-odroidc1.dts +++ b/arch/arm/boot/dts/meson8b-odroidc1.dts @@ -130,7 +130,7 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; - vin-supply = <&p5v0>; + pwm-supply = <&p5v0>; pwms = <&pwm_cd 0 12218 0>; pwm-dutycycle-range = <91 0>; @@ -162,7 +162,7 @@ regulator-min-microvolt = <860000>; regulator-max-microvolt = <1140000>; - vin-supply = <&p5v0>; + pwm-supply = <&p5v0>; pwms = <&pwm_cd 1 12218 0>; pwm-dutycycle-range = <91 0>; @@ -219,7 +219,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOH_4 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index 099bf8e711c94..1e8c5d7bc824a 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -125,8 +125,8 @@ opp-hz = /bits/ 64 <255000000>; opp-microvolt = <1100000>; }; - opp-364300000 { - opp-hz = /bits/ 64 <364300000>; + opp-364285714 { + opp-hz = /bits/ 64 <364285714>; opp-microvolt = <1100000>; }; opp-425000000 { diff --git a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts index d54477b1001ca..84b6ed51099db 100644 --- a/arch/arm/boot/dts/meson8m2-mxiii-plus.dts +++ b/arch/arm/boot/dts/meson8m2-mxiii-plus.dts @@ -83,7 +83,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOH_4 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi index d1eae47b83f63..ab91c4ebb1463 100644 --- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi +++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi @@ -13,8 +13,10 @@ #interrupt-cells = <2>; #address-cells = <1>; #size-cells = <0>; - spi-max-frequency = <3000000>; + spi-max-frequency = <9600000>; spi-cs-high; + spi-cpol; + spi-cpha; cpcap_adc: adc { compatible = "motorola,mapphone-cpcap-adc"; @@ -160,12 +162,12 @@ regulator-enable-ramp-delay = <1000>; }; - /* Used by DSS */ + /* Used by DSS and is the "zerov_regulator" trigger for SoC off mode */ vcsi: VCSI { regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; regulator-enable-ramp-delay = <1000>; - regulator-boot-on; + regulator-always-on; }; vdac: VDAC { diff --git a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts index 2b760f90f38c8..5375c6699843f 100644 --- a/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts +++ b/arch/arm/boot/dts/mt7623n-bananapi-bpi-r2.dts @@ -192,6 +192,7 @@ fixed-link { speed = <1000>; full-duplex; + pause; }; }; }; diff --git a/arch/arm/boot/dts/mt7623n-rfb-emmc.dts b/arch/arm/boot/dts/mt7623n-rfb-emmc.dts index b7606130ade9e..0447748f9fa04 100644 --- a/arch/arm/boot/dts/mt7623n-rfb-emmc.dts +++ b/arch/arm/boot/dts/mt7623n-rfb-emmc.dts @@ -138,6 +138,7 @@ mac@1 { compatible = "mediatek,eth-mac"; reg = <1>; + phy-mode = "rgmii"; phy-handle = <&phy5>; }; diff --git a/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi b/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi index 7f6aefd134514..e7534fe9c53cf 100644 --- a/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi +++ b/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi @@ -29,7 +29,7 @@ compatible = "smsc,lan9221","smsc,lan9115"; bank-width = <2>; - gpmc,mux-add-data; + gpmc,mux-add-data = <0>; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <42>; gpmc,cs-wr-off-ns = <36>; diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index b6ef1a7ac8a4e..089e9f1f442b5 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -31,6 +31,8 @@ aliases { display0 = &lcd; display1 = &tv0; + /delete-property/ mmc2; + /delete-property/ mmc3; }; ldo_3v3: fixedregulator { @@ -515,7 +517,7 @@ compatible = "bosch,bma180"; reg = <0x41>; pinctrl-names = "default"; - pintcrl-0 = <&bma180_pins>; + pinctrl-0 = <&bma180_pins>; interrupt-parent = <&gpio4>; interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; /* GPIO_115 */ }; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 84a5ade1e865b..4227da71cc626 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -105,6 +105,14 @@ linux,code = ; linux,can-disable; }; + + machine_cover { + label = "Machine Cover"; + gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* 160 */ + linux,input-type = ; + linux,code = ; + linux,can-disable; + }; }; isp1707: isp1707 { @@ -155,6 +163,12 @@ pwms = <&pwm9 0 26316 0>; /* 38000 Hz */ }; + rom_rng: rng { + compatible = "nokia,n900-rom-rng"; + clocks = <&rng_ick>; + clock-names = "ick"; + }; + /* controlled (enabled/disabled) directly by bcm2048 and wl1251 */ vctcxo: vctcxo { compatible = "fixed-clock"; @@ -808,10 +822,6 @@ pinctrl-0 = <&mmc1_pins>; vmmc-supply = <&vmmc1>; bus-width = <4>; - /* For debugging, it is often good idea to remove this GPIO. - It means you can remove back cover (to reboot by removing - battery) and still use the MMC card. */ - cd-gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* 160 */ }; /* most boards use vaux3, only some old versions use vmmc2 instead */ @@ -843,34 +853,46 @@ compatible = "ti,omap2-onenand"; reg = <0 0 0x20000>; /* CS0, offset 0, IO size 128K */ + /* + * These timings are based on CONFIG_OMAP_GPMC_DEBUG=y reported + * bootloader set values when booted with v5.1 + * (OneNAND Manufacturer: Samsung): + * + * cs0 GPMC_CS_CONFIG1: 0xfb001202 + * cs0 GPMC_CS_CONFIG2: 0x00111100 + * cs0 GPMC_CS_CONFIG3: 0x00020200 + * cs0 GPMC_CS_CONFIG4: 0x11001102 + * cs0 GPMC_CS_CONFIG5: 0x03101616 + * cs0 GPMC_CS_CONFIG6: 0x90060000 + */ gpmc,sync-read; gpmc,sync-write; gpmc,burst-length = <16>; gpmc,burst-read; gpmc,burst-wrap; gpmc,burst-write; - gpmc,device-width = <2>; /* GPMC_DEVWIDTH_16BIT */ - gpmc,mux-add-data = <2>; /* GPMC_MUX_AD */ + gpmc,device-width = <2>; + gpmc,mux-add-data = <2>; gpmc,cs-on-ns = <0>; - gpmc,cs-rd-off-ns = <87>; - gpmc,cs-wr-off-ns = <87>; + gpmc,cs-rd-off-ns = <102>; + gpmc,cs-wr-off-ns = <102>; gpmc,adv-on-ns = <0>; - gpmc,adv-rd-off-ns = <10>; - gpmc,adv-wr-off-ns = <10>; - gpmc,oe-on-ns = <15>; - gpmc,oe-off-ns = <87>; + gpmc,adv-rd-off-ns = <12>; + gpmc,adv-wr-off-ns = <12>; + gpmc,oe-on-ns = <12>; + gpmc,oe-off-ns = <102>; gpmc,we-on-ns = <0>; - gpmc,we-off-ns = <87>; - gpmc,rd-cycle-ns = <112>; - gpmc,wr-cycle-ns = <112>; - gpmc,access-ns = <81>; - gpmc,page-burst-access-ns = <15>; + gpmc,we-off-ns = <102>; + gpmc,rd-cycle-ns = <132>; + gpmc,wr-cycle-ns = <132>; + gpmc,access-ns = <96>; + gpmc,page-burst-access-ns = <18>; gpmc,bus-turnaround-ns = <0>; gpmc,cycle2cycle-delay-ns = <0>; gpmc,wait-monitoring-ns = <0>; - gpmc,clk-activation-ns = <5>; - gpmc,wr-data-mux-bus-ns = <30>; - gpmc,wr-access-ns = <81>; + gpmc,clk-activation-ns = <6>; + gpmc,wr-data-mux-bus-ns = <36>; + gpmc,wr-access-ns = <96>; gpmc,sync-clk-ps = <15000>; /* diff --git a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi index e5da3bc6f1050..218a10c0d8159 100644 --- a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi +++ b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi @@ -22,7 +22,7 @@ compatible = "smsc,lan9221","smsc,lan9115"; bank-width = <2>; - gpmc,mux-add-data; + gpmc,mux-add-data = <0>; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <42>; gpmc,cs-wr-off-ns = <36>; diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi index ec5891718ae69..150d5be42d278 100644 --- a/arch/arm/boot/dts/omap3-pandora-common.dtsi +++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi @@ -226,6 +226,17 @@ gpio = <&gpio6 4 GPIO_ACTIVE_HIGH>; /* GPIO_164 */ }; + /* wl1251 wifi+bt module */ + wlan_en: fixed-regulator-wg7210_en { + compatible = "regulator-fixed"; + regulator-name = "vwlan"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + startup-delay-us = <50000>; + enable-active-high; + gpio = <&gpio1 23 GPIO_ACTIVE_HIGH>; + }; + /* wg7210 (wifi+bt module) 32k clock buffer */ wg7210_32k: fixed-regulator-wg7210_32k { compatible = "regulator-fixed"; @@ -522,9 +533,30 @@ /*wp-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>;*/ /* GPIO_127 */ }; -/* mmc3 is probed using pdata-quirks to pass wl1251 card data */ &mmc3 { - status = "disabled"; + vmmc-supply = <&wlan_en>; + + bus-width = <4>; + non-removable; + ti,non-removable; + cap-power-off-card; + + pinctrl-names = "default"; + pinctrl-0 = <&mmc3_pins>; + + #address-cells = <1>; + #size-cells = <0>; + + wlan: wifi@1 { + compatible = "ti,wl1251"; + + reg = <1>; + + interrupt-parent = <&gpio1>; + interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* GPIO_21 */ + + ti,wl1251-has-eeprom; + }; }; /* bluetooth*/ diff --git a/arch/arm/boot/dts/omap3-tao3530.dtsi b/arch/arm/boot/dts/omap3-tao3530.dtsi index a7a04d78deebf..f24e2326cfa7c 100644 --- a/arch/arm/boot/dts/omap3-tao3530.dtsi +++ b/arch/arm/boot/dts/omap3-tao3530.dtsi @@ -222,7 +222,7 @@ pinctrl-0 = <&mmc1_pins>; vmmc-supply = <&vmmc1>; vqmmc-supply = <&vsim>; - cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_LOW>; bus-width = <8>; }; diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index 4043ecb380168..0c8fcfb292bf9 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -23,6 +23,9 @@ i2c0 = &i2c1; i2c1 = &i2c2; i2c2 = &i2c3; + mmc0 = &mmc1; + mmc1 = &mmc2; + mmc2 = &mmc3; serial0 = &uart1; serial1 = &uart2; serial2 = &uart3; diff --git a/arch/arm/boot/dts/omap3430-sdp.dts b/arch/arm/boot/dts/omap3430-sdp.dts index 0abd61108a539..ec16979825378 100644 --- a/arch/arm/boot/dts/omap3430-sdp.dts +++ b/arch/arm/boot/dts/omap3430-sdp.dts @@ -101,7 +101,7 @@ nand@1,0 { compatible = "ti,omap2-nand"; - reg = <0 0 4>; /* CS0, offset 0, IO size 4 */ + reg = <1 0 4>; /* CS1, offset 0, IO size 4 */ interrupt-parent = <&gpmc>; interrupts = <0 IRQ_TYPE_NONE>, /* fifoevent */ <1 IRQ_TYPE_NONE>; /* termcount */ diff --git a/arch/arm/boot/dts/omap4-duovero-parlor.dts b/arch/arm/boot/dts/omap4-duovero-parlor.dts index 8047e8cdb3af0..4548d87534e37 100644 --- a/arch/arm/boot/dts/omap4-duovero-parlor.dts +++ b/arch/arm/boot/dts/omap4-duovero-parlor.dts @@ -139,7 +139,7 @@ ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio2>; - interrupts = <12 IRQ_TYPE_EDGE_FALLING>; /* gpio_44 */ + interrupts = <12 IRQ_TYPE_LEVEL_LOW>; /* gpio_44 */ phy-mode = "mii"; diff --git a/arch/arm/boot/dts/omap4-panda-es.dts b/arch/arm/boot/dts/omap4-panda-es.dts index 9dd307b526048..468ad1b641380 100644 --- a/arch/arm/boot/dts/omap4-panda-es.dts +++ b/arch/arm/boot/dts/omap4-panda-es.dts @@ -46,7 +46,7 @@ button_pins: pinmux_button_pins { pinctrl-single,pins = < - OMAP4_IOPAD(0x11b, PIN_INPUT_PULLUP | MUX_MODE3) /* gpio_113 */ + OMAP4_IOPAD(0x0fc, PIN_INPUT_PULLUP | MUX_MODE3) /* gpio_113 */ >; }; }; diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 7cc95bc1598b6..0a36b8fe3fa92 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -22,6 +22,11 @@ i2c1 = &i2c2; i2c2 = &i2c3; i2c3 = &i2c4; + mmc0 = &mmc1; + mmc1 = &mmc2; + mmc2 = &mmc3; + mmc3 = &mmc4; + mmc4 = &mmc5; serial0 = &uart1; serial1 = &uart2; serial2 = &uart3; @@ -328,10 +333,10 @@ status = "disabled"; }; - target-module@56000000 { + sgx_module: target-module@56000000 { compatible = "ti,sysc-omap4", "ti,sysc"; - reg = <0x5601fc00 0x4>, - <0x5601fc10 0x4>; + reg = <0x5600fe00 0x4>, + <0x5600fe10 0x4>; reg-names = "rev", "sysc"; ti,sysc-midle = , , diff --git a/arch/arm/boot/dts/omap443x.dtsi b/arch/arm/boot/dts/omap443x.dtsi index cbcdcb4e7d1c2..6e320efd9fc1d 100644 --- a/arch/arm/boot/dts/omap443x.dtsi +++ b/arch/arm/boot/dts/omap443x.dtsi @@ -33,10 +33,12 @@ }; ocp { + /* 4430 has only gpio_86 tshut and no talert interrupt */ bandgap: bandgap@4a002260 { reg = <0x4a002260 0x4 0x4a00232C 0x4>; compatible = "ti,omap4430-bandgap"; + gpios = <&gpio3 22 GPIO_ACTIVE_HIGH>; #thermal-sensor-cells = <0>; }; @@ -74,3 +76,13 @@ }; /include/ "omap443x-clocks.dtsi" + +/* + * Use dpll_per for sgx at 153.6MHz like droid4 stock v3.0.8 Android kernel + */ +&sgx_module { + assigned-clocks = <&l3_gfx_clkctrl OMAP4_GPU_CLKCTRL 24>, + <&dpll_per_m7x2_ck>; + assigned-clock-rates = <0>, <153600000>; + assigned-clock-parents = <&dpll_per_m7x2_ck>; +}; diff --git a/arch/arm/boot/dts/omap44xx-clocks.dtsi b/arch/arm/boot/dts/omap44xx-clocks.dtsi index e9d9c8460682c..68ab6a95f222d 100644 --- a/arch/arm/boot/dts/omap44xx-clocks.dtsi +++ b/arch/arm/boot/dts/omap44xx-clocks.dtsi @@ -770,14 +770,6 @@ ti,max-div = <2>; }; - sha2md5_fck: sha2md5_fck@15c8 { - #clock-cells = <0>; - compatible = "ti,gate-clock"; - clocks = <&l3_div_ck>; - ti,bit-shift = <1>; - reg = <0x15c8>; - }; - usb_phy_cm_clk32k: usb_phy_cm_clk32k@640 { #clock-cells = <0>; compatible = "ti,gate-clock"; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 68ac04641bdb1..c7bf68c90ea8f 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -30,14 +30,6 @@ regulator-max-microvolt = <5000000>; }; - vdds_1v8_main: fixedregulator-vdds_1v8_main { - compatible = "regulator-fixed"; - regulator-name = "vdds_1v8_main"; - vin-supply = <&smps7_reg>; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; - }; - vmmcsd_fixed: fixedregulator-mmcsd { compatible = "regulator-fixed"; regulator-name = "vmmcsd_fixed"; @@ -487,6 +479,7 @@ regulator-boot-on; }; + vdds_1v8_main: smps7_reg: smps7 { /* VDDS_1v8_OMAP over VDDS_1v8_MAIN */ regulator-name = "smps7"; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index 1fb7937638f07..3b56e993326d0 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -25,6 +25,11 @@ i2c2 = &i2c3; i2c3 = &i2c4; i2c4 = &i2c5; + mmc0 = &mmc1; + mmc1 = &mmc2; + mmc2 = &mmc3; + mmc3 = &mmc4; + mmc4 = &mmc5; serial0 = &uart1; serial1 = &uart2; serial2 = &uart3; @@ -143,6 +148,7 @@ #address-cells = <1>; #size-cells = <1>; ranges = <0 0 0 0xc0000000>; + dma-ranges = <0x80000000 0x0 0x80000000 0x80000000>; ti,hwmods = "l3_main_1", "l3_main_2", "l3_main_3"; reg = <0 0x44000000 0 0x2000>, <0 0x44800000 0 0x3000>, diff --git a/arch/arm/boot/dts/owl-s500.dtsi b/arch/arm/boot/dts/owl-s500.dtsi index 5ceb6cc4451d2..1dbe4e8b38ac7 100644 --- a/arch/arm/boot/dts/owl-s500.dtsi +++ b/arch/arm/boot/dts/owl-s500.dtsi @@ -84,21 +84,21 @@ global_timer: timer@b0020200 { compatible = "arm,cortex-a9-global-timer"; reg = <0xb0020200 0x100>; - interrupts = ; + interrupts = ; status = "disabled"; }; twd_timer: timer@b0020600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0xb0020600 0x20>; - interrupts = ; + interrupts = ; status = "disabled"; }; twd_wdt: wdt@b0020620 { compatible = "arm,cortex-a9-twd-wdt"; reg = <0xb0020620 0xe0>; - interrupts = ; + interrupts = ; status = "disabled"; }; diff --git a/arch/arm/boot/dts/ox810se.dtsi b/arch/arm/boot/dts/ox810se.dtsi index 9f6c2b660ed39..0755e5864c4a3 100644 --- a/arch/arm/boot/dts/ox810se.dtsi +++ b/arch/arm/boot/dts/ox810se.dtsi @@ -323,8 +323,8 @@ interrupt-controller; reg = <0 0x200>; #interrupt-cells = <1>; - valid-mask = <0xFFFFFFFF>; - clear-mask = <0>; + valid-mask = <0xffffffff>; + clear-mask = <0xffffffff>; }; timer0: timer@200 { diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi index c9b3277320639..dde4364892bf0 100644 --- a/arch/arm/boot/dts/ox820.dtsi +++ b/arch/arm/boot/dts/ox820.dtsi @@ -240,8 +240,8 @@ reg = <0 0x200>; interrupts = ; #interrupt-cells = <1>; - valid-mask = <0xFFFFFFFF>; - clear-mask = <0>; + valid-mask = <0xffffffff>; + clear-mask = <0xffffffff>; }; timer0: timer@200 { @@ -287,7 +287,7 @@ clocks = <&armclk>; }; - gic: gic@1000 { + gic: interrupt-controller@1000 { compatible = "arm,arm11mp-gic"; interrupt-controller; #interrupt-cells = <3>; diff --git a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi index 5ae8607883395..3fcc86d7b735f 100644 --- a/arch/arm/boot/dts/picoxcell-pc3x2.dtsi +++ b/arch/arm/boot/dts/picoxcell-pc3x2.dtsi @@ -45,18 +45,21 @@ emac: gem@30000 { compatible = "cadence,gem"; reg = <0x30000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <31>; }; dmac1: dmac@40000 { compatible = "snps,dw-dmac"; reg = <0x40000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <25>; }; dmac2: dmac@50000 { compatible = "snps,dw-dmac"; reg = <0x50000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <26>; }; @@ -234,6 +237,7 @@ axi2pico@c0000000 { compatible = "picochip,axi2pico-pc3x2"; reg = <0xc0000000 0x10000>; + interrupt-parent = <&vic0>; interrupts = <13 14 15 16 17 18 19 20 21>; }; }; diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi index 8b79b4112ee1a..764984c95c686 100644 --- a/arch/arm/boot/dts/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom-apq8064.dtsi @@ -198,7 +198,7 @@ clock-frequency = <19200000>; }; - pxo_board { + pxo_board: pxo_board { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <27000000>; @@ -1147,7 +1147,7 @@ }; gpu: adreno-3xx@4300000 { - compatible = "qcom,adreno-3xx"; + compatible = "qcom,adreno-320.2", "qcom,adreno"; reg = <0x04300000 0x20000>; reg-names = "kgsl_3d0_reg_memory"; interrupts = ; @@ -1162,7 +1162,6 @@ <&mmcc GFX3D_AHB_CLK>, <&mmcc GFX3D_AXI_CLK>, <&mmcc MMSS_IMEM_AHB_CLK>; - qcom,chipid = <0x03020002>; iommus = <&gfx3d 0 &gfx3d 1 @@ -1261,9 +1260,9 @@ <&mmcc DSI1_BYTE_CLK>, <&mmcc DSI_PIXEL_CLK>, <&mmcc DSI1_ESC_CLK>; - clock-names = "iface_clk", "bus_clk", "core_mmss_clk", - "src_clk", "byte_clk", "pixel_clk", - "core_clk"; + clock-names = "iface", "bus", "core_mmss", + "src", "byte", "pixel", + "core"; assigned-clocks = <&mmcc DSI1_BYTE_SRC>, <&mmcc DSI1_ESC_SRC>, @@ -1305,7 +1304,7 @@ reg-names = "dsi_pll", "dsi_phy", "dsi_phy_regulator"; clock-names = "iface_clk", "ref"; clocks = <&mmcc DSI_M_AHB_CLK>, - <&cxo_board>; + <&pxo_board>; }; diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index 56f51599852d0..338256c59ca5a 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -141,7 +141,8 @@ clocks { sleep_clk: sleep_clk { compatible = "fixed-clock"; - clock-frequency = <32768>; + clock-frequency = <32000>; + clock-output-names = "gcc_sleep_clk_src"; #clock-cells = <0>; }; diff --git a/arch/arm/boot/dts/qcom-mdm9615.dtsi b/arch/arm/boot/dts/qcom-mdm9615.dtsi index 356e9535f7a68..ffb4dcdb62d2b 100644 --- a/arch/arm/boot/dts/qcom-mdm9615.dtsi +++ b/arch/arm/boot/dts/qcom-mdm9615.dtsi @@ -323,6 +323,7 @@ pmicgpio: gpio@150 { compatible = "qcom,pm8018-gpio", "qcom,ssbi-gpio"; + reg = <0x150>; interrupt-controller; #interrupt-cells = <2>; gpio-controller; diff --git a/arch/arm/boot/dts/qcom-msm8960.dtsi b/arch/arm/boot/dts/qcom-msm8960.dtsi index f2aeaccdc1ad6..15ff0e8fd0d30 100644 --- a/arch/arm/boot/dts/qcom-msm8960.dtsi +++ b/arch/arm/boot/dts/qcom-msm8960.dtsi @@ -145,7 +145,9 @@ reg = <0x108000 0x1000>; qcom,ipc = <&l2cc 0x8 2>; - interrupts = <0 19 0>, <0 21 0>, <0 22 0>; + interrupts = , + , + ; interrupt-names = "ack", "err", "wakeup"; regulators { @@ -191,7 +193,7 @@ compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm"; reg = <0x16440000 0x1000>, <0x16400000 0x1000>; - interrupts = <0 154 0x0>; + interrupts = ; clocks = <&gcc GSBI5_UART_CLK>, <&gcc GSBI5_H_CLK>; clock-names = "core", "iface"; status = "disabled"; @@ -317,7 +319,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x16080000 0x1000>; - interrupts = <0 147 0>; + interrupts = ; spi-max-frequency = <24000000>; cs-gpios = <&msmgpio 8 0>; diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index 369e58f64145d..9de4f17955e31 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -1213,8 +1213,8 @@ #phy-cells = <0>; qcom,dsi-phy-index = <0>; - clocks = <&mmcc MDSS_AHB_CLK>; - clock-names = "iface"; + clocks = <&mmcc MDSS_AHB_CLK>, <&xo_board>; + clock-names = "iface", "ref"; }; }; }; diff --git a/arch/arm/boot/dts/qcom-pm8841.dtsi b/arch/arm/boot/dts/qcom-pm8841.dtsi index 2fd59c440903d..c73e5b149ac5e 100644 --- a/arch/arm/boot/dts/qcom-pm8841.dtsi +++ b/arch/arm/boot/dts/qcom-pm8841.dtsi @@ -25,6 +25,7 @@ compatible = "qcom,spmi-temp-alarm"; reg = <0x2400>; interrupts = <4 0x24 0 IRQ_TYPE_EDGE_RISING>; + #thermal-sensor-cells = <0>; }; }; diff --git a/arch/arm/boot/dts/r8a73a4.dtsi b/arch/arm/boot/dts/r8a73a4.dtsi index dd865f3c2eda7..4447f45f0cba9 100644 --- a/arch/arm/boot/dts/r8a73a4.dtsi +++ b/arch/arm/boot/dts/r8a73a4.dtsi @@ -131,7 +131,14 @@ cmt1: timer@e6130000 { compatible = "renesas,r8a73a4-cmt1", "renesas,rcar-gen2-cmt1"; reg = <0 0xe6130000 0 0x1004>; - interrupts = ; + interrupts = , + , + , + , + , + , + , + ; clocks = <&mstp3_clks R8A73A4_CLK_CMT1>; clock-names = "fck"; power-domains = <&pd_c5>; diff --git a/arch/arm/boot/dts/r8a7740.dtsi b/arch/arm/boot/dts/r8a7740.dtsi index 12ffe73bf2bc4..155f58e6d4e81 100644 --- a/arch/arm/boot/dts/r8a7740.dtsi +++ b/arch/arm/boot/dts/r8a7740.dtsi @@ -479,7 +479,7 @@ cpg_clocks: cpg_clocks@e6150000 { compatible = "renesas,r8a7740-cpg-clocks"; reg = <0xe6150000 0x10000>; - clocks = <&extal1_clk>, <&extalr_clk>; + clocks = <&extal1_clk>, <&extal2_clk>, <&extalr_clk>; #clock-cells = <1>; clock-output-names = "system", "pllc0", "pllc1", "pllc2", "r", diff --git a/arch/arm/boot/dts/r8a7743.dtsi b/arch/arm/boot/dts/r8a7743.dtsi index de981d629bddd..fdd267819319e 100644 --- a/arch/arm/boot/dts/r8a7743.dtsi +++ b/arch/arm/boot/dts/r8a7743.dtsi @@ -338,7 +338,7 @@ #thermal-sensor-cells = <0>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -348,7 +348,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -357,7 +357,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -367,7 +367,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -376,7 +376,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -386,7 +386,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7743", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7744.dtsi b/arch/arm/boot/dts/r8a7744.dtsi index fa74a262107bc..8264481bf8764 100644 --- a/arch/arm/boot/dts/r8a7744.dtsi +++ b/arch/arm/boot/dts/r8a7744.dtsi @@ -338,7 +338,7 @@ #thermal-sensor-cells = <0>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -348,7 +348,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -357,7 +357,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -367,7 +367,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -376,7 +376,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -386,7 +386,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7744", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7745.dtsi b/arch/arm/boot/dts/r8a7745.dtsi index c53f7ff20695f..c306713f2ab7d 100644 --- a/arch/arm/boot/dts/r8a7745.dtsi +++ b/arch/arm/boot/dts/r8a7745.dtsi @@ -302,7 +302,7 @@ resets = <&cpg 407>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -312,7 +312,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -321,7 +321,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -331,7 +331,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -340,7 +340,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -350,7 +350,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7745", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7779-marzen.dts b/arch/arm/boot/dts/r8a7779-marzen.dts index c755f0b8fd0d7..29599adea2aba 100644 --- a/arch/arm/boot/dts/r8a7779-marzen.dts +++ b/arch/arm/boot/dts/r8a7779-marzen.dts @@ -146,7 +146,7 @@ status = "okay"; clocks = <&mstp1_clks R8A7779_CLK_DU>, <&x3_clk>; - clock-names = "du", "dclkin.0"; + clock-names = "du.0", "dclkin.0"; ports { port@0 { diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi index ebf5b7cfe2159..b3cb6492fc27e 100644 --- a/arch/arm/boot/dts/r8a7779.dtsi +++ b/arch/arm/boot/dts/r8a7779.dtsi @@ -68,6 +68,14 @@ <0xf0000100 0x100>; }; + timer@f0000200 { + compatible = "arm,cortex-a9-global-timer"; + reg = <0xf0000200 0x100>; + interrupts = ; + clocks = <&cpg_clocks R8A7779_CLK_ZS>; + }; + timer@f0000600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0xf0000600 0x20>; @@ -455,6 +463,7 @@ reg = <0xfff80000 0x40000>; interrupts = ; clocks = <&mstp1_clks R8A7779_CLK_DU>; + clock-names = "du.0"; power-domains = <&sysc R8A7779_PD_ALWAYS_ON>; status = "disabled"; diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index 5a2747758f676..e3ba00a22eebf 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -427,7 +427,7 @@ #thermal-sensor-cells = <0>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -437,7 +437,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -446,7 +446,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -456,7 +456,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -465,7 +465,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -475,7 +475,7 @@ status = "disabled"; }; - ipmmu_rt: mmu@ffc80000 { + ipmmu_rt: iommu@ffc80000 { compatible = "renesas,ipmmu-r8a7790", "renesas,ipmmu-vmsa"; reg = <0 0xffc80000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index 6f875502453cf..a26f86ccc5794 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -350,7 +350,7 @@ #thermal-sensor-cells = <0>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -360,7 +360,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -369,7 +369,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -379,7 +379,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -388,7 +388,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -398,7 +398,7 @@ status = "disabled"; }; - ipmmu_rt: mmu@ffc80000 { + ipmmu_rt: iommu@ffc80000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xffc80000 0 0x1000>; @@ -407,7 +407,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7791", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7793-gose.dts b/arch/arm/boot/dts/r8a7793-gose.dts index 42f3313e6988a..9f507393c3752 100644 --- a/arch/arm/boot/dts/r8a7793-gose.dts +++ b/arch/arm/boot/dts/r8a7793-gose.dts @@ -339,7 +339,7 @@ reg = <0x20>; remote = <&vin1>; - port { + ports { #address-cells = <1>; #size-cells = <0>; @@ -399,7 +399,7 @@ interrupts = <2 IRQ_TYPE_LEVEL_LOW>; default-input = <0>; - port { + ports { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi index bf05110fac4e2..fa38397950188 100644 --- a/arch/arm/boot/dts/r8a7793.dtsi +++ b/arch/arm/boot/dts/r8a7793.dtsi @@ -336,7 +336,7 @@ #thermal-sensor-cells = <0>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -346,7 +346,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -355,7 +355,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -365,7 +365,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -374,7 +374,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -384,7 +384,7 @@ status = "disabled"; }; - ipmmu_rt: mmu@ffc80000 { + ipmmu_rt: iommu@ffc80000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xffc80000 0 0x1000>; @@ -393,7 +393,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7793", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/r8a7794.dtsi b/arch/arm/boot/dts/r8a7794.dtsi index 8d797d34816e3..9dd952479e680 100644 --- a/arch/arm/boot/dts/r8a7794.dtsi +++ b/arch/arm/boot/dts/r8a7794.dtsi @@ -290,7 +290,7 @@ resets = <&cpg 407>; }; - ipmmu_sy0: mmu@e6280000 { + ipmmu_sy0: iommu@e6280000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xe6280000 0 0x1000>; @@ -300,7 +300,7 @@ status = "disabled"; }; - ipmmu_sy1: mmu@e6290000 { + ipmmu_sy1: iommu@e6290000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xe6290000 0 0x1000>; @@ -309,7 +309,7 @@ status = "disabled"; }; - ipmmu_ds: mmu@e6740000 { + ipmmu_ds: iommu@e6740000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xe6740000 0 0x1000>; @@ -319,7 +319,7 @@ status = "disabled"; }; - ipmmu_mp: mmu@ec680000 { + ipmmu_mp: iommu@ec680000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xec680000 0 0x1000>; @@ -328,7 +328,7 @@ status = "disabled"; }; - ipmmu_mx: mmu@fe951000 { + ipmmu_mx: iommu@fe951000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xfe951000 0 0x1000>; @@ -338,7 +338,7 @@ status = "disabled"; }; - ipmmu_gp: mmu@e62a0000 { + ipmmu_gp: iommu@e62a0000 { compatible = "renesas,ipmmu-r8a7794", "renesas,ipmmu-vmsa"; reg = <0 0xe62a0000 0 0x1000>; diff --git a/arch/arm/boot/dts/rk3036-kylin.dts b/arch/arm/boot/dts/rk3036-kylin.dts index fb3cf005cc902..2ef47ebeb0cbe 100644 --- a/arch/arm/boot/dts/rk3036-kylin.dts +++ b/arch/arm/boot/dts/rk3036-kylin.dts @@ -390,7 +390,7 @@ }; }; - sleep { + suspend { global_pwroff: global-pwroff { rockchip,pins = <2 RK_PA7 1 &pcfg_pull_none>; }; diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index c776321b2cc4e..d282a7b638d81 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -128,7 +128,7 @@ assigned-clocks = <&cru SCLK_GPU>; assigned-clock-rates = <100000000>; clocks = <&cru SCLK_GPU>, <&cru SCLK_GPU>; - clock-names = "core", "bus"; + clock-names = "bus", "core"; resets = <&cru SRST_GPU>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/rk3066a.dtsi b/arch/arm/boot/dts/rk3066a.dtsi index 3d1b02f45ffd6..1ac9deb3bd39a 100644 --- a/arch/arm/boot/dts/rk3066a.dtsi +++ b/arch/arm/boot/dts/rk3066a.dtsi @@ -761,7 +761,7 @@ #address-cells = <1>; #size-cells = <0>; - pd_vio@RK3066_PD_VIO { + power-domain@RK3066_PD_VIO { reg = ; clocks = <&cru ACLK_LCDC0>, <&cru ACLK_LCDC1>, @@ -788,7 +788,7 @@ <&qos_rga>; }; - pd_video@RK3066_PD_VIDEO { + power-domain@RK3066_PD_VIDEO { reg = ; clocks = <&cru ACLK_VDPU>, <&cru ACLK_VEPU>, @@ -797,7 +797,7 @@ pm_qos = <&qos_vpu>; }; - pd_gpu@RK3066_PD_GPU { + power-domain@RK3066_PD_GPU { reg = ; clocks = <&cru ACLK_GPU>; pm_qos = <&qos_gpu>; diff --git a/arch/arm/boot/dts/rk3188-bqedison2qc.dts b/arch/arm/boot/dts/rk3188-bqedison2qc.dts index c8b62bbd6a4a4..66a0ff196eb1f 100644 --- a/arch/arm/boot/dts/rk3188-bqedison2qc.dts +++ b/arch/arm/boot/dts/rk3188-bqedison2qc.dts @@ -58,20 +58,25 @@ lvds-encoder { compatible = "ti,sn75lvds83", "lvds-encoder"; - #address-cells = <1>; - #size-cells = <0>; - port@0 { - reg = <0>; - lvds_in_vop0: endpoint { - remote-endpoint = <&vop0_out_lvds>; + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + lvds_in_vop0: endpoint { + remote-endpoint = <&vop0_out_lvds>; + }; }; - }; - port@1 { - reg = <1>; - lvds_out_panel: endpoint { - remote-endpoint = <&panel_in_lvds>; + port@1 { + reg = <1>; + + lvds_out_panel: endpoint { + remote-endpoint = <&panel_in_lvds>; + }; }; }; }; @@ -465,10 +470,13 @@ non-removable; pinctrl-names = "default"; pinctrl-0 = <&sd1_clk>, <&sd1_cmd>, <&sd1_bus4>; - vmmcq-supply = <&vccio_wl>; + vqmmc-supply = <&vccio_wl>; + #address-cells = <1>; + #size-cells = <0>; status = "okay"; brcmf: wifi@1 { + reg = <1>; compatible = "brcm,bcm4329-fmac"; interrupt-parent = <&gpio3>; interrupts = ; diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index 10ede65d90f38..ee8a24a0e3cb1 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -150,16 +150,16 @@ compatible = "rockchip,rk3188-timer", "rockchip,rk3288-timer"; reg = <0x2000e000 0x20>; interrupts = ; - clocks = <&cru SCLK_TIMER3>, <&cru PCLK_TIMER3>; - clock-names = "timer", "pclk"; + clocks = <&cru PCLK_TIMER3>, <&cru SCLK_TIMER3>; + clock-names = "pclk", "timer"; }; timer6: timer@200380a0 { compatible = "rockchip,rk3188-timer", "rockchip,rk3288-timer"; reg = <0x200380a0 0x20>; interrupts = ; - clocks = <&cru SCLK_TIMER6>, <&cru PCLK_TIMER0>; - clock-names = "timer", "pclk"; + clocks = <&cru PCLK_TIMER0>, <&cru SCLK_TIMER6>; + clock-names = "pclk", "timer"; }; i2s0: i2s@1011a000 { @@ -701,7 +701,7 @@ #address-cells = <1>; #size-cells = <0>; - pd_vio@RK3188_PD_VIO { + power-domain@RK3188_PD_VIO { reg = ; clocks = <&cru ACLK_LCDC0>, <&cru ACLK_LCDC1>, @@ -723,7 +723,7 @@ <&qos_rga>; }; - pd_video@RK3188_PD_VIDEO { + power-domain@RK3188_PD_VIDEO { reg = ; clocks = <&cru ACLK_VDPU>, <&cru ACLK_VEPU>, @@ -732,7 +732,7 @@ pm_qos = <&qos_vpu>; }; - pd_gpu@RK3188_PD_GPU { + power-domain@RK3188_PD_GPU { reg = ; clocks = <&cru ACLK_GPU>; pm_qos = <&qos_gpu>; diff --git a/arch/arm/boot/dts/rk3228-evb.dts b/arch/arm/boot/dts/rk3228-evb.dts index 5670b33fd1bd0..aed879db6c152 100644 --- a/arch/arm/boot/dts/rk3228-evb.dts +++ b/arch/arm/boot/dts/rk3228-evb.dts @@ -46,7 +46,7 @@ #address-cells = <1>; #size-cells = <0>; - phy: phy@0 { + phy: ethernet-phy@0 { compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22"; reg = <0>; clocks = <&cru SCLK_MAC_PHY>; diff --git a/arch/arm/boot/dts/rk3229-xms6.dts b/arch/arm/boot/dts/rk3229-xms6.dts index 679fc2b00e5ac..933ef69da32ac 100644 --- a/arch/arm/boot/dts/rk3229-xms6.dts +++ b/arch/arm/boot/dts/rk3229-xms6.dts @@ -150,7 +150,7 @@ #address-cells = <1>; #size-cells = <0>; - phy: phy@0 { + phy: ethernet-phy@0 { compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22"; reg = <0>; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 340ed6ccb08f8..d393bb481e747 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -561,7 +561,7 @@ "pp1", "ppmmu1"; clocks = <&cru ACLK_GPU>, <&cru ACLK_GPU>; - clock-names = "core", "bus"; + clock-names = "bus", "core"; resets = <&cru SRST_GPU_A>; status = "disabled"; }; @@ -570,10 +570,9 @@ compatible = "rockchip,iommu"; reg = <0x20020800 0x100>; interrupts = ; - interrupt-names = "vpu_mmu"; clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>; clock-names = "aclk", "iface"; - iommu-cells = <0>; + #iommu-cells = <0>; status = "disabled"; }; @@ -581,10 +580,9 @@ compatible = "rockchip,iommu"; reg = <0x20030480 0x40>, <0x200304c0 0x40>; interrupts = ; - interrupt-names = "vdec_mmu"; clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>; clock-names = "aclk", "iface"; - iommu-cells = <0>; + #iommu-cells = <0>; status = "disabled"; }; @@ -614,7 +612,6 @@ compatible = "rockchip,iommu"; reg = <0x20053f00 0x100>; interrupts = ; - interrupt-names = "vop_mmu"; clocks = <&cru ACLK_VOP>, <&cru HCLK_VOP>; clock-names = "aclk", "iface"; #iommu-cells = <0>; @@ -625,10 +622,9 @@ compatible = "rockchip,iommu"; reg = <0x20070800 0x100>; interrupts = ; - interrupt-names = "iep_mmu"; clocks = <&cru ACLK_IEP>, <&cru HCLK_IEP>; clock-names = "aclk", "iface"; - iommu-cells = <0>; + #iommu-cells = <0>; status = "disabled"; }; @@ -639,8 +635,8 @@ interrupts = ; assigned-clocks = <&cru SCLK_HDMI_PHY>; assigned-clock-parents = <&hdmi_phy>; - clocks = <&cru SCLK_HDMI_HDCP>, <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_CEC>; - clock-names = "isfr", "iahb", "cec"; + clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>; + clock-names = "iahb", "isfr", "cec"; pinctrl-names = "default"; pinctrl-0 = <&hdmii2c_xfer &hdmi_hpd &hdmi_cec>; resets = <&cru SRST_HDMI_P>; @@ -1033,7 +1029,7 @@ }; }; - spi-0 { + spi0 { spi0_clk: spi0-clk { rockchip,pins = <0 RK_PB1 2 &pcfg_pull_up>; }; @@ -1051,7 +1047,7 @@ }; }; - spi-1 { + spi1 { spi1_clk: spi1-clk { rockchip,pins = <0 RK_PC7 2 &pcfg_pull_up>; }; diff --git a/arch/arm/boot/dts/rk3288-rock2-som.dtsi b/arch/arm/boot/dts/rk3288-rock2-som.dtsi index 9f9e2bfd1295e..7b79a21f9bbb2 100644 --- a/arch/arm/boot/dts/rk3288-rock2-som.dtsi +++ b/arch/arm/boot/dts/rk3288-rock2-som.dtsi @@ -218,7 +218,7 @@ flash0-supply = <&vcc_flash>; flash1-supply = <&vccio_pmu>; gpio30-supply = <&vccio_pmu>; - gpio1830 = <&vcc_io>; + gpio1830-supply = <&vcc_io>; lcdc-supply = <&vcc_io>; sdcard-supply = <&vccio_sd>; wifi-supply = <&vcc_18>; diff --git a/arch/arm/boot/dts/rk3288-vyasa.dts b/arch/arm/boot/dts/rk3288-vyasa.dts index ba06e9f97ddce..acfb7dc2df560 100644 --- a/arch/arm/boot/dts/rk3288-vyasa.dts +++ b/arch/arm/boot/dts/rk3288-vyasa.dts @@ -357,10 +357,10 @@ audio-supply = <&vcc_18>; bb-supply = <&vcc_io>; dvp-supply = <&vcc_io>; - flash0-suuply = <&vcc_18>; + flash0-supply = <&vcc_18>; flash1-supply = <&vcc_lan>; gpio30-supply = <&vcc_io>; - gpio1830 = <&vcc_io>; + gpio1830-supply = <&vcc_io>; lcdc-supply = <&vcc_io>; sdcard-supply = <&vccio_sd>; wifi-supply = <&vcc_18>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index cc893e154fe5a..7dcafd0833ba8 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -238,8 +238,8 @@ compatible = "rockchip,rk3288-timer"; reg = <0x0 0xff810000 0x0 0x20>; interrupts = ; - clocks = <&xin24m>, <&cru PCLK_TIMER>; - clock-names = "timer", "pclk"; + clocks = <&cru PCLK_TIMER>, <&xin24m>; + clock-names = "pclk", "timer"; }; display-subsystem { @@ -771,7 +771,7 @@ * *_HDMI HDMI * *_MIPI_* MIPI */ - pd_vio@RK3288_PD_VIO { + power-domain@RK3288_PD_VIO { reg = ; clocks = <&cru ACLK_IEP>, <&cru ACLK_ISP>, @@ -813,7 +813,7 @@ * Note: The following 3 are HEVC(H.265) clocks, * and on the ACLK_HEVC_NIU (NOC). */ - pd_hevc@RK3288_PD_HEVC { + power-domain@RK3288_PD_HEVC { reg = ; clocks = <&cru ACLK_HEVC>, <&cru SCLK_HEVC_CABAC>, @@ -827,7 +827,7 @@ * (video endecoder & decoder) clocks that on the * ACLK_VCODEC_NIU and HCLK_VCODEC_NIU (NOC). */ - pd_video@RK3288_PD_VIDEO { + power-domain@RK3288_PD_VIDEO { reg = ; clocks = <&cru ACLK_VCODEC>, <&cru HCLK_VCODEC>; @@ -838,7 +838,7 @@ * Note: ACLK_GPU is the GPU clock, * and on the ACLK_GPU_NIU (NOC). */ - pd_gpu@RK3288_PD_GPU { + power-domain@RK3288_PD_GPU { reg = ; clocks = <&cru ACLK_GPU>; pm_qos = <&qos_gpu_r>, @@ -975,7 +975,7 @@ status = "disabled"; }; - crypto: cypto-controller@ff8a0000 { + crypto: crypto@ff8a0000 { compatible = "rockchip,rk3288-crypto"; reg = <0x0 0xff8a0000 0x0 0x4000>; interrupts = ; @@ -1575,7 +1575,7 @@ drive-strength = <12>; }; - sleep { + suspend { global_pwroff: global-pwroff { rockchip,pins = <0 RK_PA0 1 &pcfg_pull_none>; }; diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index 97307a405e60e..bce0b05ef7bfe 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -84,7 +84,7 @@ compatible = "arm,mali-400"; reg = <0x10090000 0x10000>; clocks = <&cru ACLK_GPU>, <&cru ACLK_GPU>; - clock-names = "core", "bus"; + clock-names = "bus", "core"; assigned-clocks = <&cru ACLK_GPU>; assigned-clock-rates = <100000000>; resets = <&cru SRST_GPU>; diff --git a/arch/arm/boot/dts/s3c6410-mini6410.dts b/arch/arm/boot/dts/s3c6410-mini6410.dts index 0e159c884f972..1aeac33b0d341 100644 --- a/arch/arm/boot/dts/s3c6410-mini6410.dts +++ b/arch/arm/boot/dts/s3c6410-mini6410.dts @@ -165,6 +165,10 @@ }; }; +&clocks { + clocks = <&fin_pll>; +}; + &sdhci0 { pinctrl-names = "default"; pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>; diff --git a/arch/arm/boot/dts/s3c6410-smdk6410.dts b/arch/arm/boot/dts/s3c6410-smdk6410.dts index a9a5689dc462d..3bf6c450a26e5 100644 --- a/arch/arm/boot/dts/s3c6410-smdk6410.dts +++ b/arch/arm/boot/dts/s3c6410-smdk6410.dts @@ -69,6 +69,10 @@ }; }; +&clocks { + clocks = <&fin_pll>; +}; + &sdhci0 { pinctrl-names = "default"; pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>; diff --git a/arch/arm/boot/dts/s5pv210-aries.dtsi b/arch/arm/boot/dts/s5pv210-aries.dtsi index 8ff70b8563340..d419b77201f7e 100644 --- a/arch/arm/boot/dts/s5pv210-aries.dtsi +++ b/arch/arm/boot/dts/s5pv210-aries.dtsi @@ -454,6 +454,7 @@ pinctrl-names = "default"; cap-sd-highspeed; cap-mmc-highspeed; + keep-power-in-suspend; mmc-pwrseq = <&wifi_pwrseq>; non-removable; diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 2ad642f51fd92..61822afa30ab3 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -52,34 +52,26 @@ }; }; + xxti: oscillator-0 { + compatible = "fixed-clock"; + clock-frequency = <0>; + clock-output-names = "xxti"; + #clock-cells = <0>; + }; + + xusbxti: oscillator-1 { + compatible = "fixed-clock"; + clock-frequency = <0>; + clock-output-names = "xusbxti"; + #clock-cells = <0>; + }; + soc { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; ranges; - external-clocks { - compatible = "simple-bus"; - #address-cells = <1>; - #size-cells = <0>; - - xxti: oscillator@0 { - compatible = "fixed-clock"; - reg = <0>; - clock-frequency = <0>; - clock-output-names = "xxti"; - #clock-cells = <0>; - }; - - xusbxti: oscillator@1 { - compatible = "fixed-clock"; - reg = <1>; - clock-frequency = <0>; - clock-output-names = "xusbxti"; - #clock-cells = <0>; - }; - }; - onenand: onenand@b0600000 { compatible = "samsung,s5pv210-onenand"; reg = <0xb0600000 0x2000>, @@ -100,19 +92,16 @@ }; clocks: clock-controller@e0100000 { - compatible = "samsung,s5pv210-clock", "simple-bus"; + compatible = "samsung,s5pv210-clock"; reg = <0xe0100000 0x10000>; clock-names = "xxti", "xusbxti"; clocks = <&xxti>, <&xusbxti>; #clock-cells = <1>; - #address-cells = <1>; - #size-cells = <1>; - ranges; + }; - pmu_syscon: syscon@e0108000 { - compatible = "samsung-s5pv210-pmu", "syscon"; - reg = <0xe0108000 0x8000>; - }; + pmu_syscon: syscon@e0108000 { + compatible = "samsung-s5pv210-pmu", "syscon"; + reg = <0xe0108000 0x8000>; }; pinctrl0: pinctrl@e0200000 { @@ -128,35 +117,28 @@ }; }; - amba { - #address-cells = <1>; - #size-cells = <1>; - compatible = "simple-bus"; - ranges; - - pdma0: dma@e0900000 { - compatible = "arm,pl330", "arm,primecell"; - reg = <0xe0900000 0x1000>; - interrupt-parent = <&vic0>; - interrupts = <19>; - clocks = <&clocks CLK_PDMA0>; - clock-names = "apb_pclk"; - #dma-cells = <1>; - #dma-channels = <8>; - #dma-requests = <32>; - }; + pdma0: dma@e0900000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0xe0900000 0x1000>; + interrupt-parent = <&vic0>; + interrupts = <19>; + clocks = <&clocks CLK_PDMA0>; + clock-names = "apb_pclk"; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; + }; - pdma1: dma@e0a00000 { - compatible = "arm,pl330", "arm,primecell"; - reg = <0xe0a00000 0x1000>; - interrupt-parent = <&vic0>; - interrupts = <20>; - clocks = <&clocks CLK_PDMA1>; - clock-names = "apb_pclk"; - #dma-cells = <1>; - #dma-channels = <8>; - #dma-requests = <32>; - }; + pdma1: dma@e0a00000 { + compatible = "arm,pl330", "arm,primecell"; + reg = <0xe0a00000 0x1000>; + interrupt-parent = <&vic0>; + interrupts = <20>; + clocks = <&clocks CLK_PDMA1>; + clock-names = "apb_pclk"; + #dma-cells = <1>; + #dma-channels = <8>; + #dma-requests = <32>; }; spi0: spi@e1300000 { @@ -229,43 +211,36 @@ status = "disabled"; }; - audio-subsystem { - compatible = "samsung,s5pv210-audss", "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - ranges; - - clk_audss: clock-controller@eee10000 { - compatible = "samsung,s5pv210-audss-clock"; - reg = <0xeee10000 0x1000>; - clock-names = "hclk", "xxti", - "fout_epll", - "sclk_audio0"; - clocks = <&clocks DOUT_HCLKP>, <&xxti>, - <&clocks FOUT_EPLL>, - <&clocks SCLK_AUDIO0>; - #clock-cells = <1>; - }; + clk_audss: clock-controller@eee10000 { + compatible = "samsung,s5pv210-audss-clock"; + reg = <0xeee10000 0x1000>; + clock-names = "hclk", "xxti", + "fout_epll", + "sclk_audio0"; + clocks = <&clocks DOUT_HCLKP>, <&xxti>, + <&clocks FOUT_EPLL>, + <&clocks SCLK_AUDIO0>; + #clock-cells = <1>; + }; - i2s0: i2s@eee30000 { - compatible = "samsung,s5pv210-i2s"; - reg = <0xeee30000 0x1000>; - interrupt-parent = <&vic2>; - interrupts = <16>; - dma-names = "rx", "tx", "tx-sec"; - dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; - clock-names = "iis", - "i2s_opclk0", - "i2s_opclk1"; - clocks = <&clk_audss CLK_I2S>, - <&clk_audss CLK_I2S>, - <&clk_audss CLK_DOUT_AUD_BUS>; - samsung,idma-addr = <0xc0010000>; - pinctrl-names = "default"; - pinctrl-0 = <&i2s0_bus>; - #sound-dai-cells = <0>; - status = "disabled"; - }; + i2s0: i2s@eee30000 { + compatible = "samsung,s5pv210-i2s"; + reg = <0xeee30000 0x1000>; + interrupt-parent = <&vic2>; + interrupts = <16>; + dma-names = "rx", "tx", "tx-sec"; + dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; + clock-names = "iis", + "i2s_opclk0", + "i2s_opclk1"; + clocks = <&clk_audss CLK_I2S>, + <&clk_audss CLK_I2S>, + <&clk_audss CLK_DOUT_AUD_BUS>; + samsung,idma-addr = <0xc0010000>; + pinctrl-names = "default"; + pinctrl-0 = <&i2s0_bus>; + #sound-dai-cells = <0>; + status = "disabled"; }; i2s1: i2s@e2100000 { diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 2e2c1a7b1d1dc..dbfdffd54003a 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -526,7 +526,7 @@ pmecc: ecc-engine@f8014070 { compatible = "atmel,sama5d2-pmecc"; reg = <0xf8014070 0x490>, - <0xf8014500 0x100>; + <0xf8014500 0x200>; }; }; @@ -648,6 +648,7 @@ clocks = <&pmc PMC_TYPE_PERIPHERAL 51>; #address-cells = <1>; #size-cells = <1>; + no-memory-wc; ranges = <0 0xf8044000 0x1420>; }; @@ -716,7 +717,7 @@ can0: can@f8054000 { compatible = "bosch,m_can"; - reg = <0xf8054000 0x4000>, <0x210000 0x4000>; + reg = <0xf8054000 0x4000>, <0x210000 0x1c00>; reg-names = "m_can", "message_ram"; interrupts = <56 IRQ_TYPE_LEVEL_HIGH 7>, <64 IRQ_TYPE_LEVEL_HIGH 7>; @@ -932,13 +933,13 @@ clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>; clock-names = "pclk", "gclk"; assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>; - assigned-parrents = <&pmc PMC_TYPE_GCK 55>; + assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>; status = "disabled"; }; can1: can@fc050000 { compatible = "bosch,m_can"; - reg = <0xfc050000 0x4000>, <0x210000 0x4000>; + reg = <0xfc050000 0x4000>, <0x210000 0x3800>; reg-names = "m_can", "message_ram"; interrupts = <57 IRQ_TYPE_LEVEL_HIGH 7>, <65 IRQ_TYPE_LEVEL_HIGH 7>; @@ -948,7 +949,7 @@ assigned-clocks = <&pmc PMC_TYPE_GCK 57>; assigned-clock-parents = <&pmc PMC_TYPE_CORE PMC_UTMI>; assigned-clock-rates = <40000000>; - bosch,mram-cfg = <0x1100 0 0 64 0 0 32 32>; + bosch,mram-cfg = <0x1c00 0 0 64 0 0 32 32>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index f770aace0efd6..203d40be70a51 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1188,49 +1188,49 @@ usart0_clk: usart0_clk { #clock-cells = <0>; reg = <12>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart1_clk: usart1_clk { #clock-cells = <0>; reg = <13>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart2_clk: usart2_clk { #clock-cells = <0>; reg = <14>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart3_clk: usart3_clk { #clock-cells = <0>; reg = <15>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; uart0_clk: uart0_clk { #clock-cells = <0>; reg = <16>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; twi0_clk: twi0_clk { reg = <18>; #clock-cells = <0>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; twi1_clk: twi1_clk { #clock-cells = <0>; reg = <19>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; twi2_clk: twi2_clk { #clock-cells = <0>; reg = <20>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; mci0_clk: mci0_clk { @@ -1246,19 +1246,19 @@ spi0_clk: spi0_clk { #clock-cells = <0>; reg = <24>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; spi1_clk: spi1_clk { #clock-cells = <0>; reg = <25>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; tcb0_clk: tcb0_clk { #clock-cells = <0>; reg = <26>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; pwm_clk: pwm_clk { @@ -1269,7 +1269,7 @@ adc_clk: adc_clk { #clock-cells = <0>; reg = <29>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; dma0_clk: dma0_clk { @@ -1300,13 +1300,13 @@ ssc0_clk: ssc0_clk { #clock-cells = <0>; reg = <38>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; ssc1_clk: ssc1_clk { #clock-cells = <0>; reg = <39>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; sha_clk: sha_clk { diff --git a/arch/arm/boot/dts/sama5d3_can.dtsi b/arch/arm/boot/dts/sama5d3_can.dtsi index cf06a018ed0f2..2470dd3fff25e 100644 --- a/arch/arm/boot/dts/sama5d3_can.dtsi +++ b/arch/arm/boot/dts/sama5d3_can.dtsi @@ -36,13 +36,13 @@ can0_clk: can0_clk { #clock-cells = <0>; reg = <40>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; can1_clk: can1_clk { #clock-cells = <0>; reg = <41>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; }; }; diff --git a/arch/arm/boot/dts/sama5d3_tcb1.dtsi b/arch/arm/boot/dts/sama5d3_tcb1.dtsi index 1584035daf515..215802b8db301 100644 --- a/arch/arm/boot/dts/sama5d3_tcb1.dtsi +++ b/arch/arm/boot/dts/sama5d3_tcb1.dtsi @@ -22,6 +22,7 @@ tcb1_clk: tcb1_clk { #clock-cells = <0>; reg = <27>; + atmel,clk-output-range = <0 166000000>; }; }; }; diff --git a/arch/arm/boot/dts/sama5d3_uart.dtsi b/arch/arm/boot/dts/sama5d3_uart.dtsi index 4316bdbdc25dd..cb62adbd28ed6 100644 --- a/arch/arm/boot/dts/sama5d3_uart.dtsi +++ b/arch/arm/boot/dts/sama5d3_uart.dtsi @@ -41,13 +41,13 @@ uart0_clk: uart0_clk { #clock-cells = <0>; reg = <16>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; uart1_clk: uart1_clk { #clock-cells = <0>; reg = <17>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; }; }; diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 6ab27a7b388d4..a4cef07c38cbd 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -914,7 +914,7 @@ 0xffffffff 0x3ffcfe7c 0x1c010101 /* pioA */ 0x7fffffff 0xfffccc3a 0x3f00cc3a /* pioB */ 0xffffffff 0x3ff83fff 0xff00ffff /* pioC */ - 0x0003ff00 0x8002a800 0x00000000 /* pioD */ + 0xb003ff00 0x8002a800 0x00000000 /* pioD */ 0xffffffff 0x7fffffff 0x76fff1bf /* pioE */ >; diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 4f3993cc02279..4510308972209 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -710,7 +710,7 @@ }; }; - L2: l2-cache@fffef000 { + L2: cache-controller@fffef000 { compatible = "arm,pl310-cache"; reg = <0xfffef000 0x1000>; interrupts = <0 38 0x04>; diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index 2a86e72d97918..f261a33440710 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -636,7 +636,7 @@ reg = <0xffcfb100 0x80>; }; - L2: l2-cache@fffff000 { + L2: cache-controller@fffff000 { compatible = "arm,pl310-cache"; reg = <0xfffff000 0x1000>; interrupts = <0 18 IRQ_TYPE_LEVEL_HIGH>; @@ -819,7 +819,7 @@ timer3: timer3@ffd00100 { compatible = "snps,dw-apb-timer"; interrupts = <0 118 IRQ_TYPE_LEVEL_HIGH>; - reg = <0xffd01000 0x100>; + reg = <0xffd00100 0x100>; clocks = <&l4_sys_free_clk>; clock-names = "timer"; resets = <&rst L4SYSTIMER1_RESET>; diff --git a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts index b4c0a76a4d1af..4c2fcfcc7baed 100644 --- a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts +++ b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts @@ -12,7 +12,7 @@ flash0: n25q00@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00aa"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_arria5_socdk.dts b/arch/arm/boot/dts/socfpga_arria5_socdk.dts index 90e676e7019f2..1b02d46496a85 100644 --- a/arch/arm/boot/dts/socfpga_arria5_socdk.dts +++ b/arch/arm/boot/dts/socfpga_arria5_socdk.dts @@ -119,7 +119,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts index 6f138b2b26163..51bb436784e24 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts @@ -124,7 +124,7 @@ flash0: n25q00@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; /* chip select */ spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts index c155ff02eb6e0..cae9ddd5ed38b 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts @@ -169,7 +169,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts index 8d5d3996f6f27..ca18b959e6559 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts @@ -80,7 +80,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; m25p,fast-read; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts index 99a71757cdf46..3f7aa7bf0863a 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts @@ -116,7 +116,7 @@ flash0: n25q512a@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q512a"; + compatible = "micron,n25q512a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts index a060718758b67..25874e1b9c829 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts @@ -224,7 +224,7 @@ n25q128@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q128"; + compatible = "micron,n25q128", "jedec,spi-nor"; reg = <0>; /* chip select */ spi-max-frequency = <100000000>; m25p,fast-read; @@ -241,7 +241,7 @@ n25q00@1 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <1>; /* chip select */ spi-max-frequency = <100000000>; m25p,fast-read; diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index 1a8f5e8b10e3a..66cd473ecb617 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -136,9 +136,9 @@ reg = <0xb4100000 0x1000>; interrupts = <0 105 0x4>; status = "disabled"; - dmas = <&dwdma0 12 0 1>, - <&dwdma0 13 1 0>; - dma-names = "tx", "rx"; + dmas = <&dwdma0 13 0 1>, + <&dwdma0 12 1 0>; + dma-names = "rx", "tx"; }; thermal@e07008c4 { diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index f187da4485f46..78672db9068be 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -284,9 +284,9 @@ #size-cells = <0>; interrupts = <0 31 0x4>; status = "disabled"; - dmas = <&dwdma0 4 0 0>, - <&dwdma0 5 0 0>; - dma-names = "tx", "rx"; + dmas = <&dwdma0 5 0 0>, + <&dwdma0 4 0 0>; + dma-names = "rx", "tx"; }; rtc@e0580000 { diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi index f266b7b034823..cc88ebe7a60ce 100644 --- a/arch/arm/boot/dts/spear3xx.dtsi +++ b/arch/arm/boot/dts/spear3xx.dtsi @@ -47,7 +47,7 @@ }; gmac: eth@e0800000 { - compatible = "st,spear600-gmac"; + compatible = "snps,dwmac-3.40a"; reg = <0xe0800000 0x8000>; interrupts = <23 22>; interrupt-names = "macirq", "eth_wake_irq"; diff --git a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi index f78b4eabd68c2..e7178a6db6bef 100644 --- a/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi +++ b/arch/arm/boot/dts/ste-nomadik-stn8815.dtsi @@ -755,14 +755,14 @@ status = "disabled"; }; - vica: intc@10140000 { + vica: interrupt-controller@10140000 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; reg = <0x10140000 0x20>; }; - vicb: intc@10140020 { + vicb: interrupt-controller@10140020 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/arm/boot/dts/stihxxx-b2120.dtsi b/arch/arm/boot/dts/stihxxx-b2120.dtsi index 60e11045ad762..d051f080e52ec 100644 --- a/arch/arm/boot/dts/stihxxx-b2120.dtsi +++ b/arch/arm/boot/dts/stihxxx-b2120.dtsi @@ -46,7 +46,7 @@ /* DAC */ format = "i2s"; mclk-fs = <256>; - frame-inversion = <1>; + frame-inversion; cpu { sound-dai = <&sti_uni_player2>; }; diff --git a/arch/arm/boot/dts/stm32429i-eval.dts b/arch/arm/boot/dts/stm32429i-eval.dts index ba08624c6237d..4f45e71a1e4d3 100644 --- a/arch/arm/boot/dts/stm32429i-eval.dts +++ b/arch/arm/boot/dts/stm32429i-eval.dts @@ -112,17 +112,15 @@ }; }; - gpio_keys { + gpio-keys { compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; autorepeat; - button@0 { + button-0 { label = "Wake up"; linux,code = ; gpios = <&gpioa 0 0>; }; - button@1 { + button-1 { label = "Tamper"; linux,code = ; gpios = <&gpioc 13 0>; diff --git a/arch/arm/boot/dts/stm32746g-eval.dts b/arch/arm/boot/dts/stm32746g-eval.dts index 2b1664884ae7b..8d64b52838c06 100644 --- a/arch/arm/boot/dts/stm32746g-eval.dts +++ b/arch/arm/boot/dts/stm32746g-eval.dts @@ -81,12 +81,10 @@ }; }; - gpio_keys { + gpio-keys { compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; autorepeat; - button@0 { + button-0 { label = "Wake up"; linux,code = ; gpios = <&gpioc 13 0>; diff --git a/arch/arm/boot/dts/stm32f429-disco.dts b/arch/arm/boot/dts/stm32f429-disco.dts index e19d0fe7dbdac..49ae2d72afc95 100644 --- a/arch/arm/boot/dts/stm32f429-disco.dts +++ b/arch/arm/boot/dts/stm32f429-disco.dts @@ -79,12 +79,10 @@ }; }; - gpio_keys { + gpio-keys { compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; autorepeat; - button@0 { + button-0 { label = "User"; linux,code = ; gpios = <&gpioa 0 0>; diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi index 5c8a826b31958..dd41342ef0175 100644 --- a/arch/arm/boot/dts/stm32f429.dtsi +++ b/arch/arm/boot/dts/stm32f429.dtsi @@ -283,8 +283,6 @@ }; timers13: timers@40001c00 { - #address-cells = <1>; - #size-cells = <0>; compatible = "st,stm32-timers"; reg = <0x40001C00 0x400>; clocks = <&rcc 0 STM32F4_APB1_CLOCK(TIM13)>; @@ -299,8 +297,6 @@ }; timers14: timers@40002000 { - #address-cells = <1>; - #size-cells = <0>; compatible = "st,stm32-timers"; reg = <0x40002000 0x400>; clocks = <&rcc 0 STM32F4_APB1_CLOCK(TIM14)>; @@ -623,8 +619,6 @@ }; timers10: timers@40014400 { - #address-cells = <1>; - #size-cells = <0>; compatible = "st,stm32-timers"; reg = <0x40014400 0x400>; clocks = <&rcc 0 STM32F4_APB2_CLOCK(TIM10)>; @@ -639,8 +633,6 @@ }; timers11: timers@40014800 { - #address-cells = <1>; - #size-cells = <0>; compatible = "st,stm32-timers"; reg = <0x40014800 0x400>; clocks = <&rcc 0 STM32F4_APB2_CLOCK(TIM11)>; @@ -696,7 +688,7 @@ status = "disabled"; }; - rcc: rcc@40023810 { + rcc: rcc@40023800 { #reset-cells = <1>; #clock-cells = <2>; compatible = "st,stm32f42xx-rcc", "st,stm32-rcc"; diff --git a/arch/arm/boot/dts/stm32f469-disco.dts b/arch/arm/boot/dts/stm32f469-disco.dts index a3ff04940aec1..0ce450123dda6 100644 --- a/arch/arm/boot/dts/stm32f469-disco.dts +++ b/arch/arm/boot/dts/stm32f469-disco.dts @@ -76,6 +76,13 @@ regulator-max-microvolt = <3300000>; }; + vdd_dsi: vdd-dsi { + compatible = "regulator-fixed"; + regulator-name = "vdd_dsi"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + soc { dma-ranges = <0xc0000000 0x0 0x10000000>; }; @@ -97,12 +104,10 @@ }; }; - gpio_keys { + gpio-keys { compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; autorepeat; - button@0 { + button-0 { label = "User"; linux,code = ; gpios = <&gpioa 0 GPIO_ACTIVE_HIGH>; @@ -155,6 +160,7 @@ compatible = "orisetech,otm8009a"; reg = <0>; /* dsi virtual channel (0..3) */ reset-gpios = <&gpioh 7 GPIO_ACTIVE_LOW>; + power-supply = <&vdd_dsi>; status = "okay"; port { diff --git a/arch/arm/boot/dts/stm32f746.dtsi b/arch/arm/boot/dts/stm32f746.dtsi index d26f93f8b9c2a..60680fcf8eb3d 100644 --- a/arch/arm/boot/dts/stm32f746.dtsi +++ b/arch/arm/boot/dts/stm32f746.dtsi @@ -265,8 +265,6 @@ }; timers13: timers@40001c00 { - #address-cells = <1>; - #size-cells = <0>; compatible = "st,stm32-timers"; reg = <0x40001C00 0x400>; clocks = <&rcc 0 STM32F7_APB1_CLOCK(TIM13)>; @@ -281,8 +279,6 @@ }; timers14: timers@40002000 { - #address-cells = <1>; - #size-cells = <0>; compatible = "st,stm32-timers"; reg = <0x40002000 0x400>; clocks = <&rcc 0 STM32F7_APB1_CLOCK(TIM14)>; @@ -366,9 +362,9 @@ status = "disabled"; }; - i2c3: i2c@40005C00 { + i2c3: i2c@40005c00 { compatible = "st,stm32f7-i2c"; - reg = <0x40005C00 0x400>; + reg = <0x40005c00 0x400>; interrupts = <72>, <73>; resets = <&rcc STM32F7_APB1_RESET(I2C3)>; @@ -533,8 +529,6 @@ }; timers10: timers@40014400 { - #address-cells = <1>; - #size-cells = <0>; compatible = "st,stm32-timers"; reg = <0x40014400 0x400>; clocks = <&rcc 0 STM32F7_APB2_CLOCK(TIM10)>; @@ -549,8 +543,6 @@ }; timers11: timers@40014800 { - #address-cells = <1>; - #size-cells = <0>; compatible = "st,stm32-timers"; reg = <0x40014800 0x400>; clocks = <&rcc 0 STM32F7_APB2_CLOCK(TIM11)>; diff --git a/arch/arm/boot/dts/stm32f769-disco.dts b/arch/arm/boot/dts/stm32f769-disco.dts index 6f1d0ac8c31c7..a4284b761e7fe 100644 --- a/arch/arm/boot/dts/stm32f769-disco.dts +++ b/arch/arm/boot/dts/stm32f769-disco.dts @@ -75,12 +75,10 @@ }; }; - gpio_keys { + gpio-keys { compatible = "gpio-keys"; - #address-cells = <1>; - #size-cells = <0>; autorepeat; - button@0 { + button-0 { label = "User"; linux,code = ; gpios = <&gpioa 0 GPIO_ACTIVE_HIGH>; diff --git a/arch/arm/boot/dts/stm32h743.dtsi b/arch/arm/boot/dts/stm32h743.dtsi index c065266ee3776..82a234c64b8b9 100644 --- a/arch/arm/boot/dts/stm32h743.dtsi +++ b/arch/arm/boot/dts/stm32h743.dtsi @@ -438,8 +438,6 @@ }; lptimer4: timer@58002c00 { - #address-cells = <1>; - #size-cells = <0>; compatible = "st,stm32-lptimer"; reg = <0x58002c00 0x400>; clocks = <&rcc LPTIM4_CK>; @@ -454,8 +452,6 @@ }; lptimer5: timer@58003000 { - #address-cells = <1>; - #size-cells = <0>; compatible = "st,stm32-lptimer"; reg = <0x58003000 0x400>; clocks = <&rcc LPTIM5_CK>; diff --git a/arch/arm/boot/dts/stm32mp157a-avenger96.dts b/arch/arm/boot/dts/stm32mp157a-avenger96.dts index 2e4742c53d048..7b8c3f25861c9 100644 --- a/arch/arm/boot/dts/stm32mp157a-avenger96.dts +++ b/arch/arm/boot/dts/stm32mp157a-avenger96.dts @@ -91,6 +91,9 @@ #address-cells = <1>; #size-cells = <0>; compatible = "snps,dwmac-mdio"; + reset-gpios = <&gpioz 2 GPIO_ACTIVE_LOW>; + reset-delay-us = <1000>; + phy0: ethernet-phy@7 { reg = <7>; }; diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index f98e0370c0bce..0e9e930c60f06 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -515,7 +515,7 @@ compatible = "st,stm32-cec"; reg = <0x40016000 0x400>; interrupts = ; - clocks = <&rcc CEC_K>, <&clk_lse>; + clocks = <&rcc CEC_K>, <&rcc CEC>; clock-names = "cec", "hdmi-cec"; status = "disabled"; }; @@ -773,7 +773,7 @@ #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x4 0x1c>; + reg = <0x4 0x20>; clocks = <&rcc SAI1_K>; clock-names = "sai_ck"; dmas = <&dmamux1 87 0x400 0x01>; @@ -783,7 +783,7 @@ sai1b: audio-controller@4400a024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI1_K>; clock-names = "sai_ck"; dmas = <&dmamux1 88 0x400 0x01>; @@ -804,7 +804,7 @@ sai2a: audio-controller@4400b004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x4 0x1c>; + reg = <0x4 0x20>; clocks = <&rcc SAI2_K>; clock-names = "sai_ck"; dmas = <&dmamux1 89 0x400 0x01>; @@ -814,7 +814,7 @@ sai2b: audio-controller@4400b024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI2_K>; clock-names = "sai_ck"; dmas = <&dmamux1 90 0x400 0x01>; @@ -835,7 +835,7 @@ sai3a: audio-controller@4400c004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x04 0x1c>; + reg = <0x04 0x20>; clocks = <&rcc SAI3_K>; clock-names = "sai_ck"; dmas = <&dmamux1 113 0x400 0x01>; @@ -845,7 +845,7 @@ sai3b: audio-controller@4400c024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI3_K>; clock-names = "sai_ck"; dmas = <&dmamux1 114 0x400 0x01>; @@ -1191,7 +1191,7 @@ sai4a: audio-controller@50027004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x04 0x1c>; + reg = <0x04 0x20>; clocks = <&rcc SAI4_K>; clock-names = "sai_ck"; dmas = <&dmamux1 99 0x400 0x01>; @@ -1201,7 +1201,7 @@ sai4b: audio-controller@50027024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI4_K>; clock-names = "sai_ck"; dmas = <&dmamux1 100 0x400 0x01>; @@ -1311,12 +1311,6 @@ status = "disabled"; }; - stmmac_axi_config_0: stmmac-axi-config { - snps,wr_osr_lmt = <0x7>; - snps,rd_osr_lmt = <0x7>; - snps,blen = <0 0 0 0 16 8 4>; - }; - ethernet0: ethernet@5800a000 { compatible = "st,stm32mp1-dwmac", "snps,dwmac-4.20a"; reg = <0x5800a000 0x2000>; @@ -1339,6 +1333,12 @@ snps,axi-config = <&stmmac_axi_config_0>; snps,tso; status = "disabled"; + + stmmac_axi_config_0: stmmac-axi-config { + snps,wr_osr_lmt = <0x7>; + snps,rd_osr_lmt = <0x7>; + snps,blen = <0 0 0 0 16 8 4>; + }; }; usbh_ohci: usbh-ohci@5800c000 { diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 4c268b70b7357..2265ca24c0c71 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -143,7 +143,7 @@ trips { cpu_alert0: cpu-alert0 { /* milliCelsius */ - temperature = <850000>; + temperature = <85000>; hysteresis = <2000>; type = "passive"; }; @@ -198,7 +198,7 @@ default-pool { compatible = "shared-dma-pool"; size = <0x6000000>; - alloc-ranges = <0x4a000000 0x6000000>; + alloc-ranges = <0x40000000 0x10000000>; reusable; linux,cma-default; }; diff --git a/arch/arm/boot/dts/sun5i.dtsi b/arch/arm/boot/dts/sun5i.dtsi index 6befa236ba99d..fd31da8fd3112 100644 --- a/arch/arm/boot/dts/sun5i.dtsi +++ b/arch/arm/boot/dts/sun5i.dtsi @@ -117,7 +117,7 @@ default-pool { compatible = "shared-dma-pool"; size = <0x6000000>; - alloc-ranges = <0x4a000000 0x6000000>; + alloc-ranges = <0x40000000 0x10000000>; reusable; linux,cma-default; }; diff --git a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts index 049e6ab3cf56c..73de34ae37fdc 100644 --- a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts +++ b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts @@ -154,7 +154,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts index 32d5d45a35c03..8945dbb114a2a 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts @@ -130,7 +130,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi.dts b/arch/arm/boot/dts/sun7i-a20-bananapi.dts index bb3987e101c29..0b3d9ae756503 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi.dts @@ -132,7 +132,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20-bananapro.dts b/arch/arm/boot/dts/sun7i-a20-bananapro.dts index 01ccff756996d..5740f9442705c 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapro.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapro.dts @@ -110,7 +110,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts index 8c8dee6ea461a..9109ca0919ade 100644 --- a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts +++ b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts @@ -151,7 +151,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts index 9ba62774e89a1..488933b87ad5a 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts @@ -112,7 +112,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts b/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts index fce2f7fcd084a..bf38c66c1815b 100644 --- a/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts +++ b/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts @@ -1,5 +1,5 @@ /* - * Copyright 2015 Adam Sampson + * Copyright 2015-2020 Adam Sampson * * This file is dual-licensed: you can use it either under the terms * of the GPL or the X11 license, at your option. Note that this dual @@ -115,7 +115,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 8aebefd6accfe..1f8b45f07e58f 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -180,7 +180,7 @@ default-pool { compatible = "shared-dma-pool"; size = <0x6000000>; - alloc-ranges = <0x4a000000 0x6000000>; + alloc-ranges = <0x40000000 0x10000000>; reusable; linux,cma-default; }; diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts index 9d34eabba1213..431f70234d364 100644 --- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts +++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts @@ -131,7 +131,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_sw>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; allwinner,rx-delay-ps = <700>; allwinner,tx-delay-ps = <700>; status = "okay"; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index fb928503ad45d..d8326a5c681d4 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -101,7 +101,7 @@ initial-mode = <1>; /* initialize in HUB mode */ disabled-ports = <1>; intn-gpios = <&pio 7 5 GPIO_ACTIVE_HIGH>; /* PH5 */ - reset-gpios = <&pio 4 16 GPIO_ACTIVE_HIGH>; /* PE16 */ + reset-gpios = <&pio 4 16 GPIO_ACTIVE_LOW>; /* PE16 */ connect-gpios = <&pio 4 17 GPIO_ACTIVE_HIGH>; /* PE17 */ refclk-frequency = <19200000>; }; @@ -183,7 +183,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_dldo4>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts index 3bec3e0a81b2c..6bf93e5ed6817 100644 --- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts +++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts @@ -358,8 +358,8 @@ }; ®_dldo3 { - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; regulator-name = "vdd-csi"; }; @@ -482,7 +482,8 @@ }; &usbphy { - usb0_id_det-gpios = <&pio 7 11 GPIO_ACTIVE_HIGH>; /* PH11 */ + usb0_id_det-gpios = <&pio 7 11 (GPIO_ACTIVE_HIGH | GPIO_PULL_UP)>; /* PH11 */ + usb0_vbus_power-supply = <&usb_power_supply>; usb0_vbus-supply = <®_drivevbus>; usb1_vbus-supply = <®_vmain>; usb2_vbus-supply = <®_vmain>; diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi index 74bb053cf23c6..4e485e45fbc55 100644 --- a/arch/arm/boot/dts/sun8i-a83t.dtsi +++ b/arch/arm/boot/dts/sun8i-a83t.dtsi @@ -313,7 +313,7 @@ display_clocks: clock@1000000 { compatible = "allwinner,sun8i-a83t-de2-clk"; - reg = <0x01000000 0x100000>; + reg = <0x01000000 0x10000>; clocks = <&ccu CLK_BUS_DE>, <&ccu CLK_PLL_DE>; clock-names = "bus", diff --git a/arch/arm/boot/dts/sun8i-h2-plus-bananapi-m2-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-bananapi-m2-zero.dts index d277d043031b2..4c6704e4c57ec 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-bananapi-m2-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-bananapi-m2-zero.dts @@ -31,7 +31,7 @@ pwr_led { label = "bananapi-m2-zero:red:pwr"; - gpios = <&r_pio 0 10 GPIO_ACTIVE_HIGH>; /* PL10 */ + gpios = <&r_pio 0 10 GPIO_ACTIVE_LOW>; /* PL10 */ default-state = "on"; }; }; diff --git a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts index f19ed981da9d9..3706216ffb40b 100644 --- a/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-h2-plus-orangepi-zero.dts @@ -169,7 +169,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mxicy,mx25l1606e", "winbond,w25q128"; + compatible = "mxicy,mx25l1606e", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <40000000>; }; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts index 71fb732089397..babf4cf1b2f68 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts @@ -53,11 +53,6 @@ }; }; -&emac { - /* LEDs changed to active high on the plus */ - /delete-property/ allwinner,leds-active-low; -}; - &mmc1 { vmmc-supply = <®_vcc3v3>; bus-width = <4>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts index 6dbf7b2e0c13c..b6ca45d18e511 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts @@ -67,7 +67,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-h3.dtsi b/arch/arm/boot/dts/sun8i-h3.dtsi index e37c30e811d3b..6056f206c9e39 100644 --- a/arch/arm/boot/dts/sun8i-h3.dtsi +++ b/arch/arm/boot/dts/sun8i-h3.dtsi @@ -80,7 +80,7 @@ #cooling-cells = <2>; }; - cpu@1 { + cpu1: cpu@1 { compatible = "arm,cortex-a7"; device_type = "cpu"; reg = <1>; @@ -90,7 +90,7 @@ #cooling-cells = <2>; }; - cpu@2 { + cpu2: cpu@2 { compatible = "arm,cortex-a7"; device_type = "cpu"; reg = <2>; @@ -100,7 +100,7 @@ #cooling-cells = <2>; }; - cpu@3 { + cpu3: cpu@3 { compatible = "arm,cortex-a7"; device_type = "cpu"; reg = <3>; @@ -111,6 +111,15 @@ }; }; + pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = , + , + , + ; + interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; + }; + timer { compatible = "arm,armv7-timer"; interrupts = , diff --git a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts index 42d62d1ba1dc7..7db89500f399c 100644 --- a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts +++ b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts @@ -129,7 +129,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_dc1sw>; status = "okay"; }; @@ -223,16 +223,16 @@ }; ®_dc1sw { - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-name = "vcc-gmac-phy"; }; ®_dcdc1 { regulator-always-on; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; - regulator-name = "vcc-3v0"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-3v3"; }; ®_dcdc2 { diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi index c9c2688db66d9..339402601990d 100644 --- a/arch/arm/boot/dts/sun8i-r40.dtsi +++ b/arch/arm/boot/dts/sun8i-r40.dtsi @@ -118,7 +118,7 @@ display_clocks: clock@1000000 { compatible = "allwinner,sun8i-r40-de2-clk", "allwinner,sun8i-h3-de2-clk"; - reg = <0x01000000 0x100000>; + reg = <0x01000000 0x10000>; clocks = <&ccu CLK_BUS_DE>, <&ccu CLK_DE>; clock-names = "bus", @@ -266,6 +266,16 @@ #phy-cells = <1>; }; + ahci: sata@1c18000 { + compatible = "allwinner,sun8i-r40-ahci"; + reg = <0x01c18000 0x1000>; + interrupts = ; + clocks = <&ccu CLK_BUS_SATA>, <&ccu CLK_SATA>; + resets = <&ccu RST_BUS_SATA>; + reset-names = "ahci"; + status = "disabled"; + }; + ehci1: usb@1c19000 { compatible = "allwinner,sun8i-r40-ehci", "generic-ehci"; reg = <0x01c19000 0x100>; @@ -557,17 +567,6 @@ #size-cells = <0>; }; - ahci: sata@1c18000 { - compatible = "allwinner,sun8i-r40-ahci"; - reg = <0x01c18000 0x1000>; - interrupts = ; - clocks = <&ccu CLK_BUS_SATA>, <&ccu CLK_SATA>; - resets = <&ccu RST_BUS_SATA>; - reset-names = "ahci"; - status = "disabled"; - - }; - gmac: ethernet@1c50000 { compatible = "allwinner,sun8i-r40-gmac"; syscon = <&ccu>; diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi index 23ba56df38f73..50c32cf72c65c 100644 --- a/arch/arm/boot/dts/sun8i-v3s.dtsi +++ b/arch/arm/boot/dts/sun8i-v3s.dtsi @@ -105,7 +105,7 @@ display_clocks: clock@1000000 { compatible = "allwinner,sun8i-v3s-de2-clk"; - reg = <0x01000000 0x100000>; + reg = <0x01000000 0x10000>; clocks = <&ccu CLK_BUS_DE>, <&ccu CLK_DE>; clock-names = "bus", @@ -423,7 +423,7 @@ gic: interrupt-controller@1c81000 { compatible = "arm,gic-400"; reg = <0x01c81000 0x1000>, - <0x01c82000 0x1000>, + <0x01c82000 0x2000>, <0x01c84000 0x2000>, <0x01c86000 0x2000>; interrupt-controller; diff --git a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts index 15c22b06fc4b6..47954551f5735 100644 --- a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts +++ b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts @@ -120,7 +120,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_dc1sw>; status = "okay"; }; @@ -198,16 +198,16 @@ }; ®_dc1sw { - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-name = "vcc-gmac-phy"; }; ®_dcdc1 { regulator-always-on; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; - regulator-name = "vcc-3v0"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-3v3"; }; ®_dcdc2 { diff --git a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts index d3b337b043a15..484b93df20cb6 100644 --- a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts +++ b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts @@ -129,7 +129,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_cldo1>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun9i-a80-optimus.dts b/arch/arm/boot/dts/sun9i-a80-optimus.dts index bbc6335e56314..5c3580d712e40 100644 --- a/arch/arm/boot/dts/sun9i-a80-optimus.dts +++ b/arch/arm/boot/dts/sun9i-a80-optimus.dts @@ -124,7 +124,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_cldo1>; status = "okay"; }; diff --git a/arch/arm/boot/dts/suniv-f1c100s.dtsi b/arch/arm/boot/dts/suniv-f1c100s.dtsi index 6100d3b75f613..def8301014487 100644 --- a/arch/arm/boot/dts/suniv-f1c100s.dtsi +++ b/arch/arm/boot/dts/suniv-f1c100s.dtsi @@ -104,8 +104,10 @@ wdt: watchdog@1c20ca0 { compatible = "allwinner,suniv-f1c100s-wdt", - "allwinner,sun4i-a10-wdt"; + "allwinner,sun6i-a31-wdt"; reg = <0x01c20ca0 0x20>; + interrupts = <16>; + clocks = <&osc32k>; }; uart0: serial@1c25000 { diff --git a/arch/arm/boot/dts/sunxi-bananapi-m2-plus-v1.2.dtsi b/arch/arm/boot/dts/sunxi-bananapi-m2-plus-v1.2.dtsi index 22466afd38a3a..235994a4a2ebb 100644 --- a/arch/arm/boot/dts/sunxi-bananapi-m2-plus-v1.2.dtsi +++ b/arch/arm/boot/dts/sunxi-bananapi-m2-plus-v1.2.dtsi @@ -16,15 +16,27 @@ regulator-type = "voltage"; regulator-boot-on; regulator-always-on; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1300000>; + regulator-min-microvolt = <1108475>; + regulator-max-microvolt = <1308475>; regulator-ramp-delay = <50>; /* 4ms */ gpios = <&r_pio 0 1 GPIO_ACTIVE_HIGH>; /* PL1 */ gpios-states = <0x1>; - states = <1100000 0>, <1300000 1>; + states = <1108475 0>, <1308475 1>; }; }; &cpu0 { cpu-supply = <®_vdd_cpux>; }; + +&cpu1 { + cpu-supply = <®_vdd_cpux>; +}; + +&cpu2 { + cpu-supply = <®_vdd_cpux>; +}; + +&cpu3 { + cpu-supply = <®_vdd_cpux>; +}; diff --git a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi index 39263e74fbb53..8e5cb3b3fd686 100644 --- a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi +++ b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi @@ -126,7 +126,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index 107eeafad20a1..b3141c964c3a5 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -113,7 +113,7 @@ display_clocks: clock@1000000 { /* compatible is in per SoC .dtsi file */ - reg = <0x01000000 0x100000>; + reg = <0x01000000 0x10000>; clocks = <&ccu CLK_BUS_DE>, <&ccu CLK_DE>; clock-names = "bus", diff --git a/arch/arm/boot/dts/tegra20-tamonten.dtsi b/arch/arm/boot/dts/tegra20-tamonten.dtsi index 20137fc578b1b..69cb65d86c467 100644 --- a/arch/arm/boot/dts/tegra20-tamonten.dtsi +++ b/arch/arm/boot/dts/tegra20-tamonten.dtsi @@ -183,10 +183,11 @@ }; conf_ata { nvidia,pins = "ata", "atb", "atc", "atd", "ate", - "cdev1", "cdev2", "dap1", "dtb", "gma", - "gmb", "gmc", "gmd", "gme", "gpu7", - "gpv", "i2cp", "pta", "rm", "slxa", - "slxk", "spia", "spib", "uac"; + "cdev1", "cdev2", "dap1", "dtb", "dtf", + "gma", "gmb", "gmc", "gmd", "gme", "gpu7", + "gpv", "i2cp", "irrx", "irtx", "pta", + "rm", "slxa", "slxk", "spia", "spib", + "uac"; nvidia,pull = ; nvidia,tristate = ; }; @@ -202,7 +203,7 @@ }; conf_crtp { nvidia,pins = "crtp", "dap2", "dap3", "dap4", - "dtc", "dte", "dtf", "gpu", "sdio1", + "dtc", "dte", "gpu", "sdio1", "slxc", "slxd", "spdi", "spdo", "spig", "uda"; nvidia,pull = ; @@ -211,7 +212,7 @@ conf_ddc { nvidia,pins = "ddc", "dta", "dtd", "kbca", "kbcb", "kbcc", "kbcd", "kbce", "kbcf", - "sdc"; + "sdc", "uad", "uca"; nvidia,pull = ; nvidia,tristate = ; }; @@ -221,10 +222,9 @@ "lvp0", "owc", "sdb"; nvidia,tristate = ; }; - conf_irrx { - nvidia,pins = "irrx", "irtx", "sdd", "spic", - "spie", "spih", "uaa", "uab", "uad", - "uca", "ucb"; + conf_sdd { + nvidia,pins = "sdd", "spic", "spie", "spih", + "uaa", "uab", "ucb"; nvidia,pull = ; nvidia,tristate = ; }; diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi index 4eddbb8d7fcac..049ca941b4790 100644 --- a/arch/arm/boot/dts/uniphier-pxs2.dtsi +++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi @@ -571,7 +571,7 @@ clocks = <&sys_clk 6>; reset-names = "ether"; resets = <&sys_rst 6>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; local-mac-address = [00 00 00 00 00 00]; socionext,syscon-phy-mode = <&soc_glue 0>; @@ -585,8 +585,8 @@ compatible = "socionext,uniphier-dwc3", "snps,dwc3"; status = "disabled"; reg = <0x65a00000 0xcd00>; - interrupt-names = "host", "peripheral"; - interrupts = <0 134 4>, <0 135 4>; + interrupt-names = "dwc_usb3"; + interrupts = <0 134 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>, <&pinctrl_usb2>; clock-names = "ref", "bus_early", "suspend"; @@ -681,8 +681,8 @@ compatible = "socionext,uniphier-dwc3", "snps,dwc3"; status = "disabled"; reg = <0x65c00000 0xcd00>; - interrupt-names = "host", "peripheral"; - interrupts = <0 137 4>, <0 138 4>; + interrupt-names = "dwc_usb3"; + interrupts = <0 137 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>, <&pinctrl_usb3>; clock-names = "ref", "bus_early", "suspend"; diff --git a/arch/arm/boot/dts/versatile-ab.dts b/arch/arm/boot/dts/versatile-ab.dts index 37bd41ff8dffa..151c0220047dd 100644 --- a/arch/arm/boot/dts/versatile-ab.dts +++ b/arch/arm/boot/dts/versatile-ab.dts @@ -195,16 +195,15 @@ #size-cells = <1>; ranges; - vic: intc@10140000 { + vic: interrupt-controller@10140000 { compatible = "arm,versatile-vic"; interrupt-controller; #interrupt-cells = <1>; reg = <0x10140000 0x1000>; - clear-mask = <0xffffffff>; valid-mask = <0xffffffff>; }; - sic: intc@10003000 { + sic: interrupt-controller@10003000 { compatible = "arm,versatile-sic"; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts index 06a0fdf24026c..e7e751a858d81 100644 --- a/arch/arm/boot/dts/versatile-pb.dts +++ b/arch/arm/boot/dts/versatile-pb.dts @@ -7,7 +7,7 @@ amba { /* The Versatile PB is using more SIC IRQ lines than the AB */ - sic: intc@10003000 { + sic: interrupt-controller@10003000 { clear-mask = <0xffffffff>; /* * Valid interrupt lines mask according to diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi index dfae90adbb7ca..ce64bfb22f22a 100644 --- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi @@ -31,7 +31,7 @@ #interrupt-cells = <1>; ranges; - nor_flash: flash@0,00000000 { + nor_flash: flash@0 { compatible = "arm,vexpress-flash", "cfi-flash"; reg = <0 0x00000000 0x04000000>, <4 0x00000000 0x04000000>; @@ -41,13 +41,13 @@ }; }; - psram@1,00000000 { + psram@100000000 { compatible = "arm,vexpress-psram", "mtd-ram"; reg = <1 0x00000000 0x02000000>; bank-width = <4>; }; - ethernet@2,02000000 { + ethernet@202000000 { compatible = "smsc,lan9118", "smsc,lan9115"; reg = <2 0x02000000 0x10000>; interrupts = <15>; @@ -59,14 +59,14 @@ vddvario-supply = <&v2m_fixed_3v3>; }; - usb@2,03000000 { + usb@203000000 { compatible = "nxp,usb-isp1761"; reg = <2 0x03000000 0x20000>; interrupts = <16>; port1-otg; }; - iofpga@3,00000000 { + iofpga@300000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/vfxxx.dtsi b/arch/arm/boot/dts/vfxxx.dtsi index 028e0ec30e0c0..fa248066d9d9b 100644 --- a/arch/arm/boot/dts/vfxxx.dtsi +++ b/arch/arm/boot/dts/vfxxx.dtsi @@ -495,7 +495,7 @@ }; ocotp: ocotp@400a5000 { - compatible = "fsl,vf610-ocotp"; + compatible = "fsl,vf610-ocotp", "syscon"; reg = <0x400a5000 0x1000>; clocks = <&clks VF610_CLK_OCOTP>; }; diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig index 597536cc9573d..b87508c7056c9 100644 --- a/arch/arm/configs/aspeed_g5_defconfig +++ b/arch/arm/configs/aspeed_g5_defconfig @@ -139,6 +139,7 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=6 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_ASPEED_VUART=y CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_ASPEED_KCS_IPMI_BMC=y CONFIG_ASPEED_BT_IPMI_BMC=y diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index 08db1c83eb2da..34d4acbcee347 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -38,6 +38,7 @@ CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_CHACHA20_NEON=m +CONFIG_KALLSYMS_ALL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y @@ -92,6 +93,7 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_ATA=y @@ -290,6 +292,7 @@ CONFIG_CROS_EC_SPI=y CONFIG_COMMON_CLK_MAX77686=y CONFIG_COMMON_CLK_S2MPS11=y CONFIG_EXYNOS_IOMMU=y +CONFIG_PM_DEVFREQ=y CONFIG_DEVFREQ_GOV_PERFORMANCE=y CONFIG_DEVFREQ_GOV_POWERSAVE=y CONFIG_DEVFREQ_GOV_USERSPACE=y @@ -348,9 +351,13 @@ CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_DETECT_HUNG_TASK is not set CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_USER=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 0f7381ee0c375..dabb804532495 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -460,6 +460,7 @@ CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set CONFIG_PROVE_LOCKING=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 40d7f1a4fc458..4ec69fb8a698b 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -552,5 +552,6 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_SPLIT=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig index 3b82b64950d96..c090643b1ecbc 100644 --- a/arch/arm/configs/rpc_defconfig +++ b/arch/arm/configs/rpc_defconfig @@ -32,7 +32,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index 73ed73a8785a0..153009130dab3 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -202,7 +202,6 @@ CONFIG_EEPROM_AT24=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=m CONFIG_SCSI_CONSTANTS=y diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 043b0b18bf7e0..f747caea10ffa 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON config CRYPTO_SHA1_ARM_CE tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_SHA1_ARM select CRYPTO_HASH help @@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE config CRYPTO_SHA2_ARM_CE tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_SHA256_ARM select CRYPTO_HASH help @@ -96,7 +96,7 @@ config CRYPTO_AES_ARM_BS config CRYPTO_AES_ARM_CE tristate "Accelerated AES using ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_BLKCIPHER select CRYPTO_LIB_AES select CRYPTO_SIMD @@ -106,7 +106,7 @@ config CRYPTO_AES_ARM_CE config CRYPTO_GHASH_ARM_CE tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) select CRYPTO_HASH select CRYPTO_CRYPTD select CRYPTO_GF128MUL @@ -118,12 +118,14 @@ config CRYPTO_GHASH_ARM_CE config CRYPTO_CRCT10DIF_ARM_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" - depends on KERNEL_MODE_NEON && CRC_T10DIF + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on CRC_T10DIF select CRYPTO_HASH config CRYPTO_CRC32_ARM_CE tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions" - depends on KERNEL_MODE_NEON && CRC32 + depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on CRC32 select CRYPTO_HASH config CRYPTO_CHACHA20_NEON diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 4180f3a13512c..c0d36771a6934 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -12,32 +12,12 @@ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o -ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o -ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o -crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o - -ifneq ($(crc-obj-y)$(crc-obj-m),) -ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y) -ce-obj-y += $(crc-obj-y) -ce-obj-m += $(crc-obj-m) -else -$(warning These CRC Extensions modules need binutils 2.23 or higher) -$(warning $(crc-obj-y) $(crc-obj-m)) -endif -endif - -ifneq ($(ce-obj-y)$(ce-obj-m),) -ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y) -obj-y += $(ce-obj-y) -obj-m += $(ce-obj-m) -else -$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher) -$(warning $(ce-obj-y) $(ce-obj-m)) -endif -endif +obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o +obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o +obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o +obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o aes-arm-y := aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 4d1707388d941..312428d83eedb 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -386,20 +386,32 @@ ENTRY(ce_aes_ctr_encrypt) .Lctrloop4x: subs r4, r4, #4 bmi .Lctr1x - add r6, r6, #1 + + /* + * NOTE: the sequence below has been carefully tweaked to avoid + * a silicon erratum that exists in Cortex-A57 (#1742098) and + * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs + * may produce an incorrect result if they take their input from a + * register of which a single 32-bit lane has been updated the last + * time it was modified. To work around this, the lanes of registers + * q0-q3 below are not manipulated individually, and the different + * counter values are prepared by successive manipulations of q7. + */ + add ip, r6, #1 vmov q0, q7 + rev ip, ip + add lr, r6, #2 + vmov s31, ip @ set lane 3 of q1 via q7 + add ip, r6, #3 + rev lr, lr vmov q1, q7 - rev ip, r6 - add r6, r6, #1 + vmov s31, lr @ set lane 3 of q2 via q7 + rev ip, ip vmov q2, q7 - vmov s7, ip - rev ip, r6 - add r6, r6, #1 + vmov s31, ip @ set lane 3 of q3 via q7 + add r6, r6, #4 vmov q3, q7 - vmov s11, ip - rev ip, r6 - add r6, r6, #1 - vmov s15, ip + vld1.8 {q4-q5}, [r1]! vld1.8 {q6}, [r1]! vld1.8 {q15}, [r1]! diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S index 86be258a803fa..46c02c518a300 100644 --- a/arch/arm/crypto/crct10dif-ce-core.S +++ b/arch/arm/crypto/crct10dif-ce-core.S @@ -72,7 +72,7 @@ #endif .text - .arch armv7-a + .arch armv8-a .fpu crypto-neon-fp-armv8 init_crc .req r0 diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S index c47fe81abcb01..9f51e3fa45268 100644 --- a/arch/arm/crypto/ghash-ce-core.S +++ b/arch/arm/crypto/ghash-ce-core.S @@ -8,6 +8,9 @@ #include #include + .arch armv8-a + .fpu crypto-neon-fp-armv8 + SHASH .req q0 T1 .req q1 XL .req q2 @@ -88,7 +91,6 @@ T3_H .req d17 .text - .fpu crypto-neon-fp-armv8 .macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4 vmull.p64 \rd, \rn, \rm diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c index ae5aefc44a4d7..ffa8d73fe722c 100644 --- a/arch/arm/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm/crypto/nhpoly1305-neon-glue.c @@ -30,7 +30,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_neon_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S index 49a74a441aec7..8a702e051738a 100644 --- a/arch/arm/crypto/sha1-ce-core.S +++ b/arch/arm/crypto/sha1-ce-core.S @@ -10,6 +10,7 @@ #include .text + .arch armv8-a .fpu crypto-neon-fp-armv8 k0 .req q0 diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S index 4ad517577e230..b6369d2440a19 100644 --- a/arch/arm/crypto/sha2-ce-core.S +++ b/arch/arm/crypto/sha2-ce-core.S @@ -10,6 +10,7 @@ #include .text + .arch armv8-a .fpu crypto-neon-fp-armv8 k0 .req q7 diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl index a03cf4dfb7818..d927483985c2d 100644 --- a/arch/arm/crypto/sha256-armv4.pl +++ b/arch/arm/crypto/sha256-armv4.pl @@ -175,7 +175,6 @@ sub BODY_16_XX { #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -471,7 +470,8 @@ () stmdb sp!,{r4-r12,lr} sub $H,sp,#16*4+16 - adrl $Ktbl,K256 + adr $Ktbl,.Lsha256_block_data_order + sub $Ktbl,$Ktbl,#.Lsha256_block_data_order-K256 bic $H,$H,#15 @ align for 128-bit stores mov $t2,sp mov sp,$H @ alloca diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped index 054aae0edfce5..9deb515f3c9f2 100644 --- a/arch/arm/crypto/sha256-core.S_shipped +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -56,7 +56,6 @@ #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -1885,7 +1884,8 @@ sha256_block_data_order_neon: stmdb sp!,{r4-r12,lr} sub r11,sp,#16*4+16 - adrl r14,K256 + adr r14,.Lsha256_block_data_order + sub r14,r14,#.Lsha256_block_data_order-K256 bic r11,r11,#15 @ align for 128-bit stores mov r12,sp mov sp,r11 @ alloca diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl index 788c17b56ecce..2a0bdf7dd87c3 100644 --- a/arch/arm/crypto/sha512-armv4.pl +++ b/arch/arm/crypto/sha512-armv4.pl @@ -212,7 +212,6 @@ () #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -602,7 +601,8 @@ () dmb @ errata #451034 on early Cortex A8 add $len,$inp,$len,lsl#7 @ len to point at the end of inp VFP_ABI_PUSH - adrl $Ktbl,K512 + adr $Ktbl,.Lsha512_block_data_order + sub $Ktbl,$Ktbl,.Lsha512_block_data_order-K512 vldmia $ctx,{$A-$H} @ load context .Loop_neon: ___ diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped index 710ea309769e7..cf5a7a70ff008 100644 --- a/arch/arm/crypto/sha512-core.S_shipped +++ b/arch/arm/crypto/sha512-core.S_shipped @@ -79,7 +79,6 @@ #else .syntax unified # ifdef __thumb2__ -# define adrl adr .thumb # else .code 32 @@ -543,7 +542,8 @@ sha512_block_data_order_neon: dmb @ errata #451034 on early Cortex A8 add r2,r1,r2,lsl#7 @ len to point at the end of inp VFP_ABI_PUSH - adrl r3,K512 + adr r3,.Lsha512_block_data_order + sub r3,r3,.Lsha512_block_data_order-K512 vldmia r0,{d16-d23} @ load context .Loop_neon: vshr.u64 d24,d20,#14 @ 0 diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 99929122dad75..70e1c23feedb7 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -18,11 +18,11 @@ #endif #include -#include #include #include #include #include +#include #define IOMEM(x) (x) @@ -107,6 +107,16 @@ .endm #endif +#if __LINUX_ARM_ARCH__ < 7 + .macro dsb, args + mcr p15, 0, r0, c7, c10, 4 + .endm + + .macro isb, args + mcr p15, 0, r0, c7, c5, 4 + .endm +#endif + .macro asm_trace_hardirqs_off, save=1 #if defined(CONFIG_TRACE_IRQFLAGS) .if \save @@ -269,10 +279,9 @@ .endif ;\ .popsection #define ALT_UP_B(label) \ - .equ up_b_offset, label - 9998b ;\ .pushsection ".alt.smp.init", "a" ;\ .long 9998b ;\ - W(b) . + up_b_offset ;\ + W(b) . + (label - 9998b) ;\ .popsection #else #define ALT_SMP(instr...) @@ -446,79 +455,6 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .size \name , . - \name .endm - .macro csdb -#ifdef CONFIG_THUMB2_KERNEL - .inst.w 0xf3af8014 -#else - .inst 0xe320f014 -#endif - .endm - - .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req -#ifndef CONFIG_CPU_USE_DOMAINS - adds \tmp, \addr, #\size - 1 - sbcscc \tmp, \tmp, \limit - bcs \bad -#ifdef CONFIG_CPU_SPECTRE - movcs \addr, #0 - csdb -#endif -#endif - .endm - - .macro uaccess_mask_range_ptr, addr:req, size:req, limit:req, tmp:req -#ifdef CONFIG_CPU_SPECTRE - sub \tmp, \limit, #1 - subs \tmp, \tmp, \addr @ tmp = limit - 1 - addr - addhs \tmp, \tmp, #1 @ if (tmp >= 0) { - subshs \tmp, \tmp, \size @ tmp = limit - (addr + size) } - movlo \addr, #0 @ if (tmp < 0) addr = NULL - csdb -#endif - .endm - - .macro uaccess_disable, tmp, isb=1 -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - /* - * Whenever we re-enter userspace, the domains should always be - * set appropriately. - */ - mov \tmp, #DACR_UACCESS_DISABLE - mcr p15, 0, \tmp, c3, c0, 0 @ Set domain register - .if \isb - instr_sync - .endif -#endif - .endm - - .macro uaccess_enable, tmp, isb=1 -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - /* - * Whenever we re-enter userspace, the domains should always be - * set appropriately. - */ - mov \tmp, #DACR_UACCESS_ENABLE - mcr p15, 0, \tmp, c3, c0, 0 - .if \isb - instr_sync - .endif -#endif - .endm - - .macro uaccess_save, tmp -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - mrc p15, 0, \tmp, c3, c0, 0 - str \tmp, [sp, #SVC_DACR] -#endif - .endm - - .macro uaccess_restore -#ifdef CONFIG_CPU_SW_DOMAIN_PAN - ldr r0, [sp, #SVC_DACR] - mcr p15, 0, r0, c3, c0, 0 -#endif - .endm - .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo .macro ret\c, reg #if __LINUX_ARM_ARCH__ < 6 diff --git a/arch/arm/include/asm/clocksource.h b/arch/arm/include/asm/clocksource.h index 0b350a7e26f37..afb7a59828fef 100644 --- a/arch/arm/include/asm/clocksource.h +++ b/arch/arm/include/asm/clocksource.h @@ -1,8 +1,17 @@ #ifndef _ASM_CLOCKSOURCE_H #define _ASM_CLOCKSOURCE_H +enum vdso_arch_clockmode { + /* vdso clocksource not usable */ + VDSO_CLOCKMODE_NONE, + /* vdso clocksource usable */ + VDSO_CLOCKMODE_ARCHTIMER, + VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT = VDSO_CLOCKMODE_ARCHTIMER, +}; + struct arch_clocksource_data { - bool vdso_direct; /* Usable for direct VDSO access? */ + /* Usable for direct VDSO access? */ + enum vdso_arch_clockmode clock_mode; }; #endif diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index 472c93db5dac5..763c3f65e30c6 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -2,7 +2,7 @@ #ifndef _ASM_FIXMAP_H #define _ASM_FIXMAP_H -#define FIXADDR_START 0xffc00000UL +#define FIXADDR_START 0xffc80000UL #define FIXADDR_END 0xfff00000UL #define FIXADDR_TOP (FIXADDR_END - PAGE_SIZE) diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 18b0197f23848..15bd9af13497f 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -16,6 +16,9 @@ extern void __gnu_mcount_nc(void); #ifdef CONFIG_DYNAMIC_FTRACE struct dyn_arch_ftrace { +#ifdef CONFIG_ARM_MODULE_PLTS + struct module *mod; +#endif }; static inline unsigned long ftrace_call_adjust(unsigned long addr) diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 83c391b597d45..fdc4ae3e7378d 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -164,8 +164,13 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) preempt_enable(); #endif - if (!ret) - *oval = oldval; + /* + * Store unconditionally. If ret != 0 the extra store is the least + * of the worries but GCC cannot figure out that __futex_atomic_op() + * is either setting ret to -EFAULT or storing the old value in + * oldval which results in a uninitialized warning at the call site. + */ + *oval = oldval; return ret; } diff --git a/arch/arm/include/asm/insn.h b/arch/arm/include/asm/insn.h index f20e08ac85aeb..5475cbf9fb6b4 100644 --- a/arch/arm/include/asm/insn.h +++ b/arch/arm/include/asm/insn.h @@ -13,18 +13,18 @@ arm_gen_nop(void) } unsigned long -__arm_gen_branch(unsigned long pc, unsigned long addr, bool link); +__arm_gen_branch(unsigned long pc, unsigned long addr, bool link, bool warn); static inline unsigned long arm_gen_branch(unsigned long pc, unsigned long addr) { - return __arm_gen_branch(pc, addr, false); + return __arm_gen_branch(pc, addr, false, true); } static inline unsigned long -arm_gen_branch_link(unsigned long pc, unsigned long addr) +arm_gen_branch_link(unsigned long pc, unsigned long addr, bool warn) { - return __arm_gen_branch(pc, addr, true); + return __arm_gen_branch(pc, addr, true, warn); } #endif diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 7a0596fcb2e77..3cc0f6d508836 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -457,6 +457,9 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr); extern int valid_phys_addr_range(phys_addr_t addr, size_t size); extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); +extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags); +#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap #endif /* diff --git a/arch/arm/include/asm/kexec-internal.h b/arch/arm/include/asm/kexec-internal.h new file mode 100644 index 0000000000000..ecc2322db7aa1 --- /dev/null +++ b/arch/arm/include/asm/kexec-internal.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARM_KEXEC_INTERNAL_H +#define _ARM_KEXEC_INTERNAL_H + +struct kexec_relocate_data { + unsigned long kexec_start_address; + unsigned long kexec_indirection_page; + unsigned long kexec_mach_type; + unsigned long kexec_r2; +}; + +#endif diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 213607a1f45c1..e26a278d301ab 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -44,20 +44,20 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); /* optinsn template addresses */ -extern __visible kprobe_opcode_t optprobe_template_entry; -extern __visible kprobe_opcode_t optprobe_template_val; -extern __visible kprobe_opcode_t optprobe_template_call; -extern __visible kprobe_opcode_t optprobe_template_end; -extern __visible kprobe_opcode_t optprobe_template_sub_sp; -extern __visible kprobe_opcode_t optprobe_template_add_sp; -extern __visible kprobe_opcode_t optprobe_template_restore_begin; -extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn; -extern __visible kprobe_opcode_t optprobe_template_restore_end; +extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_val[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_end[]; +extern __visible kprobe_opcode_t optprobe_template_sub_sp[]; +extern __visible kprobe_opcode_t optprobe_template_add_sp[]; +extern __visible kprobe_opcode_t optprobe_template_restore_begin[]; +extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[]; +extern __visible kprobe_opcode_t optprobe_template_restore_end[]; #define MAX_OPTIMIZED_LENGTH 4 #define MAX_OPTINSN_SIZE \ - ((unsigned long)&optprobe_template_end - \ - (unsigned long)&optprobe_template_entry) + ((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) #define RELATIVEJUMP_SIZE 4 struct arch_optimized_insn { diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index f615830f9f57b..9d0b7e677faac 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -56,7 +56,7 @@ extern char __kvm_hyp_init_end[]; extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); -extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); +extern void __kvm_flush_cpu_context(struct kvm_vcpu *vcpu); extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 40002416efec2..c1747fcb86d36 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -14,13 +14,25 @@ #include /* arm64 compatibility macros */ +#define PSR_AA32_MODE_FIQ FIQ_MODE +#define PSR_AA32_MODE_SVC SVC_MODE #define PSR_AA32_MODE_ABT ABT_MODE #define PSR_AA32_MODE_UND UND_MODE #define PSR_AA32_T_BIT PSR_T_BIT +#define PSR_AA32_F_BIT PSR_F_BIT #define PSR_AA32_I_BIT PSR_I_BIT #define PSR_AA32_A_BIT PSR_A_BIT #define PSR_AA32_E_BIT PSR_E_BIT #define PSR_AA32_IT_MASK PSR_IT_MASK +#define PSR_AA32_GE_MASK 0x000f0000 +#define PSR_AA32_DIT_BIT 0x00200000 +#define PSR_AA32_PAN_BIT 0x00400000 +#define PSR_AA32_SSBS_BIT 0x00800000 +#define PSR_AA32_Q_BIT PSR_Q_BIT +#define PSR_AA32_V_BIT PSR_V_BIT +#define PSR_AA32_C_BIT PSR_C_BIT +#define PSR_AA32_Z_BIT PSR_Z_BIT +#define PSR_AA32_N_BIT PSR_N_BIT unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); @@ -41,6 +53,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) *__vcpu_spsr(vcpu) = v; } +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + return spsr; +} + static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, u8 reg_num) { @@ -177,12 +194,17 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return false; +} + static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; } -static inline bool kvm_vcpu_dabt_iss1tw(struct kvm_vcpu *vcpu) +static inline bool kvm_vcpu_abt_iss1tw(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_DABT_S1PTW; } @@ -214,16 +236,21 @@ static inline bool kvm_vcpu_trap_il_is32bit(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_IL; } -static inline u8 kvm_vcpu_trap_get_class(struct kvm_vcpu *vcpu) +static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) >> HSR_EC_SHIFT; } -static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu) +static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) { return kvm_vcpu_trap_get_class(vcpu) == HSR_EC_IABT; } +static inline bool kvm_vcpu_trap_is_exec_fault(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_is_iabt(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu); +} + static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_FSC; @@ -341,6 +368,7 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, } } -static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) {} +static inline bool vcpu_has_ptrauth(struct kvm_vcpu *vcpu) { return false; } +static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) { } #endif /* __ARM_KVM_EMULATE_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8a37c8e89777f..5414d658a4e1d 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -7,6 +7,7 @@ #ifndef __ARM_KVM_HOST_H__ #define __ARM_KVM_HOST_H__ +#include #include #include #include @@ -38,6 +39,7 @@ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -266,7 +268,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, unsigned flags); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); @@ -323,6 +325,29 @@ static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); +static inline long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu) +{ + return SMCCC_RET_NOT_SUPPORTED; +} + +static inline gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) +{ + return GPA_INVALID; +} + +static inline void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ +} + +static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ +} + +static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return false; +} + void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); @@ -335,6 +360,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_init_debug(void) {} +static inline void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} @@ -421,4 +447,6 @@ static inline bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) return true; } +#define kvm_arm_vcpu_loaded(vcpu) (false) + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 7c0eddb0adb2a..32fbf82e3ebcd 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -14,6 +14,8 @@ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Not used on 32-bit arm */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index 92282558caf7c..2b8970d8e5a2f 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -27,6 +27,7 @@ enum { MT_HIGH_VECTORS, MT_MEMORY_RWX, MT_MEMORY_RW, + MT_MEMORY_RO, MT_ROM, MT_MEMORY_RWX_NONCACHED, MT_MEMORY_RW_DTCM, diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 99035b5891ef4..f717d7122d9d1 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -67,6 +67,10 @@ */ #define XIP_VIRT_ADDR(physaddr) (MODULES_VADDR + ((physaddr) & 0x000fffff)) +#define FDT_FIXED_BASE UL(0xff800000) +#define FDT_FIXED_SIZE (2 * SECTION_SIZE) +#define FDT_VIRT_BASE(physbase) ((void *)(FDT_FIXED_BASE | (physbase) % SECTION_SIZE)) + #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) /* * Allow 16MB-aligned ioremap pages @@ -107,6 +111,7 @@ extern unsigned long vectors_base; #define MODULES_VADDR PAGE_OFFSET #define XIP_VIRT_ADDR(physaddr) (physaddr) +#define FDT_VIRT_BASE(physbase) ((void *)(physbase)) #endif /* !CONFIG_MMU */ diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h index 182163b55546c..961fedbd810ec 100644 --- a/arch/arm/include/asm/module.h +++ b/arch/arm/include/asm/module.h @@ -19,8 +19,18 @@ enum { }; #endif +#define PLT_ENT_STRIDE L1_CACHE_BYTES +#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) +#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) + +struct plt_entries { + u32 ldr[PLT_ENT_COUNT]; + u32 lit[PLT_ENT_COUNT]; +}; + struct mod_plt_sec { struct elf32_shdr *plt; + struct plt_entries *plt_ent; int plt_count; }; diff --git a/arch/arm/include/asm/percpu.h b/arch/arm/include/asm/percpu.h index f44f448537f21..1a3eedbac4a20 100644 --- a/arch/arm/include/asm/percpu.h +++ b/arch/arm/include/asm/percpu.h @@ -5,6 +5,8 @@ #ifndef _ASM_ARM_PERCPU_H_ #define _ASM_ARM_PERCPU_H_ +#include + /* * Same as asm-generic/percpu.h, except that we store the per cpu offset * in the TPIDRPRW. TPIDRPRW only exists on V6K and V7 diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 51beec41d48c8..50b51ac91fcbe 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -75,6 +75,8 @@ #define PTE_HWTABLE_OFF (PTE_HWTABLE_PTRS * sizeof(pte_t)) #define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u32)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 + /* * PMD_SHIFT determines the size of the area a second-level page table can map * PGDIR_SHIFT determines what a third-level page table entry can map diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 5b18295021a03..8006a56cc2ce2 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -25,6 +25,8 @@ #define PTE_HWTABLE_OFF (0) #define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u64)) +#define MAX_POSSIBLE_PHYSMEM_BITS 40 + /* * PGDIR_SHIFT determines the size a top-level page table entry can map. */ diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h index 1e36c40533c16..402e3f34c7ed8 100644 --- a/arch/arm/include/asm/prom.h +++ b/arch/arm/include/asm/prom.h @@ -9,12 +9,12 @@ #ifdef CONFIG_OF -extern const struct machine_desc *setup_machine_fdt(unsigned int dt_phys); +extern const struct machine_desc *setup_machine_fdt(void *dt_virt); extern void __init arm_dt_init_cpu_maps(void); #else /* CONFIG_OF */ -static inline const struct machine_desc *setup_machine_fdt(unsigned int dt_phys) +static inline const struct machine_desc *setup_machine_fdt(void *dt_virt) { return NULL; } diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index 91d6b7856be4b..73c83f4d33b3b 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -164,5 +164,31 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) ((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \ }) + +/* + * Update ITSTATE after normal execution of an IT block instruction. + * + * The 8 IT state bits are split into two parts in CPSR: + * ITSTATE<1:0> are in CPSR<26:25> + * ITSTATE<7:2> are in CPSR<15:10> + */ +static inline unsigned long it_advance(unsigned long cpsr) +{ + if ((cpsr & 0x06000400) == 0) { + /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ + cpsr &= ~PSR_IT_MASK; + } else { + /* We need to shift left ITSTATE<4:0> */ + const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ + unsigned long it = cpsr & mask; + it <<= 1; + it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ + it &= mask; + cpsr &= ~mask; + cpsr |= it; + } + return cpsr; +} + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h new file mode 100644 index 0000000000000..85f9e538fb325 --- /dev/null +++ b/arch/arm/include/asm/spectre.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_SPECTRE_H +#define __ASM_SPECTRE_H + +enum { + SPECTRE_UNAFFECTED, + SPECTRE_MITIGATED, + SPECTRE_VULNERABLE, +}; + +enum { + __SPECTRE_V2_METHOD_BPIALL, + __SPECTRE_V2_METHOD_ICIALLU, + __SPECTRE_V2_METHOD_SMC, + __SPECTRE_V2_METHOD_HVC, + __SPECTRE_V2_METHOD_LOOP8, +}; + +enum { + SPECTRE_V2_METHOD_BPIALL = BIT(__SPECTRE_V2_METHOD_BPIALL), + SPECTRE_V2_METHOD_ICIALLU = BIT(__SPECTRE_V2_METHOD_ICIALLU), + SPECTRE_V2_METHOD_SMC = BIT(__SPECTRE_V2_METHOD_SMC), + SPECTRE_V2_METHOD_HVC = BIT(__SPECTRE_V2_METHOD_HVC), + SPECTRE_V2_METHOD_LOOP8 = BIT(__SPECTRE_V2_METHOD_LOOP8), +}; + +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES +void spectre_v2_update_state(unsigned int state, unsigned int methods); +#else +static inline void spectre_v2_update_state(unsigned int state, + unsigned int methods) +{} +#endif + +int spectre_bhb_update_vectors(unsigned int method); + +#endif diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 7c3b3671d6c25..6d1337c169cd3 100644 --- a/arch/arm/include/asm/timex.h +++ b/arch/arm/include/asm/timex.h @@ -11,5 +11,6 @@ typedef unsigned long cycles_t; #define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) +#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback()) #endif diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h new file mode 100644 index 0000000000000..907571fd05c65 --- /dev/null +++ b/arch/arm/include/asm/uaccess-asm.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_UACCESS_ASM_H__ +#define __ASM_UACCESS_ASM_H__ + +#include +#include +#include +#include + + .macro csdb +#ifdef CONFIG_THUMB2_KERNEL + .inst.w 0xf3af8014 +#else + .inst 0xe320f014 +#endif + .endm + + .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req +#ifndef CONFIG_CPU_USE_DOMAINS + adds \tmp, \addr, #\size - 1 + sbcscc \tmp, \tmp, \limit + bcs \bad +#ifdef CONFIG_CPU_SPECTRE + movcs \addr, #0 + csdb +#endif +#endif + .endm + + .macro uaccess_mask_range_ptr, addr:req, size:req, limit:req, tmp:req +#ifdef CONFIG_CPU_SPECTRE + sub \tmp, \limit, #1 + subs \tmp, \tmp, \addr @ tmp = limit - 1 - addr + addhs \tmp, \tmp, #1 @ if (tmp >= 0) { + subshs \tmp, \tmp, \size @ tmp = limit - (addr + size) } + movlo \addr, #0 @ if (tmp < 0) addr = NULL + csdb +#endif + .endm + + .macro uaccess_disable, tmp, isb=1 +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Whenever we re-enter userspace, the domains should always be + * set appropriately. + */ + mov \tmp, #DACR_UACCESS_DISABLE + mcr p15, 0, \tmp, c3, c0, 0 @ Set domain register + .if \isb + instr_sync + .endif +#endif + .endm + + .macro uaccess_enable, tmp, isb=1 +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Whenever we re-enter userspace, the domains should always be + * set appropriately. + */ + mov \tmp, #DACR_UACCESS_ENABLE + mcr p15, 0, \tmp, c3, c0, 0 + .if \isb + instr_sync + .endif +#endif + .endm + +#if defined(CONFIG_CPU_SW_DOMAIN_PAN) || defined(CONFIG_CPU_USE_DOMAINS) +#define DACR(x...) x +#else +#define DACR(x...) +#endif + + /* + * Save the address limit on entry to a privileged exception. + * + * If we are using the DACR for kernel access by the user accessors + * (CONFIG_CPU_USE_DOMAINS=y), always reset the DACR kernel domain + * back to client mode, whether or not \disable is set. + * + * If we are using SW PAN, set the DACR user domain to no access + * if \disable is set. + */ + .macro uaccess_entry, tsk, tmp0, tmp1, tmp2, disable + ldr \tmp1, [\tsk, #TI_ADDR_LIMIT] + mov \tmp2, #TASK_SIZE + str \tmp2, [\tsk, #TI_ADDR_LIMIT] + DACR( mrc p15, 0, \tmp0, c3, c0, 0) + DACR( str \tmp0, [sp, #SVC_DACR]) + str \tmp1, [sp, #SVC_ADDR_LIMIT] + .if \disable && IS_ENABLED(CONFIG_CPU_SW_DOMAIN_PAN) + /* kernel=client, user=no access */ + mov \tmp2, #DACR_UACCESS_DISABLE + mcr p15, 0, \tmp2, c3, c0, 0 + instr_sync + .elseif IS_ENABLED(CONFIG_CPU_USE_DOMAINS) + /* kernel=client */ + bic \tmp2, \tmp0, #domain_mask(DOMAIN_KERNEL) + orr \tmp2, \tmp2, #domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT) + mcr p15, 0, \tmp2, c3, c0, 0 + instr_sync + .endif + .endm + + /* Restore the user access state previously saved by uaccess_entry */ + .macro uaccess_exit, tsk, tmp0, tmp1 + ldr \tmp1, [sp, #SVC_ADDR_LIMIT] + DACR( ldr \tmp0, [sp, #SVC_DACR]) + str \tmp1, [\tsk, #TI_ADDR_LIMIT] + DACR( mcr p15, 0, \tmp0, c3, c0, 0) + .endm + +#undef DACR + +#endif /* __ASM_UACCESS_ASM_H__ */ diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h index 628c336e8e3b2..947ee5395e1fb 100644 --- a/arch/arm/include/asm/vfpmacros.h +++ b/arch/arm/include/asm/vfpmacros.h @@ -19,23 +19,25 @@ @ read all the working registers back into the VFP .macro VFPFLDMIA, base, tmp + .fpu vfpv2 #if __LINUX_ARM_ARCH__ < 6 - LDC p11, cr0, [\base],#33*4 @ FLDMIAX \base!, {d0-d15} + fldmiax \base!, {d0-d15} #else - LDC p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d0-d15} + vldmia \base!, {d0-d15} #endif #ifdef CONFIG_VFPv3 + .fpu vfpv3 #if __LINUX_ARM_ARCH__ <= 6 ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPD32 - ldclne p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + vldmiane \base!, {d16-d31} addeq \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field cmp \tmp, #2 @ 32 x 64bit registers? - ldcleq p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + vldmiaeq \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #endif #endif @@ -44,22 +46,23 @@ @ write all the working registers out of the VFP .macro VFPFSTMIA, base, tmp #if __LINUX_ARM_ARCH__ < 6 - STC p11, cr0, [\base],#33*4 @ FSTMIAX \base!, {d0-d15} + fstmiax \base!, {d0-d15} #else - STC p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d0-d15} + vstmia \base!, {d0-d15} #endif #ifdef CONFIG_VFPv3 + .fpu vfpv3 #if __LINUX_ARM_ARCH__ <= 6 ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPD32 - stclne p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + vstmiane \base!, {d16-d31} addeq \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field cmp \tmp, #2 @ 32 x 64bit registers? - stcleq p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + vstmiaeq \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #endif #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 8cad59465af39..dc31426cae6d8 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -17,10 +17,14 @@ CFLAGS_REMOVE_return_address.o = -pg # Object file lists. obj-y := elf.o entry-common.o irq.o opcodes.o \ - process.o ptrace.o reboot.o return_address.o \ + process.o ptrace.o reboot.o \ setup.o signal.o sigreturn_codes.o \ stacktrace.o sys_arm.o time.o traps.o +ifneq ($(CONFIG_ARM_UNWIND),y) +obj-$(CONFIG_FRAME_POINTER) += return_address.o +endif + obj-$(CONFIG_ATAGS) += atags_parse.o obj-$(CONFIG_ATAGS_PROC) += atags_proc.o obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += atags_compat.o @@ -102,4 +106,6 @@ endif obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o + extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index c773b829ee8ee..4ce2e29da14de 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -15,6 +15,7 @@ #include #endif #include +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include "signal.h" /* @@ -159,6 +161,8 @@ int main(void) DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); #endif + DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id)); + DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state)); BLANK(); DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); @@ -190,5 +194,9 @@ int main(void) DEFINE(MPU_RGN_PRBAR, offsetof(struct mpu_rgn, prbar)); DEFINE(MPU_RGN_PRLAR, offsetof(struct mpu_rgn, prlar)); #endif + DEFINE(KEXEC_START_ADDR, offsetof(struct kexec_relocate_data, kexec_start_address)); + DEFINE(KEXEC_INDIR_PAGE, offsetof(struct kexec_relocate_data, kexec_indirection_page)); + DEFINE(KEXEC_MACH_TYPE, offsetof(struct kexec_relocate_data, kexec_mach_type)); + DEFINE(KEXEC_R2, offsetof(struct kexec_relocate_data, kexec_r2)); return 0; } diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h index 067e12edc3419..f2819c25b6029 100644 --- a/arch/arm/kernel/atags.h +++ b/arch/arm/kernel/atags.h @@ -2,11 +2,11 @@ void convert_to_tag_list(struct tag *tags); #ifdef CONFIG_ATAGS -const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer, +const struct machine_desc *setup_machine_tags(void *__atags_vaddr, unsigned int machine_nr); #else static inline const struct machine_desc * __init __noreturn -setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) +setup_machine_tags(void *__atags_vaddr, unsigned int machine_nr) { early_print("no ATAGS support: can't continue\n"); while (true); diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index ce02f92f4ab26..8288151631fc4 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -176,7 +176,7 @@ static void __init squash_mem_tags(struct tag *tag) } const struct machine_desc * __init -setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) +setup_machine_tags(void *atags_vaddr, unsigned int machine_nr) { struct tag *tags = (struct tag *)&default_tags; const struct machine_desc *mdesc = NULL, *p; @@ -197,8 +197,8 @@ setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) if (!mdesc) return NULL; - if (__atags_pointer) - tags = phys_to_virt(__atags_pointer); + if (atags_vaddr) + tags = atags_vaddr; else if (mdesc->atag_offset) tags = (void *)(PAGE_OFFSET + mdesc->atag_offset); diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 39c9786984062..4e09883c276d9 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -203,12 +203,12 @@ static const void * __init arch_get_next_mach(const char *const **match) /** * setup_machine_fdt - Machine setup when an dtb was passed to the kernel - * @dt_phys: physical address of dt blob + * @dt_virt: virtual address of dt blob * * If a dtb was passed to the kernel in r2, then use it to choose the * correct machine_desc and to setup the system. */ -const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) +const struct machine_desc * __init setup_machine_fdt(void *dt_virt) { const struct machine_desc *mdesc, *mdesc_best = NULL; @@ -221,7 +221,7 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) mdesc_best = &__mach_desc_GENERIC_DT; #endif - if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys))) + if (!dt_virt || !early_init_dt_verify(dt_virt)) return NULL; mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 858d4e5415326..8e8efe28d7995 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -27,6 +27,7 @@ #include #include #include +#include #include "entry-header.S" #include @@ -179,15 +180,7 @@ ENDPROC(__und_invalid) stmia r7, {r2 - r6} get_thread_info tsk - ldr r0, [tsk, #TI_ADDR_LIMIT] - mov r1, #TASK_SIZE - str r1, [tsk, #TI_ADDR_LIMIT] - str r0, [sp, #SVC_ADDR_LIMIT] - - uaccess_save r0 - .if \uaccess - uaccess_disable r0 - .endif + uaccess_entry tsk, r0, r1, r2, \uaccess .if \trace #ifdef CONFIG_TRACE_IRQFLAGS @@ -259,31 +252,10 @@ __und_svc: #else svc_entry #endif - @ - @ call emulation code, which returns using r9 if it has emulated - @ the instruction, or the more conventional lr if we are to treat - @ this as a real undefined instruction - @ - @ r0 - instruction - @ -#ifndef CONFIG_THUMB2_KERNEL - ldr r0, [r4, #-4] -#else - mov r1, #2 - ldrh r0, [r4, #-2] @ Thumb instruction at LR - 2 - cmp r0, #0xe800 @ 32-bit instruction if xx >= 0 - blo __und_svc_fault - ldrh r9, [r4] @ bottom 16 bits - add r4, r4, #2 - str r4, [sp, #S_PC] - orr r0, r9, r0, lsl #16 -#endif - badr r9, __und_svc_finish - mov r2, r4 - bl call_fpe mov r1, #4 @ PC correction to apply -__und_svc_fault: + THUMB( tst r5, #PSR_T_BIT ) @ exception taken in Thumb mode? + THUMB( movne r1, #2 ) @ if so, fix up PC correction mov r0, sp @ struct pt_regs *regs bl __und_fault @@ -624,11 +596,9 @@ call_fpe: tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 reteq lr and r8, r0, #0x00000f00 @ mask out CP number - THUMB( lsr r8, r8, #8 ) mov r7, #1 - add r6, r10, #TI_USED_CP - ARM( strb r7, [r6, r8, lsr #8] ) @ set appropriate used_cp[] - THUMB( strb r7, [r6, r8] ) @ set appropriate used_cp[] + add r6, r10, r8, lsr #8 @ add used_cp[] array offset first + strb r7, [r6, #TI_USED_CP] @ set appropriate used_cp[] #ifdef CONFIG_IWMMXT @ Test if we need to give access to iWMMXt coprocessors ldr r5, [r10, #TI_FLAGS] @@ -637,7 +607,7 @@ call_fpe: bcs iwmmxt_task_enable #endif ARM( add pc, pc, r8, lsr #6 ) - THUMB( lsl r8, r8, #2 ) + THUMB( lsr r8, r8, #6 ) THUMB( add pc, r8 ) nop @@ -1035,12 +1005,11 @@ vector_\name: sub lr, lr, #\correction .endif - @ - @ Save r0, lr_ (parent PC) and spsr_ - @ (parent CPSR) - @ + @ Save r0, lr_ (parent PC) stmia sp, {r0, lr} @ save r0, lr - mrs lr, spsr + + @ Save spsr_ (parent CPSR) +2: mrs lr, spsr str lr, [sp, #8] @ save spsr @ @@ -1061,6 +1030,44 @@ vector_\name: movs pc, lr @ branch to handler in SVC mode ENDPROC(vector_\name) +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .subsection 1 + .align 5 +vector_bhb_loop8_\name: + .if \correction + sub lr, lr, #\correction + .endif + + @ Save r0, lr_ (parent PC) + stmia sp, {r0, lr} + + @ bhb workaround + mov r0, #8 +3: W(b) . + 4 + subs r0, r0, #1 + bne 3b + dsb + isb + b 2b +ENDPROC(vector_bhb_loop8_\name) + +vector_bhb_bpiall_\name: + .if \correction + sub lr, lr, #\correction + .endif + + @ Save r0, lr_ (parent PC) + stmia sp, {r0, lr} + + @ bhb workaround + mcr p15, 0, r0, c7, c5, 6 @ BPIALL + @ isb not needed due to "movs pc, lr" in the vector stub + @ which gives a "context synchronisation". + b 2b +ENDPROC(vector_bhb_bpiall_\name) + .previous +#endif + .align 2 @ handler addresses follow this label 1: @@ -1069,6 +1076,10 @@ ENDPROC(vector_\name) .section .stubs, "ax", %progbits @ This must be the first word .word vector_swi +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .word vector_bhb_loop8_swi + .word vector_bhb_bpiall_swi +#endif vector_rst: ARM( swi SYS_ERROR0 ) @@ -1183,8 +1194,10 @@ vector_addrexcptn: * FIQ "NMI" handler *----------------------------------------------------------------------------- * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86 - * systems. + * systems. This must be the last vector stub, so lets place it in its own + * subsection. */ + .subsection 2 vector_stub fiq, FIQ_MODE, 4 .long __fiq_usr @ 0 (USR_26 / USR_32) @@ -1217,6 +1230,30 @@ vector_addrexcptn: W(b) vector_irq W(b) vector_fiq +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .section .vectors.bhb.loop8, "ax", %progbits +.L__vectors_bhb_loop8_start: + W(b) vector_rst + W(b) vector_bhb_loop8_und + W(ldr) pc, .L__vectors_bhb_loop8_start + 0x1004 + W(b) vector_bhb_loop8_pabt + W(b) vector_bhb_loop8_dabt + W(b) vector_addrexcptn + W(b) vector_bhb_loop8_irq + W(b) vector_bhb_loop8_fiq + + .section .vectors.bhb.bpiall, "ax", %progbits +.L__vectors_bhb_bpiall_start: + W(b) vector_rst + W(b) vector_bhb_bpiall_und + W(ldr) pc, .L__vectors_bhb_bpiall_start + 0x1008 + W(b) vector_bhb_bpiall_pabt + W(b) vector_bhb_bpiall_dabt + W(b) vector_addrexcptn + W(b) vector_bhb_bpiall_irq + W(b) vector_bhb_bpiall_fiq +#endif + .data .align 2 diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 271cb8a1eba1e..bd619da73c84e 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -162,6 +162,29 @@ ENDPROC(ret_from_fork) *----------------------------------------------------------------------------- */ + .align 5 +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +ENTRY(vector_bhb_loop8_swi) + sub sp, sp, #PT_REGS_SIZE + stmia sp, {r0 - r12} + mov r8, #8 +1: b 2f +2: subs r8, r8, #1 + bne 1b + dsb + isb + b 3f +ENDPROC(vector_bhb_loop8_swi) + + .align 5 +ENTRY(vector_bhb_bpiall_swi) + sub sp, sp, #PT_REGS_SIZE + stmia sp, {r0 - r12} + mcr p15, 0, r8, c7, c5, 6 @ BPIALL + isb + b 3f +ENDPROC(vector_bhb_bpiall_swi) +#endif .align 5 ENTRY(vector_swi) #ifdef CONFIG_CPU_V7M @@ -169,6 +192,7 @@ ENTRY(vector_swi) #else sub sp, sp, #PT_REGS_SIZE stmia sp, {r0 - r12} @ Calling r0 - r12 +3: ARM( add r8, sp, #S_PC ) ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr THUMB( mov r8, sp ) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 32051ec5b33fa..40db0f9188b69 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -6,6 +6,7 @@ #include #include #include +#include #include @ Bad Abort numbers @@ -217,9 +218,7 @@ blne trace_hardirqs_off #endif .endif - ldr r1, [sp, #SVC_ADDR_LIMIT] - uaccess_restore - str r1, [tsk, #TI_ADDR_LIMIT] + uaccess_exit tsk, r0, r1 #ifndef CONFIG_THUMB2_KERNEL @ ARM mode SVC restore @@ -263,9 +262,7 @@ @ on the stack remains correct). @ .macro svc_exit_via_fiq - ldr r1, [sp, #SVC_ADDR_LIMIT] - uaccess_restore - str r1, [tsk, #TI_ADDR_LIMIT] + uaccess_exit tsk, r0, r1 #ifndef CONFIG_THUMB2_KERNEL @ ARM mode restore mov r0, sp diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index bda949fd84e8b..12b6da56f88dd 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -71,9 +71,10 @@ int ftrace_arch_code_modify_post_process(void) return 0; } -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr, + bool warn) { - return arm_gen_branch_link(pc, addr); + return arm_gen_branch_link(pc, addr, warn); } static int ftrace_modify_code(unsigned long pc, unsigned long old, @@ -112,14 +113,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int ret; pc = (unsigned long)&ftrace_call; - new = ftrace_call_replace(pc, (unsigned long)func); + new = ftrace_call_replace(pc, (unsigned long)func, true); ret = ftrace_modify_code(pc, 0, new, false); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS if (!ret) { pc = (unsigned long)&ftrace_regs_call; - new = ftrace_call_replace(pc, (unsigned long)func); + new = ftrace_call_replace(pc, (unsigned long)func, true); ret = ftrace_modify_code(pc, 0, new, false); } @@ -132,10 +133,22 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long new, old; unsigned long ip = rec->ip; + unsigned long aaddr = adjust_address(rec, addr); + struct module *mod = NULL; + +#ifdef CONFIG_ARM_MODULE_PLTS + mod = rec->arch.mod; +#endif old = ftrace_nop_replace(rec); - new = ftrace_call_replace(ip, adjust_address(rec, addr)); + new = ftrace_call_replace(ip, aaddr, !mod); +#ifdef CONFIG_ARM_MODULE_PLTS + if (!new && mod) { + aaddr = get_module_plt(mod, ip, aaddr); + new = ftrace_call_replace(ip, aaddr, true); + } +#endif return ftrace_modify_code(rec->ip, old, new, true); } @@ -148,9 +161,9 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long new, old; unsigned long ip = rec->ip; - old = ftrace_call_replace(ip, adjust_address(rec, old_addr)); + old = ftrace_call_replace(ip, adjust_address(rec, old_addr), true); - new = ftrace_call_replace(ip, adjust_address(rec, addr)); + new = ftrace_call_replace(ip, adjust_address(rec, addr), true); return ftrace_modify_code(rec->ip, old, new, true); } @@ -160,12 +173,29 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { + unsigned long aaddr = adjust_address(rec, addr); unsigned long ip = rec->ip; unsigned long old; unsigned long new; int ret; - old = ftrace_call_replace(ip, adjust_address(rec, addr)); +#ifdef CONFIG_ARM_MODULE_PLTS + /* mod is only supplied during module loading */ + if (!mod) + mod = rec->arch.mod; + else + rec->arch.mod = mod; +#endif + + old = ftrace_call_replace(ip, aaddr, + !IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || !mod); +#ifdef CONFIG_ARM_MODULE_PLTS + if (!old && mod) { + aaddr = get_module_plt(mod, ip, aaddr); + old = ftrace_call_replace(ip, aaddr, true); + } +#endif + new = ftrace_nop_replace(rec); ret = ftrace_modify_code(ip, old, new, true); diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index c49b39340ddbd..5ceed4d9ee036 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -274,11 +274,10 @@ __create_page_tables: * We map 2 sections in case the ATAGs/DTB crosses a section boundary. */ mov r0, r2, lsr #SECTION_SHIFT - movs r0, r0, lsl #SECTION_SHIFT - subne r3, r0, r8 - addne r3, r3, #PAGE_OFFSET - addne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) - orrne r6, r7, r0 + cmp r2, #0 + ldrne r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ORDER) + addne r3, r3, r4 + orrne r6, r7, r0, lsl #SECTION_SHIFT strne r6, [r3], #1 << PMD_ORDER addne r6, r6, #1 << SECTION_SHIFT strne r6, [r3] @@ -671,12 +670,8 @@ ARM_BE8(rev16 ip, ip) ldrcc r7, [r4], #4 @ use branch for delay slot bcc 1b bx lr -#else -#ifdef CONFIG_CPU_ENDIAN_BE8 - moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction #else moveq r0, #0x400000 @ set bit 22, mov to mvn instruction -#endif b 2f 1: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 @@ -685,7 +680,7 @@ ARM_BE8(rev16 ip, ip) tst ip, #0x000f0000 @ check the rotation field orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 + orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 #else bic ip, ip, #0x000000ff tst ip, #0xf00 @ check the rotation field diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index b0c195e3a06df..b06d9ea07c846 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -680,26 +680,68 @@ static void disable_single_step(struct perf_event *bp) arch_install_hw_breakpoint(bp); } +/* + * Arm32 hardware does not always report a watchpoint hit address that matches + * one of the watchpoints set. It can also report an address "near" the + * watchpoint if a single instruction access both watched and unwatched + * addresses. There is no straight-forward way, short of disassembling the + * offending instruction, to map that address back to the watchpoint. This + * function computes the distance of the memory access from the watchpoint as a + * heuristic for the likelyhood that a given access triggered the watchpoint. + * + * See this same function in the arm64 platform code, which has the same + * problem. + * + * The function returns the distance of the address from the bytes watched by + * the watchpoint. In case of an exact match, it returns 0. + */ +static u32 get_distance_from_watchpoint(unsigned long addr, u32 val, + struct arch_hw_breakpoint_ctrl *ctrl) +{ + u32 wp_low, wp_high; + u32 lens, lene; + + lens = __ffs(ctrl->len); + lene = __fls(ctrl->len); + + wp_low = val + lens; + wp_high = val + lene; + if (addr < wp_low) + return wp_low - addr; + else if (addr > wp_high) + return addr - wp_high; + else + return 0; +} + +static int watchpoint_fault_on_uaccess(struct pt_regs *regs, + struct arch_hw_breakpoint *info) +{ + return !user_mode(regs) && info->ctrl.privilege == ARM_BREAKPOINT_USER; +} + static void watchpoint_handler(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - int i, access; - u32 val, ctrl_reg, alignment_mask; + int i, access, closest_match = 0; + u32 min_dist = -1, dist; + u32 val, ctrl_reg; struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = this_cpu_ptr(wp_on_reg); + /* + * Find all watchpoints that match the reported address. If no exact + * match is found. Attribute the hit to the closest watchpoint. + */ + rcu_read_lock(); for (i = 0; i < core_num_wrps; ++i) { - rcu_read_lock(); - wp = slots[i]; - if (wp == NULL) - goto unlock; + continue; - info = counter_arch_bp(wp); /* * The DFAR is an unknown value on debug architectures prior * to 7.1. Since we only allow a single watchpoint on these @@ -708,50 +750,69 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, */ if (debug_arch < ARM_DEBUG_ARCH_V7_1) { BUG_ON(i > 0); + info = counter_arch_bp(wp); info->trigger = wp->attr.bp_addr; } else { - if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) - alignment_mask = 0x7; - else - alignment_mask = 0x3; - - /* Check if the watchpoint value matches. */ - val = read_wb_reg(ARM_BASE_WVR + i); - if (val != (addr & ~alignment_mask)) - goto unlock; - - /* Possible match, check the byte address select. */ - ctrl_reg = read_wb_reg(ARM_BASE_WCR + i); - decode_ctrl_reg(ctrl_reg, &ctrl); - if (!((1 << (addr & alignment_mask)) & ctrl.len)) - goto unlock; - /* Check that the access type matches. */ if (debug_exception_updates_fsr()) { access = (fsr & ARM_FSR_ACCESS_MASK) ? HW_BREAKPOINT_W : HW_BREAKPOINT_R; if (!(access & hw_breakpoint_type(wp))) - goto unlock; + continue; } + val = read_wb_reg(ARM_BASE_WVR + i); + ctrl_reg = read_wb_reg(ARM_BASE_WCR + i); + decode_ctrl_reg(ctrl_reg, &ctrl); + dist = get_distance_from_watchpoint(addr, val, &ctrl); + if (dist < min_dist) { + min_dist = dist; + closest_match = i; + } + /* Is this an exact match? */ + if (dist != 0) + continue; + /* We have a winner. */ + info = counter_arch_bp(wp); info->trigger = addr; } pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); + + /* + * If we triggered a user watchpoint from a uaccess routine, + * then handle the stepping ourselves since userspace really + * can't help us with this. + */ + if (watchpoint_fault_on_uaccess(regs, info)) + goto step; + perf_bp_event(wp, regs); /* - * If no overflow handler is present, insert a temporary - * mismatch breakpoint so we can single-step over the - * watchpoint trigger. + * Defer stepping to the overflow handler if one is installed. + * Otherwise, insert a temporary mismatch breakpoint so that + * we can single-step over the watchpoint trigger. */ + if (!is_default_overflow_handler(wp)) + continue; +step: + enable_single_step(wp, instruction_pointer(regs)); + } + + if (min_dist > 0 && min_dist != -1) { + /* No exact match found. */ + wp = slots[closest_match]; + info = counter_arch_bp(wp); + info->trigger = addr; + pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); + perf_bp_event(wp, regs); if (is_default_overflow_handler(wp)) enable_single_step(wp, instruction_pointer(regs)); - -unlock: - rcu_read_unlock(); } + + rcu_read_unlock(); } static void watchpoint_single_step_handler(unsigned long pc) @@ -822,7 +883,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) info->trigger = addr; pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); - if (!bp->overflow_handler) + if (is_default_overflow_handler(bp)) enable_single_step(bp, addr); goto unlock; } diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index ae5020302de49..6607fa817bba9 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -146,10 +146,9 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) @ make CNTP_* and CNTPCT accessible from PL1 mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 - lsr r7, #16 - and r7, #0xf - cmp r7, #1 - bne 1f + ubfx r7, r7, #16, #4 + teq r7, #0 + beq 1f mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL orr r7, r7, #3 @ PL1PCEN | PL1PCTEN mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL diff --git a/arch/arm/kernel/insn.c b/arch/arm/kernel/insn.c index 2e844b70386b3..db0acbb7d7a02 100644 --- a/arch/arm/kernel/insn.c +++ b/arch/arm/kernel/insn.c @@ -3,8 +3,9 @@ #include #include -static unsigned long -__arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link) +static unsigned long __arm_gen_branch_thumb2(unsigned long pc, + unsigned long addr, bool link, + bool warn) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -12,7 +13,7 @@ __arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link) offset = (long)addr - (long)(pc + 4); if (offset < -16777216 || offset > 16777214) { - WARN_ON_ONCE(1); + WARN_ON_ONCE(warn); return 0; } @@ -33,8 +34,8 @@ __arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link) return __opcode_thumb32_compose(first, second); } -static unsigned long -__arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link) +static unsigned long __arm_gen_branch_arm(unsigned long pc, unsigned long addr, + bool link, bool warn) { unsigned long opcode = 0xea000000; long offset; @@ -44,7 +45,7 @@ __arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link) offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { - WARN_ON_ONCE(1); + WARN_ON_ONCE(warn); return 0; } @@ -54,10 +55,10 @@ __arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link) } unsigned long -__arm_gen_branch(unsigned long pc, unsigned long addr, bool link) +__arm_gen_branch(unsigned long pc, unsigned long addr, bool link, bool warn) { if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) - return __arm_gen_branch_thumb2(pc, addr, link); + return __arm_gen_branch_thumb2(pc, addr, link, warn); else - return __arm_gen_branch_arm(pc, addr, link); + return __arm_gen_branch_arm(pc, addr, link, warn); } diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index 0dcae787b004d..d2b4ac06e4ed8 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -16,6 +16,7 @@ #include #include #include +#include "iwmmxt.h" #if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B) #define PJ4(code...) code @@ -113,33 +114,33 @@ concan_save: concan_dump: - wstrw wCSSF, [r1, #MMX_WCSSF] - wstrw wCASF, [r1, #MMX_WCASF] - wstrw wCGR0, [r1, #MMX_WCGR0] - wstrw wCGR1, [r1, #MMX_WCGR1] - wstrw wCGR2, [r1, #MMX_WCGR2] - wstrw wCGR3, [r1, #MMX_WCGR3] + wstrw wCSSF, r1, MMX_WCSSF + wstrw wCASF, r1, MMX_WCASF + wstrw wCGR0, r1, MMX_WCGR0 + wstrw wCGR1, r1, MMX_WCGR1 + wstrw wCGR2, r1, MMX_WCGR2 + wstrw wCGR3, r1, MMX_WCGR3 1: @ MUP? wRn tst r2, #0x2 beq 2f - wstrd wR0, [r1, #MMX_WR0] - wstrd wR1, [r1, #MMX_WR1] - wstrd wR2, [r1, #MMX_WR2] - wstrd wR3, [r1, #MMX_WR3] - wstrd wR4, [r1, #MMX_WR4] - wstrd wR5, [r1, #MMX_WR5] - wstrd wR6, [r1, #MMX_WR6] - wstrd wR7, [r1, #MMX_WR7] - wstrd wR8, [r1, #MMX_WR8] - wstrd wR9, [r1, #MMX_WR9] - wstrd wR10, [r1, #MMX_WR10] - wstrd wR11, [r1, #MMX_WR11] - wstrd wR12, [r1, #MMX_WR12] - wstrd wR13, [r1, #MMX_WR13] - wstrd wR14, [r1, #MMX_WR14] - wstrd wR15, [r1, #MMX_WR15] + wstrd wR0, r1, MMX_WR0 + wstrd wR1, r1, MMX_WR1 + wstrd wR2, r1, MMX_WR2 + wstrd wR3, r1, MMX_WR3 + wstrd wR4, r1, MMX_WR4 + wstrd wR5, r1, MMX_WR5 + wstrd wR6, r1, MMX_WR6 + wstrd wR7, r1, MMX_WR7 + wstrd wR8, r1, MMX_WR8 + wstrd wR9, r1, MMX_WR9 + wstrd wR10, r1, MMX_WR10 + wstrd wR11, r1, MMX_WR11 + wstrd wR12, r1, MMX_WR12 + wstrd wR13, r1, MMX_WR13 + wstrd wR14, r1, MMX_WR14 + wstrd wR15, r1, MMX_WR15 2: teq r0, #0 @ anything to load? reteq lr @ if not, return @@ -147,30 +148,30 @@ concan_dump: concan_load: @ Load wRn - wldrd wR0, [r0, #MMX_WR0] - wldrd wR1, [r0, #MMX_WR1] - wldrd wR2, [r0, #MMX_WR2] - wldrd wR3, [r0, #MMX_WR3] - wldrd wR4, [r0, #MMX_WR4] - wldrd wR5, [r0, #MMX_WR5] - wldrd wR6, [r0, #MMX_WR6] - wldrd wR7, [r0, #MMX_WR7] - wldrd wR8, [r0, #MMX_WR8] - wldrd wR9, [r0, #MMX_WR9] - wldrd wR10, [r0, #MMX_WR10] - wldrd wR11, [r0, #MMX_WR11] - wldrd wR12, [r0, #MMX_WR12] - wldrd wR13, [r0, #MMX_WR13] - wldrd wR14, [r0, #MMX_WR14] - wldrd wR15, [r0, #MMX_WR15] + wldrd wR0, r0, MMX_WR0 + wldrd wR1, r0, MMX_WR1 + wldrd wR2, r0, MMX_WR2 + wldrd wR3, r0, MMX_WR3 + wldrd wR4, r0, MMX_WR4 + wldrd wR5, r0, MMX_WR5 + wldrd wR6, r0, MMX_WR6 + wldrd wR7, r0, MMX_WR7 + wldrd wR8, r0, MMX_WR8 + wldrd wR9, r0, MMX_WR9 + wldrd wR10, r0, MMX_WR10 + wldrd wR11, r0, MMX_WR11 + wldrd wR12, r0, MMX_WR12 + wldrd wR13, r0, MMX_WR13 + wldrd wR14, r0, MMX_WR14 + wldrd wR15, r0, MMX_WR15 @ Load wCx - wldrw wCSSF, [r0, #MMX_WCSSF] - wldrw wCASF, [r0, #MMX_WCASF] - wldrw wCGR0, [r0, #MMX_WCGR0] - wldrw wCGR1, [r0, #MMX_WCGR1] - wldrw wCGR2, [r0, #MMX_WCGR2] - wldrw wCGR3, [r0, #MMX_WCGR3] + wldrw wCSSF, r0, MMX_WCSSF + wldrw wCASF, r0, MMX_WCASF + wldrw wCGR0, r0, MMX_WCGR0 + wldrw wCGR1, r0, MMX_WCGR1 + wldrw wCGR2, r0, MMX_WCGR2 + wldrw wCGR3, r0, MMX_WCGR3 @ clear CUP/MUP (only if r1 != 0) teq r1, #0 diff --git a/arch/arm/kernel/iwmmxt.h b/arch/arm/kernel/iwmmxt.h new file mode 100644 index 0000000000000..fb627286f5bb9 --- /dev/null +++ b/arch/arm/kernel/iwmmxt.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __IWMMXT_H__ +#define __IWMMXT_H__ + +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +.set .LwR\b, \b +.set .Lr\b, \b +.endr + +.set .LwCSSF, 0x2 +.set .LwCASF, 0x3 +.set .LwCGR0, 0x8 +.set .LwCGR1, 0x9 +.set .LwCGR2, 0xa +.set .LwCGR3, 0xb + +.macro wldrd, reg:req, base:req, offset:req +.inst 0xedd00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wldrw, reg:req, base:req, offset:req +.inst 0xfd900100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrd, reg:req, base:req, offset:req +.inst 0xedc00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrw, reg:req, base:req, offset:req +.inst 0xfd800100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +#ifdef __clang__ + +#define wCon c1 + +.macro tmrc, dest:req, control:req +mrc p1, 0, \dest, \control, c0, 0 +.endm + +.macro tmcr, control:req, src:req +mcr p1, 0, \src, \control, c0, 0 +.endm +#endif + +#endif diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 6a95b92966406..183a6f2f165ad 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -154,22 +154,38 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) return 0; } -static struct undef_hook kgdb_brkpt_hook = { +static struct undef_hook kgdb_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_BREAKINST, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_brk_fn }; -static struct undef_hook kgdb_compiled_brkpt_hook = { +static struct undef_hook kgdb_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_BREAKINST & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_brk_fn +}; + +static struct undef_hook kgdb_compiled_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_COMPILED_BREAK, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_compiled_brk_fn }; +static struct undef_hook kgdb_compiled_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_COMPILED_BREAK & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_compiled_brk_fn +}; + static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs *regs = args->regs; @@ -210,8 +226,10 @@ int kgdb_arch_init(void) if (ret != 0) return ret; - register_undef_hook(&kgdb_brkpt_hook); - register_undef_hook(&kgdb_compiled_brkpt_hook); + register_undef_hook(&kgdb_brkpt_arm_hook); + register_undef_hook(&kgdb_brkpt_thumb_hook); + register_undef_hook(&kgdb_compiled_brkpt_arm_hook); + register_undef_hook(&kgdb_compiled_brkpt_thumb_hook); return 0; } @@ -224,8 +242,10 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_undef_hook(&kgdb_brkpt_hook); - unregister_undef_hook(&kgdb_compiled_brkpt_hook); + unregister_undef_hook(&kgdb_brkpt_arm_hook); + unregister_undef_hook(&kgdb_brkpt_thumb_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_arm_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_thumb_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 76300f3813e89..734adeb42df87 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -24,11 +25,6 @@ extern void relocate_new_kernel(void); extern const unsigned int relocate_new_kernel_size; -extern unsigned long kexec_start_address; -extern unsigned long kexec_indirection_page; -extern unsigned long kexec_mach_type; -extern unsigned long kexec_boot_atags; - static atomic_t waiting_for_crash_ipi; /* @@ -161,6 +157,7 @@ void (*kexec_reinit)(void); void machine_kexec(struct kimage *image) { unsigned long page_list, reboot_entry_phys; + struct kexec_relocate_data *data; void (*reboot_entry)(void); void *reboot_code_buffer; @@ -176,18 +173,17 @@ void machine_kexec(struct kimage *image) reboot_code_buffer = page_address(image->control_code_page); - /* Prepare parameters for reboot_code_buffer*/ - set_kernel_text_rw(); - kexec_start_address = image->start; - kexec_indirection_page = page_list; - kexec_mach_type = machine_arch_type; - kexec_boot_atags = image->arch.kernel_r2; - /* copy our kernel relocation code to the control code page */ reboot_entry = fncpy(reboot_code_buffer, &relocate_new_kernel, relocate_new_kernel_size); + data = reboot_code_buffer + relocate_new_kernel_size; + data->kexec_start_address = image->start; + data->kexec_indirection_page = page_list; + data->kexec_mach_type = machine_arch_type; + data->kexec_r2 = image->arch.kernel_r2; + /* get the identity mapping physical address for the reboot code */ reboot_entry_phys = virt_to_idmap(reboot_entry); diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index b647741c0ab06..d1c2d3bd55b64 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -11,10 +12,6 @@ #include #include -#define PLT_ENT_STRIDE L1_CACHE_BYTES -#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32)) -#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT) - #ifdef CONFIG_THUMB2_KERNEL #define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \ (PLT_ENT_STRIDE - 4)) @@ -23,9 +20,11 @@ (PLT_ENT_STRIDE - 8)) #endif -struct plt_entries { - u32 ldr[PLT_ENT_COUNT]; - u32 lit[PLT_ENT_COUNT]; +static const u32 fixed_plts[] = { +#ifdef CONFIG_DYNAMIC_FTRACE + FTRACE_ADDR, + MCOUNT_ADDR, +#endif }; static bool in_init(const struct module *mod, unsigned long loc) @@ -33,14 +32,40 @@ static bool in_init(const struct module *mod, unsigned long loc) return loc - (u32)mod->init_layout.base < mod->init_layout.size; } +static void prealloc_fixed(struct mod_plt_sec *pltsec, struct plt_entries *plt) +{ + int i; + + if (!ARRAY_SIZE(fixed_plts) || pltsec->plt_count) + return; + pltsec->plt_count = ARRAY_SIZE(fixed_plts); + + for (i = 0; i < ARRAY_SIZE(plt->ldr); ++i) + plt->ldr[i] = PLT_ENT_LDR; + + BUILD_BUG_ON(sizeof(fixed_plts) > sizeof(plt->lit)); + memcpy(plt->lit, fixed_plts, sizeof(fixed_plts)); +} + u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) { struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core : &mod->arch.init; + struct plt_entries *plt; + int idx; + + /* cache the address, ELF header is available only during module load */ + if (!pltsec->plt_ent) + pltsec->plt_ent = (struct plt_entries *)pltsec->plt->sh_addr; + plt = pltsec->plt_ent; - struct plt_entries *plt = (struct plt_entries *)pltsec->plt->sh_addr; - int idx = 0; + prealloc_fixed(pltsec, plt); + + for (idx = 0; idx < ARRAY_SIZE(fixed_plts); ++idx) + if (plt->lit[idx] == val) + return (u32)&plt->ldr[idx]; + idx = 0; /* * Look for an existing entry pointing to 'val'. Given that the * relocations are sorted, this will be the last entry we allocated. @@ -188,8 +213,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base, int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - unsigned long core_plts = 0; - unsigned long init_plts = 0; + unsigned long core_plts = ARRAY_SIZE(fixed_plts); + unsigned long init_plts = ARRAY_SIZE(fixed_plts); Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; Elf32_Sym *syms = NULL; @@ -244,6 +269,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, sizeof(struct plt_entries)); mod->arch.core.plt_count = 0; + mod->arch.core.plt_ent = NULL; mod->arch.init.plt->sh_type = SHT_NOBITS; mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; @@ -251,6 +277,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.init.plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, sizeof(struct plt_entries)); mod->arch.init.plt_count = 0; + mod->arch.init.plt_ent = NULL; pr_debug("%s: plt=%x, init.plt=%x\n", __func__, mod->arch.core.plt->sh_size, mod->arch.init.plt->sh_size); diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 3b69a76d341e7..1626dfc6f6ce6 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -62,9 +62,10 @@ user_backtrace(struct frame_tail __user *tail, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct frame_tail __user *tail; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -98,9 +99,10 @@ callchain_trace(struct stackframe *fr, void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe fr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -111,18 +113,21 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return instruction_pointer(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 2924d7910b106..eb2190477da10 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -773,10 +773,10 @@ static inline void armv7pmu_write_counter(struct perf_event *event, u64 value) pr_err("CPU%u writing wrong counter %d\n", smp_processor_id(), idx); } else if (idx == ARMV7_IDX_CYCLE_COUNTER) { - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value)); } else { armv7_pmnc_select_counter(idx); - asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value)); + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value)); } } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 9485acc520a41..e7fac125ea0d5 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -224,8 +224,8 @@ void release_thread(struct task_struct *dead_task) asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int -copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p) +copy_thread_tls(unsigned long clone_flags, unsigned long stack_start, + unsigned long stk_sz, struct task_struct *p, unsigned long tls) { struct thread_info *thread = task_thread_info(p); struct pt_regs *childregs = task_pt_regs(p); @@ -259,7 +259,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, clear_ptrace_hw_breakpoint(p); if (clone_flags & CLONE_SETTLS) - thread->tp_value[0] = childregs->ARM_r3; + thread->tp_value[0] = tls; thread->tp_value[1] = get_tpuser(); thread_notify(THREAD_NOTIFY_COPY, thread); diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 324352787aeaf..db9401581cd25 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -219,8 +219,8 @@ static struct undef_hook arm_break_hook = { }; static struct undef_hook thumb_break_hook = { - .instr_mask = 0xffff, - .instr_val = 0xde01, + .instr_mask = 0xffffffff, + .instr_val = 0x0000de01, .cpsr_mask = PSR_T_BIT, .cpsr_val = PSR_T_BIT, .fn = break_trap, diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index 7eaa2ae7aff58..5e15b5912cb05 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -5,14 +5,16 @@ #include #include +#include #include .align 3 /* not needed for this code, but keeps fncpy() happy */ ENTRY(relocate_new_kernel) - ldr r0,kexec_indirection_page - ldr r1,kexec_start_address + adr r7, relocate_new_kernel_end + ldr r0, [r7, #KEXEC_INDIR_PAGE] + ldr r1, [r7, #KEXEC_START_ADDR] /* * If there is no indirection page (we are doing crashdumps) @@ -57,34 +59,16 @@ ENTRY(relocate_new_kernel) 2: /* Jump to relocated kernel */ - mov lr,r1 - mov r0,#0 - ldr r1,kexec_mach_type - ldr r2,kexec_boot_atags - ARM( ret lr ) - THUMB( bx lr ) - - .align - - .globl kexec_start_address -kexec_start_address: - .long 0x0 - - .globl kexec_indirection_page -kexec_indirection_page: - .long 0x0 - - .globl kexec_mach_type -kexec_mach_type: - .long 0x0 - - /* phy addr of the atags for the new kernel */ - .globl kexec_boot_atags -kexec_boot_atags: - .long 0x0 + mov lr, r1 + mov r0, #0 + ldr r1, [r7, #KEXEC_MACH_TYPE] + ldr r2, [r7, #KEXEC_R2] + ARM( ret lr ) + THUMB( bx lr ) ENDPROC(relocate_new_kernel) + .align 3 relocate_new_kernel_end: .globl relocate_new_kernel_size diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index b0d2f1fe891d1..7b42ac010fdfd 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -7,8 +7,6 @@ */ #include #include - -#if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) #include #include @@ -53,6 +51,4 @@ void *return_address(unsigned int level) return NULL; } -#endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */ - EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d0a464e317eac..43d6a6085d862 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -543,9 +544,11 @@ void notrace cpu_init(void) * In Thumb-2, msr with an immediate value is not allowed. */ #ifdef CONFIG_THUMB2_KERNEL -#define PLC "r" +#define PLC_l "l" +#define PLC_r "r" #else -#define PLC "I" +#define PLC_l "I" +#define PLC_r "I" #endif /* @@ -567,15 +570,15 @@ void notrace cpu_init(void) "msr cpsr_c, %9" : : "r" (stk), - PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), "I" (offsetof(struct stack, irq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE), "I" (offsetof(struct stack, abt[0])), - PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE), "I" (offsetof(struct stack, und[0])), - PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), "I" (offsetof(struct stack, fiq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE) + PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE) : "r14"); #endif } @@ -1075,19 +1078,27 @@ void __init hyp_mode_check(void) void __init setup_arch(char **cmdline_p) { - const struct machine_desc *mdesc; + const struct machine_desc *mdesc = NULL; + void *atags_vaddr = NULL; + + if (__atags_pointer) + atags_vaddr = FDT_VIRT_BASE(__atags_pointer); setup_processor(); - mdesc = setup_machine_fdt(__atags_pointer); + if (atags_vaddr) { + mdesc = setup_machine_fdt(atags_vaddr); + if (mdesc) + memblock_reserve(__atags_pointer, + fdt_totalsize(atags_vaddr)); + } if (!mdesc) - mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type); + mdesc = setup_machine_tags(atags_vaddr, __machine_arch_type); if (!mdesc) { early_print("\nError: invalid dtb and unrecognized/unsupported machine ID\n"); early_print(" r1=0x%08x, r2=0x%08x\n", __machine_arch_type, __atags_pointer); if (__atags_pointer) - early_print(" r2[]=%*ph\n", 16, - phys_to_virt(__atags_pointer)); + early_print(" r2[]=%*ph\n", 16, atags_vaddr); dump_machine_table(); } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index ab2568996ddb0..c01f76cd02422 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -694,18 +694,20 @@ struct page *get_signal_page(void) addr = page_address(page); + /* Poison the entire page */ + memset32(addr, __opcode_to_mem_arm(0xe7fddef1), + PAGE_SIZE / sizeof(u32)); + /* Give the signal return code some randomness */ offset = 0x200 + (get_random_int() & 0x7fc); signal_return_offset = offset; - /* - * Copy signal return handlers into the vector page, and - * set sigreturn to be a pointer to these. - */ + /* Copy signal return handlers into the page */ memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes)); - ptr = (unsigned long)addr + offset; - flush_icache_range(ptr, ptr + sizeof(sigreturn_codes)); + /* Flush out all instructions in this page */ + ptr = (unsigned long)addr; + flush_icache_range(ptr, ptr + PAGE_SIZE); return page; } diff --git a/arch/arm/kernel/smccc-call.S b/arch/arm/kernel/smccc-call.S index 00664c78facab..931df62a78312 100644 --- a/arch/arm/kernel/smccc-call.S +++ b/arch/arm/kernel/smccc-call.S @@ -3,7 +3,9 @@ * Copyright (c) 2015, Linaro Limited */ #include +#include +#include #include #include #include @@ -27,7 +29,14 @@ UNWIND( .fnstart) UNWIND( .save {r4-r7}) ldm r12, {r4-r7} \instr - pop {r4-r7} + ldr r4, [sp, #36] + cmp r4, #0 + beq 1f // No quirk structure + ldr r5, [r4, #ARM_SMCCC_QUIRK_ID_OFFS] + cmp r5, #ARM_SMCCC_QUIRK_QCOM_A6 + bne 1f // No quirk present + str r6, [r4, #ARM_SMCCC_QUIRK_STATE_OFFS] +1: pop {r4-r7} ldr r12, [sp, #(4 * 4)] stm r12, {r0-r3} bx lr diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 4b0bab2607e47..46e1be9e57a81 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -240,6 +240,10 @@ int __cpu_disable(void) if (ret) return ret; +#ifdef CONFIG_GENERIC_ARCH_TOPOLOGY + remove_cpu_topology(cpu); +#endif + /* * Take this CPU offline. Once we clear this, we can't return, * and we must not schedule until we're ready to give up the cpu. diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c new file mode 100644 index 0000000000000..0dcefc36fb7a0 --- /dev/null +++ b/arch/arm/kernel/spectre.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +#include + +static bool _unprivileged_ebpf_enabled(void) +{ +#ifdef CONFIG_BPF_SYSCALL + return !sysctl_unprivileged_bpf_disabled; +#else + return false; +#endif +} + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +static unsigned int spectre_v2_state; +static unsigned int spectre_v2_methods; + +void spectre_v2_update_state(unsigned int state, unsigned int method) +{ + if (state > spectre_v2_state) + spectre_v2_state = state; + spectre_v2_methods |= method; +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const char *method; + + if (spectre_v2_state == SPECTRE_UNAFFECTED) + return sprintf(buf, "%s\n", "Not affected"); + + if (spectre_v2_state != SPECTRE_MITIGATED) + return sprintf(buf, "%s\n", "Vulnerable"); + + if (_unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + + switch (spectre_v2_methods) { + case SPECTRE_V2_METHOD_BPIALL: + method = "Branch predictor hardening"; + break; + + case SPECTRE_V2_METHOD_ICIALLU: + method = "I-cache invalidation"; + break; + + case SPECTRE_V2_METHOD_SMC: + case SPECTRE_V2_METHOD_HVC: + method = "Firmware call"; + break; + + case SPECTRE_V2_METHOD_LOOP8: + method = "History overwrite"; + break; + + default: + method = "Multiple mitigations"; + break; + } + + return sprintf(buf, "Mitigation: %s\n", method); +} diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 71778bb0475b3..8247749998259 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -22,6 +22,19 @@ * A simple function epilogue looks like this: * ldm sp, {fp, sp, pc} * + * When compiled with clang, pc and sp are not pushed. A simple function + * prologue looks like this when built with clang: + * + * stmdb {..., fp, lr} + * add fp, sp, #x + * sub sp, sp, #y + * + * A simple function epilogue looks like this when built with clang: + * + * sub sp, fp, #x + * ldm {..., fp, pc} + * + * * Note that with framepointer enabled, even the leaf functions have the same * prologue and epilogue, therefore we can ignore the LR value in this case. */ @@ -34,14 +47,24 @@ int notrace unwind_frame(struct stackframe *frame) low = frame->sp; high = ALIGN(low, THREAD_SIZE); +#ifdef CONFIG_CC_IS_CLANG + /* check current frame pointer is within bounds */ + if (fp < low + 4 || fp > high - 4) + return -EINVAL; + + frame->sp = frame->fp; + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 4)); +#else /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) return -EINVAL; /* restore the registers from the stack frame */ - frame->fp = *(unsigned long *)(fp - 12); - frame->sp = *(unsigned long *)(fp - 8); - frame->pc = *(unsigned long *)(fp - 4); + frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 12)); + frame->sp = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 8)); + frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp - 4)); +#endif return 0; } @@ -92,6 +115,8 @@ static int save_trace(struct stackframe *frame, void *d) return 0; regs = (struct pt_regs *)frame->sp; + if ((unsigned long)®s[1] > ALIGN(frame->sp, THREAD_SIZE)) + return 0; trace->entries[trace->nr_entries++] = regs->ARM_pc; diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index d08099269e35b..e126386fb78a8 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -26,6 +27,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) if (!idmap_pgd) return -EINVAL; + /* + * Function graph tracer state gets incosistent when the kernel + * calls functions that never return (aka suspend finishers) hence + * disable graph tracing during their execution. + */ + pause_graph_tracing(); + /* * Provide a temporary page table with an identity mapping for * the MMU-enable code, required for resuming. On successful @@ -33,6 +41,9 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) * back to the correct page tables. */ ret = __cpu_suspend(arg, fn, __mpidr); + + unpause_graph_tracing(); + if (ret == 0) { cpu_switch_mm(mm->pgd, mm); local_flush_bp_all(); @@ -46,7 +57,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { u32 __mpidr = cpu_logical_map(smp_processor_id()); - return __cpu_suspend(arg, fn, __mpidr); + int ret; + + pause_graph_tracing(); + ret = __cpu_suspend(arg, fn, __mpidr); + unpause_graph_tracing(); + + return ret; } #define idmap_pgd NULL #endif diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 5b9faba03afba..8d2e61d9e7a63 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -196,9 +196,8 @@ void store_cpu_topology(unsigned int cpuid) struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; unsigned int mpidr; - /* If the cpu topology has been already set, just return */ - if (cpuid_topo->core_id != -1) - return; + if (cpuid_topo->package_id != -1) + goto topology_populated; mpidr = read_cpuid_mpidr(); @@ -231,14 +230,15 @@ void store_cpu_topology(unsigned int cpuid) cpuid_topo->package_id = -1; } - update_siblings_masks(cpuid); - update_cpu_capacity(cpuid); pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", cpuid, cpu_topology[cpuid].thread_id, cpu_topology[cpuid].core_id, cpu_topology[cpuid].package_id, mpidr); + +topology_populated: + update_siblings_masks(cpuid); } static inline int cpu_corepower_flags(void) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index c053abd1fb539..207ef9a797bd4 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -64,14 +65,16 @@ static void dump_mem(const char *, const char *, unsigned long, unsigned long); void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame) { + unsigned long end = frame + 4 + sizeof(struct pt_regs); + #ifdef CONFIG_KALLSYMS printk("[<%08lx>] (%ps) from [<%08lx>] (%pS)\n", where, (void *)where, from, (void *)from); #else printk("Function entered at [<%08lx>] from [<%08lx>]\n", where, from); #endif - if (in_entry_text(from)) - dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs)); + if (in_entry_text(from) && end <= ALIGN(frame, THREAD_SIZE)) + dump_mem("", "Exception stack", frame + 4, end); } void dump_backtrace_stm(u32 *stack, u32 instruction) @@ -797,10 +800,59 @@ static inline void __init kuser_init(void *vectors) } #endif +#ifndef CONFIG_CPU_V7M +static void copy_from_lma(void *vma, void *lma_start, void *lma_end) +{ + memcpy(vma, lma_start, lma_end - lma_start); +} + +static void flush_vectors(void *vma, size_t offset, size_t size) +{ + unsigned long start = (unsigned long)vma + offset; + unsigned long end = start + size; + + flush_icache_range(start, end); +} + +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +int spectre_bhb_update_vectors(unsigned int method) +{ + extern char __vectors_bhb_bpiall_start[], __vectors_bhb_bpiall_end[]; + extern char __vectors_bhb_loop8_start[], __vectors_bhb_loop8_end[]; + void *vec_start, *vec_end; + + if (system_state > SYSTEM_SCHEDULING) { + pr_err("CPU%u: Spectre BHB workaround too late - system vulnerable\n", + smp_processor_id()); + return SPECTRE_VULNERABLE; + } + + switch (method) { + case SPECTRE_V2_METHOD_LOOP8: + vec_start = __vectors_bhb_loop8_start; + vec_end = __vectors_bhb_loop8_end; + break; + + case SPECTRE_V2_METHOD_BPIALL: + vec_start = __vectors_bhb_bpiall_start; + vec_end = __vectors_bhb_bpiall_end; + break; + + default: + pr_err("CPU%u: unknown Spectre BHB state %d\n", + smp_processor_id(), method); + return SPECTRE_VULNERABLE; + } + + copy_from_lma(vectors_page, vec_start, vec_end); + flush_vectors(vectors_page, 0, vec_end - vec_start); + + return SPECTRE_MITIGATED; +} +#endif + void __init early_trap_init(void *vectors_base) { -#ifndef CONFIG_CPU_V7M - unsigned long vectors = (unsigned long)vectors_base; extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; unsigned i; @@ -821,17 +873,20 @@ void __init early_trap_init(void *vectors_base) * into the vector page, mapped at 0xffff0000, and ensure these * are visible to the instruction stream. */ - memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start); - memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start); + copy_from_lma(vectors_base, __vectors_start, __vectors_end); + copy_from_lma(vectors_base + 0x1000, __stubs_start, __stubs_end); kuser_init(vectors_base); - flush_icache_range(vectors, vectors + PAGE_SIZE * 2); + flush_vectors(vectors_base, 0, PAGE_SIZE * 2); +} #else /* ifndef CONFIG_CPU_V7M */ +void __init early_trap_init(void *vectors_base) +{ /* * on V7-M there is no need to copy the vector table to a dedicated * memory area. The address is configurable and so a table in the kernel * image can be used. */ -#endif } +#endif diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index 9bf16c93ee6a5..6c69a5548ba25 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -92,6 +92,8 @@ static bool __init cntvct_functional(void) * this. */ np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (!np) + np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer"); if (!np) goto out_put; @@ -279,7 +281,7 @@ static bool tk_is_cntvct(const struct timekeeper *tk) if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER)) return false; - if (!tk->tkr_mono.clock->archdata.vdso_direct) + if (tk->tkr_mono.clock->archdata.clock_mode != VDSO_CLOCKMODE_ARCHTIMER) return false; return true; diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 8c74037ade229..b1f366df620b0 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -180,7 +180,7 @@ ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE, ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA") #endif -#ifdef CONFIG_ARM_MPU +#if defined(CONFIG_ARM_MPU) && !defined(CONFIG_COMPILE_TEST) /* * Due to PMSAv7 restriction on base address and size we have to * enforce minimal alignment restrictions. It was seen that weaker diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h index 8247bc15addc4..78d156e4f0088 100644 --- a/arch/arm/kernel/vmlinux.lds.h +++ b/arch/arm/kernel/vmlinux.lds.h @@ -25,6 +25,19 @@ #define ARM_MMU_DISCARD(x) x #endif +/* + * ld.lld does not support NOCROSSREFS: + * https://github.com/ClangBuiltLinux/linux/issues/1609 + */ +#ifdef CONFIG_LD_IS_LLD +#define NOCROSSREFS +#endif + +/* Set start/end symbol names to the LMA for the section */ +#define ARM_LMA(sym, section) \ + sym##_start = LOADADDR(section); \ + sym##_end = LOADADDR(section) + SIZEOF(section) + #define PROC_INFO \ . = ALIGN(4); \ __proc_info_begin = .; \ @@ -100,19 +113,31 @@ * only thing that matters is their relative offsets */ #define ARM_VECTORS \ - __vectors_start = .; \ - .vectors 0xffff0000 : AT(__vectors_start) { \ - *(.vectors) \ + __vectors_lma = .; \ + OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) { \ + .vectors { \ + *(.vectors) \ + } \ + .vectors.bhb.loop8 { \ + *(.vectors.bhb.loop8) \ + } \ + .vectors.bhb.bpiall { \ + *(.vectors.bhb.bpiall) \ + } \ } \ - . = __vectors_start + SIZEOF(.vectors); \ - __vectors_end = .; \ + ARM_LMA(__vectors, .vectors); \ + ARM_LMA(__vectors_bhb_loop8, .vectors.bhb.loop8); \ + ARM_LMA(__vectors_bhb_bpiall, .vectors.bhb.bpiall); \ + . = __vectors_lma + SIZEOF(.vectors) + \ + SIZEOF(.vectors.bhb.loop8) + \ + SIZEOF(.vectors.bhb.bpiall); \ \ - __stubs_start = .; \ - .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { \ + __stubs_lma = .; \ + .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) { \ *(.stubs) \ } \ - . = __stubs_start + SIZEOF(.stubs); \ - __stubs_end = .; \ + ARM_LMA(__stubs, .stubs); \ + . = __stubs_lma + SIZEOF(.stubs); \ \ PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors)); diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index b76b75bd9e005..e442d82821df5 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += handle_exit.o guest.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o -obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o obj-y += $(KVM)/arm/aarch32.o obj-y += $(KVM)/arm/vgic/vgic.o diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 2a6a1394d26ea..e58a89d2f13ff 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include "trace.h" diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c index 848f27bbad9db..80e67108d39d1 100644 --- a/arch/arm/kvm/hyp/tlb.c +++ b/arch/arm/kvm/hyp/tlb.c @@ -45,7 +45,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) __kvm_tlb_flush_vmid(kvm); } -void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) +void __hyp_text __kvm_flush_cpu_context(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); @@ -54,6 +54,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) isb(); write_sysreg(0, TLBIALL); + write_sysreg(0, ICIALLU); dsb(nsh); isb(); diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 95b2e1ce559cb..f8016e3db65d7 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -118,7 +118,7 @@ ENTRY(arm_copy_from_user) ENDPROC(arm_copy_from_user) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3} diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index b5e8b9ae4c7d4..7fd3600db8efd 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -40,8 +40,8 @@ ENDPROC(_find_first_zero_bit_le) * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) */ ENTRY(_find_next_zero_bit_le) - teq r1, #0 - beq 3b + cmp r2, r1 + bhs 3b ands ip, r2, #7 beq 1b @ If new byte, goto old routine ARM( ldrb r3, [r0, r2, lsr #3] ) @@ -81,8 +81,8 @@ ENDPROC(_find_first_bit_le) * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset) */ ENTRY(_find_next_bit_le) - teq r1, #0 - beq 3b + cmp r2, r1 + bhs 3b ands ip, r2, #7 beq 1b @ If new byte, goto old routine ARM( ldrb r3, [r0, r2, lsr #3] ) @@ -115,8 +115,8 @@ ENTRY(_find_first_zero_bit_be) ENDPROC(_find_first_zero_bit_be) ENTRY(_find_next_zero_bit_be) - teq r1, #0 - beq 3b + cmp r2, r1 + bhs 3b ands ip, r2, #7 beq 1b @ If new byte, goto old routine eor r3, r2, #0x18 @ big endian byte ordering @@ -149,8 +149,8 @@ ENTRY(_find_first_bit_be) ENDPROC(_find_first_bit_be) ENTRY(_find_next_bit_be) - teq r1, #0 - beq 3b + cmp r2, r1 + bhs 3b ands ip, r2, #7 beq 1b @ If new byte, goto old routine eor r3, r2, #0x18 @ big endian byte ordering diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index b99dd8e1c93f1..7ba6cf8261626 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -26,8 +26,9 @@ MODULE_LICENSE("GPL"); * While older versions of GCC do not generate incorrect code, they fail to * recognize the parallel nature of these functions, and emit plain ARM code, * which is known to be slower than the optimized ARM code in asm-arm/xor.h. + * + * #warning This code requires at least version 4.6 of GCC */ -#warning This code requires at least version 4.6 of GCC #endif #pragma GCC diagnostic ignored "-Wunused-variable" diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index d5af6aedc02c4..5d75ab82d5a6a 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -103,7 +103,7 @@ static const struct wakeup_source_info ws_info[] = { static const struct of_device_id sama5d2_ws_ids[] = { { .compatible = "atmel,sama5d2-gem", .data = &ws_info[0] }, - { .compatible = "atmel,at91rm9200-rtc", .data = &ws_info[1] }, + { .compatible = "atmel,sama5d2-rtc", .data = &ws_info[1] }, { .compatible = "atmel,sama5d3-udc", .data = &ws_info[2] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, @@ -114,12 +114,12 @@ static const struct of_device_id sama5d2_ws_ids[] = { }; static const struct of_device_id sam9x60_ws_ids[] = { - { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] }, + { .compatible = "microchip,sam9x60-rtc", .data = &ws_info[1] }, { .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] }, { .compatible = "usb-ohci", .data = &ws_info[2] }, { .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] }, { .compatible = "usb-ehci", .data = &ws_info[2] }, - { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] }, + { .compatible = "microchip,sam9x60-rtt", .data = &ws_info[4] }, { .compatible = "cdns,sam9x60-macb", .data = &ws_info[5] }, { /* sentinel */ } }; @@ -592,13 +592,13 @@ static void __init at91_pm_sram_init(void) sram_pool = gen_pool_get(&pdev->dev, NULL); if (!sram_pool) { pr_warn("%s: sram pool unavailable!\n", __func__); - return; + goto out_put_device; } sram_base = gen_pool_alloc(sram_pool, at91_pm_suspend_in_sram_sz); if (!sram_base) { pr_warn("%s: unable to alloc sram!\n", __func__); - return; + goto out_put_device; } sram_pbase = gen_pool_virt_to_phys(sram_pool, sram_base); @@ -606,12 +606,17 @@ static void __init at91_pm_sram_init(void) at91_pm_suspend_in_sram_sz, false); if (!at91_suspend_sram_fn) { pr_warn("SRAM: Could not map\n"); - return; + goto out_put_device; } /* Copy the pm suspend handler to SRAM */ at91_suspend_sram_fn = fncpy(at91_suspend_sram_fn, &at91_pm_suspend_in_sram, at91_pm_suspend_in_sram_sz); + return; + +out_put_device: + put_device(&pdev->dev); + return; } static bool __init at91_is_pm_mode_active(int pm_mode) @@ -691,6 +696,12 @@ static void __init at91_pm_use_default_mode(int pm_mode) soc_pm.data.suspend_mode = AT91_PM_ULP0; } +static const struct of_device_id atmel_shdwc_ids[] = { + { .compatible = "atmel,sama5d2-shdwc" }, + { .compatible = "microchip,sam9x60-shdwc" }, + { /* sentinel. */ } +}; + static void __init at91_pm_modes_init(void) { struct device_node *np; @@ -700,7 +711,7 @@ static void __init at91_pm_modes_init(void) !at91_is_pm_mode_active(AT91_PM_ULP1)) return; - np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-shdwc"); + np = of_find_matching_node(NULL, atmel_shdwc_ids); if (!np) { pr_warn("%s: failed to find shdwc!\n", __func__); goto ulp1_default; @@ -751,6 +762,7 @@ static const struct of_device_id atmel_pmc_ids[] __initconst = { { .compatible = "atmel,sama5d3-pmc", .data = &pmc_infos[1] }, { .compatible = "atmel,sama5d4-pmc", .data = &pmc_infos[1] }, { .compatible = "atmel,sama5d2-pmc", .data = &pmc_infos[1] }, + { .compatible = "microchip,sam9x60-pmc", .data = &pmc_infos[1] }, { /* sentinel */ }, }; @@ -765,6 +777,7 @@ static void __init at91_pm_init(void (*pm_idle)(void)) pmc_np = of_find_matching_node_and_match(NULL, atmel_pmc_ids, &of_id); soc_pm.data.pmc = of_iomap(pmc_np, 0); + of_node_put(pmc_np); if (!soc_pm.data.pmc) { pr_err("AT91: PM not supported, PMC not found\n"); return; diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index ed57c879d4e17..2591cba61937b 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -268,6 +268,10 @@ ENDPROC(at91_backup_mode) orr tmp1, tmp1, #AT91_PMC_KEY str tmp1, [pmc, #AT91_CKGR_MOR] + /* Quirk for SAM9X60's PMC */ + nop + nop + wait_mckrdy /* Enable the crystal oscillator */ diff --git a/arch/arm/mach-axxia/platsmp.c b/arch/arm/mach-axxia/platsmp.c index 512943eae30a5..2e203626eda52 100644 --- a/arch/arm/mach-axxia/platsmp.c +++ b/arch/arm/mach-axxia/platsmp.c @@ -39,6 +39,7 @@ static int axxia_boot_secondary(unsigned int cpu, struct task_struct *idle) return -ENOENT; syscon = of_iomap(syscon_np, 0); + of_node_put(syscon_np); if (!syscon) return -ENOMEM; diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index 5e5f1fabc3d40..634d1bc3c0114 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -214,7 +214,6 @@ config ARCH_BRCMSTB select HAVE_ARM_ARCH_TIMER select BRCMSTB_L2_IRQ select BCM7120_L2_IRQ - select ARCH_HAS_HOLES_MEMORYMODEL select ZONE_DMA if ARM_LPAE select SOC_BRCMSTB select SOC_BUS diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c index 541e850a736c9..9175c130967ed 100644 --- a/arch/arm/mach-bcm/bcm_kona_smc.c +++ b/arch/arm/mach-bcm/bcm_kona_smc.c @@ -54,6 +54,7 @@ int __init bcm_kona_smc_init(void) return -ENODEV; prop_val = of_get_address(node, 0, &prop_size, NULL); + of_node_put(node); if (!prop_val) return -EINVAL; diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index 1d61a7701c11f..d7e63f57b426a 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -376,6 +376,7 @@ static void __init cns3xxx_init(void) /* De-Asscer SATA Reset */ cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA)); } + of_node_put(dn); dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci"); if (of_device_is_available(dn)) { @@ -389,6 +390,7 @@ static void __init cns3xxx_init(void) cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO)); cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO)); } + of_node_put(dn); pm_power_off = cns3xxx_power_off; diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig index dd427bd2768c3..4d3b7d0418c40 100644 --- a/arch/arm/mach-davinci/Kconfig +++ b/arch/arm/mach-davinci/Kconfig @@ -5,10 +5,10 @@ menuconfig ARCH_DAVINCI depends on ARCH_MULTI_V5 select DAVINCI_TIMER select ZONE_DMA - select ARCH_HAS_HOLES_MEMORYMODEL select PM_GENERIC_DOMAINS if PM select PM_GENERIC_DOMAINS_OF if PM && OF select REGMAP_MMIO + select RESET_CONTROLLER select HAVE_IDE select PINCTRL_SINGLE diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 5b3549f1236c5..b2ede9bf82dff 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -1101,11 +1101,13 @@ static int __init da850_evm_config_emac(void) int ret; u32 val; struct davinci_soc_info *soc_info = &davinci_soc_info; - u8 rmii_en = soc_info->emac_pdata->rmii_en; + u8 rmii_en; if (!machine_is_davinci_da850_evm()) return 0; + rmii_en = soc_info->emac_pdata->rmii_en; + cfg_chip3_base = DA8XX_SYSCFG0_VIRT(DA8XX_CFGCHIP3_REG); val = __raw_readl(cfg_chip3_base); diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 9dab1f50a02f8..fc01137628e4b 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -8,7 +8,6 @@ menuconfig ARCH_EXYNOS bool "Samsung EXYNOS" depends on ARCH_MULTI_V7 - select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 9aa483366ebcb..9025067ef6f57 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -46,6 +46,7 @@ void __init exynos_sysram_init(void) sysram_base_addr = of_iomap(node, 0); sysram_base_phys = of_translate_address(node, of_get_address(node, 0, NULL, NULL)); + of_node_put(node); break; } @@ -53,6 +54,7 @@ void __init exynos_sysram_init(void) if (!of_device_is_available(node)) continue; sysram_ns_base_addr = of_iomap(node, 0); + of_node_put(node); break; } } @@ -134,6 +136,7 @@ static void exynos_map_pmu(void) np = of_find_matching_node(NULL, exynos_dt_pmu_match); if (np) pmu_base_addr = of_iomap(np, 0); + of_node_put(np); } static void __init exynos_init_irq(void) diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index 9a681b421ae11..cd861c57d5adf 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -26,6 +26,7 @@ #define EXYNOS5420_USE_L2_COMMON_UP_STATE BIT(30) static void __iomem *ns_sram_base_addr __ro_after_init; +static bool secure_firmware __ro_after_init; /* * The common v7_exit_coherency_flush API could not be used because of the @@ -58,15 +59,16 @@ static void __iomem *ns_sram_base_addr __ro_after_init; static int exynos_cpu_powerup(unsigned int cpu, unsigned int cluster) { unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER); + bool state; pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER || cluster >= EXYNOS5420_NR_CLUSTERS) return -EINVAL; - if (!exynos_cpu_power_state(cpunr)) { - exynos_cpu_power_up(cpunr); - + state = exynos_cpu_power_state(cpunr); + exynos_cpu_power_up(cpunr); + if (!state && secure_firmware) { /* * This assumes the cluster number of the big cores(Cortex A15) * is 0 and the Little cores(Cortex A7) is 1. @@ -258,6 +260,8 @@ static int __init exynos_mcpm_init(void) return -ENOMEM; } + secure_firmware = exynos_secure_firmware_available(); + /* * To increase the stability of KFC reset we need to program * the PMU SPARE3 register diff --git a/arch/arm/mach-footbridge/cats-pci.c b/arch/arm/mach-footbridge/cats-pci.c index 0b2fd7e2e9b42..90b1e9be430e9 100644 --- a/arch/arm/mach-footbridge/cats-pci.c +++ b/arch/arm/mach-footbridge/cats-pci.c @@ -15,14 +15,14 @@ #include /* cats host-specific stuff */ -static int irqmap_cats[] __initdata = { IRQ_PCI, IRQ_IN0, IRQ_IN1, IRQ_IN3 }; +static int irqmap_cats[] = { IRQ_PCI, IRQ_IN0, IRQ_IN1, IRQ_IN3 }; static u8 cats_no_swizzle(struct pci_dev *dev, u8 *pin) { return 0; } -static int __init cats_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int cats_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (dev->irq >= 255) return -1; /* not a valid interrupt. */ diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c index 8b81a17f675d9..e17ec92b90dd8 100644 --- a/arch/arm/mach-footbridge/dc21285.c +++ b/arch/arm/mach-footbridge/dc21285.c @@ -66,15 +66,15 @@ dc21285_read_config(struct pci_bus *bus, unsigned int devfn, int where, if (addr) switch (size) { case 1: - asm("ldrb %0, [%1, %2]" + asm volatile("ldrb %0, [%1, %2]" : "=r" (v) : "r" (addr), "r" (where) : "cc"); break; case 2: - asm("ldrh %0, [%1, %2]" + asm volatile("ldrh %0, [%1, %2]" : "=r" (v) : "r" (addr), "r" (where) : "cc"); break; case 4: - asm("ldr %0, [%1, %2]" + asm volatile("ldr %0, [%1, %2]" : "=r" (v) : "r" (addr), "r" (where) : "cc"); break; } @@ -100,17 +100,17 @@ dc21285_write_config(struct pci_bus *bus, unsigned int devfn, int where, if (addr) switch (size) { case 1: - asm("strb %0, [%1, %2]" + asm volatile("strb %0, [%1, %2]" : : "r" (value), "r" (addr), "r" (where) : "cc"); break; case 2: - asm("strh %0, [%1, %2]" + asm volatile("strh %0, [%1, %2]" : : "r" (value), "r" (addr), "r" (where) : "cc"); break; case 4: - asm("str %0, [%1, %2]" + asm volatile("str %0, [%1, %2]" : : "r" (value), "r" (addr), "r" (where) : "cc"); break; diff --git a/arch/arm/mach-footbridge/ebsa285-pci.c b/arch/arm/mach-footbridge/ebsa285-pci.c index 6f28aaa9ca79b..c3f280d08fa7f 100644 --- a/arch/arm/mach-footbridge/ebsa285-pci.c +++ b/arch/arm/mach-footbridge/ebsa285-pci.c @@ -14,9 +14,9 @@ #include #include -static int irqmap_ebsa285[] __initdata = { IRQ_IN3, IRQ_IN1, IRQ_IN0, IRQ_PCI }; +static int irqmap_ebsa285[] = { IRQ_IN3, IRQ_IN1, IRQ_IN0, IRQ_PCI }; -static int __init ebsa285_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int ebsa285_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (dev->vendor == PCI_VENDOR_ID_CONTAQ && dev->device == PCI_DEVICE_ID_CONTAQ_82C693) diff --git a/arch/arm/mach-footbridge/netwinder-pci.c b/arch/arm/mach-footbridge/netwinder-pci.c index 9473aa0305e5f..e8304392074b8 100644 --- a/arch/arm/mach-footbridge/netwinder-pci.c +++ b/arch/arm/mach-footbridge/netwinder-pci.c @@ -18,7 +18,7 @@ * We now use the slot ID instead of the device identifiers to select * which interrupt is routed where. */ -static int __init netwinder_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int netwinder_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { switch (slot) { case 0: /* host bridge */ diff --git a/arch/arm/mach-footbridge/personal-pci.c b/arch/arm/mach-footbridge/personal-pci.c index 4391e433a4b2f..9d19aa98a663e 100644 --- a/arch/arm/mach-footbridge/personal-pci.c +++ b/arch/arm/mach-footbridge/personal-pci.c @@ -14,13 +14,12 @@ #include #include -static int irqmap_personal_server[] __initdata = { +static int irqmap_personal_server[] = { IRQ_IN0, IRQ_IN1, IRQ_IN2, IRQ_IN3, 0, 0, 0, IRQ_DOORBELLHOST, IRQ_DMA1, IRQ_DMA2, IRQ_PCI }; -static int __init personal_server_map_irq(const struct pci_dev *dev, u8 slot, - u8 pin) +static int personal_server_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { unsigned char line; diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index 1bc68913d62c1..9de38ce8124f2 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig @@ -2,7 +2,6 @@ config ARCH_HIGHBANK bool "Calxeda ECX-1000/2000 (Highbank/Midway)" depends on ARCH_MULTI_V7 - select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_ERRATA_764369 if SMP diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c index da7a09c1dae56..1cd1d9b0aabf9 100644 --- a/arch/arm/mach-hisi/platsmp.c +++ b/arch/arm/mach-hisi/platsmp.c @@ -67,14 +67,17 @@ static void __init hi3xxx_smp_prepare_cpus(unsigned int max_cpus) } ctrl_base = of_iomap(np, 0); if (!ctrl_base) { + of_node_put(np); pr_err("failed to map address\n"); return; } if (of_property_read_u32(np, "smp-offset", &offset) < 0) { + of_node_put(np); pr_err("failed to find smp-offset property\n"); return; } ctrl_base += offset; + of_node_put(np); } } @@ -160,6 +163,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle) if (WARN_ON(!node)) return -1; ctrl_base = of_iomap(node, 0); + of_node_put(node); /* set the secondary core boot from DDR */ remap_reg_value = readl_relaxed(ctrl_base + REG_SC_CTRL); diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index 593bf1519608c..95584ee02b559 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -520,6 +520,7 @@ config SOC_IMX6UL bool "i.MX6 UltraLite support" select PINCTRL_IMX6UL select SOC_IMX6 + select ARM_ERRATA_814220 help This enables support for Freescale i.MX6 UltraLite processor. @@ -556,6 +557,7 @@ config SOC_IMX7D select PINCTRL_IMX7D select SOC_IMX7D_CA7 if ARCH_MULTI_V7 select SOC_IMX7D_CM4 if ARM_SINGLE_ARMV7M + select ARM_ERRATA_814220 if ARCH_MULTI_V7 help This enables support for Freescale i.MX7 Dual processor. diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 35ff620537e62..e7364e6c8c6b0 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -91,6 +91,10 @@ AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o endif +ifeq ($(CONFIG_ARM_CPU_SUSPEND),y) +AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a +obj-$(CONFIG_SOC_IMX6) += resume-imx6.o +endif obj-$(CONFIG_SOC_IMX6) += pm-imx6.o obj-$(CONFIG_SOC_IMX1) += mach-imx1.o diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index 912aeceb4ff81..5aa5796cff0e1 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -109,17 +109,17 @@ void imx_cpu_die(unsigned int cpu); int imx_cpu_kill(unsigned int cpu); #ifdef CONFIG_SUSPEND -void v7_cpu_resume(void); void imx53_suspend(void __iomem *ocram_vbase); extern const u32 imx53_suspend_sz; void imx6_suspend(void __iomem *ocram_vbase); #else -static inline void v7_cpu_resume(void) {} static inline void imx53_suspend(void __iomem *ocram_vbase) {} static const u32 imx53_suspend_sz; static inline void imx6_suspend(void __iomem *ocram_vbase) {} #endif +void v7_cpu_resume(void); + void imx6_pm_ccm_init(const char *ccm_compat); void imx6q_pm_init(void); void imx6dl_pm_init(void); diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 0dfd0ae7a63dd..af12668d0bf51 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -103,6 +103,7 @@ struct mmdc_pmu { struct perf_event *mmdc_events[MMDC_NUM_COUNTERS]; struct hlist_node node; struct fsl_mmdc_devtype_data *devtype_data; + struct clk *mmdc_ipg_clk; }; /* @@ -462,11 +463,14 @@ static int imx_mmdc_remove(struct platform_device *pdev) cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); perf_pmu_unregister(&pmu_mmdc->pmu); + iounmap(pmu_mmdc->mmdc_base); + clk_disable_unprepare(pmu_mmdc->mmdc_ipg_clk); kfree(pmu_mmdc); return 0; } -static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base) +static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_base, + struct clk *mmdc_ipg_clk) { struct mmdc_pmu *pmu_mmdc; char *name; @@ -494,6 +498,7 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b } mmdc_num = mmdc_pmu_init(pmu_mmdc, mmdc_base, &pdev->dev); + pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; if (mmdc_num == 0) name = "mmdc"; else @@ -529,7 +534,7 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b #else #define imx_mmdc_remove NULL -#define imx_mmdc_perf_init(pdev, mmdc_base) 0 +#define imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk) 0 #endif static int imx_mmdc_probe(struct platform_device *pdev) @@ -567,7 +572,13 @@ static int imx_mmdc_probe(struct platform_device *pdev) val &= ~(1 << BP_MMDC_MAPSR_PSD); writel_relaxed(val, reg); - return imx_mmdc_perf_init(pdev, mmdc_base); + err = imx_mmdc_perf_init(pdev, mmdc_base, mmdc_ipg_clk); + if (err) { + iounmap(mmdc_base); + clk_disable_unprepare(mmdc_ipg_clk); + } + + return err; } int imx_mmdc_get_ddr_type(void) diff --git a/arch/arm/mach-imx/pm-imx5.c b/arch/arm/mach-imx/pm-imx5.c index f057df813f83a..e9962b48e30cb 100644 --- a/arch/arm/mach-imx/pm-imx5.c +++ b/arch/arm/mach-imx/pm-imx5.c @@ -295,14 +295,14 @@ static int __init imx_suspend_alloc_ocram( if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, size); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } phys = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -312,6 +312,8 @@ static int __init imx_suspend_alloc_ocram( if (virt_out) *virt_out = virt; +put_device: + put_device(&pdev->dev); put_node: of_node_put(node); diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index 1c0ecad3620e0..1b73e4e76310c 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -493,14 +494,14 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, MX6Q_SUSPEND_OCRAM_SIZE); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } ocram_pbase = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -523,7 +524,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) ret = imx6_pm_get_base(&pm_info->mmdc_base, socdata->mmdc_compat); if (ret) { pr_warn("%s: failed to get mmdc base %d!\n", __func__, ret); - goto put_node; + goto put_device; } ret = imx6_pm_get_base(&pm_info->src_base, socdata->src_compat); @@ -570,7 +571,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) &imx6_suspend, MX6Q_SUSPEND_OCRAM_SIZE - sizeof(*pm_info)); - goto put_node; + goto put_device; pl310_cache_map_failed: iounmap(pm_info->gpc_base.vbase); @@ -580,6 +581,8 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) iounmap(pm_info->src_base.vbase); src_map_failed: iounmap(pm_info->mmdc_base.vbase); +put_device: + put_device(&pdev->dev); put_node: of_node_put(node); @@ -616,6 +619,7 @@ static void __init imx6_pm_common_init(const struct imx6_pm_socdata static void imx6_pm_stby_poweroff(void) { + gic_cpu_if_down(0); imx6_set_lpm(STOP_POWER_OFF); imx6q_suspend_finish(0); diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S new file mode 100644 index 0000000000000..5bd1ba7ef15b6 --- /dev/null +++ b/arch/arm/mach-imx/resume-imx6.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include "hardware.h" + +/* + * The following code must assume it is running from physical address + * where absolute virtual addresses to the data section have to be + * turned into relative ones. + */ + +ENTRY(v7_cpu_resume) + bl v7_invalidate_l1 +#ifdef CONFIG_CACHE_L2X0 + bl l2c310_early_resume +#endif + b cpu_resume +ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-imx/suspend-imx53.S b/arch/arm/mach-imx/suspend-imx53.S index 41b8aad653634..46570ec2fbcfe 100644 --- a/arch/arm/mach-imx/suspend-imx53.S +++ b/arch/arm/mach-imx/suspend-imx53.S @@ -28,11 +28,11 @@ * ^ * ^ * imx53_suspend code - * PM_INFO structure(imx53_suspend_info) + * PM_INFO structure(imx5_cpu_suspend_info) * ======================== low address ======================= */ -/* Offsets of members of struct imx53_suspend_info */ +/* Offsets of members of struct imx5_cpu_suspend_info */ #define SUSPEND_INFO_MX53_M4IF_V_OFFSET 0x0 #define SUSPEND_INFO_MX53_IOMUXC_V_OFFSET 0x4 #define SUSPEND_INFO_MX53_IO_COUNT_OFFSET 0x8 diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index 062391ff13dae..e06f946b75b96 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -67,6 +67,7 @@ #define MX6Q_CCM_CCR 0x0 .align 3 + .arm .macro sync_l2_cache @@ -327,17 +328,3 @@ resume: ret lr ENDPROC(imx6_suspend) - -/* - * The following code must assume it is running from physical address - * where absolute virtual addresses to the data section have to be - * turned into relative ones. - */ - -ENTRY(v7_cpu_resume) - bl v7_invalidate_l1 -#ifdef CONFIG_CACHE_L2X0 - bl l2c310_early_resume -#endif - b cpu_resume -ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-integrator/Kconfig b/arch/arm/mach-integrator/Kconfig index 982eabc361635..2406cab73835a 100644 --- a/arch/arm/mach-integrator/Kconfig +++ b/arch/arm/mach-integrator/Kconfig @@ -4,6 +4,8 @@ menuconfig ARCH_INTEGRATOR depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 || ARCH_MULTI_V6 select ARM_AMBA select COMMON_CLK_VERSATILE + select CMA + select DMA_CMA select HAVE_TCM select ICST select MFD_SYSCON @@ -35,14 +37,13 @@ config INTEGRATOR_IMPD1 select ARM_VIC select GPIO_PL061 select GPIOLIB + select REGULATOR + select REGULATOR_FIXED_VOLTAGE help The IM-PD1 is an add-on logic module for the Integrator which allows ARM(R) Ltd PrimeCells to be developed and evaluated. The IM-PD1 can be found on the Integrator/PP2 platform. - To compile this driver as a module, choose M here: the - module will be called impd1. - config INTEGRATOR_CM7TDMI bool "Integrator/CM7TDMI core module" depends on ARCH_INTEGRATOR_AP diff --git a/arch/arm/mach-iop32x/include/mach/entry-macro.S b/arch/arm/mach-iop32x/include/mach/entry-macro.S index 8e6766d4621eb..341e5d9a6616d 100644 --- a/arch/arm/mach-iop32x/include/mach/entry-macro.S +++ b/arch/arm/mach-iop32x/include/mach/entry-macro.S @@ -20,7 +20,7 @@ mrc p6, 0, \irqstat, c8, c0, 0 @ Read IINTSRC cmp \irqstat, #0 clzne \irqnr, \irqstat - rsbne \irqnr, \irqnr, #31 + rsbne \irqnr, \irqnr, #32 .endm .macro arch_ret_to_user, tmp1, tmp2 diff --git a/arch/arm/mach-iop32x/include/mach/irqs.h b/arch/arm/mach-iop32x/include/mach/irqs.h index c4e78df428e86..e09ae5f48aec5 100644 --- a/arch/arm/mach-iop32x/include/mach/irqs.h +++ b/arch/arm/mach-iop32x/include/mach/irqs.h @@ -9,6 +9,6 @@ #ifndef __IRQS_H #define __IRQS_H -#define NR_IRQS 32 +#define NR_IRQS 33 #endif diff --git a/arch/arm/mach-iop32x/irq.c b/arch/arm/mach-iop32x/irq.c index 2d48bf1398c10..d1e8824cbd824 100644 --- a/arch/arm/mach-iop32x/irq.c +++ b/arch/arm/mach-iop32x/irq.c @@ -32,14 +32,14 @@ static void intstr_write(u32 val) static void iop32x_irq_mask(struct irq_data *d) { - iop32x_mask &= ~(1 << d->irq); + iop32x_mask &= ~(1 << (d->irq - 1)); intctl_write(iop32x_mask); } static void iop32x_irq_unmask(struct irq_data *d) { - iop32x_mask |= 1 << d->irq; + iop32x_mask |= 1 << (d->irq - 1); intctl_write(iop32x_mask); } @@ -65,7 +65,7 @@ void __init iop32x_init_irq(void) machine_is_em7210()) *IOP3XX_PCIIRSR = 0x0f; - for (i = 0; i < NR_IRQS; i++) { + for (i = 1; i < NR_IRQS; i++) { irq_set_chip_and_handler(i, &ext_chip, handle_level_irq); irq_clear_status_flags(i, IRQ_NOREQUEST | IRQ_NOPROBE); } diff --git a/arch/arm/mach-iop32x/irqs.h b/arch/arm/mach-iop32x/irqs.h index 69858e4e905d1..e1dfc8b4e7d7e 100644 --- a/arch/arm/mach-iop32x/irqs.h +++ b/arch/arm/mach-iop32x/irqs.h @@ -7,36 +7,40 @@ #ifndef __IOP32X_IRQS_H #define __IOP32X_IRQS_H +/* Interrupts in Linux start at 1, hardware starts at 0 */ + +#define IOP_IRQ(x) ((x) + 1) + /* * IOP80321 chipset interrupts */ -#define IRQ_IOP32X_DMA0_EOT 0 -#define IRQ_IOP32X_DMA0_EOC 1 -#define IRQ_IOP32X_DMA1_EOT 2 -#define IRQ_IOP32X_DMA1_EOC 3 -#define IRQ_IOP32X_AA_EOT 6 -#define IRQ_IOP32X_AA_EOC 7 -#define IRQ_IOP32X_CORE_PMON 8 -#define IRQ_IOP32X_TIMER0 9 -#define IRQ_IOP32X_TIMER1 10 -#define IRQ_IOP32X_I2C_0 11 -#define IRQ_IOP32X_I2C_1 12 -#define IRQ_IOP32X_MESSAGING 13 -#define IRQ_IOP32X_ATU_BIST 14 -#define IRQ_IOP32X_PERFMON 15 -#define IRQ_IOP32X_CORE_PMU 16 -#define IRQ_IOP32X_BIU_ERR 17 -#define IRQ_IOP32X_ATU_ERR 18 -#define IRQ_IOP32X_MCU_ERR 19 -#define IRQ_IOP32X_DMA0_ERR 20 -#define IRQ_IOP32X_DMA1_ERR 21 -#define IRQ_IOP32X_AA_ERR 23 -#define IRQ_IOP32X_MSG_ERR 24 -#define IRQ_IOP32X_SSP 25 -#define IRQ_IOP32X_XINT0 27 -#define IRQ_IOP32X_XINT1 28 -#define IRQ_IOP32X_XINT2 29 -#define IRQ_IOP32X_XINT3 30 -#define IRQ_IOP32X_HPI 31 +#define IRQ_IOP32X_DMA0_EOT IOP_IRQ(0) +#define IRQ_IOP32X_DMA0_EOC IOP_IRQ(1) +#define IRQ_IOP32X_DMA1_EOT IOP_IRQ(2) +#define IRQ_IOP32X_DMA1_EOC IOP_IRQ(3) +#define IRQ_IOP32X_AA_EOT IOP_IRQ(6) +#define IRQ_IOP32X_AA_EOC IOP_IRQ(7) +#define IRQ_IOP32X_CORE_PMON IOP_IRQ(8) +#define IRQ_IOP32X_TIMER0 IOP_IRQ(9) +#define IRQ_IOP32X_TIMER1 IOP_IRQ(10) +#define IRQ_IOP32X_I2C_0 IOP_IRQ(11) +#define IRQ_IOP32X_I2C_1 IOP_IRQ(12) +#define IRQ_IOP32X_MESSAGING IOP_IRQ(13) +#define IRQ_IOP32X_ATU_BIST IOP_IRQ(14) +#define IRQ_IOP32X_PERFMON IOP_IRQ(15) +#define IRQ_IOP32X_CORE_PMU IOP_IRQ(16) +#define IRQ_IOP32X_BIU_ERR IOP_IRQ(17) +#define IRQ_IOP32X_ATU_ERR IOP_IRQ(18) +#define IRQ_IOP32X_MCU_ERR IOP_IRQ(19) +#define IRQ_IOP32X_DMA0_ERR IOP_IRQ(20) +#define IRQ_IOP32X_DMA1_ERR IOP_IRQ(21) +#define IRQ_IOP32X_AA_ERR IOP_IRQ(23) +#define IRQ_IOP32X_MSG_ERR IOP_IRQ(24) +#define IRQ_IOP32X_SSP IOP_IRQ(25) +#define IRQ_IOP32X_XINT0 IOP_IRQ(27) +#define IRQ_IOP32X_XINT1 IOP_IRQ(28) +#define IRQ_IOP32X_XINT2 IOP_IRQ(29) +#define IRQ_IOP32X_XINT3 IOP_IRQ(30) +#define IRQ_IOP32X_HPI IOP_IRQ(31) #endif diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index f7211b57b1e78..165c184801e19 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -13,7 +13,6 @@ config MACH_IXP4XX_OF select I2C select I2C_IOP3XX select PCI - select TIMER_OF select USE_OF help Say 'Y' here to support Device Tree-based IXP4xx platforms. diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c index 638808c4e1224..697adedaced49 100644 --- a/arch/arm/mach-keystone/keystone.c +++ b/arch/arm/mach-keystone/keystone.c @@ -62,7 +62,7 @@ static void __init keystone_init(void) static long long __init keystone_pv_fixup(void) { long long offset; - phys_addr_t mem_start, mem_end; + u64 mem_start, mem_end; mem_start = memblock_start_of_DRAM(); mem_end = memblock_end_of_DRAM(); @@ -75,7 +75,7 @@ static long long __init keystone_pv_fixup(void) if (mem_start < KEYSTONE_HIGH_PHYS_START || mem_end > KEYSTONE_HIGH_PHYS_END) { pr_crit("Invalid address space for memory (%08llx-%08llx)\n", - (u64)mem_start, (u64)mem_end); + mem_start, mem_end); return 0; } diff --git a/arch/arm/mach-mediatek/Kconfig b/arch/arm/mach-mediatek/Kconfig index 9e0f592d87d8e..35a3430c7942d 100644 --- a/arch/arm/mach-mediatek/Kconfig +++ b/arch/arm/mach-mediatek/Kconfig @@ -30,6 +30,7 @@ config MACH_MT7623 config MACH_MT7629 bool "MediaTek MT7629 SoCs support" default ARCH_MEDIATEK + select HAVE_ARM_ARCH_TIMER config MACH_MT8127 bool "MediaTek MT8127 SoCs support" diff --git a/arch/arm/mach-meson/platsmp.c b/arch/arm/mach-meson/platsmp.c index 4b8ad728bb42a..32ac60b89fdcc 100644 --- a/arch/arm/mach-meson/platsmp.c +++ b/arch/arm/mach-meson/platsmp.c @@ -71,6 +71,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } sram_base = of_iomap(node, 0); + of_node_put(node); if (!sram_base) { pr_err("Couldn't map SRAM registers\n"); return; @@ -91,6 +92,7 @@ static void __init meson_smp_prepare_cpus(const char *scu_compatible, } scu_base = of_iomap(node, 0); + of_node_put(node); if (!scu_base) { pr_err("Couldn't map SCU registers\n"); return; diff --git a/arch/arm/mach-mmp/sram.c b/arch/arm/mach-mmp/sram.c index 6794e2db1ad5f..ecc46c31004f6 100644 --- a/arch/arm/mach-mmp/sram.c +++ b/arch/arm/mach-mmp/sram.c @@ -72,6 +72,8 @@ static int sram_probe(struct platform_device *pdev) if (!info) return -ENOMEM; + platform_set_drvdata(pdev, info); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { dev_err(&pdev->dev, "no memory resource defined\n"); @@ -107,8 +109,6 @@ static int sram_probe(struct platform_device *pdev) list_add(&info->node, &sram_bank_list); mutex_unlock(&sram_lock); - platform_set_drvdata(pdev, info); - dev_info(&pdev->dev, "initialized\n"); return 0; @@ -127,17 +127,19 @@ static int sram_remove(struct platform_device *pdev) struct sram_bank_info *info; info = platform_get_drvdata(pdev); - if (info == NULL) - return -ENODEV; - mutex_lock(&sram_lock); - list_del(&info->node); - mutex_unlock(&sram_lock); + if (info->sram_size) { + mutex_lock(&sram_lock); + list_del(&info->node); + mutex_unlock(&sram_lock); + + gen_pool_destroy(info->gpool); + iounmap(info->sram_virt); + kfree(info->pool_name); + } - gen_pool_destroy(info->gpool); - iounmap(info->sram_virt); - kfree(info->pool_name); kfree(info); + return 0; } diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig index 880bc2a5cadaa..7f7002dc2b21f 100644 --- a/arch/arm/mach-npcm/Kconfig +++ b/arch/arm/mach-npcm/Kconfig @@ -11,7 +11,7 @@ config ARCH_NPCM7XX depends on ARCH_MULTI_V7 select PINCTRL_NPCM7XX select NPCM7XX_TIMER - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select CACHE_L2X0 select ARM_GIC select HAVE_ARM_TWD if SMP diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S index 14a6c3eb32985..f745a65d3bd7a 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S +++ b/arch/arm/mach-omap1/ams-delta-fiq-handler.S @@ -15,6 +15,7 @@ #include #include +#include #include "ams-delta-fiq.h" #include "board-ams-delta.h" diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index bd5be82101f32..d89bda12bf3cd 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(clockfw_lock); unsigned long omap1_uart_recalc(struct clk *clk) { unsigned int val = __raw_readl(clk->enable_reg); - return val & clk->enable_bit ? 48000000 : 12000000; + return val & 1 << clk->enable_bit ? 48000000 : 12000000; } unsigned long omap1_sossi_recalc(struct clk *clk) diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index d068958d6f8a4..2c1e2b32b9b36 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -596,11 +596,6 @@ static irqreturn_t omap_wakeup_interrupt(int irq, void *dev) return IRQ_HANDLED; } -static struct irqaction omap_wakeup_irq = { - .name = "peripheral wakeup", - .handler = omap_wakeup_interrupt -}; - static const struct platform_suspend_ops omap_pm_ops = { @@ -613,6 +608,7 @@ static const struct platform_suspend_ops omap_pm_ops = { static int __init omap_pm_init(void) { int error = 0; + int irq; if (!cpu_class_is_omap1()) return -ENODEV; @@ -656,9 +652,12 @@ static int __init omap_pm_init(void) arm_pm_idle = omap1_pm_idle; if (cpu_is_omap7xx()) - setup_irq(INT_7XX_WAKE_UP_REQ, &omap_wakeup_irq); + irq = INT_7XX_WAKE_UP_REQ; else if (cpu_is_omap16xx()) - setup_irq(INT_1610_WAKE_UP_REQ, &omap_wakeup_irq); + irq = INT_1610_WAKE_UP_REQ; + if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup", + NULL)) + pr_err("Failed to request irq %d (peripheral wakeup)\n", irq); /* Program new power ramp-up time * (0 for most boards since we don't lower voltage when in deep sleep) diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 524977a31a49c..de590a85a42b3 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -155,15 +155,11 @@ static irqreturn_t omap_mpu_timer1_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static struct irqaction omap_mpu_timer1_irq = { - .name = "mpu_timer1", - .flags = IRQF_TIMER | IRQF_IRQPOLL, - .handler = omap_mpu_timer1_interrupt, -}; - static __init void omap_init_mpu_timer(unsigned long rate) { - setup_irq(INT_TIMER1, &omap_mpu_timer1_irq); + if (request_irq(INT_TIMER1, omap_mpu_timer1_interrupt, + IRQF_TIMER | IRQF_IRQPOLL, "mpu_timer1", NULL)) + pr_err("Failed to request irq %d (mpu_timer1)\n", INT_TIMER1); omap_mpu_timer_start(0, (rate / HZ) - 1, 1); clockevent_mpu_timer1.cpumask = cpumask_of(0); diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c index 0ae6c52a7d70b..780fdf03c3cee 100644 --- a/arch/arm/mach-omap1/timer32k.c +++ b/arch/arm/mach-omap1/timer32k.c @@ -148,15 +148,11 @@ static irqreturn_t omap_32k_timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static struct irqaction omap_32k_timer_irq = { - .name = "32KHz timer", - .flags = IRQF_TIMER | IRQF_IRQPOLL, - .handler = omap_32k_timer_interrupt, -}; - static __init void omap_init_32k_timer(void) { - setup_irq(INT_OS_TIMER, &omap_32k_timer_irq); + if (request_irq(INT_OS_TIMER, omap_32k_timer_interrupt, + IRQF_TIMER | IRQF_IRQPOLL, "32KHz timer", NULL)) + pr_err("Failed to request irq %d(32KHz timer)\n", INT_OS_TIMER); clockevent_32k_timer.cpumask = cpumask_of(0); clockevents_config_and_register(&clockevent_32k_timer, diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index fdb6743760a2e..0211f4aa8cc75 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -94,7 +94,7 @@ config SOC_DRA7XX config ARCH_OMAP2PLUS bool select ARCH_HAS_BANDGAP - select ARCH_HAS_HOLES_MEMORYMODEL + select ARCH_HAS_RESET_CONTROLLER select ARCH_OMAP select CLKSRC_MMIO select GENERIC_IRQ_CHIP diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index ff992f8895ee4..ad512f07d5689 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -327,7 +327,7 @@ DT_MACHINE_START(DRA74X_DT, "Generic DRA74X (Flattened Device Tree)") .init_late = dra7xx_init_late, .init_irq = omap_gic_of_init, .init_machine = omap_generic_init, - .init_time = omap5_realtime_timer_init, + .init_time = omap3_gptimer_timer_init, .dt_compat = dra74x_boards_compat, .restart = omap44xx_restart, MACHINE_END @@ -350,7 +350,7 @@ DT_MACHINE_START(DRA72X_DT, "Generic DRA72X (Flattened Device Tree)") .init_late = dra7xx_init_late, .init_irq = omap_gic_of_init, .init_machine = omap_generic_init, - .init_time = omap5_realtime_timer_init, + .init_time = omap3_gptimer_timer_init, .dt_compat = dra72x_boards_compat, .restart = omap44xx_restart, MACHINE_END diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index 418a61ecb8275..5e86145db0e2a 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -322,6 +322,7 @@ static int n8x0_mmc_get_cover_state(struct device *dev, int slot) static void n8x0_mmc_callback(void *data, u8 card_mask) { +#ifdef CONFIG_MMC_OMAP int bit, *openp, index; if (board_is_n800()) { @@ -339,7 +340,6 @@ static void n8x0_mmc_callback(void *data, u8 card_mask) else *openp = 0; -#ifdef CONFIG_MMC_OMAP omap_mmc_notify_cover_event(mmc_device, index, *openp); #else pr_warn("MMC: notify cover event not available\n"); diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c index 532a3e4b98c6f..090a8aafb25e1 100644 --- a/arch/arm/mach-omap2/cpuidle34xx.c +++ b/arch/arm/mach-omap2/cpuidle34xx.c @@ -109,6 +109,7 @@ static int omap3_enter_idle(struct cpuidle_device *dev, int index) { struct omap3_idle_statedata *cx = &omap3_idle_data[index]; + int error; if (omap_irq_pending() || need_resched()) goto return_sleep_time; @@ -125,8 +126,11 @@ static int omap3_enter_idle(struct cpuidle_device *dev, * Call idle CPU PM enter notifier chain so that * VFP context is saved. */ - if (cx->mpu_state == PWRDM_POWER_OFF) - cpu_pm_enter(); + if (cx->mpu_state == PWRDM_POWER_OFF) { + error = cpu_pm_enter(); + if (error) + goto out_clkdm_set; + } /* Execute ARM wfi */ omap_sram_idle(); @@ -139,6 +143,7 @@ static int omap3_enter_idle(struct cpuidle_device *dev, pwrdm_read_prev_pwrst(mpu_pd) == PWRDM_POWER_OFF) cpu_pm_exit(); +out_clkdm_set: /* Re-allow idle for C1 */ if (cx->flags & OMAP_CPUIDLE_CX_NO_CLKDM_IDLE) clkdm_allow_idle(mpu_pd->pwrdm_clkdms[0]); diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index fe75d4fa60738..de37027ad7587 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -122,6 +122,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, { struct idle_statedata *cx = state_ptr + index; u32 mpuss_can_lose_context = 0; + int error; /* * CPU0 has to wait and stay ON until CPU1 is OFF state. @@ -150,27 +151,37 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, (cx->mpu_logic_state == PWRDM_POWER_OFF); /* Enter broadcast mode for periodic timers */ - tick_broadcast_enable(); + RCU_NONIDLE(tick_broadcast_enable()); /* Enter broadcast mode for one-shot timers */ - tick_broadcast_enter(); + RCU_NONIDLE(tick_broadcast_enter()); /* * Call idle CPU PM enter notifier chain so that * VFP and per CPU interrupt context is saved. */ - cpu_pm_enter(); + error = cpu_pm_enter(); + if (error) + goto cpu_pm_out; if (dev->cpu == 0) { pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); - omap_set_pwrdm_state(mpu_pd, cx->mpu_state); + RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state)); /* * Call idle CPU cluster PM enter notifier chain * to save GIC and wakeupgen context. */ - if (mpuss_can_lose_context) - cpu_cluster_pm_enter(); + if (mpuss_can_lose_context) { + error = cpu_cluster_pm_enter(); + if (error) { + index = 0; + cx = state_ptr + index; + pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); + RCU_NONIDLE(omap_set_pwrdm_state(mpu_pd, cx->mpu_state)); + mpuss_can_lose_context = 0; + } + } } omap4_enter_lowpower(dev->cpu, cx->cpu_state); @@ -183,9 +194,9 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, mpuss_can_lose_context) gic_dist_disable(); - clkdm_deny_idle(cpu_clkdm[1]); - omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON); - clkdm_allow_idle(cpu_clkdm[1]); + RCU_NONIDLE(clkdm_deny_idle(cpu_clkdm[1])); + RCU_NONIDLE(omap_set_pwrdm_state(cpu_pd[1], PWRDM_POWER_ON)); + RCU_NONIDLE(clkdm_allow_idle(cpu_clkdm[1])); if (IS_PM44XX_ERRATUM(PM_OMAP4_ROM_SMP_BOOT_ERRATUM_GICD) && mpuss_can_lose_context) { @@ -197,12 +208,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, } } - /* - * Call idle CPU PM exit notifier chain to restore - * VFP and per CPU IRQ context. - */ - cpu_pm_exit(); - /* * Call idle CPU cluster PM exit notifier chain * to restore GIC and wakeupgen context. @@ -210,7 +215,14 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, if (dev->cpu == 0 && mpuss_can_lose_context) cpu_cluster_pm_exit(); - tick_broadcast_exit(); + /* + * Call idle CPU PM exit notifier chain to restore + * VFP and per CPU IRQ context. + */ + cpu_pm_exit(); + +cpu_pm_out: + RCU_NONIDLE(tick_broadcast_exit()); fail: cpuidle_coupled_parallel_barrier(dev, &abort_barrier); diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 439e143cad7b5..1de25166414e8 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -211,6 +211,7 @@ static int __init omapdss_init_fbdev(void) node = of_find_node_by_name(NULL, "omap4_padconf_global"); if (node) omap4_dsi_mux_syscon = syscon_node_to_regmap(node); + of_node_put(node); return 0; } @@ -259,10 +260,13 @@ static int __init omapdss_init_of(void) if (!pdev) { pr_err("Unable to find DSS platform device\n"); + of_node_put(node); return -ENODEV; } r = of_platform_populate(node, NULL, NULL, &pdev->dev); + put_device(&pdev->dev); + of_node_put(node); if (r) { pr_err("Unable to populate DSS submodule devices\n"); return r; diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c index f1a6ece8108e4..78247e6f4a720 100644 --- a/arch/arm/mach-omap2/omap-iommu.c +++ b/arch/arm/mach-omap2/omap-iommu.c @@ -11,14 +11,43 @@ #include "omap_hwmod.h" #include "omap_device.h" +#include "clockdomain.h" #include "powerdomain.h" +static void omap_iommu_dra7_emu_swsup_config(struct platform_device *pdev, + bool enable) +{ + static struct clockdomain *emu_clkdm; + static DEFINE_SPINLOCK(emu_lock); + static atomic_t count; + struct device_node *np = pdev->dev.of_node; + + if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu")) + return; + + if (!emu_clkdm) { + emu_clkdm = clkdm_lookup("emu_clkdm"); + if (WARN_ON_ONCE(!emu_clkdm)) + return; + } + + spin_lock(&emu_lock); + + if (enable && (atomic_inc_return(&count) == 1)) + clkdm_deny_idle(emu_clkdm); + else if (!enable && (atomic_dec_return(&count) == 0)) + clkdm_allow_idle(emu_clkdm); + + spin_unlock(&emu_lock); +} + int omap_iommu_set_pwrdm_constraint(struct platform_device *pdev, bool request, u8 *pwrst) { struct powerdomain *pwrdm; struct omap_device *od; u8 next_pwrst; + int ret = 0; od = to_omap_device(pdev); if (!od) @@ -31,13 +60,21 @@ int omap_iommu_set_pwrdm_constraint(struct platform_device *pdev, bool request, if (!pwrdm) return -EINVAL; - if (request) + if (request) { *pwrst = pwrdm_read_next_pwrst(pwrdm); + omap_iommu_dra7_emu_swsup_config(pdev, true); + } if (*pwrst > PWRDM_POWER_RET) - return 0; + goto out; next_pwrst = request ? PWRDM_POWER_ON : *pwrst; - return pwrdm_set_next_pwrst(pwrdm, next_pwrst); + ret = pwrdm_set_next_pwrst(pwrdm, next_pwrst); + +out: + if (!request) + omap_iommu_dra7_emu_swsup_config(pdev, false); + + return ret; } diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 5c3845730dbf5..0b80f8bcd3047 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -314,10 +314,12 @@ void __init omap_gic_of_init(void) np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-gic"); gic_dist_base_addr = of_iomap(np, 0); + of_node_put(np); WARN_ON(!gic_dist_base_addr); np = of_find_compatible_node(NULL, NULL, "arm,cortex-a9-twd-timer"); twd_base = of_iomap(np, 0); + of_node_put(np); WARN_ON(!twd_base); skip_errata_init: diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c index 3acb4192918df..f85a0fd6aca5c 100644 --- a/arch/arm/mach-omap2/omap_device.c +++ b/arch/arm/mach-omap2/omap_device.c @@ -234,10 +234,12 @@ static int _omap_device_notifier_call(struct notifier_block *nb, break; case BUS_NOTIFY_BIND_DRIVER: od = to_omap_device(pdev); - if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED) && - pm_runtime_status_suspended(dev)) { + if (od) { od->_driver_status = BUS_NOTIFY_BIND_DRIVER; - pm_runtime_set_active(dev); + if (od->_state == OMAP_DEVICE_STATE_ENABLED && + pm_runtime_status_suspended(dev)) { + pm_runtime_set_active(dev); + } } break; case BUS_NOTIFY_ADD_DEVICE: diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 203664c40d3d2..202b740adee0e 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -782,8 +782,10 @@ static int __init _init_clkctrl_providers(void) for_each_matching_node(np, ti_clkctrl_match_table) { ret = _setup_clkctrl_provider(np); - if (ret) + if (ret) { + of_node_put(np); break; + } } return ret; @@ -3535,7 +3537,7 @@ static const struct omap_hwmod_reset dra7_reset_quirks[] = { }; static const struct omap_hwmod_reset omap_reset_quirks[] = { - { .match = "dss", .len = 3, .reset = omap_dss_reset, }, + { .match = "dss_core", .len = 8, .reset = omap_dss_reset, }, { .match = "hdq1w", .len = 5, .reset = omap_hdq1w_reset, }, { .match = "i2c", .len = 3, .reset = omap_i2c_reset, }, { .match = "wd_timer", .len = 8, .reset = omap2_wd_timer_reset, }, @@ -3656,6 +3658,8 @@ int omap_hwmod_init_module(struct device *dev, oh->flags |= HWMOD_SWSUP_SIDLE_ACT; if (data->cfg->quirks & SYSC_QUIRK_SWSUP_MSTANDBY) oh->flags |= HWMOD_SWSUP_MSTANDBY; + if (data->cfg->quirks & SYSC_QUIRK_CLKDM_NOAUTO) + oh->flags |= HWMOD_CLKDM_NOAUTO; error = omap_hwmod_check_module(dev, oh, data, sysc_fields, rev_offs, sysc_offs, syss_offs, diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 2efd18e8824c7..ca07e310d9ed5 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -45,6 +44,17 @@ struct pdata_init { static struct of_dev_auxdata omap_auxdata_lookup[]; static struct twl4030_gpio_platform_data twl_gpio_auxdata; +#if IS_ENABLED(CONFIG_OMAP_IOMMU) +int omap_iommu_set_pwrdm_constraint(struct platform_device *pdev, bool request, + u8 *pwrst); +#else +static inline int omap_iommu_set_pwrdm_constraint(struct platform_device *pdev, + bool request, u8 *pwrst) +{ + return 0; +} +#endif + #ifdef CONFIG_MACH_NOKIA_N8X0 static void __init omap2420_n8x0_legacy_init(void) { @@ -269,14 +279,6 @@ static void __init am3517_evm_legacy_init(void) am35xx_emac_reset(); } -static struct platform_device omap3_rom_rng_device = { - .name = "omap3-rom-rng", - .id = -1, - .dev = { - .platform_data = rx51_secure_rng_call, - }, -}; - static void __init nokia_n900_legacy_init(void) { hsmmc2_internal_input_clk(); @@ -292,9 +294,6 @@ static void __init nokia_n900_legacy_init(void) pr_warn("RX-51: Not enabling ARM errata 430973 workaround\n"); pr_warn("Thumb binaries may crash randomly without this workaround\n"); } - - pr_info("RX-51: Registering OMAP3 HWRNG device\n"); - platform_device_register(&omap3_rom_rng_device); } } @@ -311,131 +310,18 @@ static void __init omap3_logicpd_torpedo_init(void) } /* omap3pandora legacy devices */ -#define PANDORA_WIFI_IRQ_GPIO 21 -#define PANDORA_WIFI_NRESET_GPIO 23 static struct platform_device pandora_backlight = { .name = "pandora-backlight", .id = -1, }; -static struct regulator_consumer_supply pandora_vmmc3_supply[] = { - REGULATOR_SUPPLY("vmmc", "omap_hsmmc.2"), -}; - -static struct regulator_init_data pandora_vmmc3 = { - .constraints = { - .valid_ops_mask = REGULATOR_CHANGE_STATUS, - }, - .num_consumer_supplies = ARRAY_SIZE(pandora_vmmc3_supply), - .consumer_supplies = pandora_vmmc3_supply, -}; - -static struct fixed_voltage_config pandora_vwlan = { - .supply_name = "vwlan", - .microvolts = 1800000, /* 1.8V */ - .startup_delay = 50000, /* 50ms */ - .init_data = &pandora_vmmc3, -}; - -static struct platform_device pandora_vwlan_device = { - .name = "reg-fixed-voltage", - .id = 1, - .dev = { - .platform_data = &pandora_vwlan, - }, -}; - -static struct gpiod_lookup_table pandora_vwlan_gpiod_table = { - .dev_id = "reg-fixed-voltage.1", - .table = { - /* - * As this is a low GPIO number it should be at the first - * GPIO bank. - */ - GPIO_LOOKUP("gpio-0-31", PANDORA_WIFI_NRESET_GPIO, - NULL, GPIO_ACTIVE_HIGH), - { }, - }, -}; - -static void pandora_wl1251_init_card(struct mmc_card *card) -{ - /* - * We have TI wl1251 attached to MMC3. Pass this information to - * SDIO core because it can't be probed by normal methods. - */ - if (card->type == MMC_TYPE_SDIO || card->type == MMC_TYPE_SD_COMBO) { - card->quirks |= MMC_QUIRK_NONSTD_SDIO; - card->cccr.wide_bus = 1; - card->cis.vendor = 0x104c; - card->cis.device = 0x9066; - card->cis.blksize = 512; - card->cis.max_dtr = 24000000; - card->ocr = 0x80; - } -} - -static struct omap2_hsmmc_info pandora_mmc3[] = { - { - .mmc = 3, - .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD, - .init_card = pandora_wl1251_init_card, - }, - {} /* Terminator */ -}; - -static void __init pandora_wl1251_init(void) -{ - struct wl1251_platform_data pandora_wl1251_pdata; - int ret; - - memset(&pandora_wl1251_pdata, 0, sizeof(pandora_wl1251_pdata)); - - pandora_wl1251_pdata.power_gpio = -1; - - ret = gpio_request_one(PANDORA_WIFI_IRQ_GPIO, GPIOF_IN, "wl1251 irq"); - if (ret < 0) - goto fail; - - pandora_wl1251_pdata.irq = gpio_to_irq(PANDORA_WIFI_IRQ_GPIO); - if (pandora_wl1251_pdata.irq < 0) - goto fail_irq; - - pandora_wl1251_pdata.use_eeprom = true; - ret = wl1251_set_platform_data(&pandora_wl1251_pdata); - if (ret < 0) - goto fail_irq; - - return; - -fail_irq: - gpio_free(PANDORA_WIFI_IRQ_GPIO); -fail: - pr_err("wl1251 board initialisation failed\n"); -} - static void __init omap3_pandora_legacy_init(void) { platform_device_register(&pandora_backlight); - gpiod_add_lookup_table(&pandora_vwlan_gpiod_table); - platform_device_register(&pandora_vwlan_device); - omap_hsmmc_init(pandora_mmc3); - omap_hsmmc_late_init(pandora_mmc3); - pandora_wl1251_init(); } #endif /* CONFIG_ARCH_OMAP3 */ -#if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) -static struct iommu_platform_data omap4_iommu_pdata = { - .reset_name = "mmu_cache", - .assert_reset = omap_device_assert_hardreset, - .deassert_reset = omap_device_deassert_hardreset, - .device_enable = omap_device_enable, - .device_idle = omap_device_idle, -}; -#endif - #if defined(CONFIG_SOC_AM33XX) || defined(CONFIG_SOC_AM43XX) static struct wkup_m3_platform_data wkup_m3_data = { .reset_name = "wkup_m3", @@ -451,6 +337,10 @@ static void __init omap5_uevm_legacy_init(void) #endif #ifdef CONFIG_SOC_DRA7XX +static struct iommu_platform_data dra7_ipu1_dsp_iommu_pdata = { + .set_pwrdm_constraint = omap_iommu_set_pwrdm_constraint, +}; + static struct omap_hsmmc_platform_data dra7_hsmmc_data_mmc1; static struct omap_hsmmc_platform_data dra7_hsmmc_data_mmc2; static struct omap_hsmmc_platform_data dra7_hsmmc_data_mmc3; @@ -472,10 +362,14 @@ static void __init dra7x_evm_mmc_quirk(void) static struct clockdomain *ti_sysc_find_one_clockdomain(struct clk *clk) { + struct clk_hw *hw = __clk_get_hw(clk); struct clockdomain *clkdm = NULL; struct clk_hw_omap *hwclk; - hwclk = to_clk_hw_omap(__clk_get_hw(clk)); + hwclk = to_clk_hw_omap(hw); + if (!omap2_clk_is_hw_omap(hw)) + return NULL; + if (hwclk && hwclk->clkdm_name) clkdm = clkdm_lookup(hwclk->clkdm_name); @@ -638,6 +532,7 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { OF_DEV_AUXDATA("ti,davinci_mdio", 0x5c030000, "davinci_mdio.0", NULL), OF_DEV_AUXDATA("ti,am3517-emac", 0x5c000000, "davinci_emac.0", &am35xx_emac_pdata), + OF_DEV_AUXDATA("nokia,n900-rom-rng", 0, NULL, rx51_secure_rng_call), /* McBSP modules with sidetone core */ #if IS_ENABLED(CONFIG_SND_SOC_OMAP_MCBSP) OF_DEV_AUXDATA("ti,omap3-mcbsp", 0x49022000, "49022000.mcbsp", &mcbsp_pdata), @@ -653,10 +548,6 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { &wkup_m3_data), #endif #if defined(CONFIG_ARCH_OMAP4) || defined(CONFIG_SOC_OMAP5) - OF_DEV_AUXDATA("ti,omap4-iommu", 0x4a066000, "4a066000.mmu", - &omap4_iommu_pdata), - OF_DEV_AUXDATA("ti,omap4-iommu", 0x55082000, "55082000.mmu", - &omap4_iommu_pdata), OF_DEV_AUXDATA("ti,omap4-smartreflex-iva", 0x4a0db000, "4a0db000.smartreflex", &omap_sr_pdata[OMAP_SR_IVA]), OF_DEV_AUXDATA("ti,omap4-smartreflex-core", 0x4a0dd000, @@ -671,6 +562,12 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = { &dra7_hsmmc_data_mmc2), OF_DEV_AUXDATA("ti,dra7-hsmmc", 0x480ad000, "480ad000.mmc", &dra7_hsmmc_data_mmc3), + OF_DEV_AUXDATA("ti,dra7-dsp-iommu", 0x40d01000, "40d01000.mmu", + &dra7_ipu1_dsp_iommu_pdata), + OF_DEV_AUXDATA("ti,dra7-dsp-iommu", 0x41501000, "41501000.mmu", + &dra7_ipu1_dsp_iommu_pdata), + OF_DEV_AUXDATA("ti,dra7-iommu", 0x58882000, "58882000.mmu", + &dra7_ipu1_dsp_iommu_pdata), #endif /* Common auxdata */ OF_DEV_AUXDATA("ti,sysc", 0, NULL, &ti_sysc_pdata), diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 54254fc92c2ed..fa66534a7ae22 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -194,6 +194,7 @@ void omap_sram_idle(void) int per_next_state = PWRDM_POWER_ON; int core_next_state = PWRDM_POWER_ON; u32 sdrc_pwr = 0; + int error; mpu_next_state = pwrdm_read_next_pwrst(mpu_pwrdm); switch (mpu_next_state) { @@ -222,8 +223,11 @@ void omap_sram_idle(void) pwrdm_pre_transition(NULL); /* PER */ - if (per_next_state == PWRDM_POWER_OFF) - cpu_cluster_pm_enter(); + if (per_next_state == PWRDM_POWER_OFF) { + error = cpu_cluster_pm_enter(); + if (error) + return; + } /* CORE */ if (core_next_state < PWRDM_POWER_ON) { diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index 1b442b1285693..63e73e9b82bc6 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -708,6 +708,7 @@ static int omap3xxx_prm_late_init(void) } irq_num = of_irq_get(np, 0); + of_node_put(np); if (irq_num == -EPROBE_DEFER) return irq_num; diff --git a/arch/arm/mach-omap2/sleep34xx.S b/arch/arm/mach-omap2/sleep34xx.S index ac1324c6453b5..c4e97d35c310d 100644 --- a/arch/arm/mach-omap2/sleep34xx.S +++ b/arch/arm/mach-omap2/sleep34xx.S @@ -72,7 +72,7 @@ ENTRY(enable_omap3630_toggle_l2_on_restore) stmfd sp!, {lr} @ save registers on stack /* Setup so that we will disable and enable l2 */ mov r1, #0x1 - adrl r3, l2dis_3630_offset @ may be too distant for plain adr + adr r3, l2dis_3630_offset ldr r2, [r3] @ value for offset str r1, [r2, r3] @ write to l2dis_3630 ldmfd sp!, {pc} @ restore regs and return diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index 07bea84c5d6e4..1defb838eae3a 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -63,15 +64,28 @@ /* Clockevent code */ -static struct omap_dm_timer clkev; -static struct clock_event_device clockevent_gpt; - /* Clockevent hwmod for am335x and am437x suspend */ static struct omap_hwmod *clockevent_gpt_hwmod; /* Clockesource hwmod for am437x suspend */ static struct omap_hwmod *clocksource_gpt_hwmod; +struct dmtimer_clockevent { + struct clock_event_device dev; + struct omap_dm_timer timer; +}; + +static struct dmtimer_clockevent clockevent; + +static struct omap_dm_timer *to_dmtimer(struct clock_event_device *clockevent) +{ + struct dmtimer_clockevent *clkevt = + container_of(clockevent, struct dmtimer_clockevent, dev); + struct omap_dm_timer *timer = &clkevt->timer; + + return timer; +} + #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER static unsigned long arch_timer_freq; @@ -83,24 +97,21 @@ void set_cntfreq(void) static irqreturn_t omap2_gp_timer_interrupt(int irq, void *dev_id) { - struct clock_event_device *evt = &clockevent_gpt; - - __omap_dm_timer_write_status(&clkev, OMAP_TIMER_INT_OVERFLOW); + struct dmtimer_clockevent *clkevt = dev_id; + struct clock_event_device *evt = &clkevt->dev; + struct omap_dm_timer *timer = &clkevt->timer; + __omap_dm_timer_write_status(timer, OMAP_TIMER_INT_OVERFLOW); evt->event_handler(evt); return IRQ_HANDLED; } -static struct irqaction omap2_gp_timer_irq = { - .name = "gp_timer", - .flags = IRQF_TIMER | IRQF_IRQPOLL, - .handler = omap2_gp_timer_interrupt, -}; - static int omap2_gp_timer_set_next_event(unsigned long cycles, struct clock_event_device *evt) { - __omap_dm_timer_load_start(&clkev, OMAP_TIMER_CTRL_ST, + struct omap_dm_timer *timer = to_dmtimer(evt); + + __omap_dm_timer_load_start(timer, OMAP_TIMER_CTRL_ST, 0xffffffff - cycles, OMAP_TIMER_POSTED); return 0; @@ -108,22 +119,26 @@ static int omap2_gp_timer_set_next_event(unsigned long cycles, static int omap2_gp_timer_shutdown(struct clock_event_device *evt) { - __omap_dm_timer_stop(&clkev, OMAP_TIMER_POSTED, clkev.rate); + struct omap_dm_timer *timer = to_dmtimer(evt); + + __omap_dm_timer_stop(timer, OMAP_TIMER_POSTED, timer->rate); + return 0; } static int omap2_gp_timer_set_periodic(struct clock_event_device *evt) { + struct omap_dm_timer *timer = to_dmtimer(evt); u32 period; - __omap_dm_timer_stop(&clkev, OMAP_TIMER_POSTED, clkev.rate); + __omap_dm_timer_stop(timer, OMAP_TIMER_POSTED, timer->rate); - period = clkev.rate / HZ; + period = timer->rate / HZ; period -= 1; /* Looks like we need to first set the load value separately */ - __omap_dm_timer_write(&clkev, OMAP_TIMER_LOAD_REG, 0xffffffff - period, + __omap_dm_timer_write(timer, OMAP_TIMER_LOAD_REG, 0xffffffff - period, OMAP_TIMER_POSTED); - __omap_dm_timer_load_start(&clkev, + __omap_dm_timer_load_start(timer, OMAP_TIMER_CTRL_AR | OMAP_TIMER_CTRL_ST, 0xffffffff - period, OMAP_TIMER_POSTED); return 0; @@ -137,25 +152,16 @@ static void omap_clkevt_idle(struct clock_event_device *unused) omap_hwmod_idle(clockevent_gpt_hwmod); } -static void omap_clkevt_unidle(struct clock_event_device *unused) +static void omap_clkevt_unidle(struct clock_event_device *evt) { + struct omap_dm_timer *timer = to_dmtimer(evt); + if (!clockevent_gpt_hwmod) return; omap_hwmod_enable(clockevent_gpt_hwmod); - __omap_dm_timer_int_enable(&clkev, OMAP_TIMER_INT_OVERFLOW); -} - -static struct clock_event_device clockevent_gpt = { - .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT, - .rating = 300, - .set_next_event = omap2_gp_timer_set_next_event, - .set_state_shutdown = omap2_gp_timer_shutdown, - .set_state_periodic = omap2_gp_timer_set_periodic, - .set_state_oneshot = omap2_gp_timer_shutdown, - .tick_resume = omap2_gp_timer_shutdown, -}; + __omap_dm_timer_int_enable(timer, OMAP_TIMER_INT_OVERFLOW); +} static const struct of_device_id omap_timer_match[] __initconst = { { .compatible = "ti,omap2420-timer", }, @@ -362,47 +368,104 @@ void tick_broadcast(const struct cpumask *mask) } #endif -static void __init omap2_gp_clockevent_init(int gptimer_id, - const char *fck_source, - const char *property) +static void __init dmtimer_clkevt_init_common(struct dmtimer_clockevent *clkevt, + int gptimer_id, + const char *fck_source, + unsigned int features, + const struct cpumask *cpumask, + const char *property, + int rating, const char *name) { + struct omap_dm_timer *timer = &clkevt->timer; int res; - clkev.id = gptimer_id; - clkev.errata = omap_dm_timer_get_errata(); + timer->id = gptimer_id; + timer->errata = omap_dm_timer_get_errata(); + clkevt->dev.features = features; + clkevt->dev.rating = rating; + clkevt->dev.set_next_event = omap2_gp_timer_set_next_event; + clkevt->dev.set_state_shutdown = omap2_gp_timer_shutdown; + clkevt->dev.set_state_periodic = omap2_gp_timer_set_periodic; + clkevt->dev.set_state_oneshot = omap2_gp_timer_shutdown; + clkevt->dev.tick_resume = omap2_gp_timer_shutdown; /* * For clock-event timers we never read the timer counter and * so we are not impacted by errata i103 and i767. Therefore, * we can safely ignore this errata for clock-event timers. */ - __omap_dm_timer_override_errata(&clkev, OMAP_TIMER_ERRATA_I103_I767); + __omap_dm_timer_override_errata(timer, OMAP_TIMER_ERRATA_I103_I767); - res = omap_dm_timer_init_one(&clkev, fck_source, property, - &clockevent_gpt.name, OMAP_TIMER_POSTED); + res = omap_dm_timer_init_one(timer, fck_source, property, + &clkevt->dev.name, OMAP_TIMER_POSTED); BUG_ON(res); - omap2_gp_timer_irq.dev_id = &clkev; - setup_irq(clkev.irq, &omap2_gp_timer_irq); + clkevt->dev.cpumask = cpumask; + clkevt->dev.irq = omap_dm_timer_get_irq(timer); - __omap_dm_timer_int_enable(&clkev, OMAP_TIMER_INT_OVERFLOW); + if (request_irq(clkevt->dev.irq, omap2_gp_timer_interrupt, + IRQF_TIMER | IRQF_IRQPOLL, name, clkevt)) + pr_err("Failed to request irq %d (gp_timer)\n", clkevt->dev.irq); - clockevent_gpt.cpumask = cpu_possible_mask; - clockevent_gpt.irq = omap_dm_timer_get_irq(&clkev); - clockevents_config_and_register(&clockevent_gpt, clkev.rate, - 3, /* Timer internal resynch latency */ - 0xffffffff); + __omap_dm_timer_int_enable(timer, OMAP_TIMER_INT_OVERFLOW); if (soc_is_am33xx() || soc_is_am43xx()) { - clockevent_gpt.suspend = omap_clkevt_idle; - clockevent_gpt.resume = omap_clkevt_unidle; + clkevt->dev.suspend = omap_clkevt_idle; + clkevt->dev.resume = omap_clkevt_unidle; clockevent_gpt_hwmod = - omap_hwmod_lookup(clockevent_gpt.name); + omap_hwmod_lookup(clkevt->dev.name); + } + + pr_info("OMAP clockevent source: %s at %lu Hz\n", clkevt->dev.name, + timer->rate); +} + +static DEFINE_PER_CPU(struct dmtimer_clockevent, dmtimer_percpu_timer); + +static int omap_gptimer_starting_cpu(unsigned int cpu) +{ + struct dmtimer_clockevent *clkevt = per_cpu_ptr(&dmtimer_percpu_timer, cpu); + struct clock_event_device *dev = &clkevt->dev; + struct omap_dm_timer *timer = &clkevt->timer; + + clockevents_config_and_register(dev, timer->rate, 3, ULONG_MAX); + irq_force_affinity(dev->irq, cpumask_of(cpu)); + + return 0; +} + +static int __init dmtimer_percpu_quirk_init(void) +{ + struct dmtimer_clockevent *clkevt; + struct clock_event_device *dev; + struct device_node *arm_timer; + struct omap_dm_timer *timer; + int cpu = 0; + + arm_timer = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (of_device_is_available(arm_timer)) { + pr_warn_once("ARM architected timer wrap issue i940 detected\n"); + return 0; + } + + for_each_possible_cpu(cpu) { + clkevt = per_cpu_ptr(&dmtimer_percpu_timer, cpu); + dev = &clkevt->dev; + timer = &clkevt->timer; + + dmtimer_clkevt_init_common(clkevt, 0, "timer_sys_ck", + CLOCK_EVT_FEAT_ONESHOT, + cpumask_of(cpu), + "assigned-clock-parents", + 500, "percpu timer"); } - pr_info("OMAP clockevent source: %s at %lu Hz\n", clockevent_gpt.name, - clkev.rate); + cpuhp_setup_state(CPUHP_AP_OMAP_DM_TIMER_STARTING, + "clockevents/omap/gptimer:starting", + omap_gptimer_starting_cpu, NULL); + + return 0; } /* Clocksource code */ @@ -542,7 +605,15 @@ static void __init __omap_sync32k_timer_init(int clkev_nr, const char *clkev_src { omap_clk_init(); omap_dmtimer_init(); - omap2_gp_clockevent_init(clkev_nr, clkev_src, clkev_prop); + dmtimer_clkevt_init_common(&clockevent, clkev_nr, clkev_src, + CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + cpu_possible_mask, clkev_prop, 300, "clockevent"); + clockevents_config_and_register(&clockevent.dev, clockevent.timer.rate, + 3, /* Timer internal resynch latency */ + 0xffffffff); + + if (soc_is_dra7xx()) + dmtimer_percpu_quirk_init(); /* Enable the use of clocksource="gp_timer" kernel parameter */ if (use_gptimer_clksrc || gptimer) @@ -571,7 +642,7 @@ void __init omap3_secure_sync32k_timer_init(void) #endif /* CONFIG_ARCH_OMAP3 */ #if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) || \ - defined(CONFIG_SOC_AM43XX) + defined(CONFIG_SOC_AM43XX) || defined(CONFIG_SOC_DRA7XX) void __init omap3_gptimer_timer_init(void) { __omap_sync32k_timer_init(2, "timer_sys_ck", NULL, diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 425855f456f2b..719e6395797cb 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -355,13 +355,13 @@ static struct platform_device cm_x300_spi_gpio = { static struct gpiod_lookup_table cm_x300_spi_gpiod_table = { .dev_id = "spi_gpio", .table = { - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_SCL, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_SCL - GPIO_LCD_BASE, "sck", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_DIN, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_DIN - GPIO_LCD_BASE, "mosi", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_DOUT, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_DOUT - GPIO_LCD_BASE, "miso", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_CS, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_CS - GPIO_LCD_BASE, "cs", GPIO_ACTIVE_HIGH), { }, }, diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index e1a394ac3eea7..8f2d4faa26120 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -675,7 +675,7 @@ static struct platform_device bq24022 = { static struct gpiod_lookup_table bq24022_gpiod_table = { .dev_id = "gpio-regulator", .table = { - GPIO_LOOKUP("gpio-pxa", EGPIO_MAGICIAN_BQ24022_ISET2, + GPIO_LOOKUP("htc-egpio-0", EGPIO_MAGICIAN_BQ24022_ISET2 - MAGICIAN_EGPIO_BASE, NULL, GPIO_ACTIVE_HIGH), GPIO_LOOKUP("gpio-pxa", GPIO30_MAGICIAN_BQ24022_nCHARGE_EN, "enable", GPIO_ACTIVE_LOW), diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index f537ff1c3ba7e..3fbcaa3b4e182 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -295,9 +295,9 @@ static struct gpiod_lookup_table tosa_mci_gpio_table = { .table = { GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_nSD_DETECT, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_SD_WP, + GPIO_LOOKUP("sharp-scoop.0", TOSA_GPIO_SD_WP - TOSA_SCOOP_GPIO_BASE, "wp", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_PWR_ON, + GPIO_LOOKUP("sharp-scoop.0", TOSA_GPIO_PWR_ON - TOSA_SCOOP_GPIO_BASE, "power", GPIO_ACTIVE_HIGH), { }, }, diff --git a/arch/arm/mach-s3c24xx/mach-at2440evb.c b/arch/arm/mach-s3c24xx/mach-at2440evb.c index 58c5ef3cf1d7e..2d370f7f75fa2 100644 --- a/arch/arm/mach-s3c24xx/mach-at2440evb.c +++ b/arch/arm/mach-s3c24xx/mach-at2440evb.c @@ -143,7 +143,7 @@ static struct gpiod_lookup_table at2440evb_mci_gpio_table = { .dev_id = "s3c2410-sdi", .table = { /* Card detect S3C2410_GPG(10) */ - GPIO_LOOKUP("GPG", 10, "cd", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("GPIOG", 10, "cd", GPIO_ACTIVE_LOW), { }, }, }; diff --git a/arch/arm/mach-s3c24xx/mach-h1940.c b/arch/arm/mach-s3c24xx/mach-h1940.c index 74d6b68e91c74..8d9d8e7c71d4c 100644 --- a/arch/arm/mach-s3c24xx/mach-h1940.c +++ b/arch/arm/mach-s3c24xx/mach-h1940.c @@ -468,9 +468,9 @@ static struct gpiod_lookup_table h1940_mmc_gpio_table = { .dev_id = "s3c2410-sdi", .table = { /* Card detect S3C2410_GPF(5) */ - GPIO_LOOKUP("GPF", 5, "cd", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("GPIOF", 5, "cd", GPIO_ACTIVE_LOW), /* Write protect S3C2410_GPH(8) */ - GPIO_LOOKUP("GPH", 8, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("GPIOH", 8, "wp", GPIO_ACTIVE_LOW), { }, }, }; diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c index 885e8f12e4b91..eedc9f8ed2109 100644 --- a/arch/arm/mach-s3c24xx/mach-jive.c +++ b/arch/arm/mach-s3c24xx/mach-jive.c @@ -237,11 +237,11 @@ static int __init jive_mtdset(char *options) unsigned long set; if (options == NULL || options[0] == '\0') - return 0; + return 1; if (kstrtoul(options, 10, &set)) { printk(KERN_ERR "failed to parse mtdset=%s\n", options); - return 0; + return 1; } switch (set) { @@ -256,7 +256,7 @@ static int __init jive_mtdset(char *options) "using default.", set); } - return 0; + return 1; } /* parse the mtdset= option given to the kernel command line */ diff --git a/arch/arm/mach-s3c24xx/mach-mini2440.c b/arch/arm/mach-s3c24xx/mach-mini2440.c index 9035f868fb34e..3a5b1124037b2 100644 --- a/arch/arm/mach-s3c24xx/mach-mini2440.c +++ b/arch/arm/mach-s3c24xx/mach-mini2440.c @@ -244,9 +244,9 @@ static struct gpiod_lookup_table mini2440_mmc_gpio_table = { .dev_id = "s3c2410-sdi", .table = { /* Card detect S3C2410_GPG(8) */ - GPIO_LOOKUP("GPG", 8, "cd", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("GPIOG", 8, "cd", GPIO_ACTIVE_LOW), /* Write protect S3C2410_GPH(8) */ - GPIO_LOOKUP("GPH", 8, "wp", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("GPIOH", 8, "wp", GPIO_ACTIVE_HIGH), { }, }, }; diff --git a/arch/arm/mach-s3c24xx/mach-n30.c b/arch/arm/mach-s3c24xx/mach-n30.c index d856f23939aff..ffa20f52aa832 100644 --- a/arch/arm/mach-s3c24xx/mach-n30.c +++ b/arch/arm/mach-s3c24xx/mach-n30.c @@ -359,9 +359,9 @@ static struct gpiod_lookup_table n30_mci_gpio_table = { .dev_id = "s3c2410-sdi", .table = { /* Card detect S3C2410_GPF(1) */ - GPIO_LOOKUP("GPF", 1, "cd", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("GPIOF", 1, "cd", GPIO_ACTIVE_LOW), /* Write protect S3C2410_GPG(10) */ - GPIO_LOOKUP("GPG", 10, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("GPIOG", 10, "wp", GPIO_ACTIVE_LOW), { }, }, }; diff --git a/arch/arm/mach-s3c24xx/mach-rx1950.c b/arch/arm/mach-s3c24xx/mach-rx1950.c index 29f9b345a5311..534e9c1d8161f 100644 --- a/arch/arm/mach-s3c24xx/mach-rx1950.c +++ b/arch/arm/mach-s3c24xx/mach-rx1950.c @@ -567,9 +567,9 @@ static struct gpiod_lookup_table rx1950_mmc_gpio_table = { .dev_id = "s3c2410-sdi", .table = { /* Card detect S3C2410_GPF(5) */ - GPIO_LOOKUP("GPF", 5, "cd", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("GPIOF", 5, "cd", GPIO_ACTIVE_LOW), /* Write protect S3C2410_GPH(8) */ - GPIO_LOOKUP("GPH", 8, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("GPIOH", 8, "wp", GPIO_ACTIVE_LOW), { }, }, }; diff --git a/arch/arm/mach-s5pv210/Kconfig b/arch/arm/mach-s5pv210/Kconfig index 03984a7918791..69ff1bb89f38f 100644 --- a/arch/arm/mach-s5pv210/Kconfig +++ b/arch/arm/mach-s5pv210/Kconfig @@ -8,7 +8,6 @@ config ARCH_S5PV210 bool "Samsung S5PV210/S5PC110" depends on ARCH_MULTI_V7 - select ARCH_HAS_HOLES_MEMORYMODEL select ARM_VIC select CLKSRC_SAMSUNG_PWM select COMMON_CLK_SAMSUNG diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index ee949255ced3f..ba44cec5e59ac 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -125,6 +125,7 @@ static int regulator_quirk_notify(struct notifier_block *nb, list_for_each_entry_safe(pos, tmp, &quirk_list, list) { list_del(&pos->list); + of_node_put(pos->np); kfree(pos); } @@ -154,8 +155,10 @@ static int __init rcar_gen2_regulator_quirk(void) return -ENODEV; for_each_matching_node_and_match(np, rcar_gen2_quirk_match, &id) { - if (!of_device_is_available(np)) + if (!of_device_is_available(np)) { + of_node_put(np); break; + } ret = of_property_read_u32(np, "reg", &addr); if (ret) /* Skip invalid entry and continue */ @@ -164,6 +167,7 @@ static int __init rcar_gen2_regulator_quirk(void) quirk = kzalloc(sizeof(*quirk), GFP_KERNEL); if (!quirk) { ret = -ENOMEM; + of_node_put(np); goto err_mem; } @@ -171,11 +175,12 @@ static int __init rcar_gen2_regulator_quirk(void) memcpy(&quirk->i2c_msg, id->data, sizeof(quirk->i2c_msg)); quirk->id = id; - quirk->np = np; + quirk->np = of_node_get(np); quirk->i2c_msg.addr = addr; ret = of_irq_parse_one(np, 0, argsa); if (ret) { /* Skip invalid entry and continue */ + of_node_put(np); kfree(quirk); continue; } @@ -222,6 +227,7 @@ static int __init rcar_gen2_regulator_quirk(void) err_mem: list_for_each_entry_safe(pos, tmp, &quirk_list, list) { list_del(&pos->list); + of_node_put(pos->np); kfree(pos); } diff --git a/arch/arm/mach-socfpga/Kconfig b/arch/arm/mach-socfpga/Kconfig index 22af5e308db6c..61b872792c4cb 100644 --- a/arch/arm/mach-socfpga/Kconfig +++ b/arch/arm/mach-socfpga/Kconfig @@ -2,6 +2,7 @@ menuconfig ARCH_SOCFPGA bool "Altera SOCFPGA family" depends on ARCH_MULTI_V7 + select ARCH_HAS_RESET_CONTROLLER select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC @@ -19,6 +20,7 @@ menuconfig ARCH_SOCFPGA select PL310_ERRATA_727915 select PL310_ERRATA_753970 if PL310 select PL310_ERRATA_769419 + select RESET_CONTROLLER if ARCH_SOCFPGA config SOCFPGA_SUSPEND diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index fc2608b18a0d0..18f01190dcfd4 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -33,7 +33,7 @@ extern void __iomem *sdr_ctl_base_addr; u32 socfpga_sdram_self_refresh(u32 sdr_base); extern unsigned int socfpga_sdram_self_refresh_sz; -extern char secondary_trampoline, secondary_trampoline_end; +extern char secondary_trampoline[], secondary_trampoline_end[]; extern unsigned long socfpga_cpu1start_addr; diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index fbb80b883e5dd..201191cf68f32 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -20,14 +20,14 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) { - int trampoline_size = &secondary_trampoline_end - &secondary_trampoline; + int trampoline_size = secondary_trampoline_end - secondary_trampoline; if (socfpga_cpu1start_addr) { /* This will put CPU #1 into reset. */ writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr + SOCFPGA_RSTMGR_MODMPURST); - memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); + memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size); writel(__pa_symbol(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff)); @@ -45,12 +45,12 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle) { - int trampoline_size = &secondary_trampoline_end - &secondary_trampoline; + int trampoline_size = secondary_trampoline_end - secondary_trampoline; if (socfpga_cpu1start_addr) { writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr + SOCFPGA_A10_RSTMGR_MODMPURST); - memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); + memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size); writel(__pa_symbol(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff)); diff --git a/arch/arm/mach-socfpga/pm.c b/arch/arm/mach-socfpga/pm.c index 6ed887cf8dc9a..365c0428b21b6 100644 --- a/arch/arm/mach-socfpga/pm.c +++ b/arch/arm/mach-socfpga/pm.c @@ -49,14 +49,14 @@ static int socfpga_setup_ocram_self_refresh(void) if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, socfpga_sdram_self_refresh_sz); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } ocram_pbase = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -67,7 +67,7 @@ static int socfpga_setup_ocram_self_refresh(void) if (!suspend_ocram_base) { pr_warn("%s: __arm_ioremap_exec failed!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } /* Copy the code that puts DDR in self refresh to ocram */ @@ -81,6 +81,8 @@ static int socfpga_setup_ocram_self_refresh(void) if (!socfpga_sdram_self_refresh_in_ocram) ret = -EFAULT; +put_device: + put_device(&pdev->dev); put_node: of_node_put(np); diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 933b6930f024f..a0ca5e7a68de2 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -66,6 +66,7 @@ static const char * const sun8i_board_dt_compat[] = { "allwinner,sun8i-h2-plus", "allwinner,sun8i-h3", "allwinner,sun8i-r40", + "allwinner,sun8i-v3", "allwinner,sun8i-v3s", NULL, }; diff --git a/arch/arm/mach-tango/Kconfig b/arch/arm/mach-tango/Kconfig index 25b2fd4348617..a9eeda36aeb15 100644 --- a/arch/arm/mach-tango/Kconfig +++ b/arch/arm/mach-tango/Kconfig @@ -3,7 +3,6 @@ config ARCH_TANGO bool "Sigma Designs Tango4 (SMP87xx)" depends on ARCH_MULTI_V7 # Cortex-A9 MPCore r3p0, PL310 r3p2 - select ARCH_HAS_HOLES_MEMORYMODEL select ARM_ERRATA_754322 select ARM_ERRATA_764369 if SMP select ARM_ERRATA_775420 diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 67b763fea005d..e3f34815c9da7 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -44,16 +44,16 @@ ENTRY(tegra_resume) cmp r6, #TEGRA20 beq 1f @ Yes /* Clear the flow controller flags for this CPU. */ - cpu_to_csr_reg r1, r0 + cpu_to_csr_reg r3, r0 mov32 r2, TEGRA_FLOW_CTRL_BASE - ldr r1, [r2, r1] + ldr r1, [r2, r3] /* Clear event & intr flag */ orr r1, r1, \ #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG movw r0, #0x3FFD @ enable, cluster_switch, immed, bitmaps @ & ext flags for CPU power mgnt bic r1, r1, r0 - str r1, [r2] + str r1, [r2, r3] 1: mov32 r9, 0xc09 diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index b408fa56eb892..6922dd8d3e2d9 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -370,6 +370,14 @@ _pll_m_c_x_done: pll_locked r1, r0, CLK_RESET_PLLC_BASE pll_locked r1, r0, CLK_RESET_PLLX_BASE + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 + beq 1f + ldr r1, [r0, #CLK_RESET_PLLP_BASE] + bic r1, r1, #(1<<31) @ disable PllP bypass + str r1, [r0, #CLK_RESET_PLLP_BASE] +1: + mov32 r7, TEGRA_TMRUS_BASE ldr r1, [r7] add r1, r1, #LOCK_DELAY @@ -630,7 +638,10 @@ tegra30_switch_cpu_to_clk32k: str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] /* disable PLLP, PLLA, PLLC and PLLX */ + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 ldr r0, [r5, #CLK_RESET_PLLP_BASE] + orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster bic r0, r0, #(1 << 30) str r0, [r5, #CLK_RESET_PLLP_BASE] ldr r0, [r5, #CLK_RESET_PLLA_BASE] diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c index e512e606eabdd..5ea3421fa1e8c 100644 --- a/arch/arm/mach-tegra/tegra.c +++ b/arch/arm/mach-tegra/tegra.c @@ -106,8 +106,8 @@ static const char * const tegra_dt_board_compat[] = { }; DT_MACHINE_START(TEGRA_DT, "NVIDIA Tegra SoC (Flattened Device Tree)") - .l2c_aux_val = 0x3c400001, - .l2c_aux_mask = 0xc20fc3fe, + .l2c_aux_val = 0x3c400000, + .l2c_aux_mask = 0xc20fc3ff, .smp = smp_ops(tegra_smp_ops), .map_io = tegra_map_common_io, .init_early = tegra_init_early, diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index 46a903c88c6a0..f553cde614f92 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -143,6 +143,7 @@ static int __init dcscb_init(void) if (!node) return -ENODEV; dcscb_base = of_iomap(node, 0); + of_node_put(node); if (!dcscb_base) return -EADDRNOTAVAIL; cfg = readl_relaxed(dcscb_base + DCS_CFG_R); diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 354e0e7025aec..1c6500c4e6a17 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -551,8 +551,9 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev) static int __init ve_spc_clk_init(void) { - int cpu; + int cpu, cluster; struct clk *clk; + bool init_opp_table[MAX_CLUSTERS] = { false }; if (!info) return 0; /* Continue only if SPC is initialised */ @@ -578,8 +579,17 @@ static int __init ve_spc_clk_init(void) continue; } + cluster = topology_physical_package_id(cpu_dev->id); + if (cluster < 0 || init_opp_table[cluster]) + continue; + if (ve_init_opp_table(cpu_dev)) pr_warn("failed to initialise cpu%d opp table\n", cpu); + else if (dev_pm_opp_set_sharing_cpus(cpu_dev, + topology_core_cpumask(cpu_dev->id))) + pr_warn("failed to mark OPPs shared for cpu%d\n", cpu); + else + init_opp_table[cluster] = true; } platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0); diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index 3a4248fd79628..25530ddae1fa1 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -77,6 +77,7 @@ static int __init zynq_get_revision(void) } zynq_devcfg_base = of_iomap(np, 0); + of_node_put(np); if (!zynq_devcfg_base) { pr_err("%s: Unable to map I/O memory\n", __func__); return -1; diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 0ab3a86b1f523..00ffee644372e 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -743,6 +743,7 @@ config SWP_EMULATE config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on ARCH_SUPPORTS_BIG_ENDIAN + depends on !LD_IS_LLD help Say Y if you plan on running a kernel in big-endian mode. Note that your board must be properly built and your board @@ -752,7 +753,7 @@ config CPU_BIG_ENDIAN config CPU_ENDIAN_BE8 bool depends on CPU_BIG_ENDIAN - default CPU_V6 || CPU_V6K || CPU_V7 + default CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M help Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors. @@ -832,6 +833,7 @@ config CPU_BPREDICT_DISABLE config CPU_SPECTRE bool + select GENERIC_CPU_VULNERABILITIES config HARDEN_BRANCH_PREDICTOR bool "Harden the branch predictor against aliasing attacks" if EXPERT @@ -852,6 +854,16 @@ config HARDEN_BRANCH_PREDICTOR If unsure, say Y. +config HARDEN_BRANCH_HISTORY + bool "Harden Spectre style attacks against branch history" if EXPERT + depends on CPU_SPECTRE + default y + help + Speculation attacks against some high-performance processors can + make use of branch history to influence future speculation. When + taking an exception, a sequence of branches overwrites the branch + history, or branch history is invalidated. + config TLS_REG_EMUL bool select NEED_KUSER_HELPERS diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 788c5cf46de59..1fc6c4810a5c3 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -935,6 +935,9 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (type == TYPE_LDST) do_alignment_finish_ldst(addr, instr, regs, offset); + if (thumb_mode(regs)) + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); + return 0; bad_or_fault: diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 12c26eb88afbc..43d91bfd23600 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1249,20 +1249,28 @@ static void __init l2c310_of_parse(const struct device_node *np, ret = of_property_read_u32(np, "prefetch-data", &val); if (ret == 0) { - if (val) + if (val) { prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH; - else + *aux_val |= L310_PREFETCH_CTRL_DATA_PREFETCH; + } else { prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH; + *aux_val &= ~L310_PREFETCH_CTRL_DATA_PREFETCH; + } + *aux_mask &= ~L310_PREFETCH_CTRL_DATA_PREFETCH; } else if (ret != -EINVAL) { pr_err("L2C-310 OF prefetch-data property value is missing\n"); } ret = of_property_read_u32(np, "prefetch-instr", &val); if (ret == 0) { - if (val) + if (val) { prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH; - else + *aux_val |= L310_PREFETCH_CTRL_INSTR_PREFETCH; + } else { prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH; + *aux_val &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH; + } + *aux_mask &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH; } else if (ret != -EINVAL) { pr_err("L2C-310 OF prefetch-instr property value is missing\n"); } diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index db92478983005..287ef898a55e1 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -35,7 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, unsigned long attrs) { - void *ret = dma_alloc_from_global_coherent(size, dma_handle); + void *ret = dma_alloc_from_global_coherent(dev, size, dma_handle); /* * dma_alloc_from_global_coherent() may fail because: diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7d042d5c43e32..27576c7b836ee 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -221,7 +221,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops); static int __dma_supported(struct device *dev, u64 mask, bool warn) { - unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit); /* * Translate the device's DMA mask to a PFN limit. This diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index b4be3baa83d4d..ff2cd985d20e0 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -176,11 +176,22 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max_low, int pfn_valid(unsigned long pfn) { phys_addr_t addr = __pfn_to_phys(pfn); + unsigned long pageblock_size = PAGE_SIZE * pageblock_nr_pages; if (__phys_to_pfn(addr) != pfn) return 0; - return memblock_is_map_memory(__pfn_to_phys(pfn)); + /* + * If address less than pageblock_size bytes away from a present + * memory chunk there still will be a memory map entry for it + * because we round freed memory map to the pageblock boundaries. + */ + if (memblock_overlaps_region(&memblock.memory, + ALIGN_DOWN(addr, pageblock_size), + pageblock_size)) + return 1; + + return 0; } EXPORT_SYMBOL(pfn_valid); #endif @@ -274,7 +285,6 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); - early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); /* reserve memory for DMA contiguous allocations */ @@ -323,7 +333,7 @@ static inline void poison_init_mem(void *s, size_t count) *p++ = 0xe7fddef0; } -static inline void +static inline void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) { struct page *start_pg, *end_pg; @@ -372,14 +382,14 @@ static void __init free_unused_memmap(void) */ start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); -#else +#endif /* - * Align down here since the VM subsystem insists that the - * memmap entries are valid from the bank start aligned to - * MAX_ORDER_NR_PAGES. + * Align down here since many operations in VM subsystem + * presume that there are no holes in the memory map inside + * a pageblock */ - start = round_down(start, MAX_ORDER_NR_PAGES); -#endif + start = round_down(start, pageblock_nr_pages); + /* * If we had a previous bank, and there is a space * between the current bank and the previous, free it. @@ -388,18 +398,20 @@ static void __init free_unused_memmap(void) free_memmap(prev_end, start); /* - * Align up here since the VM subsystem insists that the - * memmap entries are valid from the bank end aligned to - * MAX_ORDER_NR_PAGES. + * Align up here since many operations in VM subsystem + * presume that there are no holes in the memory map inside + * a pageblock */ prev_end = ALIGN(memblock_region_memory_end_pfn(reg), - MAX_ORDER_NR_PAGES); + pageblock_nr_pages); } #ifdef CONFIG_SPARSEMEM - if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) + if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) { + prev_end = ALIGN(prev_end, pageblock_nr_pages); free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); + } #endif } @@ -470,7 +482,11 @@ static void __init free_highpages(void) void __init mem_init(void) { #ifdef CONFIG_ARM_LPAE - swiotlb_init(1); + if (swiotlb_force == SWIOTLB_FORCE || + max_pfn > arm_dma_pfn_limit) + swiotlb_init(1); + else + swiotlb_force = SWIOTLB_NO_FORCE; #endif set_max_mapnr(pfn_to_page(max_pfn) - mem_map); diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index d42b933161832..841b66515b379 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -301,7 +302,8 @@ static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, * Don't allow RAM to be mapped with mismatched attributes - this * causes problems with ARMv6+ */ - if (WARN_ON(pfn_valid(pfn) && mtype != MT_MEMORY_RW)) + if (WARN_ON(memblock_is_map_memory(PFN_PHYS(pfn)) && + mtype != MT_MEMORY_RW)) return NULL; area = get_vm_area_caller(size, VM_IOREMAP, caller); @@ -498,3 +500,11 @@ void __init early_ioremap_init(void) { early_ioremap_setup(); } + +bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags) +{ + unsigned long pfn = PHYS_PFN(offset); + + return memblock_is_map_memory(pfn); +} diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 48c2888297dd9..463cbb0631be2 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -39,6 +39,8 @@ #include "mm.h" #include "tcm.h" +extern unsigned long __atags_pointer; + /* * empty_zero_page is a special page that is used for * zero-initialized data and COW. @@ -227,12 +229,14 @@ early_param("ecc", early_ecc); static int __init early_cachepolicy(char *p) { pr_warn("cachepolicy kernel parameter not supported without cp15\n"); + return 0; } early_param("cachepolicy", early_cachepolicy); static int __init noalign_setup(char *__unused) { pr_warn("noalign kernel parameter not supported without cp15\n"); + return 1; } __setup("noalign", noalign_setup); @@ -312,6 +316,13 @@ static struct mem_type mem_types[] __ro_after_init = { .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_RO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, [MT_ROM] = { .prot_sect = PMD_TYPE_SECT, .domain = DOMAIN_KERNEL, @@ -413,9 +424,9 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) FIXADDR_END); BUG_ON(idx >= __end_of_fixed_addresses); - /* we only support device mappings until pgprot_kernel has been set */ + /* We support only device mappings before pgprot_kernel is set. */ if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) && - pgprot_val(pgprot_kernel) == 0)) + pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0)) return; if (pgprot_val(prot)) @@ -511,6 +522,7 @@ static void __init build_mem_type_table(void) /* Also setup NX memory mapping */ mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN; } if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { /* @@ -593,6 +605,7 @@ static void __init build_mem_type_table(void) mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; #endif /* @@ -613,6 +626,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; @@ -676,6 +691,8 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -962,7 +979,7 @@ static void __init create_mapping(struct map_desc *md) return; } - if ((md->type == MT_DEVICE || md->type == MT_ROM) && + if (md->type == MT_DEVICE && md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n", @@ -1352,6 +1369,15 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); + if (__atags_pointer) { + /* create a read-only mapping of the device tree */ + map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); + map.virtual = FDT_FIXED_BASE; + map.length = FDT_FIXED_SIZE; + map.type = MT_MEMORY_RO; + create_mapping(&map); + } + /* * Map the kernel if it is XIP. * It is always first in the modulearea. @@ -1512,8 +1538,7 @@ static void __init map_lowmem(void) } #ifdef CONFIG_ARM_PV_FIXUP -extern unsigned long __atags_pointer; -typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata); +typedef void pgtables_remap(long long offset, unsigned long pgd); pgtables_remap lpae_pgtables_remap_asm; /* @@ -1526,7 +1551,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc) unsigned long pa_pgd; unsigned int cr, ttbcr; long long offset; - void *boot_data; if (!mdesc->pv_fixup) return; @@ -1543,7 +1567,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc) */ lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); pa_pgd = __pa(swapper_pg_dir); - boot_data = __va(__atags_pointer); barrier(); pr_info("Switching physical address space to 0x%08llx\n", @@ -1579,7 +1602,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc) * needs to be assembly. It's fairly simple, as we're using the * temporary tables setup by the initial assembly code. */ - lpae_pgtables_remap(offset, pa_pgd, boot_data); + lpae_pgtables_remap(offset, pa_pgd); /* Re-enable the caches and cacheable TLB walks */ asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 4fa5371bc6624..2785da387c910 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -491,7 +491,7 @@ cpu_arm1020_name: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1020_proc_info,#object __arm1020_proc_info: diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 5d8a8339e09a4..e9ea237ed7852 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -449,7 +449,7 @@ arm1020e_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1020e_proc_info,#object __arm1020e_proc_info: diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index b3dd95c345e48..920c279e7879d 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -443,7 +443,7 @@ arm1022_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1022_proc_info,#object __arm1022_proc_info: diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index ac5afde12f35c..0bdf25a95b107 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -138,7 +138,7 @@ ENTRY(arm1026_flush_kern_cache_all) mov ip, #0 __flush_whole_cache: #ifndef CONFIG_CPU_DCACHE_DISABLE -1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test, clean, invalidate bne 1b #endif tst r2, #VM_EXEC @@ -363,7 +363,7 @@ ENTRY(cpu_arm1026_switch_mm) #ifdef CONFIG_MMU mov r1, #0 #ifndef CONFIG_CPU_DCACHE_DISABLE -1: mrc p15, 0, r15, c7, c14, 3 @ test, clean, invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test, clean, invalidate bne 1b #endif #ifndef CONFIG_CPU_ICACHE_DISABLE @@ -437,7 +437,7 @@ arm1026_crval: string cpu_arm1026_name, "ARM1026EJ-S" .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm1026_proc_info,#object __arm1026_proc_info: diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index c99d24363f32e..39361e196d61b 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -172,7 +172,7 @@ arm720_crval: * See for a definition of this structure. */ - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index 1b4a3838393fb..1a94bbf6e53fc 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -128,7 +128,7 @@ __arm740_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm740_proc_info,#object __arm740_proc_info: .long 0x41807400 diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index 17a4687065c7f..52b66cf0259e3 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -72,7 +72,7 @@ __arm7tdmi_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ extra_hwcaps=0 diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 298c76b47749f..31ac8acc34dc5 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -434,7 +434,7 @@ arm920_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm920_proc_info,#object __arm920_proc_info: diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 824be3a0bc238..ca2c7ca8af214 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -412,7 +412,7 @@ arm922_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm922_proc_info,#object __arm922_proc_info: diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index d40cff8f102c2..a381a0c9f1092 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -477,7 +477,7 @@ arm925_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index f3cd08f353f00..1ba253c2bce19 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -131,7 +131,7 @@ __flush_whole_cache: #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache #else -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate bne 1b #endif tst r2, #VM_EXEC @@ -358,7 +358,7 @@ ENTRY(cpu_arm926_switch_mm) mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache #else @ && 'Clean & Invalidate whole DCache' -1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate bne 1b #endif mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache @@ -460,7 +460,7 @@ arm926_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm926_proc_info,#object __arm926_proc_info: diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 1c26d991386d7..4b8a00220cc97 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -340,7 +340,7 @@ __arm940_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm940_proc_info,#object __arm940_proc_info: diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 2dc1c75a4fd4a..555becf9c758d 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -395,7 +395,7 @@ __arm946_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __arm946_proc_info,#object __arm946_proc_info: .long 0x41009460 diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index 913c06e590af5..ef517530130b0 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -66,7 +66,7 @@ __arm9tdmi_setup: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name\()_proc_info, #object diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index 8120b6f4dbb83..dddf833fe0007 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -185,7 +185,7 @@ fa526_cr1_set: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __fa526_proc_info,#object __fa526_proc_info: diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index bb6dc34d42a37..b12b76bc8d30c 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -571,7 +571,7 @@ feroceon_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 5461d589a1e25..86e54447dc916 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -5,6 +5,7 @@ * VMA_VM_FLAGS * VM_EXEC */ +#include #include #include @@ -30,7 +31,7 @@ * act_mm - get current->active_mm */ .macro act_mm, rd - bic \rd, sp, #8128 + bic \rd, sp, #(THREAD_SIZE - 1) & ~63 bic \rd, \rd, #63 ldr \rd, [\rd, #TI_TASK] .if (TSK_ACTIVE_MM > IMM12_MASK) @@ -341,6 +342,7 @@ ENTRY(\name\()_cache_fns) .macro define_tlb_functions name:req, flags_up:req, flags_smp .type \name\()_tlb_fns, #object + .align 2 ENTRY(\name\()_tlb_fns) .long \name\()_flush_user_tlb_range .long \name\()_flush_kern_tlb_range diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index f083085788857..d47d6c5cee63a 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -416,7 +416,7 @@ mohawk_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __88sv331x_proc_info,#object __88sv331x_proc_info: diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index d5bc5d7025639..baba503ba8166 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -196,7 +196,7 @@ sa110_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .type __sa110_proc_info,#object __sa110_proc_info: diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index be7b611c76c76..75ebacc8e4e5c 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -239,7 +239,7 @@ sa1100_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index c1c85eb3484f3..1dd0d5ca27da8 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -261,7 +261,7 @@ v6_crval: string cpu_elf_name, "v6" .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" /* * Match any ARMv6 processor core. diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 9a07916af8dd2..e10faa6b3c6a8 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -1,14 +1,40 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include #include #include +#include #include +#ifdef CONFIG_ARM_PSCI +static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + switch ((int)res.a0) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + + default: + return SPECTRE_VULNERABLE; + } +} +#else +static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void) +{ + return SPECTRE_VULNERABLE; +} +#endif + #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn); @@ -37,13 +63,60 @@ static void __maybe_unused call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void cpu_v7_spectre_init(void) +static unsigned int spectre_v2_install_workaround(unsigned int method) { const char *spectre_v2_method = NULL; int cpu = smp_processor_id(); if (per_cpu(harden_branch_predictor_fn, cpu)) - return; + return SPECTRE_MITIGATED; + + switch (method) { + case SPECTRE_V2_METHOD_BPIALL: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_bpiall; + spectre_v2_method = "BPIALL"; + break; + + case SPECTRE_V2_METHOD_ICIALLU: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_iciallu; + spectre_v2_method = "ICIALLU"; + break; + + case SPECTRE_V2_METHOD_HVC: + per_cpu(harden_branch_predictor_fn, cpu) = + call_hvc_arch_workaround_1; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; + spectre_v2_method = "hypervisor"; + break; + + case SPECTRE_V2_METHOD_SMC: + per_cpu(harden_branch_predictor_fn, cpu) = + call_smc_arch_workaround_1; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; + spectre_v2_method = "firmware"; + break; + } + + if (spectre_v2_method) + pr_info("CPU%u: Spectre v2: using %s workaround\n", + smp_processor_id(), spectre_v2_method); + + return SPECTRE_MITIGATED; +} +#else +static unsigned int spectre_v2_install_workaround(unsigned int method) +{ + pr_info_once("Spectre V2: workarounds disabled by configuration\n"); + + return SPECTRE_VULNERABLE; +} +#endif + +static void cpu_v7_spectre_v2_init(void) +{ + unsigned int state, method = 0; switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A8: @@ -52,72 +125,139 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - per_cpu(harden_branch_predictor_fn, cpu) = - harden_branch_predictor_bpiall; - spectre_v2_method = "BPIALL"; + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_BPIALL; break; case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - per_cpu(harden_branch_predictor_fn, cpu) = - harden_branch_predictor_iciallu; - spectre_v2_method = "ICIALLU"; + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_ICIALLU; + break; + + case ARM_CPU_PART_BRAHMA_B53: + /* Requires no workaround */ + state = SPECTRE_UNAFFECTED; break; -#ifdef CONFIG_ARM_PSCI default: /* Other ARM CPUs require no workaround */ - if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) + if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) { + state = SPECTRE_UNAFFECTED; break; + } /* fallthrough */ - /* Cortex A57/A72 require firmware workaround */ + /* Cortex A57/A72 require firmware workaround */ case ARM_CPU_PART_CORTEX_A57: case ARM_CPU_PART_CORTEX_A72: { struct arm_smccc_res res; - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + if ((int)res.a0 != 0) + return; + + state = spectre_v2_get_cpu_fw_mitigation_state(); + if (state != SPECTRE_MITIGATED) break; - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; - per_cpu(harden_branch_predictor_fn, cpu) = - call_hvc_arch_workaround_1; - cpu_do_switch_mm = cpu_v7_hvc_switch_mm; - spectre_v2_method = "hypervisor"; + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + method = SPECTRE_V2_METHOD_HVC; break; - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - break; - per_cpu(harden_branch_predictor_fn, cpu) = - call_smc_arch_workaround_1; - cpu_do_switch_mm = cpu_v7_smc_switch_mm; - spectre_v2_method = "firmware"; + case SMCCC_CONDUIT_SMC: + method = SPECTRE_V2_METHOD_SMC; break; default: + state = SPECTRE_VULNERABLE; break; } } -#endif } - if (spectre_v2_method) - pr_info("CPU%u: Spectre v2: using %s workaround\n", - smp_processor_id(), spectre_v2_method); + if (state == SPECTRE_MITIGATED) + state = spectre_v2_install_workaround(method); + + spectre_v2_update_state(state, method); +} + +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +static int spectre_bhb_method; + +static const char *spectre_bhb_method_name(int method) +{ + switch (method) { + case SPECTRE_V2_METHOD_LOOP8: + return "loop"; + + case SPECTRE_V2_METHOD_BPIALL: + return "BPIALL"; + + default: + return "unknown"; + } +} + +static int spectre_bhb_install_workaround(int method) +{ + if (spectre_bhb_method != method) { + if (spectre_bhb_method) { + pr_err("CPU%u: Spectre BHB: method disagreement, system vulnerable\n", + smp_processor_id()); + + return SPECTRE_VULNERABLE; + } + + if (spectre_bhb_update_vectors(method) == SPECTRE_VULNERABLE) + return SPECTRE_VULNERABLE; + + spectre_bhb_method = method; + + pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n", + smp_processor_id(), spectre_bhb_method_name(method)); + } + + return SPECTRE_MITIGATED; } #else -static void cpu_v7_spectre_init(void) +static int spectre_bhb_install_workaround(int method) { + return SPECTRE_VULNERABLE; } #endif +static void cpu_v7_spectre_bhb_init(void) +{ + unsigned int state, method = 0; + + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A15: + case ARM_CPU_PART_BRAHMA_B15: + case ARM_CPU_PART_CORTEX_A57: + case ARM_CPU_PART_CORTEX_A72: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_LOOP8; + break; + + case ARM_CPU_PART_CORTEX_A73: + case ARM_CPU_PART_CORTEX_A75: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_BPIALL; + break; + + default: + state = SPECTRE_UNAFFECTED; + break; + } + + if (state == SPECTRE_MITIGATED) + state = spectre_bhb_install_workaround(method); + + spectre_v2_update_state(state, method); +} + static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned, u32 mask, const char *msg) { @@ -146,16 +286,18 @@ static bool check_spectre_auxcr(bool *warned, u32 bit) void cpu_v7_ca8_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6))) - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); } void cpu_v7_ca15_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } void cpu_v7_bugs_init(void) { - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index c4e8006a1a8cd..48e0ef6f0dccf 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -644,7 +644,7 @@ __v7_setup_stack: string cpu_elf_name, "v7" .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" /* * Standard v7 proc info content diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 1a49d503eafc8..84459c1d31b87 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -93,7 +93,7 @@ ENTRY(cpu_cm7_proc_fin) ret lr ENDPROC(cpu_cm7_proc_fin) - .section ".init.text", #alloc, #execinstr + .section ".init.text", "ax" __v7m_cm7_setup: mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP) @@ -177,7 +177,7 @@ ENDPROC(__v7m_setup) string cpu_elf_name "v7m" string cpu_v7m_name "ARMv7-M" - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0, proc_fns = v7m_processor_functions .long 0 /* proc_info_list.__cpu_mm_mmu_flags */ diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index 1ac0fbbe9f127..42eaecc43cfef 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -496,7 +496,7 @@ xsc3_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index bdb2b7749b039..18ac5a1f89225 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -610,7 +610,7 @@ xscale_crval: .align - .section ".proc.info.init", #alloc + .section ".proc.info.init", "a" .macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache .type __\name\()_proc_info,#object diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S index 769778928356e..6d081d1cdc691 100644 --- a/arch/arm/mm/pv-fixup-asm.S +++ b/arch/arm/mm/pv-fixup-asm.S @@ -39,8 +39,8 @@ ENTRY(lpae_pgtables_remap_asm) /* Update level 2 entries for the boot data */ add r7, r2, #0x1000 - add r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER - bic r7, r7, #(1 << L2_ORDER) - 1 + movw r3, #FDT_FIXED_BASE >> (SECTION_SHIFT - L2_ORDER) + add r7, r7, r3 ldrd r4, r5, [r7] adds r4, r4, r0 adc r5, r5, r1 diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 97dc386e3cb8e..1c6e57f1dbc48 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -36,6 +36,10 @@ * +-----+ * |RSVD | JIT scratchpad * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) + * | ... | caller-saved registers + * +-----+ + * | ... | arguments passed on stack + * ARM_SP during call => +-----| * | | * | ... | Function call stack * | | @@ -63,6 +67,12 @@ * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. + * + * Some eBPF operations are implemented via a call to a helper function. + * Such calls are "invisible" in the eBPF code, so it is up to the calling + * program to preserve any caller-saved ARM registers during the call. The + * JIT emits code to push and pop those registers onto the stack, immediately + * above the callee stack frame. */ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ @@ -70,6 +80,8 @@ #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) +#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3) + enum { /* Stack layout - these are offsets from (top of stack - 4) */ BPF_R2_HI, @@ -464,6 +476,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { + const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1]; #if __LINUX_ARM_ARCH__ == 7 @@ -495,11 +508,17 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, rm), ctx); } + /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); + /* Call appropriate function */ emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx); emit_blx_r(ARM_IP, ctx); + /* Restore caller-saved registers from stack */ + emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx); + /* Save return value */ if (rd != ARM_R0) emit(ARM_MOV_R(rd, ARM_R0), ctx); @@ -929,7 +948,11 @@ static inline void emit_a32_rsh_i64(const s8 dst[], rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSR operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for LSR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); @@ -955,7 +978,11 @@ static inline void emit_a32_arsh_i64(const s8 dst[], rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do ARSH operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for ASR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); @@ -992,21 +1019,35 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], arm_bpf_put_reg32(dst_hi, rd[0], ctx); } +static bool is_ldst_imm(s16 off, const u8 size) +{ + s16 off_max = 0; + + switch (size) { + case BPF_B: + case BPF_W: + off_max = 0xfff; + break; + case BPF_H: + off_max = 0xff; + break; + case BPF_DW: + /* Need to make sure off+4 does not overflow. */ + off_max = 0xfff - 4; + break; + } + return -off_max <= off && off <= off_max; +} + /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src[], - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s32 off_max; s8 rd; rd = arm_bpf_get_reg32(dst, tmp[1], ctx); - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); rd = tmp[0]; @@ -1035,18 +1076,12 @@ static inline void emit_str_r(const s8 dst, const s8 src[], /* dst = *(size*)(src + off) */ static inline void emit_ldx_r(const s8 dst[], const s8 src, - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *rd = is_stacked(dst_lo) ? tmp : dst; s8 rm = src; - s32 off_max; - - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; @@ -1586,6 +1621,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig index 301e572651c0f..790c87ee72716 100644 --- a/arch/arm/plat-samsung/Kconfig +++ b/arch/arm/plat-samsung/Kconfig @@ -241,6 +241,7 @@ config SAMSUNG_PM_DEBUG depends on PM && DEBUG_KERNEL depends on PLAT_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210 depends on DEBUG_EXYNOS_UART || DEBUG_S3C24XX_UART || DEBUG_S3C2410_UART + depends on DEBUG_LL && MMU help Say Y here if you want verbose debugging from the PM Suspend and Resume code. See diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index 9731735989921..facc889d05eee 100644 --- a/arch/arm/probes/decode.h +++ b/arch/arm/probes/decode.h @@ -14,6 +14,7 @@ #include #include #include +#include #include void __init arm_probes_decode_init(void); @@ -35,31 +36,6 @@ void __init find_str_pc_offset(void); #endif -/* - * Update ITSTATE after normal execution of an IT block instruction. - * - * The 8 IT state bits are split into two parts in CPSR: - * ITSTATE<1:0> are in CPSR<26:25> - * ITSTATE<7:2> are in CPSR<15:10> - */ -static inline unsigned long it_advance(unsigned long cpsr) - { - if ((cpsr & 0x06000400) == 0) { - /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ - cpsr &= ~PSR_IT_MASK; - } else { - /* We need to shift left ITSTATE<4:0> */ - const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ - unsigned long it = cpsr & mask; - it <<= 1; - it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ - it &= mask; - cpsr &= ~mask; - cpsr |= it; - } - return cpsr; -} - static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs) { long cpsr = regs->ARM_cpsr; diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 90b5bc723c83f..0a783bd4641c5 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -534,7 +534,7 @@ static struct undef_hook kprobes_arm_break_hook = { #endif /* !CONFIG_THUMB2_KERNEL */ -int __init arch_init_kprobes() +int __init arch_init_kprobes(void) { arm_probes_decode_init(); #ifdef CONFIG_THUMB2_KERNEL diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 7a449df0b3591..c78180172120f 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -85,21 +85,21 @@ asm ( "optprobe_template_end:\n"); #define TMPL_VAL_IDX \ - ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry) #define TMPL_CALL_IDX \ - ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry) #define TMPL_END_IDX \ - ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry) #define TMPL_ADD_SP \ - ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry) #define TMPL_SUB_SP \ - ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_BEGIN \ - ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_ORIGN_INSN \ - ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_END \ - ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry) /* * ARM can always optimize an instruction when using ARM ISA, except @@ -234,7 +234,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, (unsigned long *)&optprobe_template_entry, + memcpy(code, (unsigned long *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ diff --git a/arch/arm/probes/kprobes/test-thumb.c b/arch/arm/probes/kprobes/test-thumb.c index 456c181a7bfe8..4e11f0b760f89 100644 --- a/arch/arm/probes/kprobes/test-thumb.c +++ b/arch/arm/probes/kprobes/test-thumb.c @@ -441,21 +441,21 @@ void kprobe_thumb32_test_cases(void) "3: mvn r0, r0 \n\t" "2: nop \n\t") - TEST_RX("tbh [pc, r",7, (9f-(1f+4))>>1,"]", + TEST_RX("tbh [pc, r",7, (9f-(1f+4))>>1,", lsl #1]", "9: \n\t" ".short (2f-1b-4)>>1 \n\t" ".short (3f-1b-4)>>1 \n\t" "3: mvn r0, r0 \n\t" "2: nop \n\t") - TEST_RX("tbh [pc, r",12, ((9f-(1f+4))>>1)+1,"]", + TEST_RX("tbh [pc, r",12, ((9f-(1f+4))>>1)+1,", lsl #1]", "9: \n\t" ".short (2f-1b-4)>>1 \n\t" ".short (3f-1b-4)>>1 \n\t" "3: mvn r0, r0 \n\t" "2: nop \n\t") - TEST_RRX("tbh [r",1,9f, ", r",14,1,"]", + TEST_RRX("tbh [r",1,9f, ", r",14,1,", lsl #1]", "9: \n\t" ".short (2f-1b-4)>>1 \n\t" ".short (3f-1b-4)>>1 \n\t" @@ -468,10 +468,10 @@ void kprobe_thumb32_test_cases(void) TEST_UNSUPPORTED("strexb r0, r1, [r2]") TEST_UNSUPPORTED("strexh r0, r1, [r2]") - TEST_UNSUPPORTED("strexd r0, r1, [r2]") + TEST_UNSUPPORTED("strexd r0, r1, r2, [r2]") TEST_UNSUPPORTED("ldrexb r0, [r1]") TEST_UNSUPPORTED("ldrexh r0, [r1]") - TEST_UNSUPPORTED("ldrexd r0, [r1]") + TEST_UNSUPPORTED("ldrexd r0, r1, [r1]") TEST_GROUP("Data-processing (shifted register) and (modified immediate)") diff --git a/arch/arm/probes/uprobes/core.c b/arch/arm/probes/uprobes/core.c index c4b49b322e8a8..f5f790c6e5f89 100644 --- a/arch/arm/probes/uprobes/core.c +++ b/arch/arm/probes/uprobes/core.c @@ -204,7 +204,7 @@ unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) static struct undef_hook uprobes_arm_break_hook = { .instr_mask = 0x0fffffff, .instr_val = (UPROBE_SWBP_ARM_INSN & 0x0fffffff), - .cpsr_mask = MODE_MASK, + .cpsr_mask = (PSR_T_BIT | MODE_MASK), .cpsr_val = USR_MODE, .fn = uprobe_trap_handler, }; @@ -212,7 +212,7 @@ static struct undef_hook uprobes_arm_break_hook = { static struct undef_hook uprobes_arm_ss_hook = { .instr_mask = 0x0fffffff, .instr_val = (UPROBE_SS_ARM_INSN & 0x0fffffff), - .cpsr_mask = MODE_MASK, + .cpsr_mask = (PSR_T_BIT | MODE_MASK), .cpsr_val = USR_MODE, .fn = uprobe_trap_handler, }; diff --git a/arch/arm/vfp/Makefile b/arch/arm/vfp/Makefile index 9975b63ac3b0d..749901a72d6dc 100644 --- a/arch/arm/vfp/Makefile +++ b/arch/arm/vfp/Makefile @@ -8,6 +8,4 @@ # ccflags-y := -DDEBUG # asflags-y := -DDEBUG -KBUILD_AFLAGS :=$(KBUILD_AFLAGS:-msoft-float=-Wa,-mfpu=softvfp+vfp -mfloat-abi=soft) - obj-y += vfpmodule.o entry.o vfphw.o vfpsingle.o vfpdouble.o diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S index 0186cf9da890b..27b0a1f27fbdf 100644 --- a/arch/arm/vfp/entry.S +++ b/arch/arm/vfp/entry.S @@ -37,20 +37,3 @@ ENDPROC(vfp_null_entry) .align 2 .LCvfp: .word vfp_vector - -@ This code is called if the VFP does not exist. It needs to flag the -@ failure to the VFP initialisation code. - - __INIT -ENTRY(vfp_testing_entry) - dec_preempt_count_ti r10, r4 - ldr r0, VFP_arch_address - str r0, [r0] @ set to non-zero value - ret r9 @ we have handled the fault -ENDPROC(vfp_testing_entry) - - .align 2 -VFP_arch_address: - .word VFP_arch - - __FINIT diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index b2e560290860e..772c6a3b1f724 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -78,11 +78,6 @@ ENTRY(vfp_support_entry) DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 - ldr r3, [sp, #S_PSR] @ Neither lazy restore nor FP exceptions - and r3, r3, #MODE_MASK @ are supported in kernel mode - teq r3, #USR_MODE - bne vfp_kmode_exception @ Returns through lr - VFPFMRX r1, FPEXC @ Is the VFP enabled? DBGSTR1 "fpexc %08x", r1 tst r1, #FPEXC_EN @@ -258,11 +253,14 @@ vfp_current_hw_state_address: ENTRY(vfp_get_float) tbl_branch r0, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mrc p10, 0, r0, c\dr, c0, 0 @ fmrs r0, s0 +1: vmov r0, s\dr ret lr .org 1b + 8 -1: mrc p10, 0, r0, c\dr, c0, 4 @ fmrs r0, s1 + .endr + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov r0, s\dr ret lr .org 1b + 8 .endr @@ -270,11 +268,14 @@ ENDPROC(vfp_get_float) ENTRY(vfp_put_float) tbl_branch r1, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mcr p10, 0, r0, c\dr, c0, 0 @ fmsr r0, s0 +1: vmov s\dr, r0 ret lr .org 1b + 8 -1: mcr p10, 0, r0, c\dr, c0, 4 @ fmsr r0, s1 + .endr + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov s\dr, r0 ret lr .org 1b + 8 .endr @@ -282,15 +283,17 @@ ENDPROC(vfp_put_float) ENTRY(vfp_get_double) tbl_branch r0, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: fmrrd r0, r1, d\dr +1: vmov r0, r1, d\dr ret lr .org 1b + 8 .endr #ifdef CONFIG_VFPv3 @ d16 - d31 registers - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mrrc p11, 3, r0, r1, c\dr @ fmrrd r0, r1, d\dr + .fpu vfpv3 + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov r0, r1, d\dr ret lr .org 1b + 8 .endr @@ -304,15 +307,17 @@ ENDPROC(vfp_get_double) ENTRY(vfp_put_double) tbl_branch r2, r3, #3 + .fpu vfpv2 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: fmdrr d\dr, r0, r1 +1: vmov d\dr, r0, r1 ret lr .org 1b + 8 .endr #ifdef CONFIG_VFPv3 + .fpu vfpv3 @ d16 - d31 registers - .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -1: mcrr p11, 3, r0, r1, c\dr @ fmdrr r0, r1, d\dr + .irp dr,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 +1: vmov d\dr, r0, r1 ret lr .org 1b + 8 .endr diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 8c9e7f9f0277d..2cb355c1b5b71 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "vfpinstr.h" @@ -31,7 +32,6 @@ /* * Our undef handlers (in entry.S) */ -asmlinkage void vfp_testing_entry(void); asmlinkage void vfp_support_entry(void); asmlinkage void vfp_null_entry(void); @@ -42,7 +42,7 @@ asmlinkage void (*vfp_vector)(void) = vfp_null_entry; * Used in startup: set to non-zero if VFP checks fail * After startup, holds VFP architecture */ -unsigned int VFP_arch; +static unsigned int __initdata VFP_arch; /* * The pointer to the vfpstate structure of the thread which currently @@ -436,7 +436,7 @@ static void vfp_enable(void *unused) * present on all CPUs within a SMP complex. Needs to be called prior to * vfp_init(). */ -void vfp_disable(void) +void __init vfp_disable(void) { if (VFP_arch) { pr_debug("%s: should be called prior to vfp_init\n", __func__); @@ -642,7 +642,9 @@ static int vfp_starting_cpu(unsigned int unused) return 0; } -void vfp_kmode_exception(void) +#ifdef CONFIG_KERNEL_MODE_NEON + +static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr) { /* * If we reach this point, a floating point exception has been raised @@ -660,9 +662,51 @@ void vfp_kmode_exception(void) pr_crit("BUG: unsupported FP instruction in kernel mode\n"); else pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n"); + pr_crit("FPEXC == 0x%08x\n", fmrx(FPEXC)); + return 1; } -#ifdef CONFIG_KERNEL_MODE_NEON +static struct undef_hook vfp_kmode_exception_hook[] = {{ + .instr_mask = 0xfe000000, + .instr_val = 0xf2000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xff100000, + .instr_val = 0xf4000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xef000000, + .instr_val = 0xef000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE | PSR_T_BIT, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xff100000, + .instr_val = 0xf9000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE | PSR_T_BIT, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0x0c000e00, + .instr_val = 0x0c000a00, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}}; + +static int __init vfp_kmode_exception_hook_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++) + register_undef_hook(&vfp_kmode_exception_hook[i]); + return 0; +} +subsys_initcall(vfp_kmode_exception_hook_init); /* * Kernel-side NEON support functions @@ -708,6 +752,21 @@ EXPORT_SYMBOL(kernel_neon_end); #endif /* CONFIG_KERNEL_MODE_NEON */ +static int __init vfp_detect(struct pt_regs *regs, unsigned int instr) +{ + VFP_arch = UINT_MAX; /* mark as not present */ + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook vfp_detect_hook __initdata = { + .instr_mask = 0x0c000e00, + .instr_val = 0x0c000a00, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = vfp_detect, +}; + /* * VFP support code initialisation. */ @@ -728,10 +787,11 @@ static int __init vfp_init(void) * The handler is already setup to just log calls, so * we just need to read the VFPSID register. */ - vfp_vector = vfp_testing_entry; + register_undef_hook(&vfp_detect_hook); barrier(); vfpsid = fmrx(FPSID); barrier(); + unregister_undef_hook(&vfp_detect_hook); vfp_vector = vfp_null_entry; pr_info("VFP support v0.3: "); diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index dd6804a64f1a0..57dfc13b27529 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -370,8 +370,6 @@ static int __init xen_guest_init(void) return -ENOMEM; } gnttab_init(); - if (!xen_initial_domain()) - xenbus_probe(NULL); /* * Making sure board specific code will not set up ops for diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index e52950a43f2ed..4a1991a103ea0 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -62,11 +62,12 @@ static int xen_add_phys_to_mach_entry(struct xen_p2m_entry *new) unsigned long __pfn_to_mfn(unsigned long pfn) { - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; struct xen_p2m_entry *entry; unsigned long irqflags; read_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (entry->pfn <= pfn && @@ -93,10 +94,39 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, int i; for (i = 0; i < count; i++) { + struct gnttab_unmap_grant_ref unmap; + int rc; + if (map_ops[i].status) continue; - set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, - map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT); + if (likely(set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, + map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) + continue; + + /* + * Signal an error for this slot. This in turn requires + * immediate unmapping. + */ + map_ops[i].status = GNTST_general_error; + unmap.host_addr = map_ops[i].host_addr, + unmap.handle = map_ops[i].handle; + map_ops[i].handle = ~0; + if (map_ops[i].flags & GNTMAP_device_map) + unmap.dev_bus_addr = map_ops[i].dev_bus_addr; + else + unmap.dev_bus_addr = 0; + + /* + * Pre-populate the status field, to be recognizable in + * the log message below. + */ + unmap.status = 1; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + &unmap, 1); + if (rc || unmap.status != GNTST_okay) + pr_err_once("gnttab unmap failed: rc=%d st=%d\n", + rc, unmap.status); } return 0; @@ -124,10 +154,11 @@ bool __set_phys_to_machine_multi(unsigned long pfn, int rc; unsigned long irqflags; struct xen_p2m_entry *p2m_entry; - struct rb_node *n = phys_to_mach.rb_node; + struct rb_node *n; if (mfn == INVALID_P2M_ENTRY) { write_lock_irqsave(&p2m_lock, irqflags); + n = phys_to_mach.rb_node; while (n) { p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys); if (p2m_entry->pfn <= pfn && diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3f047afb982c8..56f1e146613d4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -139,6 +139,7 @@ config ARM64 select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_CONTEXT_TRACKING + select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS @@ -180,7 +181,6 @@ config ARM64 select PCI_SYSCALL if PCI select POWER_RESET select POWER_SUPPLY - select REFCOUNT_FULL select SPARSE_IRQ select SWIOTLB select SYSCTL_EXCEPTION_TRACE @@ -488,7 +488,7 @@ config ARM64_ERRATUM_1024718 help This option adds a workaround for ARM Cortex-A55 Erratum 1024718. - Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect + Affected Cortex-A55 cores (all revisions) could cause incorrect update of the hardware dirty bit when the DBM/AP bits are updated without a break-before-make. The workaround is to disable the usage of hardware DBM locally on the affected cores. CPUs not affected by @@ -558,6 +558,22 @@ config ARM64_ERRATUM_1463225 If unsure, say Y. +config ARM64_ERRATUM_1542419 + bool "Neoverse-N1: workaround mis-ordering of instruction fetches" + default y + help + This option adds a workaround for ARM Neoverse-N1 erratum + 1542419. + + Affected Neoverse-N1 cores could execute a stale instruction when + modified by another CPU. The workaround depends on a firmware + counterpart. + + Workaround the issue by hiding the DIC feature from EL0. This + forces user-space to perform cache maintenance. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -1048,6 +1064,7 @@ config XEN config FORCE_MAX_ZONEORDER int default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) + default "13" if (ARCH_THUNDER && ARM64_4K_PAGES) default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE) default "11" help @@ -1122,6 +1139,15 @@ config ARM64_SSBD If unsure, say Y. +config MITIGATE_SPECTRE_BRANCH_HISTORY + bool "Mitigate Spectre style attacks against branch history" if EXPERT + default y + help + Speculation attacks against some high-performance processors can + make use of branch history to influence future speculation. + When taking an exception from user-space, a sequence of branches + or a firmware call overwrites the branch history. + config RODATA_FULL_DEFAULT_ENABLED bool "Apply r/o permissions of VM areas also to their linear aliases" default y diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 16d761475a860..90202e5608d1e 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -54,6 +54,7 @@ config ARCH_BCM_IPROC config ARCH_BERLIN bool "Marvell Berlin SoC Family" select DW_APB_ICTL + select DW_APB_TIMER_OF select GPIOLIB select PINCTRL help @@ -224,6 +225,7 @@ config ARCH_STRATIX10 config ARCH_SYNQUACER bool "Socionext SynQuacer SoC Family" + select IRQ_FASTEOI_HIERARCHY_HANDLERS config ARCH_TEGRA bool "NVIDIA Tegra SoC Family" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 2c0238ce05515..d227cf87c48f3 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -18,7 +18,7 @@ ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour # for relative relocs, since this leads to better Image compression # with the relocation offsets always being zero. -LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \ +LDFLAGS_vmlinux += -shared -Bsymbolic -z notext \ $(call ld-option, --no-apply-dynamic-relocs) endif @@ -72,23 +72,31 @@ stack_protector_prepare: prepare0 include/generated/asm-offsets.h)) endif +# Ensure that if the compiler supports branch protection we default it +# off. +KBUILD_CFLAGS += $(call cc-option,-mbranch-protection=none) + ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ AS += -EB # Prefer the baremetal ELF build target, but not all toolchains include # it so fall back to the standard linux version if needed. -KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb) +KBUILD_LDFLAGS += -EB $(call ld-option, -maarch64elfb, -maarch64linuxb -z norelro) UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ AS += -EL # Same as above, prefer ELF but fall back to linux target if needed. -KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux) +KBUILD_LDFLAGS += -EL $(call ld-option, -maarch64elf, -maarch64linux -z norelro) UTS_MACHINE := aarch64 endif +ifeq ($(CONFIG_LD_IS_LLD), y) +KBUILD_LDFLAGS += -z norelro +endif + CHECKFLAGS += -D__aarch64__ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) @@ -142,6 +150,8 @@ zinstall install: PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@ + $(if $(CONFIG_COMPAT_VDSO), \ + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@) # We use MRPROPER_FILES and CLEAN_FILES now archclean: diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 1f012c5064343..cd3414898d10f 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,7 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.gz +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/arm64/boot/dts/actions/s700.dtsi b/arch/arm64/boot/dts/actions/s700.dtsi index 2006ad5424fa6..f8eb72bb41254 100644 --- a/arch/arm64/boot/dts/actions/s700.dtsi +++ b/arch/arm64/boot/dts/actions/s700.dtsi @@ -231,7 +231,7 @@ pinctrl: pinctrl@e01b0000 { compatible = "actions,s700-pinctrl"; - reg = <0x0 0xe01b0000 0x0 0x1000>; + reg = <0x0 0xe01b0000 0x0 0x100>; clocks = <&cmu CLK_GPIO>; gpio-controller; gpio-ranges = <&pinctrl 0 0 136>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 208373efee494..7d1e89e5b1ae4 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -127,7 +127,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_dc1sw>; status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts index 96ab0227e82d1..121e6cc4849b4 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino-emmc.dts @@ -15,7 +15,7 @@ pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; vmmc-supply = <®_dcdc1>; - vqmmc-supply = <®_dcdc1>; + vqmmc-supply = <®_eldo1>; bus-width = <8>; non-removable; cap-mmc-hw-reset; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts index 01a9a52edae4a..393c1948a4959 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts @@ -140,7 +140,7 @@ &mmc1 { pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; - vmmc-supply = <®_aldo2>; + vmmc-supply = <®_dcdc1>; vqmmc-supply = <®_dldo4>; mmc-pwrseq = <&wifi_pwrseq>; bus-width = <4>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts index 04446e4716c44..963a7c505e30f 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts @@ -78,7 +78,7 @@ leds { compatible = "gpio-leds"; - status { + led-0 { label = "orangepi:green:status"; gpios = <&pio 7 11 GPIO_ACTIVE_HIGH>; /* PH11 */ }; @@ -129,7 +129,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_gmac_3v3>; status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts index 72d6961dc3128..7eb252adf9f03 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-lts.dts @@ -11,3 +11,7 @@ compatible = "pine64,pine64-lts", "allwinner,sun50i-r18", "allwinner,sun50i-a64"; }; + +&mmc0 { + broken-cd; /* card detect is broken on *some* boards */ +}; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index d5b6e8159a335..5d0905f0f1c1d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -52,7 +52,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-txid"; phy-handle = <&ext_rgmii_phy>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts index 78c82a665c84a..bb1de8217b86d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts @@ -103,8 +103,6 @@ }; &ehci0 { - phys = <&usbphy 0>; - phy-names = "usb"; status = "okay"; }; @@ -142,6 +140,7 @@ pinctrl-0 = <&mmc2_pins>, <&mmc2_ds_pin>; vmmc-supply = <®_dcdc1>; vqmmc-supply = <®_eldo1>; + max-frequency = <200000000>; bus-width = <8>; non-removable; cap-mmc-hw-reset; @@ -150,8 +149,6 @@ }; &ohci0 { - phys = <&usbphy 0>; - phy-names = "usb"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi index 9d20e13f0c02b..19e5b7e298fdf 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi @@ -55,10 +55,9 @@ pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; vmmc-supply = <®_dcdc1>; - non-removable; disable-wp; bus-width = <4>; - cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; /* PF6 */ + cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>; /* PF6 push-pull switch */ status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 70f4cce6be43e..cf9e3234afaf8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -142,6 +142,15 @@ clock-output-names = "ext-osc32k"; }; + pmu { + compatible = "arm,cortex-a53-pmu"; + interrupts = , + , + , + ; + interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; + }; + psci { compatible = "arm,psci-0.2"; method = "smc"; @@ -218,7 +227,7 @@ display_clocks: clock@0 { compatible = "allwinner,sun50i-a64-de2-clk"; - reg = <0x0 0x100000>; + reg = <0x0 0x10000>; clocks = <&ccu CLK_BUS_DE>, <&ccu CLK_DE>; clock-names = "bus", @@ -467,7 +476,7 @@ resets = <&ccu RST_BUS_MMC2>; reset-names = "ahb"; interrupts = ; - max-frequency = <200000000>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -521,6 +530,8 @@ <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>, <&ccu RST_BUS_EHCI0>; + phys = <&usbphy 0>; + phy-names = "usb"; status = "disabled"; }; @@ -531,6 +542,8 @@ clocks = <&ccu CLK_BUS_OHCI0>, <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>; + phys = <&usbphy 0>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts index 57a6f45036c1f..d7177465b0968 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts @@ -114,7 +114,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts index e126c1c9f05ce..4d357b81b0c00 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts @@ -157,7 +157,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts index d9b3ed257088a..f10340339007f 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts @@ -164,7 +164,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts index db6ea7b58999b..19f930f43936e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts @@ -72,7 +72,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi index f002a496d7cbb..8466d44ee0b15 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi @@ -54,21 +54,21 @@ enable-method = "psci"; }; - cpu@1 { + cpu1: cpu@1 { compatible = "arm,cortex-a53"; device_type = "cpu"; reg = <1>; enable-method = "psci"; }; - cpu@2 { + cpu2: cpu@2 { compatible = "arm,cortex-a53"; device_type = "cpu"; reg = <2>; enable-method = "psci"; }; - cpu@3 { + cpu3: cpu@3 { compatible = "arm,cortex-a53"; device_type = "cpu"; reg = <3>; @@ -76,6 +76,15 @@ }; }; + pmu { + compatible = "arm,cortex-a53-pmu"; + interrupts = , + , + , + ; + interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; + }; + psci { compatible = "arm,psci-0.2"; method = "smc"; @@ -146,8 +155,7 @@ , , , - , - ; + ; interrupt-names = "gp", "gpmmu", "pp", @@ -158,8 +166,7 @@ "pp2", "ppmmu2", "pp3", - "ppmmu3", - "pmu"; + "ppmmu3"; clocks = <&ccu CLK_BUS_GPU>, <&ccu CLK_GPU>; clock-names = "bus", "core"; resets = <&ccu RST_BUS_GPU>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 1d05d570142fa..0a04730960fcb 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -83,7 +83,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_aldo2>; status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts index 30102daf83cc6..3f2882b36616d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts @@ -66,7 +66,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_aldo2>; allwinner,rx-delay-ps = <200>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 0d5ea19336a19..1583cd5915214 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -70,6 +70,15 @@ clock-output-names = "ext_osc32k"; }; + pmu { + compatible = "arm,cortex-a53-pmu"; + interrupts = , + , + , + ; + interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; + }; + psci { compatible = "arm,psci-0.2"; method = "smc"; @@ -323,6 +332,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -339,6 +349,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -355,6 +366,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -524,6 +536,8 @@ <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>, <&ccu RST_BUS_EHCI0>; + phys = <&usb2phy 0>; + phy-names = "usb"; status = "disabled"; }; @@ -534,6 +548,8 @@ clocks = <&ccu CLK_BUS_OHCI0>, <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>; + phys = <&usb2phy 0>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 144a2c19ac026..9498d1de730ce 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -61,10 +61,10 @@ pmu { compatible = "arm,armv8-pmuv3"; - interrupts = <0 120 8>, - <0 121 8>, - <0 122 8>, - <0 123 8>; + interrupts = <0 170 4>, + <0 171 4>, + <0 172 4>, + <0 173 4>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, @@ -77,7 +77,7 @@ method = "smc"; }; - intc: intc@fffc1000 { + intc: interrupt-controller@fffc1000 { compatible = "arm,gic-400", "arm,cortex-a15-gic"; #interrupt-cells = <3>; interrupt-controller; @@ -302,7 +302,7 @@ status = "disabled"; }; - nand: nand@ffb90000 { + nand: nand-controller@ffb90000 { #address-cells = <1>; #size-cells = <0>; compatible = "altr,socfpga-denali-nand"; @@ -445,7 +445,7 @@ clock-names = "timer"; }; - uart0: serial0@ffc02000 { + uart0: serial@ffc02000 { compatible = "snps,dw-apb-uart"; reg = <0xffc02000 0x100>; interrupts = <0 108 4>; @@ -456,7 +456,7 @@ status = "disabled"; }; - uart1: serial1@ffc02100 { + uart1: serial@ffc02100 { compatible = "snps,dw-apb-uart"; reg = <0xffc02100 0x100>; interrupts = <0 109 4>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index 66e4ffb4e929d..2c8c2b322c727 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -155,6 +155,7 @@ }; &qspi { + status = "okay"; flash@0 { #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index 82919b106010a..502c4ac45c29e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -1162,7 +1162,7 @@ toddr_a: audio-controller@100 { compatible = "amlogic,axg-toddr"; - reg = <0x0 0x100 0x0 0x1c>; + reg = <0x0 0x100 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "TODDR_A"; interrupts = ; @@ -1173,7 +1173,7 @@ toddr_b: audio-controller@140 { compatible = "amlogic,axg-toddr"; - reg = <0x0 0x140 0x0 0x1c>; + reg = <0x0 0x140 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "TODDR_B"; interrupts = ; @@ -1184,7 +1184,7 @@ toddr_c: audio-controller@180 { compatible = "amlogic,axg-toddr"; - reg = <0x0 0x180 0x0 0x1c>; + reg = <0x0 0x180 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "TODDR_C"; interrupts = ; @@ -1195,7 +1195,7 @@ frddr_a: audio-controller@1c0 { compatible = "amlogic,axg-frddr"; - reg = <0x0 0x1c0 0x0 0x1c>; + reg = <0x0 0x1c0 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "FRDDR_A"; interrupts = ; @@ -1206,7 +1206,7 @@ frddr_b: audio-controller@200 { compatible = "amlogic,axg-frddr"; - reg = <0x0 0x200 0x0 0x1c>; + reg = <0x0 0x200 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "FRDDR_B"; interrupts = ; @@ -1217,7 +1217,7 @@ frddr_c: audio-controller@240 { compatible = "amlogic,axg-frddr"; - reg = <0x0 0x240 0x0 0x1c>; + reg = <0x0 0x240 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "FRDDR_C"; interrupts = ; @@ -1728,18 +1728,18 @@ }; sram: sram@fffc0000 { - compatible = "amlogic,meson-axg-sram", "mmio-sram"; + compatible = "mmio-sram"; reg = <0x0 0xfffc0000 0x0 0x20000>; #address-cells = <1>; #size-cells = <1>; ranges = <0 0x0 0xfffc0000 0x20000>; - cpu_scp_lpri: scp-shmem@13000 { + cpu_scp_lpri: scp-sram@13000 { compatible = "amlogic,meson-axg-scp-shmem"; reg = <0x13000 0x400>; }; - cpu_scp_hpri: scp-shmem@13400 { + cpu_scp_hpri: scp-sram@13400 { compatible = "amlogic,meson-axg-scp-shmem"; reg = <0x13400 0x400>; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 3f39e020f74e6..d2d255a988a81 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -76,6 +76,12 @@ no-map; }; + /* 32 MiB reserved for ARM Trusted Firmware (BL32) */ + secmon_reserved_bl32: secmon@5300000 { + reg = <0x0 0x05300000 0x0 0x2000000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; @@ -167,6 +173,8 @@ hwrng: rng@218 { compatible = "amlogic,meson-rng"; reg = <0x0 0x218 0x0 0x4>; + clocks = <&clkc CLKID_RNG0>; + clock-names = "core"; }; }; @@ -1509,7 +1517,7 @@ toddr_a: audio-controller@100 { compatible = "amlogic,g12a-toddr", "amlogic,axg-toddr"; - reg = <0x0 0x100 0x0 0x1c>; + reg = <0x0 0x100 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "TODDR_A"; interrupts = ; @@ -1521,7 +1529,7 @@ toddr_b: audio-controller@140 { compatible = "amlogic,g12a-toddr", "amlogic,axg-toddr"; - reg = <0x0 0x140 0x0 0x1c>; + reg = <0x0 0x140 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "TODDR_B"; interrupts = ; @@ -1533,7 +1541,7 @@ toddr_c: audio-controller@180 { compatible = "amlogic,g12a-toddr", "amlogic,axg-toddr"; - reg = <0x0 0x180 0x0 0x1c>; + reg = <0x0 0x180 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "TODDR_C"; interrupts = ; @@ -1545,7 +1553,7 @@ frddr_a: audio-controller@1c0 { compatible = "amlogic,g12a-frddr", "amlogic,axg-frddr"; - reg = <0x0 0x1c0 0x0 0x1c>; + reg = <0x0 0x1c0 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "FRDDR_A"; interrupts = ; @@ -1557,7 +1565,7 @@ frddr_b: audio-controller@200 { compatible = "amlogic,g12a-frddr", "amlogic,axg-frddr"; - reg = <0x0 0x200 0x0 0x1c>; + reg = <0x0 0x200 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "FRDDR_B"; interrupts = ; @@ -1569,7 +1577,7 @@ frddr_c: audio-controller@240 { compatible = "amlogic,g12a-frddr", "amlogic,axg-frddr"; - reg = <0x0 0x240 0x0 0x1c>; + reg = <0x0 0x240 0x0 0x2c>; #sound-dai-cells = <0>; sound-name-prefix = "FRDDR_C"; interrupts = ; @@ -2365,7 +2373,7 @@ reg = <0x0 0xff400000 0x0 0x40000>; interrupts = ; clocks = <&clkc CLKID_USB1_DDR_BRIDGE>; - clock-names = "ddr"; + clock-names = "otg"; phys = <&usb2_phy1>; phy-names = "usb2-phy"; dr_mode = "peripheral"; @@ -2380,7 +2388,8 @@ interrupts = ; dr_mode = "host"; snps,dis_u2_susphy_quirk; - snps,quirk-frame-length-adjustment; + snps,quirk-frame-length-adjustment = <0x20>; + snps,parkmode-disable-ss-quirk; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts index c9fa23a565624..c76bf498ee388 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts @@ -139,7 +139,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&dc_in>; + pwm-supply = <&dc_in>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; @@ -157,14 +157,6 @@ regulator-always-on; }; - reserved-memory { - /* TEE Reserved Memory */ - bl32_reserved: bl32@5000000 { - reg = <0x0 0x05300000 0x0 0x2000000>; - no-map; - }; - }; - sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts index 2a324f0136e3f..02ec6eda03b1c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts @@ -139,7 +139,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&main_12v>; + pwm-supply = <&main_12v>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts index 17155fb73fce9..5209c44fda01a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts @@ -139,7 +139,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&dc_in>; + pwm-supply = <&dc_in>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; @@ -340,7 +340,7 @@ eee-broken-1000t; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_15 (GPIO_ACTIVE_LOW | GPIO_OPEN_DRAIN)>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi index d61f43052a344..8e9ad1e51d665 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-a311d.dtsi @@ -11,26 +11,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <761000>; @@ -71,26 +51,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <731000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi index 554863429aa6d..e2094575f5282 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi @@ -152,6 +152,10 @@ clock-latency = <50000>; }; +&frddr_a { + status = "okay"; +}; + &frddr_b { status = "okay"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi index 046cc332d07f9..f2543da63c39d 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-s922x.dtsi @@ -11,26 +11,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <731000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <731000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <731000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <731000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <731000>; @@ -71,26 +51,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <751000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <751000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <751000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <667000000>; - opp-microvolt = <751000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <771000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 6733050d735fe..ad7bc0eec6682 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -41,6 +41,12 @@ no-map; }; + /* 32 MiB reserved for ARM Trusted Firmware (BL32) */ + secmon_reserved_bl32: secmon@5300000 { + reg = <0x0 0x05300000 0x0 0x2000000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; @@ -345,20 +351,20 @@ }; sram: sram@c8000000 { - compatible = "amlogic,meson-gx-sram", "amlogic,meson-gxbb-sram", "mmio-sram"; + compatible = "mmio-sram"; reg = <0x0 0xc8000000 0x0 0x14000>; #address-cells = <1>; #size-cells = <1>; ranges = <0 0x0 0xc8000000 0x14000>; - cpu_scp_lpri: scp-shmem@0 { - compatible = "amlogic,meson-gx-scp-shmem", "amlogic,meson-gxbb-scp-shmem"; + cpu_scp_lpri: scp-sram@0 { + compatible = "amlogic,meson-gxbb-scp-shmem"; reg = <0x13000 0x400>; }; - cpu_scp_hpri: scp-shmem@200 { - compatible = "amlogic,meson-gx-scp-shmem", "amlogic,meson-gxbb-scp-shmem"; + cpu_scp_hpri: scp-sram@200 { + compatible = "amlogic,meson-gxbb-scp-shmem"; reg = <0x13400 0x400>; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index 233eb1cd79671..d94b695916a35 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -165,7 +165,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index 6039adda12eec..8828acb3fd4c5 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -138,7 +138,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; @@ -296,7 +296,7 @@ }; &usb0_phy { - status = "okay"; + status = "disabled"; phy-supply = <&usb_otg_pwr>; }; @@ -306,7 +306,7 @@ }; &usb0 { - status = "okay"; + status = "disabled"; }; &usb1 { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi index 43b11e3dfe119..29976215e1446 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-vega-s95.dtsi @@ -126,7 +126,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi index 4c539881fbb73..e94f09c2d4e32 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi @@ -6,6 +6,7 @@ */ #include "meson-gxbb.dtsi" +#include / { aliases { @@ -64,6 +65,7 @@ regulator-name = "VDDIO_AO18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; }; vcc_3v3: regulator-vcc_3v3 { @@ -147,7 +149,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; }; }; @@ -157,6 +159,7 @@ status = "okay"; pinctrl-0 = <&hdmi_hpd_pins>, <&hdmi_i2c_pins>; pinctrl-names = "default"; + hdmi-supply = <&vddio_ao18>; }; &hdmi_tx_tmds_port { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts index 82b1c48511478..e034bbff8e66e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-libretech-ac.dts @@ -9,7 +9,7 @@ #include -#include "meson-gxl-s905x.dtsi" +#include "meson-gxl-s805x.dtsi" / { compatible = "libretech,aml-s805x-ac", "amlogic,s805x", diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts index 3a1484e5b8e1d..fbc687c9ff839 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x-p241.dts @@ -9,7 +9,7 @@ #include -#include "meson-gxl-s905x.dtsi" +#include "meson-gxl-s805x.dtsi" / { compatible = "amlogic,p241", "amlogic,s805x", "amlogic,meson-gxl"; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi new file mode 100644 index 0000000000000..f9d705648426e --- /dev/null +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s805x.dtsi @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2020 BayLibre SAS + * Author: Neil Armstrong + */ + +#include "meson-gxl-s905x.dtsi" + +/ { + compatible = "amlogic,s805x", "amlogic,meson-gxl"; +}; + +/* The S805X Package doesn't seem to handle the 744MHz OPP correctly */ +&mali { + assigned-clocks = <&clkc CLKID_MALI_0_SEL>, + <&clkc CLKID_MALI_0>, + <&clkc CLKID_MALI>; /* Glitch free mux */ + assigned-clock-parents = <&clkc CLKID_FCLK_DIV3>, + <0>, /* Do Nothing */ + <&clkc CLKID_MALI_0>; + assigned-clock-rates = <0>, /* Do Nothing */ + <666666666>, + <0>; /* Do Nothing */ +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-p230.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-p230.dts index b08c4537f260d..b2ab05c220903 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-p230.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905d-p230.dts @@ -82,7 +82,7 @@ /* External PHY reset is shared with internal PHY Led signal */ reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts index 2a5cd303123de..440bc23c73426 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts @@ -33,11 +33,9 @@ gpio-keys-polled { compatible = "gpio-keys-polled"; - #address-cells = <1>; - #size-cells = <0>; poll-interval = <100>; - button@0 { + power-button { label = "power"; linux,code = ; gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_LOW>; @@ -192,6 +190,9 @@ bluetooth { compatible = "brcm,bcm43438-bt"; shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>; + max-speed = <2000000>; + clocks = <&wifi32k>; + clock-names = "lpo"; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 49ff0a7d0210f..e3cfa24dca5ab 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -288,6 +288,11 @@ }; }; +&hwrng { + clocks = <&clkc CLKID_RNG0>; + clock-names = "core"; +}; + &i2c_A { clocks = <&clkc CLKID_I2C>; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts index f25ddd18a607e..c8a4205117f15 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts @@ -251,7 +251,7 @@ reg = <0>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; @@ -327,7 +327,7 @@ #size-cells = <0>; bus-width = <4>; - max-frequency = <50000000>; + max-frequency = <60000000>; non-removable; disable-wp; @@ -395,7 +395,7 @@ #size-cells = <1>; compatible = "winbond,w25q16", "jedec,spi-nor"; reg = <0>; - spi-max-frequency = <3000000>; + spi-max-frequency = <104000000>; }; }; @@ -409,6 +409,9 @@ bluetooth { compatible = "brcm,bcm43438-bt"; shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>; + max-speed = <2000000>; + clocks = <&wifi32k>; + clock-names = "lpo"; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts index c2bd4dbbf38c5..8dccf91d68da7 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-nexbox-a1.dts @@ -112,7 +112,7 @@ max-speed = <1000>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts index ea45ae0c71b7f..8edbfe040805c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-q200.dts @@ -64,7 +64,7 @@ /* External PHY reset is shared with internal PHY Led signal */ reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; interrupt-parent = <&gpio_intc>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts index 5cd4d35006d09..f72d29e33a9e4 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-rbox-pro.dts @@ -114,7 +114,7 @@ max-speed = <1000>; reset-assert-us = <10000>; - reset-deassert-us = <30000>; + reset-deassert-us = <80000>; reset-gpios = <&gpio GPIOZ_14 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi b/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi index 8647da7d6609b..f6694aad84db3 100644 --- a/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-khadas-vim3.dtsi @@ -43,13 +43,13 @@ white { label = "vim3:white:sys"; - gpios = <&gpio_ao GPIOAO_4 GPIO_ACTIVE_LOW>; + gpios = <&gpio_ao GPIOAO_4 GPIO_ACTIVE_HIGH>; linux,default-trigger = "heartbeat"; }; red { label = "vim3:red"; - gpios = <&gpio_expander 5 GPIO_ACTIVE_LOW>; + gpios = <&gpio_expander 5 GPIO_ACTIVE_HIGH>; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts index 3435aaa4e8db5..85fb59060cdff 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts @@ -164,14 +164,6 @@ regulator-always-on; }; - reserved-memory { - /* TEE Reserved Memory */ - bl32_reserved: bl32@5000000 { - reg = <0x0 0x05300000 0x0 0x2000000>; - no-map; - }; - }; - sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>; @@ -361,6 +353,9 @@ bluetooth { compatible = "brcm,bcm43438-bt"; + interrupt-parent = <&gpio_intc>; + interrupts = <95 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "host-wakeup"; shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>; max-speed = <2000000>; clocks = <&wifi32k>; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi index 521573f3a5bab..dd62a608c805a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-sm1.dtsi @@ -55,26 +55,6 @@ compatible = "operating-points-v2"; opp-shared; - opp-100000000 { - opp-hz = /bits/ 64 <100000000>; - opp-microvolt = <730000>; - }; - - opp-250000000 { - opp-hz = /bits/ 64 <250000000>; - opp-microvolt = <730000>; - }; - - opp-500000000 { - opp-hz = /bits/ 64 <500000000>; - opp-microvolt = <730000>; - }; - - opp-667000000 { - opp-hz = /bits/ 64 <666666666>; - opp-microvolt = <750000>; - }; - opp-1000000000 { opp-hz = /bits/ 64 <1000000000>; opp-microvolt = <770000>; @@ -90,7 +70,7 @@ opp-microvolt = <790000>; }; - opp-1512000000 { + opp-1500000000 { opp-hz = /bits/ 64 <1500000000>; opp-microvolt = <800000>; }; diff --git a/arch/arm64/boot/dts/arm/foundation-v8-gicv2.dtsi b/arch/arm64/boot/dts/arm/foundation-v8-gicv2.dtsi index 15fe81738e94d..dfb23dfc0b0fb 100644 --- a/arch/arm64/boot/dts/arm/foundation-v8-gicv2.dtsi +++ b/arch/arm64/boot/dts/arm/foundation-v8-gicv2.dtsi @@ -8,7 +8,7 @@ gic: interrupt-controller@2c001000 { compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; #interrupt-cells = <3>; - #address-cells = <2>; + #address-cells = <1>; interrupt-controller; reg = <0x0 0x2c001000 0 0x1000>, <0x0 0x2c002000 0 0x2000>, diff --git a/arch/arm64/boot/dts/arm/foundation-v8-gicv3.dtsi b/arch/arm64/boot/dts/arm/foundation-v8-gicv3.dtsi index f2c75c756039c..906f51935b362 100644 --- a/arch/arm64/boot/dts/arm/foundation-v8-gicv3.dtsi +++ b/arch/arm64/boot/dts/arm/foundation-v8-gicv3.dtsi @@ -8,9 +8,9 @@ gic: interrupt-controller@2f000000 { compatible = "arm,gic-v3"; #interrupt-cells = <3>; - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x0 0x2f000000 0x100000>; interrupt-controller; reg = <0x0 0x2f000000 0x0 0x10000>, <0x0 0x2f100000 0x0 0x200000>, @@ -22,7 +22,7 @@ its: its@2f020000 { compatible = "arm,gic-v3-its"; msi-controller; - reg = <0x0 0x2f020000 0x0 0x20000>; + reg = <0x20000 0x20000>; }; }; }; diff --git a/arch/arm64/boot/dts/arm/foundation-v8.dtsi b/arch/arm64/boot/dts/arm/foundation-v8.dtsi index 3f78373f708a2..05d1657170b49 100644 --- a/arch/arm64/boot/dts/arm/foundation-v8.dtsi +++ b/arch/arm64/boot/dts/arm/foundation-v8.dtsi @@ -107,51 +107,51 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 63>; - interrupt-map = <0 0 0 &gic 0 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, - <0 0 1 &gic 0 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, - <0 0 2 &gic 0 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, - <0 0 3 &gic 0 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, - <0 0 4 &gic 0 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, - <0 0 5 &gic 0 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, - <0 0 6 &gic 0 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, - <0 0 7 &gic 0 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, - <0 0 8 &gic 0 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, - <0 0 9 &gic 0 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, - <0 0 10 &gic 0 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, - <0 0 11 &gic 0 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>, - <0 0 12 &gic 0 0 GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, - <0 0 13 &gic 0 0 GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, - <0 0 14 &gic 0 0 GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, - <0 0 15 &gic 0 0 GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>, - <0 0 16 &gic 0 0 GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, - <0 0 17 &gic 0 0 GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, - <0 0 18 &gic 0 0 GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, - <0 0 19 &gic 0 0 GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>, - <0 0 20 &gic 0 0 GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>, - <0 0 21 &gic 0 0 GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>, - <0 0 22 &gic 0 0 GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, - <0 0 23 &gic 0 0 GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>, - <0 0 24 &gic 0 0 GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, - <0 0 25 &gic 0 0 GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, - <0 0 26 &gic 0 0 GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, - <0 0 27 &gic 0 0 GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, - <0 0 28 &gic 0 0 GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, - <0 0 29 &gic 0 0 GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>, - <0 0 30 &gic 0 0 GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, - <0 0 31 &gic 0 0 GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, - <0 0 32 &gic 0 0 GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, - <0 0 33 &gic 0 0 GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>, - <0 0 34 &gic 0 0 GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, - <0 0 35 &gic 0 0 GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>, - <0 0 36 &gic 0 0 GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>, - <0 0 37 &gic 0 0 GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, - <0 0 38 &gic 0 0 GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, - <0 0 39 &gic 0 0 GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, - <0 0 40 &gic 0 0 GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, - <0 0 41 &gic 0 0 GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, - <0 0 42 &gic 0 0 GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; - - ethernet@2,02000000 { + interrupt-map = <0 0 0 &gic 0 GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <0 0 1 &gic 0 GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <0 0 2 &gic 0 GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, + <0 0 3 &gic 0 GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <0 0 4 &gic 0 GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <0 0 5 &gic 0 GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, + <0 0 6 &gic 0 GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, + <0 0 7 &gic 0 GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <0 0 8 &gic 0 GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <0 0 9 &gic 0 GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <0 0 10 &gic 0 GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <0 0 11 &gic 0 GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>, + <0 0 12 &gic 0 GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, + <0 0 13 &gic 0 GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, + <0 0 14 &gic 0 GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, + <0 0 15 &gic 0 GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>, + <0 0 16 &gic 0 GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, + <0 0 17 &gic 0 GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, + <0 0 18 &gic 0 GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, + <0 0 19 &gic 0 GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>, + <0 0 20 &gic 0 GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>, + <0 0 21 &gic 0 GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>, + <0 0 22 &gic 0 GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, + <0 0 23 &gic 0 GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>, + <0 0 24 &gic 0 GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, + <0 0 25 &gic 0 GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, + <0 0 26 &gic 0 GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, + <0 0 27 &gic 0 GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, + <0 0 28 &gic 0 GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, + <0 0 29 &gic 0 GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>, + <0 0 30 &gic 0 GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, + <0 0 31 &gic 0 GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, + <0 0 32 &gic 0 GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, + <0 0 33 &gic 0 GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>, + <0 0 34 &gic 0 GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, + <0 0 35 &gic 0 GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>, + <0 0 36 &gic 0 GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>, + <0 0 37 &gic 0 GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, + <0 0 38 &gic 0 GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, + <0 0 39 &gic 0 GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, + <0 0 40 &gic 0 GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, + <0 0 41 &gic 0 GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, + <0 0 42 &gic 0 GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; + + ethernet@202000000 { compatible = "smsc,lan91c111"; reg = <2 0x02000000 0x10000>; interrupts = <15>; @@ -178,7 +178,7 @@ clock-output-names = "v2m:refclk32khz"; }; - iofpga@3,00000000 { + iofpga@300000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts index 62ab0d54ff71e..335fff7624516 100644 --- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts +++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts @@ -161,10 +161,10 @@ bus-range = <0x0 0x1>; reg = <0x0 0x40000000 0x0 0x10000000>; ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>; - interrupt-map = <0 0 0 1 &gic GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &gic GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &gic GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &gic GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gic 0 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gic 0 0 GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gic 0 0 GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; interrupt-map-mask = <0x0 0x0 0x0 0x7>; msi-map = <0x0 &its 0x0 0x10000>; iommu-map = <0x0 &smmu 0x0 0x10000>; diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index 26a039a028b8e..65bcdd0fe78ad 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -6,7 +6,6 @@ /* * Devices shared by all Juno boards */ - dma-ranges = <0 0 0 0 0x100 0>; memtimer: timer@2a810000 { compatible = "arm,armv7-timer-mem"; @@ -63,35 +62,35 @@ <0x0 0x2c02f000 0 0x2000>, <0x0 0x2c04f000 0 0x2000>, <0x0 0x2c06f000 0 0x2000>; - #address-cells = <2>; + #address-cells = <1>; #interrupt-cells = <3>; - #size-cells = <2>; + #size-cells = <1>; interrupt-controller; interrupts = ; - ranges = <0 0 0 0x2c1c0000 0 0x40000>; + ranges = <0 0 0x2c1c0000 0x40000>; v2m_0: v2m@0 { compatible = "arm,gic-v2m-frame"; msi-controller; - reg = <0 0 0 0x10000>; + reg = <0 0x10000>; }; v2m@10000 { compatible = "arm,gic-v2m-frame"; msi-controller; - reg = <0 0x10000 0 0x10000>; + reg = <0x10000 0x10000>; }; v2m@20000 { compatible = "arm,gic-v2m-frame"; msi-controller; - reg = <0 0x20000 0 0x10000>; + reg = <0x20000 0x10000>; }; v2m@30000 { compatible = "arm,gic-v2m-frame"; msi-controller; - reg = <0 0x30000 0 0x10000>; + reg = <0x30000 0x10000>; }; }; @@ -520,10 +519,10 @@ <0x42000000 0x40 0x00000000 0x40 0x00000000 0x1 0x00000000>; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 7>; - interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &gic 0 0 GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &gic 0 0 GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &gic 0 0 GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &gic 0 GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gic 0 GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gic 0 GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gic 0 GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>; msi-parent = <&v2m_0>; status = "disabled"; iommu-map-mask = <0x0>; /* RC has no means to output PCI RID */ @@ -538,13 +537,13 @@ clocks { compatible = "arm,scpi-clocks"; - scpi_dvfs: scpi-dvfs { + scpi_dvfs: clocks-0 { compatible = "arm,scpi-dvfs-clocks"; #clock-cells = <1>; clock-indices = <0>, <1>, <2>; clock-output-names = "atlclk", "aplclk","gpuclk"; }; - scpi_clk: scpi-clk { + scpi_clk: clocks-1 { compatible = "arm,scpi-variable-clocks"; #clock-cells = <1>; clock-indices = <3>; @@ -552,7 +551,7 @@ }; }; - scpi_devpd: scpi-power-domains { + scpi_devpd: power-controller { compatible = "arm,scpi-power-domains"; num-domains = <2>; #power-domain-cells = <1>; @@ -787,19 +786,19 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 15>; - interrupt-map = <0 0 0 &gic 0 0 GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>, - <0 0 1 &gic 0 0 GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>, - <0 0 2 &gic 0 0 GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>, - <0 0 3 &gic 0 0 GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>, - <0 0 4 &gic 0 0 GIC_SPI 161 IRQ_TYPE_LEVEL_HIGH>, - <0 0 5 &gic 0 0 GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>, - <0 0 6 &gic 0 0 GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>, - <0 0 7 &gic 0 0 GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>, - <0 0 8 &gic 0 0 GIC_SPI 165 IRQ_TYPE_LEVEL_HIGH>, - <0 0 9 &gic 0 0 GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>, - <0 0 10 &gic 0 0 GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>, - <0 0 11 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, - <0 0 12 &gic 0 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 &gic 0 GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>, + <0 0 1 &gic 0 GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>, + <0 0 2 &gic 0 GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>, + <0 0 3 &gic 0 GIC_SPI 160 IRQ_TYPE_LEVEL_HIGH>, + <0 0 4 &gic 0 GIC_SPI 161 IRQ_TYPE_LEVEL_HIGH>, + <0 0 5 &gic 0 GIC_SPI 162 IRQ_TYPE_LEVEL_HIGH>, + <0 0 6 &gic 0 GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>, + <0 0 7 &gic 0 GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>, + <0 0 8 &gic 0 GIC_SPI 165 IRQ_TYPE_LEVEL_HIGH>, + <0 0 9 &gic 0 GIC_SPI 166 IRQ_TYPE_LEVEL_HIGH>, + <0 0 10 &gic 0 GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>, + <0 0 11 &gic 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, + <0 0 12 &gic 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>; }; site2: tlx@60000000 { @@ -809,6 +808,6 @@ ranges = <0 0 0x60000000 0x10000000>; #interrupt-cells = <1>; interrupt-map-mask = <0 0>; - interrupt-map = <0 0 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 &gic 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>; }; }; diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi index e5e265dfa9025..2870b5eeb1984 100644 --- a/arch/arm64/boot/dts/arm/juno-clocks.dtsi +++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi @@ -8,10 +8,10 @@ */ / { /* SoC fixed clocks */ - soc_uartclk: refclk7273800hz { + soc_uartclk: refclk7372800hz { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <7273800>; + clock-frequency = <7372800>; clock-output-names = "juno:uartclk"; }; diff --git a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi index 9f60dacb4f80f..1234a8cfc0a92 100644 --- a/arch/arm64/boot/dts/arm/juno-motherboard.dtsi +++ b/arch/arm64/boot/dts/arm/juno-motherboard.dtsi @@ -103,7 +103,7 @@ }; }; - flash@0,00000000 { + flash@0 { /* 2 * 32MiB NOR Flash memory mounted on CS0 */ compatible = "arm,vexpress-flash", "cfi-flash"; reg = <0 0x00000000 0x04000000>; @@ -120,7 +120,7 @@ }; }; - ethernet@2,00000000 { + ethernet@200000000 { compatible = "smsc,lan9118", "smsc,lan9115"; reg = <2 0x00000000 0x10000>; interrupts = <3>; @@ -133,7 +133,7 @@ vddvario-supply = <&mb_fixed_3v3>; }; - iofpga@3,00000000 { + iofpga@300000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi index 57b0b9d7f3fae..29e6962c70bd9 100644 --- a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi +++ b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard-rs2.dtsi @@ -9,7 +9,7 @@ motherboard { arm,v2m-memory-map = "rs2"; - iofpga@3,00000000 { + iofpga@300000000 { virtio-p9@140000 { compatible = "virtio,mmio"; reg = <0x140000 0x200>; diff --git a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi index 03a7bf079c8ff..ad20076357f50 100644 --- a/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi +++ b/arch/arm64/boot/dts/arm/rtsm_ve-motherboard.dtsi @@ -17,14 +17,14 @@ #interrupt-cells = <1>; ranges; - flash@0,00000000 { + flash@0 { compatible = "arm,vexpress-flash", "cfi-flash"; reg = <0 0x00000000 0x04000000>, <4 0x00000000 0x04000000>; bank-width = <4>; }; - ethernet@2,02000000 { + ethernet@202000000 { compatible = "smsc,lan91c111"; reg = <2 0x02000000 0x10000>; interrupts = <15>; @@ -51,7 +51,7 @@ clock-output-names = "v2m:refclk32khz"; }; - iofpga@3,00000000 { + iofpga@300000000 { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts index ec19fbf928a14..12a4b1c03390c 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2-svk.dts @@ -111,8 +111,8 @@ compatible = "silabs,si3226x"; reg = <0>; spi-max-frequency = <5000000>; - spi-cpha = <1>; - spi-cpol = <1>; + spi-cpha; + spi-cpol; pl022,hierarchy = <0>; pl022,interface = <0>; pl022,slave-tx-disable = <0>; @@ -135,8 +135,8 @@ at25,byte-len = <0x8000>; at25,addr-mode = <2>; at25,page-size = <64>; - spi-cpha = <1>; - spi-cpol = <1>; + spi-cpha; + spi-cpol; pl022,hierarchy = <0>; pl022,interface = <0>; pl022,slave-tx-disable = <0>; diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 15f7b0ed38369..edc1a8a4c4bc0 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -687,7 +687,7 @@ }; }; - sata: ahci@663f2000 { + sata: sata@663f2000 { compatible = "brcm,iproc-ahci", "generic-ahci"; reg = <0x663f2000 0x1000>; dma-coherent; @@ -745,7 +745,7 @@ }; qspi: spi@66470200 { - compatible = "brcm,spi-bcm-qspi", "brcm,spi-ns2-qspi"; + compatible = "brcm,spi-ns2-qspi", "brcm,spi-bcm-qspi"; reg = <0x66470200 0x184>, <0x66470000 0x124>, <0x67017408 0x004>, diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi index 55259f973b5a9..5401a646c8406 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi @@ -4,21 +4,26 @@ */ usb { compatible = "simple-bus"; - dma-ranges; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0x0 0x0 0x68500000 0x00400000>; + #address-cells = <2>; + #size-cells = <2>; + ranges = <0x0 0x0 0x0 0x68500000 0x0 0x00400000>; + + /* + * Internally, USB bus to the interconnect can only address up + * to 40-bit + */ + dma-ranges = <0 0 0 0 0x100 0x0>; usbphy0: usb-phy@0 { compatible = "brcm,sr-usb-combo-phy"; - reg = <0x00000000 0x100>; + reg = <0x0 0x00000000 0x0 0x100>; #phy-cells = <1>; status = "disabled"; }; xhci0: usb@1000 { compatible = "generic-xhci"; - reg = <0x00001000 0x1000>; + reg = <0x0 0x00001000 0x0 0x1000>; interrupts = ; phys = <&usbphy0 1>, <&usbphy0 0>; phy-names = "phy0", "phy1"; @@ -28,7 +33,7 @@ bdc0: usb@2000 { compatible = "brcm,bdc-v0.16"; - reg = <0x00002000 0x1000>; + reg = <0x0 0x00002000 0x0 0x1000>; interrupts = ; phys = <&usbphy0 0>, <&usbphy0 1>; phy-names = "phy0", "phy1"; @@ -38,21 +43,21 @@ usbphy1: usb-phy@10000 { compatible = "brcm,sr-usb-combo-phy"; - reg = <0x00010000 0x100>; + reg = <0x0 0x00010000 0x0 0x100>; #phy-cells = <1>; status = "disabled"; }; usbphy2: usb-phy@20000 { compatible = "brcm,sr-usb-hs-phy"; - reg = <0x00020000 0x100>; + reg = <0x0 0x00020000 0x0 0x100>; #phy-cells = <0>; status = "disabled"; }; xhci1: usb@11000 { compatible = "generic-xhci"; - reg = <0x00011000 0x1000>; + reg = <0x0 0x00011000 0x0 0x1000>; interrupts = ; phys = <&usbphy1 1>, <&usbphy2>, <&usbphy1 0>; phy-names = "phy0", "phy1", "phy2"; @@ -62,7 +67,7 @@ bdc1: usb@21000 { compatible = "brcm,bdc-v0.16"; - reg = <0x00021000 0x1000>; + reg = <0x0 0x00021000 0x0 0x1000>; interrupts = ; phys = <&usbphy2>; phy-names = "phy0"; diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi index 6f90b0e62cba6..148bdca8d9c96 100644 --- a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi @@ -389,7 +389,7 @@ s2mps13-pmic@66 { compatible = "samsung,s2mps13-pmic"; interrupt-parent = <&gpa0>; - interrupts = <7 IRQ_TYPE_NONE>; + interrupts = <7 IRQ_TYPE_LEVEL_LOW>; reg = <0x66>; samsung,s2mps11-wrstbi-ground; diff --git a/arch/arm64/boot/dts/exynos/exynos5433.dtsi b/arch/arm64/boot/dts/exynos/exynos5433.dtsi index a76f620f7f355..a5f8752f607b3 100644 --- a/arch/arm64/boot/dts/exynos/exynos5433.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos5433.dtsi @@ -18,8 +18,8 @@ / { compatible = "samsung,exynos5433"; - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; interrupt-parent = <&gic>; @@ -311,7 +311,7 @@ compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; - ranges; + ranges = <0x0 0x0 0x0 0x18000000>; chipid@10000000 { compatible = "samsung,exynos4210-chipid"; diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts index 080e0f56e108f..09aead2be000c 100644 --- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts +++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts @@ -90,7 +90,7 @@ s2mps15_pmic@66 { compatible = "samsung,s2mps15-pmic"; reg = <0x66>; - interrupts = <2 IRQ_TYPE_NONE>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&gpa0>; pinctrl-names = "default"; pinctrl-0 = <&pmic_irq>; @@ -157,6 +157,7 @@ regulator-min-microvolt = <700000>; regulator-max-microvolt = <1150000>; regulator-enable-ramp-delay = <125>; + regulator-always-on; }; ldo8_reg: LDO8 { diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi index bcb9d8cee2677..84f92b44c3235 100644 --- a/arch/arm64/boot/dts/exynos/exynos7.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi @@ -12,8 +12,8 @@ / { compatible = "samsung,exynos7"; interrupt-parent = <&gic>; - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; aliases { pinctrl0 = &pinctrl_alive; @@ -90,15 +90,17 @@ }; psci { - compatible = "arm,psci-0.2"; + compatible = "arm,psci"; method = "smc"; + cpu_off = <0x84000002>; + cpu_on = <0xC4000003>; }; soc: soc { compatible = "simple-bus"; #address-cells = <1>; #size-cells = <1>; - ranges; + ranges = <0 0 0 0x18000000>; chipid@10000000 { compatible = "samsung,exynos4210-chipid"; @@ -111,7 +113,7 @@ #address-cells = <0>; interrupt-controller; reg = <0x11001000 0x1000>, - <0x11002000 0x1000>, + <0x11002000 0x2000>, <0x11004000 0x2000>, <0x11006000 0x2000>; }; @@ -494,13 +496,6 @@ pmu_system_controller: system-controller@105c0000 { compatible = "samsung,exynos7-pmu", "syscon"; reg = <0x105c0000 0x5000>; - - reboot: syscon-reboot { - compatible = "syscon-reboot"; - regmap = <&pmu_system_controller>; - offset = <0x0400>; - mask = <0x1>; - }; }; rtc: rtc@10590000 { @@ -650,3 +645,4 @@ }; #include "exynos7-pinctrl.dtsi" +#include "arm/exynos-syscon-restart.dtsi" diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi index 337919366dc85..ec141c9852893 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi @@ -177,6 +177,7 @@ ranges = <0x0 0x00 0x1700000 0x100000>; reg = <0x00 0x1700000 0x0 0x100000>; interrupts = ; + dma-coherent; sec_jr0: jr@10000 { compatible = "fsl,sec-v5.4-job-ring", diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts index 078a5010228cd..0b3a93c4155d2 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts @@ -161,11 +161,6 @@ vcc-supply = <&sb_3v3>; }; - rtc@51 { - compatible = "nxp,pcf2129"; - reg = <0x51>; - }; - eeprom@56 { compatible = "atmel,24c512"; reg = <0x56>; @@ -209,6 +204,15 @@ }; +&i2c1 { + status = "okay"; + + rtc@51 { + compatible = "nxp,pcf2129"; + reg = <0x51>; + }; +}; + &enetc_port1 { phy-handle = <&qds_phy1>; phy-connection-type = "rgmii-id"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 72b9a75976a1f..02ae6bfff5658 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -65,7 +65,7 @@ }; }; - sysclk: clock-sysclk { + sysclk: sysclk { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <100000000>; @@ -102,8 +102,8 @@ reboot { compatible ="syscon-reboot"; - regmap = <&dcfg>; - offset = <0xb0>; + regmap = <&rst>; + offset = <0>; mask = <0x02>; }; @@ -151,14 +151,20 @@ ddr: memory-controller@1080000 { compatible = "fsl,qoriq-memory-controller"; reg = <0x0 0x1080000 0x0 0x1000>; - interrupts = ; - big-endian; + interrupts = ; + little-endian; }; dcfg: syscon@1e00000 { compatible = "fsl,ls1028a-dcfg", "syscon"; reg = <0x0 0x1e00000 0x0 0x10000>; - big-endian; + little-endian; + }; + + rst: syscon@1e60000 { + compatible = "syscon"; + reg = <0x0 0x1e60000 0x0 0x10000>; + little-endian; }; scfg: syscon@1fc0000 { @@ -281,6 +287,24 @@ status = "disabled"; }; + can0: can@2180000 { + compatible = "fsl,ls1028ar1-flexcan", "fsl,lx2160ar1-flexcan"; + reg = <0x0 0x2180000 0x0 0x10000>; + interrupts = ; + clocks = <&sysclk>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + can1: can@2190000 { + compatible = "fsl,ls1028ar1-flexcan", "fsl,lx2160ar1-flexcan"; + reg = <0x0 0x2190000 0x0 0x10000>; + interrupts = ; + clocks = <&sysclk>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + duart0: serial@21c0500 { compatible = "fsl,ns16550", "ns16550a"; reg = <0x00 0x21c0500 0x0 0x100>; @@ -490,14 +514,14 @@ compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc000000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core1_watchdog: watchdog@c010000 { compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc010000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; sai1: audio-controller@f100000 { @@ -567,7 +591,7 @@ 0x00010004 0x0000003d 0x00010005 0x00000045 0x00010006 0x0000004d - 0x00010007 0x00000045 + 0x00010007 0x00000055 0x00010008 0x0000005e 0x00010009 0x00000066 0x0001000a 0x0000006e @@ -667,7 +691,7 @@ ethernet@0,4 { compatible = "fsl,enetc-ptp"; reg = <0x000400 0 0 0 0>; - clocks = <&clockgen 4 0>; + clocks = <&clockgen 2 3>; little-endian; }; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi index 6082ae0221364..d237162a87446 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi @@ -20,6 +20,8 @@ }; &fman0 { + fsl,erratum-a050385; + /* these aliases provide the FMan ports mapping */ enet0: ethernet@e0000 { }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts index 4223a2352d45a..dde50c88f5e35 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts @@ -119,12 +119,12 @@ ethernet@e4000 { phy-handle = <&rgmii_phy1>; - phy-connection-type = "rgmii-txid"; + phy-connection-type = "rgmii-id"; }; ethernet@e6000 { phy-handle = <&rgmii_phy2>; - phy-connection-type = "rgmii-txid"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index c084c7a4b6a6f..b611d835dc25a 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -241,6 +241,7 @@ ranges = <0x0 0x00 0x1700000 0x100000>; reg = <0x00 0x1700000 0x0 0x100000>; interrupts = <0 75 0x4>; + dma-coherent; sec_jr0: jr@10000 { compatible = "fsl,sec-v5.4-job-ring", diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-frwy.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-frwy.dts index 3595be0f25277..2d6c73d7d397c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-frwy.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-frwy.dts @@ -83,15 +83,9 @@ }; eeprom@52 { - compatible = "atmel,24c512"; + compatible = "onnn,cat24c04", "atmel,24c04"; reg = <0x52>; }; - - eeprom@53 { - compatible = "atmel,24c512"; - reg = <0x53>; - }; - }; }; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts index 6a6514d0e5a98..8858c1e92f23c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts @@ -58,14 +58,9 @@ }; eeprom@52 { - compatible = "atmel,24c512"; + compatible = "onnn,cat24c05", "atmel,24c04"; reg = <0x52>; }; - - eeprom@53 { - compatible = "atmel,24c512"; - reg = <0x53>; - }; }; &i2c3 { @@ -127,12 +122,12 @@ &fman0 { ethernet@e4000 { phy-handle = <&rgmii_phy1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e6000 { phy-handle = <&rgmii_phy2>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index d4c1da3d4bde2..ca087918c250a 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -244,6 +244,7 @@ ranges = <0x0 0x00 0x1700000 0x100000>; reg = <0x00 0x1700000 0x0 0x100000>; interrupts = ; + dma-coherent; sec_jr0: jr@10000 { compatible = "fsl,sec-v5.4-job-ring", @@ -304,7 +305,7 @@ dcfg: dcfg@1ee0000 { compatible = "fsl,ls1046a-dcfg", "syscon"; - reg = <0x0 0x1ee0000 0x0 0x10000>; + reg = <0x0 0x1ee0000 0x0 0x1000>; big-endian; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index c676d0771762f..6b1b728de9e9c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -637,59 +637,59 @@ }; cluster1_core0_watchdog: wdt@c000000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc000000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core1_watchdog: wdt@c010000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc010000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core2_watchdog: wdt@c020000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc020000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core3_watchdog: wdt@c030000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc030000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core0_watchdog: wdt@c100000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc100000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core1_watchdog: wdt@c110000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc110000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core2_watchdog: wdt@c120000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc120000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core3_watchdog: wdt@c130000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc130000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; fsl_mc: fsl-mc@80c000000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index 7a0be8eaa84a2..4bf4a22faa61a 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -227,59 +227,59 @@ }; cluster1_core0_watchdog: wdt@c000000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc000000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core1_watchdog: wdt@c010000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc010000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core0_watchdog: wdt@c100000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc100000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core1_watchdog: wdt@c110000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc110000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster3_core0_watchdog: wdt@c200000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc200000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster3_core1_watchdog: wdt@c210000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc210000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster4_core0_watchdog: wdt@c300000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc300000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; cluster4_core1_watchdog: wdt@c310000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc310000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; - clock-names = "apb_pclk", "wdog_clk"; + clock-names = "wdog_clk", "apb_pclk"; }; crypto: crypto@8000000 { @@ -501,7 +501,6 @@ clocks = <&clockgen 4 3>; clock-names = "dspi"; spi-num-chipselects = <5>; - bus-num = <0>; }; esdhc: esdhc@2140000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts index f7a15f3904c2c..b9f8b7aac8ff1 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts @@ -62,6 +62,8 @@ cpudai: simple-audio-card,cpu { sound-dai = <&sai3>; + dai-tdm-slot-num = <2>; + dai-tdm-slot-width = <32>; }; simple-audio-card,codec { @@ -229,7 +231,7 @@ ldo1_reg: LDO1 { regulator-name = "LDO1"; - regulator-min-microvolt = <3000000>; + regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; @@ -237,7 +239,7 @@ ldo2_reg: LDO2 { regulator-name = "LDO2"; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h b/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h index cffa8991880d1..93b44efdbc527 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h +++ b/arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h @@ -124,7 +124,7 @@ #define MX8MM_IOMUXC_SD1_CMD_USDHC1_CMD 0x0A4 0x30C 0x000 0x0 0x0 #define MX8MM_IOMUXC_SD1_CMD_GPIO2_IO1 0x0A4 0x30C 0x000 0x5 0x0 #define MX8MM_IOMUXC_SD1_DATA0_USDHC1_DATA0 0x0A8 0x310 0x000 0x0 0x0 -#define MX8MM_IOMUXC_SD1_DATA0_GPIO2_IO2 0x0A8 0x31 0x000 0x5 0x0 +#define MX8MM_IOMUXC_SD1_DATA0_GPIO2_IO2 0x0A8 0x310 0x000 0x5 0x0 #define MX8MM_IOMUXC_SD1_DATA1_USDHC1_DATA1 0x0AC 0x314 0x000 0x0 0x0 #define MX8MM_IOMUXC_SD1_DATA1_GPIO2_IO3 0x0AC 0x314 0x000 0x5 0x0 #define MX8MM_IOMUXC_SD1_DATA2_USDHC1_DATA2 0x0B0 0x318 0x000 0x0 0x0 diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 23c8fad7932b1..7b178a77cc712 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -126,7 +126,7 @@ opp-1600000000 { opp-hz = /bits/ 64 <1600000000>; - opp-microvolt = <900000>; + opp-microvolt = <950000>; opp-supported-hw = <0xc>, <0x7>; clock-latency-ns = <150000>; opp-suspend; @@ -479,14 +479,18 @@ <&clk IMX8MM_CLK_AUDIO_AHB>, <&clk IMX8MM_CLK_IPG_AUDIO_ROOT>, <&clk IMX8MM_SYS_PLL3>, - <&clk IMX8MM_VIDEO_PLL1>; + <&clk IMX8MM_VIDEO_PLL1>, + <&clk IMX8MM_AUDIO_PLL1>, + <&clk IMX8MM_AUDIO_PLL2>; assigned-clock-parents = <&clk IMX8MM_SYS_PLL3_OUT>, <&clk IMX8MM_SYS_PLL1_800M>; assigned-clock-rates = <0>, <400000000>, <400000000>, <750000000>, - <594000000>; + <594000000>, + <393216000>, + <361267200>; }; src: reset-controller@30390000 { @@ -741,7 +745,7 @@ reg = <0x30bd0000 0x10000>; interrupts = ; clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>, - <&clk IMX8MM_CLK_SDMA1_ROOT>; + <&clk IMX8MM_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx7d.bin"; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index 11c705d225d02..4fa39654b695d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -213,6 +213,10 @@ interrupts = <3 GPIO_ACTIVE_LOW>; rohm,reset-snvs-powered; + #clock-cells = <0>; + clocks = <&osc_32k 0>; + clock-output-names = "clk-32k-out"; + regulators { buck1_reg: BUCK1 { regulator-name = "BUCK1"; @@ -268,7 +272,7 @@ ldo1_reg: LDO1 { regulator-name = "LDO1"; - regulator-min-microvolt = <3000000>; + regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; @@ -276,7 +280,7 @@ ldo2_reg: LDO2 { regulator-name = "LDO2"; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 43c4db3121468..546511b373d43 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -616,7 +616,7 @@ reg = <0x30bd0000 0x10000>; interrupts = ; clocks = <&clk IMX8MN_CLK_SDMA1_ROOT>, - <&clk IMX8MN_CLK_SDMA1_ROOT>; + <&clk IMX8MN_CLK_AHB>; clock-names = "ipg", "ahb"; #dma-cells = <3>; fsl,sdma-ram-script-name = "imx/sdma/sdma-imx7d.bin"; @@ -677,28 +677,6 @@ #index-cells = <1>; reg = <0x32e40200 0x200>; }; - - usbotg2: usb@32e50000 { - compatible = "fsl,imx8mn-usb", "fsl,imx7d-usb"; - reg = <0x32e50000 0x200>; - interrupts = ; - clocks = <&clk IMX8MN_CLK_USB1_CTRL_ROOT>; - clock-names = "usb1_ctrl_root_clk"; - assigned-clocks = <&clk IMX8MN_CLK_USB_BUS>, - <&clk IMX8MN_CLK_USB_CORE_REF>; - assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_500M>, - <&clk IMX8MN_SYS_PLL1_100M>; - fsl,usbphy = <&usbphynop2>; - fsl,usbmisc = <&usbmisc2 0>; - status = "disabled"; - }; - - usbmisc2: usbmisc@32e50200 { - compatible = "fsl,imx8mn-usbmisc", "fsl,imx7d-usbmisc"; - #index-cells = <1>; - reg = <0x32e50200 0x200>; - }; - }; dma_apbh: dma-controller@33000000 { @@ -747,12 +725,4 @@ assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; clock-names = "main_clk"; }; - - usbphynop2: usbphynop2 { - compatible = "usb-nop-xceiv"; - clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; - assigned-clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; - assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; - clock-names = "main_clk"; - }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts b/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts index 683a110356431..19b427f68dad2 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5-devkit.dts @@ -421,7 +421,7 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_imu>; interrupt-parent = <&gpio3>; - interrupts = <19 IRQ_TYPE_LEVEL_LOW>; + interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; vdd-supply = <®_3v3_p>; vddio-supply = <®_3v3_p>; }; @@ -743,6 +743,7 @@ }; &usb3_phy0 { + vbus-supply = <®_5v_p>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq-pinfunc.h b/arch/arm64/boot/dts/freescale/imx8mq-pinfunc.h index b94b02080a344..68e8fa1729741 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-pinfunc.h +++ b/arch/arm64/boot/dts/freescale/imx8mq-pinfunc.h @@ -130,7 +130,7 @@ #define MX8MQ_IOMUXC_SD1_CMD_USDHC1_CMD 0x0A4 0x30C 0x000 0x0 0x0 #define MX8MQ_IOMUXC_SD1_CMD_GPIO2_IO1 0x0A4 0x30C 0x000 0x5 0x0 #define MX8MQ_IOMUXC_SD1_DATA0_USDHC1_DATA0 0x0A8 0x310 0x000 0x0 0x0 -#define MX8MQ_IOMUXC_SD1_DATA0_GPIO2_IO2 0x0A8 0x31 0x000 0x5 0x0 +#define MX8MQ_IOMUXC_SD1_DATA0_GPIO2_IO2 0x0A8 0x310 0x000 0x5 0x0 #define MX8MQ_IOMUXC_SD1_DATA1_USDHC1_DATA1 0x0AC 0x314 0x000 0x0 0x0 #define MX8MQ_IOMUXC_SD1_DATA1_GPIO2_IO3 0x0AC 0x314 0x000 0x5 0x0 #define MX8MQ_IOMUXC_SD1_DATA2_USDHC1_DATA2 0x0B0 0x318 0x000 0x0 0x0 diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi index 32ce14936b013..f385b143b3086 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi @@ -45,8 +45,8 @@ reg_12p0_main: regulator-12p0-main { compatible = "regulator-fixed"; regulator-name = "12V_MAIN"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; regulator-always-on; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 55a3d1c4bdf04..3dae8d7c76198 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -349,7 +349,7 @@ tmu: tmu@30260000 { compatible = "fsl,imx8mq-tmu"; reg = <0x30260000 0x10000>; - interrupt = ; + interrupts = ; clocks = <&clk IMX8MQ_CLK_TMU_ROOT>; little-endian; fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x70061>; @@ -516,6 +516,7 @@ gpc: gpc@303a0000 { compatible = "fsl,imx8mq-gpc"; reg = <0x303a0000 0x10000>; + interrupts = ; interrupt-parent = <&gic>; interrupt-controller; #interrupt-cells = <3>; @@ -1055,6 +1056,14 @@ <&src IMX8MQ_RESET_PCIE_CTRL_APPS_EN>, <&src IMX8MQ_RESET_PCIE_CTRL_APPS_TURNOFF>; reset-names = "pciephy", "apps", "turnoff"; + assigned-clocks = <&clk IMX8MQ_CLK_PCIE1_CTRL>, + <&clk IMX8MQ_CLK_PCIE1_PHY>, + <&clk IMX8MQ_CLK_PCIE1_AUX>; + assigned-clock-parents = <&clk IMX8MQ_SYS2_PLL_250M>, + <&clk IMX8MQ_SYS2_PLL_100M>, + <&clk IMX8MQ_SYS1_PLL_80M>; + assigned-clock-rates = <250000000>, <100000000>, + <10000000>; status = "disabled"; }; @@ -1084,6 +1093,14 @@ <&src IMX8MQ_RESET_PCIE2_CTRL_APPS_EN>, <&src IMX8MQ_RESET_PCIE2_CTRL_APPS_TURNOFF>; reset-names = "pciephy", "apps", "turnoff"; + assigned-clocks = <&clk IMX8MQ_CLK_PCIE2_CTRL>, + <&clk IMX8MQ_CLK_PCIE2_PHY>, + <&clk IMX8MQ_CLK_PCIE2_AUX>; + assigned-clock-parents = <&clk IMX8MQ_SYS2_PLL_250M>, + <&clk IMX8MQ_SYS2_PLL_100M>, + <&clk IMX8MQ_SYS1_PLL_80M>; + assigned-clock-rates = <250000000>, <100000000>, + <10000000>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts index 19468058e6ae4..8148196902dd5 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts @@ -52,11 +52,6 @@ compatible = "ethernet-phy-ieee802.3-c22"; reg = <0>; }; - - ethphy1: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; }; }; diff --git a/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts b/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts index e035cf195b19a..8c4bfbaf3a806 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts +++ b/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts @@ -530,6 +530,17 @@ status = "ok"; compatible = "adi,adv7533"; reg = <0x39>; + adi,dsi-lanes = <4>; + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { + reg = <1>; + }; + }; }; }; diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi index 253cc345f143a..0c88b72094774 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi @@ -1086,7 +1086,7 @@ }; watchdog0: watchdog@e8a06000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xe8a06000 0x0 0x1000>; interrupts = ; clocks = <&crg_ctrl HI3660_OSC32K>; @@ -1094,7 +1094,7 @@ }; watchdog1: watchdog@e8a07000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xe8a07000 0x0 0x1000>; interrupts = ; clocks = <&crg_ctrl HI3660_OSC32K>; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index c14205cd6bf5c..3e47150c05ec2 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -516,7 +516,7 @@ reg = <0x39>; interrupt-parent = <&gpio1>; interrupts = <1 2>; - pd-gpio = <&gpio0 4 0>; + pd-gpios = <&gpio0 4 0>; adi,dsi-lanes = <4>; #sound-dai-cells = <0>; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi index 108e2a4227f66..568faaba7ace9 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi @@ -839,7 +839,7 @@ }; watchdog0: watchdog@f8005000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xf8005000 0x0 0x1000>; interrupts = ; clocks = <&ao_ctrl HI6220_WDT0_PCLK>; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index 36abc25320a81..19f17bb29e4bd 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -47,10 +47,10 @@ pmu { compatible = "arm,armv8-pmuv3"; - interrupts = <0 120 8>, - <0 121 8>, - <0 122 8>, - <0 123 8>; + interrupts = <0 170 4>, + <0 171 4>, + <0 172 4>, + <0 173 4>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, @@ -82,7 +82,7 @@ ranges = <0 0 0 0xffffffff>; gmac0: ethernet@ff800000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff800000 0x2000>; interrupts = <0 90 4>; interrupt-names = "macirq"; @@ -97,7 +97,7 @@ }; gmac1: ethernet@ff802000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff802000 0x2000>; interrupts = <0 91 4>; interrupt-names = "macirq"; @@ -112,7 +112,7 @@ }; gmac2: ethernet@ff804000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff804000 0x2000>; interrupts = <0 92 4>; interrupt-names = "macirq"; @@ -369,7 +369,7 @@ }; usb0: usb@ffb00000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb00000 0x40000>; interrupts = <0 93 4>; phys = <&usbphy0>; @@ -381,7 +381,7 @@ }; usb1: usb@ffb40000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb40000 0x40000>; interrupts = <0 94 4>; phys = <&usbphy0>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-db.dts b/arch/arm64/boot/dts/marvell/armada-3720-db.dts index f2cc00594d64a..3e5789f372069 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-db.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-db.dts @@ -128,6 +128,9 @@ /* CON15(V2.0)/CON17(V1.4) : PCIe / CON15(V2.0)/CON12(V1.4) :mini-PCIe */ &pcie0 { + pinctrl-names = "default"; + pinctrl-0 = <&pcie_reset_pins &pcie_clkreq_pins>; + reset-gpios = <&gpiosb 3 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts index fbcf03f86c967..a75bb2ea3506d 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dts @@ -19,6 +19,16 @@ model = "Globalscale Marvell ESPRESSOBin Board"; compatible = "globalscale,espressobin", "marvell,armada3720", "marvell,armada3710"; + aliases { + ethernet0 = ð0; + /* for dsa slave device */ + ethernet1 = &switch0port1; + ethernet2 = &switch0port2; + ethernet3 = &switch0port3; + serial0 = &uart0; + serial1 = &uart1; + }; + chosen { stdout-path = "serial0:115200n8"; }; @@ -49,6 +59,7 @@ phys = <&comphy1 0>; pinctrl-names = "default"; pinctrl-0 = <&pcie_reset_pins &pcie_clkreq_pins>; + reset-gpios = <&gpiosb 3 GPIO_ACTIVE_LOW>; }; /* J6 */ @@ -141,7 +152,7 @@ #address-cells = <1>; #size-cells = <0>; - port@0 { + switch0port0: port@0 { reg = <0>; label = "cpu"; ethernet = <ð0>; @@ -152,19 +163,19 @@ }; }; - port@1 { + switch0port1: port@1 { reg = <1>; label = "wan"; phy-handle = <&switch0phy0>; }; - port@2 { + switch0port2: port@2 { reg = <2>; label = "lan0"; phy-handle = <&switch0phy1>; }; - port@3 { + switch0port3: port@3 { reg = <3>; label = "lan1"; phy-handle = <&switch0phy2>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index 5f350cc71a2fd..2e8239d489f82 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -18,6 +18,7 @@ aliases { spi0 = &spi0; + ethernet0 = ð0; ethernet1 = ð1; }; @@ -95,7 +96,7 @@ }; sfp: sfp { - compatible = "sff,sfp+"; + compatible = "sff,sfp"; i2c-bus = <&i2c0>; los-gpio = <&moxtet_sfp 0 GPIO_ACTIVE_HIGH>; tx-fault-gpio = <&moxtet_sfp 1 GPIO_ACTIVE_HIGH>; @@ -106,12 +107,19 @@ /* enabled by U-Boot if SFP module is present */ status = "disabled"; }; + + firmware { + armada-3700-rwtm { + compatible = "marvell,armada-3700-rwtm-firmware", "cznic,turris-mox-rwtm"; + }; + }; }; &i2c0 { pinctrl-names = "default"; pinctrl-0 = <&i2c1_pins>; clock-frequency = <100000>; + /delete-property/ mrvl,i2c-fast-mode; status = "okay"; rtc@6f { @@ -120,10 +128,6 @@ }; }; -&pcie_reset_pins { - function = "gpio"; -}; - &pcie0 { pinctrl-names = "default"; pinctrl-0 = <&pcie_reset_pins &pcie_clkreq_pins>; @@ -131,6 +135,28 @@ max-link-speed = <2>; reset-gpios = <&gpiosb 3 GPIO_ACTIVE_LOW>; phys = <&comphy1 0>; + /* + * U-Boot port for Turris Mox has a bug which always expects that "ranges" DT property + * contains exactly 2 ranges with 3 (child) address cells, 2 (parent) address cells and + * 2 size cells and also expects that the second range starts at 16 MB offset. Also it + * expects that first range uses same address for PCI (child) and CPU (parent) cells (so + * no remapping) and that this address is the lowest from all specified ranges. If these + * conditions are not met then U-Boot crashes during loading kernel DTB file. PCIe address + * space is 128 MB long, so the best split between MEM and IO is to use fixed 16 MB window + * for IO and the rest 112 MB (64+32+16) for MEM, despite that maximal IO size is just 64 kB. + * This bug is not present in U-Boot ports for other Armada 3700 devices and is fixed in + * U-Boot version 2021.07. See relevant U-Boot commits (the last one contains fix): + * https://source.denx.de/u-boot/u-boot/-/commit/cb2ddb291ee6fcbddd6d8f4ff49089dfe580f5d7 + * https://source.denx.de/u-boot/u-boot/-/commit/c64ac3b3185aeb3846297ad7391fc6df8ecd73bf + * https://source.denx.de/u-boot/u-boot/-/commit/4a82fca8e330157081fc132a591ebd99ba02ee33 + * Bug related to requirement of same child and parent addresses for first range is fixed + * in U-Boot version 2022.04 by following commit: + * https://source.denx.de/u-boot/u-boot/-/commit/1fd54253bca7d43d046bba4853fe5fafd034bc17 + */ + #address-cells = <3>; + #size-cells = <2>; + ranges = <0x81000000 0 0xe8000000 0 0xe8000000 0 0x01000000 /* Port 0 IO */ + 0x82000000 0 0xe9000000 0 0xe9000000 0 0x07000000>; /* Port 0 MEM */ /* enabled by U-Boot if PCIe module is present */ status = "disabled"; @@ -144,7 +170,7 @@ pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; phy-mode = "rgmii-id"; - phy = <&phy1>; + phy-handle = <&phy1>; status = "okay"; }; @@ -171,6 +197,8 @@ marvell,pad-type = "sd"; vqmmc-supply = <&vsdio_reg>; mmc-pwrseq = <&sdhci1_pwrseq>; + /* forbid SDR104 for FCC purposes */ + sdhci-caps-mask = <0x2 0x0>; status = "okay"; }; @@ -200,7 +228,7 @@ }; partition@20000 { - label = "u-boot"; + label = "a53-firmware"; reg = <0x20000 0x160000>; }; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts index bd4aab6092e0f..e31813a4f9722 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts @@ -143,6 +143,7 @@ phy-mode = "sgmii"; status = "okay"; managed = "in-band-status"; + phys = <&comphy1 0>; sfp = <&sfp_eth0>; }; @@ -150,11 +151,14 @@ phy-mode = "sgmii"; status = "okay"; managed = "in-band-status"; + phys = <&comphy0 1>; sfp = <&sfp_eth1>; }; &usb3 { status = "okay"; + phys = <&usb2_utmi_otg_phy>; + phy-names = "usb2-utmi-otg-phy"; }; &uart0 { diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index 000c135e39b73..9405d9c619ca6 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -134,7 +134,7 @@ uart0: serial@12000 { compatible = "marvell,armada-3700-uart"; - reg = <0x12000 0x200>; + reg = <0x12000 0x18>; clocks = <&xtalclk>; interrupts = , @@ -156,7 +156,8 @@ }; nb_periph_clk: nb-periph-clk@13000 { - compatible = "marvell,armada-3700-periph-clock-nb"; + compatible = "marvell,armada-3700-periph-clock-nb", + "syscon"; reg = <0x13000 0x100>; clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>, <&tbg 3>, <&xtalclk>; @@ -317,7 +318,7 @@ pcie_reset_pins: pcie-reset-pins { groups = "pcie1"; - function = "pcie"; + function = "gpio"; }; pcie_clkreq_pins: pcie-clkreq-pins { @@ -486,8 +487,15 @@ #interrupt-cells = <1>; msi-parent = <&pcie0>; msi-controller; - ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x1000000 /* Port 0 MEM */ - 0x81000000 0 0xe9000000 0 0xe9000000 0 0x10000>; /* Port 0 IO*/ + /* + * The 128 MiB address range [0xe8000000-0xf0000000] is + * dedicated for PCIe and can be assigned to 8 windows + * with size a power of two. Use one 64 KiB window for + * IO at the end and the remaining seven windows + * (totaling 127 MiB) for MEM. + */ + ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x07f00000 /* Port 0 MEM */ + 0x81000000 0 0x00000000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */ interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pcie_intc 0>, <0 0 0 2 &pcie_intc 1>, @@ -499,4 +507,12 @@ }; }; }; + + firmware { + armada-3700-rwtm { + compatible = "marvell,armada-3700-rwtm-firmware"; + mboxes = <&rwtm 0>; + status = "okay"; + }; + }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts index bd881497b8729..b90d78a5724b2 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts +++ b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts @@ -367,6 +367,7 @@ pinctrl-0 = <&cp0_copper_eth_phy_reset>; reset-gpios = <&cp0_gpio2 11 GPIO_ACTIVE_LOW>; reset-assert-us = <10000>; + reset-deassert-us = <10000>; }; switch0: switch0@4 { @@ -408,6 +409,8 @@ reg = <5>; label = "cpu"; ethernet = <&cp1_eth2>; + phy-mode = "2500base-x"; + managed = "in-band-status"; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi index d250f4b2bfedb..bf443ca1fcf11 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-8040-mcbin.dtsi @@ -71,6 +71,7 @@ tx-fault-gpio = <&cp1_gpio1 26 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp1_sfpp0_pins>; + maximum-power-milliwatt = <2000>; }; sfp_eth1: sfp-eth1 { @@ -83,6 +84,7 @@ tx-fault-gpio = <&cp0_gpio2 30 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp1_sfpp1_pins &cp0_sfpp1_pins>; + maximum-power-milliwatt = <2000>; }; sfp_eth3: sfp-eth3 { @@ -95,6 +97,7 @@ tx-fault-gpio = <&cp0_gpio2 19 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&cp0_sfp_1g_pins &cp1_sfp_1g_pins>; + maximum-power-milliwatt = <2000>; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-ap806-dual.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806-dual.dtsi index 9024a2d9db070..62ae016ee6aa1 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806-dual.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806-dual.dtsi @@ -21,6 +21,7 @@ reg = <0x000>; enable-method = "psci"; #cooling-cells = <2>; + clocks = <&cpu_clk 0>; }; cpu1: cpu@1 { device_type = "cpu"; @@ -28,6 +29,7 @@ reg = <0x001>; enable-method = "psci"; #cooling-cells = <2>; + clocks = <&cpu_clk 0>; }; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi index d819449026502..8259fc8f86f23 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi @@ -438,10 +438,10 @@ CP110_LABEL(nand_controller): nand@720000 { /* - * Due to the limitation of the pins available - * this controller is only usable on the CPM - * for A7K and on the CPS for A8K. - */ + * Due to the limitation of the pins available + * this controller is only usable on the CPM + * for A7K and on the CPS for A8K. + */ compatible = "marvell,armada-8k-nand-controller", "marvell,armada370-nand-controller"; reg = <0x720000 0x54>; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 83e10591e0e5d..81215cc3759a8 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -49,7 +49,7 @@ wps { label = "wps"; linux,code = ; - gpios = <&pio 102 GPIO_ACTIVE_HIGH>; + gpios = <&pio 102 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index dac51e98204c0..e7e002d8b1089 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -686,6 +686,8 @@ clocks = <&pericfg CLK_PERI_MSDC30_0_PD>, <&topckgen CLK_TOP_MSDC50_0_SEL>; clock-names = "source", "hclk"; + resets = <&pericfg MT7622_PERI_MSDC0_SW_RST>; + reset-names = "hrst"; status = "disabled"; }; @@ -696,6 +698,8 @@ clocks = <&pericfg CLK_PERI_MSDC30_1_PD>, <&topckgen CLK_TOP_AXI_SEL>; clock-names = "source", "hclk"; + resets = <&pericfg MT7622_PERI_MSDC1_SW_RST>; + reset-names = "hrst"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi index 15f1842f6df3e..c4f742e6d1604 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi @@ -238,21 +238,21 @@ cpu_on = <0x84000003>; }; - clk26m: oscillator@0 { + clk26m: oscillator0 { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <26000000>; clock-output-names = "clk26m"; }; - clk32k: oscillator@1 { + clk32k: oscillator1 { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <32000>; clock-output-names = "clk32k"; }; - cpum_ck: oscillator@2 { + cpum_ck: oscillator2 { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <0>; @@ -268,19 +268,19 @@ sustainable-power = <1500>; /* milliwatts */ trips { - threshold: trip-point@0 { + threshold: trip-point0 { temperature = <68000>; hysteresis = <2000>; type = "passive"; }; - target: trip-point@1 { + target: trip-point1 { temperature = <85000>; hysteresis = <2000>; type = "passive"; }; - cpu_crit: cpu_crit@0 { + cpu_crit: cpu_crit0 { temperature = <115000>; hysteresis = <2000>; type = "critical"; @@ -288,13 +288,13 @@ }; cooling-maps { - map@0 { + map0 { trip = <&target>; cooling-device = <&cpu0 0 0>, <&cpu1 0 0>; contribution = <3072>; }; - map@1 { + map1 { trip = <&target>; cooling-device = <&cpu2 0 0>, <&cpu3 0 0>; @@ -308,7 +308,7 @@ #address-cells = <2>; #size-cells = <2>; ranges; - vpu_dma_reserved: vpu_dma_mem_region { + vpu_dma_reserved: vpu_dma_mem_region@b7000000 { compatible = "shared-dma-pool"; reg = <0 0xb7000000 0 0x500000>; alignment = <0x1000>; @@ -360,7 +360,7 @@ reg = <0 0x10005000 0 0x1000>; }; - pio: pinctrl@10005000 { + pio: pinctrl@1000b000 { compatible = "mediatek,mt8173-pinctrl"; reg = <0 0x1000b000 0 0x1000>; mediatek,pctl-regmap = <&syscfg_pctl_a>; @@ -567,7 +567,7 @@ status = "disabled"; }; - gic: interrupt-controller@10220000 { + gic: interrupt-controller@10221000 { compatible = "arm,gic-400"; #interrupt-cells = <3>; interrupt-parent = <&gic>; @@ -1137,7 +1137,7 @@ <&mmsys CLK_MM_DSI1_DIGITAL>, <&mipi_tx1>; clock-names = "engine", "digital", "hs"; - phy = <&mipi_tx1>; + phys = <&mipi_tx1>; phy-names = "dphy"; status = "disabled"; }; @@ -1397,8 +1397,8 @@ "venc_lt_sel"; assigned-clocks = <&topckgen CLK_TOP_VENC_SEL>, <&topckgen CLK_TOP_VENC_LT_SEL>; - assigned-clock-parents = <&topckgen CLK_TOP_VENCPLL_D2>, - <&topckgen CLK_TOP_UNIVPLL1_D2>; + assigned-clock-parents = <&topckgen CLK_TOP_VCODECPLL>, + <&topckgen CLK_TOP_VCODECPLL_370P5>; }; vencltsys: clock-controller@19000000 { diff --git a/arch/arm64/boot/dts/nvidia/tegra132.dtsi b/arch/arm64/boot/dts/nvidia/tegra132.dtsi index 631a7f77c3869..0b3eb8c0b8df0 100644 --- a/arch/arm64/boot/dts/nvidia/tegra132.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra132.dtsi @@ -1082,13 +1082,13 @@ cpu@0 { device_type = "cpu"; - compatible = "nvidia,denver"; + compatible = "nvidia,tegra132-denver"; reg = <0>; }; cpu@1 { device_type = "cpu"; - compatible = "nvidia,denver"; + compatible = "nvidia,tegra132-denver"; reg = <1>; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts index bdace01561bab..9df4782c90f35 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts @@ -10,18 +10,6 @@ model = "NVIDIA Jetson TX2 Developer Kit"; compatible = "nvidia,p2771-0000", "nvidia,tegra186"; - aconnect { - status = "okay"; - - dma-controller@2930000 { - status = "okay"; - }; - - interrupt-controller@2a40000 { - status = "okay"; - }; - }; - i2c@3160000 { power-monitor@42 { compatible = "ti,ina3221"; diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi index 47cd831fcf445..4457262750734 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi @@ -309,8 +309,9 @@ compatible = "nvidia,tegra186-sdhci"; reg = <0x0 0x03400000 0x0 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA186_CLK_SDMMC1>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA186_CLK_SDMMC1>, + <&bpmp TEGRA186_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; resets = <&bpmp TEGRA186_RESET_SDMMC1>; reset-names = "sdhci"; iommus = <&smmu TEGRA186_SID_SDMMC1>; @@ -335,8 +336,9 @@ compatible = "nvidia,tegra186-sdhci"; reg = <0x0 0x03420000 0x0 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA186_CLK_SDMMC2>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA186_CLK_SDMMC2>, + <&bpmp TEGRA186_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; resets = <&bpmp TEGRA186_RESET_SDMMC2>; reset-names = "sdhci"; iommus = <&smmu TEGRA186_SID_SDMMC2>; @@ -356,8 +358,9 @@ compatible = "nvidia,tegra186-sdhci"; reg = <0x0 0x03440000 0x0 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA186_CLK_SDMMC3>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA186_CLK_SDMMC3>, + <&bpmp TEGRA186_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; resets = <&bpmp TEGRA186_RESET_SDMMC3>; reset-names = "sdhci"; iommus = <&smmu TEGRA186_SID_SDMMC3>; @@ -379,8 +382,9 @@ compatible = "nvidia,tegra186-sdhci"; reg = <0x0 0x03460000 0x0 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA186_CLK_SDMMC4>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA186_CLK_SDMMC4>, + <&bpmp TEGRA186_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; assigned-clocks = <&bpmp TEGRA186_CLK_SDMMC4>, <&bpmp TEGRA186_CLK_PLLC4_VCO>; assigned-clock-parents = <&bpmp TEGRA186_CLK_PLLC4_VCO>; @@ -705,7 +709,7 @@ ccplex@e000000 { compatible = "nvidia,tegra186-ccplex-cluster"; - reg = <0x0 0x0e000000 0x0 0x3fffff>; + reg = <0x0 0x0e000000 0x0 0x400000>; nvidia,bpmp = <&bpmp>; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi index 4c38426a69693..7899759a12f80 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi @@ -32,7 +32,7 @@ phy-reset-gpios = <&gpio TEGRA194_MAIN_GPIO(G, 5) GPIO_ACTIVE_LOW>; phy-handle = <&phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; mdio { #address-cells = <1>; @@ -309,9 +309,8 @@ regulator-name = "VDD_12V"; regulator-min-microvolt = <1200000>; regulator-max-microvolt = <1200000>; - gpio = <&gpio TEGRA194_MAIN_GPIO(A, 1) GPIO_ACTIVE_LOW>; + gpio = <&gpio TEGRA194_MAIN_GPIO(A, 1) GPIO_ACTIVE_HIGH>; regulator-boot-on; - enable-active-low; }; }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 3c0cf54f0aab3..90adff8aa9baf 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -144,7 +144,7 @@ nvidia,schmitt = ; nvidia,lpdr = ; nvidia,enable-input = ; - nvidia,io-high-voltage = ; + nvidia,io-hv = ; nvidia,tristate = ; nvidia,pull = ; }; @@ -156,7 +156,7 @@ nvidia,schmitt = ; nvidia,lpdr = ; nvidia,enable-input = ; - nvidia,io-high-voltage = ; + nvidia,io-hv = ; nvidia,tristate = ; nvidia,pull = ; }; @@ -403,8 +403,9 @@ compatible = "nvidia,tegra194-sdhci", "nvidia,tegra186-sdhci"; reg = <0x03400000 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA194_CLK_SDMMC1>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA194_CLK_SDMMC1>, + <&bpmp TEGRA194_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; resets = <&bpmp TEGRA194_RESET_SDMMC1>; reset-names = "sdhci"; nvidia,pad-autocal-pull-up-offset-3v3-timeout = @@ -425,8 +426,9 @@ compatible = "nvidia,tegra194-sdhci", "nvidia,tegra186-sdhci"; reg = <0x03440000 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA194_CLK_SDMMC3>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA194_CLK_SDMMC3>, + <&bpmp TEGRA194_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; resets = <&bpmp TEGRA194_RESET_SDMMC3>; reset-names = "sdhci"; nvidia,pad-autocal-pull-up-offset-1v8 = <0x00>; @@ -448,8 +450,9 @@ compatible = "nvidia,tegra194-sdhci", "nvidia,tegra186-sdhci"; reg = <0x03460000 0x10000>; interrupts = ; - clocks = <&bpmp TEGRA194_CLK_SDMMC4>; - clock-names = "sdhci"; + clocks = <&bpmp TEGRA194_CLK_SDMMC4>, + <&bpmp TEGRA194_CLK_SDMMC_LEGACY_TM>; + clock-names = "sdhci", "tmclk"; assigned-clocks = <&bpmp TEGRA194_CLK_SDMMC4>, <&bpmp TEGRA194_CLK_PLLC4>; assigned-clock-parents = @@ -689,7 +692,7 @@ hsp_aon: hsp@c150000 { compatible = "nvidia,tegra194-hsp", "nvidia,tegra186-hsp"; - reg = <0x0c150000 0xa0000>; + reg = <0x0c150000 0x90000>; interrupts = , , , @@ -1151,7 +1154,7 @@ }; pcie@14100000 { - compatible = "nvidia,tegra194-pcie", "snps,dw-pcie"; + compatible = "nvidia,tegra194-pcie"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX1A>; reg = <0x00 0x14100000 0x0 0x00020000 /* appl registers (128K) */ 0x00 0x30000000 0x0 0x00040000 /* configuration space (256K) */ @@ -1192,12 +1195,12 @@ bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x30100000 0x0 0x30100000 0x0 0x00100000 /* downstream I/O (1MB) */ - 0xc2000000 0x12 0x00000000 0x12 0x00000000 0x0 0x30000000 /* prefetchable memory (768MB) */ + 0xc3000000 0x12 0x00000000 0x12 0x00000000 0x0 0x30000000 /* prefetchable memory (768MB) */ 0x82000000 0x0 0x40000000 0x12 0x30000000 0x0 0x10000000>; /* non-prefetchable memory (256MB) */ }; pcie@14120000 { - compatible = "nvidia,tegra194-pcie", "snps,dw-pcie"; + compatible = "nvidia,tegra194-pcie"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX1A>; reg = <0x00 0x14120000 0x0 0x00020000 /* appl registers (128K) */ 0x00 0x32000000 0x0 0x00040000 /* configuration space (256K) */ @@ -1238,12 +1241,12 @@ bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x32100000 0x0 0x32100000 0x0 0x00100000 /* downstream I/O (1MB) */ - 0xc2000000 0x12 0x40000000 0x12 0x40000000 0x0 0x30000000 /* prefetchable memory (768MB) */ + 0xc3000000 0x12 0x40000000 0x12 0x40000000 0x0 0x30000000 /* prefetchable memory (768MB) */ 0x82000000 0x0 0x40000000 0x12 0x70000000 0x0 0x10000000>; /* non-prefetchable memory (256MB) */ }; pcie@14140000 { - compatible = "nvidia,tegra194-pcie", "snps,dw-pcie"; + compatible = "nvidia,tegra194-pcie"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX1A>; reg = <0x00 0x14140000 0x0 0x00020000 /* appl registers (128K) */ 0x00 0x34000000 0x0 0x00040000 /* configuration space (256K) */ @@ -1284,12 +1287,12 @@ bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x34100000 0x0 0x34100000 0x0 0x00100000 /* downstream I/O (1MB) */ - 0xc2000000 0x12 0x80000000 0x12 0x80000000 0x0 0x30000000 /* prefetchable memory (768MB) */ + 0xc3000000 0x12 0x80000000 0x12 0x80000000 0x0 0x30000000 /* prefetchable memory (768MB) */ 0x82000000 0x0 0x40000000 0x12 0xb0000000 0x0 0x10000000>; /* non-prefetchable memory (256MB) */ }; pcie@14160000 { - compatible = "nvidia,tegra194-pcie", "snps,dw-pcie"; + compatible = "nvidia,tegra194-pcie"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX4A>; reg = <0x00 0x14160000 0x0 0x00020000 /* appl registers (128K) */ 0x00 0x36000000 0x0 0x00040000 /* configuration space (256K) */ @@ -1330,12 +1333,12 @@ bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x36100000 0x0 0x36100000 0x0 0x00100000 /* downstream I/O (1MB) */ - 0xc2000000 0x14 0x00000000 0x14 0x00000000 0x3 0x40000000 /* prefetchable memory (13GB) */ + 0xc3000000 0x14 0x00000000 0x14 0x00000000 0x3 0x40000000 /* prefetchable memory (13GB) */ 0x82000000 0x0 0x40000000 0x17 0x40000000 0x0 0xc0000000>; /* non-prefetchable memory (3GB) */ }; pcie@14180000 { - compatible = "nvidia,tegra194-pcie", "snps,dw-pcie"; + compatible = "nvidia,tegra194-pcie"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX8B>; reg = <0x00 0x14180000 0x0 0x00020000 /* appl registers (128K) */ 0x00 0x38000000 0x0 0x00040000 /* configuration space (256K) */ @@ -1376,12 +1379,12 @@ bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x38100000 0x0 0x38100000 0x0 0x00100000 /* downstream I/O (1MB) */ - 0xc2000000 0x18 0x00000000 0x18 0x00000000 0x3 0x40000000 /* prefetchable memory (13GB) */ + 0xc3000000 0x18 0x00000000 0x18 0x00000000 0x3 0x40000000 /* prefetchable memory (13GB) */ 0x82000000 0x0 0x40000000 0x1b 0x40000000 0x0 0xc0000000>; /* non-prefetchable memory (3GB) */ }; pcie@141a0000 { - compatible = "nvidia,tegra194-pcie", "snps,dw-pcie"; + compatible = "nvidia,tegra194-pcie"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX8A>; reg = <0x00 0x141a0000 0x0 0x00020000 /* appl registers (128K) */ 0x00 0x3a000000 0x0 0x00040000 /* configuration space (256K) */ @@ -1426,10 +1429,109 @@ bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x3a100000 0x0 0x3a100000 0x0 0x00100000 /* downstream I/O (1MB) */ - 0xc2000000 0x1c 0x00000000 0x1c 0x00000000 0x3 0x40000000 /* prefetchable memory (13GB) */ + 0xc3000000 0x1c 0x00000000 0x1c 0x00000000 0x3 0x40000000 /* prefetchable memory (13GB) */ 0x82000000 0x0 0x40000000 0x1f 0x40000000 0x0 0xc0000000>; /* non-prefetchable memory (3GB) */ }; + pcie_ep@14160000 { + compatible = "nvidia,tegra194-pcie-ep"; + power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX4A>; + reg = <0x00 0x14160000 0x0 0x00020000 /* appl registers (128K) */ + 0x00 0x36040000 0x0 0x00040000 /* iATU_DMA reg space (256K) */ + 0x00 0x36080000 0x0 0x00040000 /* DBI reg space (256K) */ + 0x14 0x00000000 0x4 0x00000000>; /* Address Space (16G) */ + reg-names = "appl", "atu_dma", "dbi", "addr_space"; + + status = "disabled"; + + num-lanes = <4>; + num-ib-windows = <2>; + num-ob-windows = <8>; + + clocks = <&bpmp TEGRA194_CLK_PEX0_CORE_4>; + clock-names = "core"; + + resets = <&bpmp TEGRA194_RESET_PEX0_CORE_4_APB>, + <&bpmp TEGRA194_RESET_PEX0_CORE_4>; + reset-names = "apb", "core"; + + interrupts = ; /* controller interrupt */ + interrupt-names = "intr"; + + nvidia,bpmp = <&bpmp 4>; + + nvidia,aspm-cmrt-us = <60>; + nvidia,aspm-pwr-on-t-us = <20>; + nvidia,aspm-l0s-entrance-latency-us = <3>; + }; + + pcie_ep@14180000 { + compatible = "nvidia,tegra194-pcie-ep"; + power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX8B>; + reg = <0x00 0x14180000 0x0 0x00020000 /* appl registers (128K) */ + 0x00 0x38040000 0x0 0x00040000 /* iATU_DMA reg space (256K) */ + 0x00 0x38080000 0x0 0x00040000 /* DBI reg space (256K) */ + 0x18 0x00000000 0x4 0x00000000>; /* Address Space (16G) */ + reg-names = "appl", "atu_dma", "dbi", "addr_space"; + + status = "disabled"; + + num-lanes = <8>; + num-ib-windows = <2>; + num-ob-windows = <8>; + + clocks = <&bpmp TEGRA194_CLK_PEX0_CORE_0>; + clock-names = "core"; + + resets = <&bpmp TEGRA194_RESET_PEX0_CORE_0_APB>, + <&bpmp TEGRA194_RESET_PEX0_CORE_0>; + reset-names = "apb", "core"; + + interrupts = ; /* controller interrupt */ + interrupt-names = "intr"; + + nvidia,bpmp = <&bpmp 0>; + + nvidia,aspm-cmrt-us = <60>; + nvidia,aspm-pwr-on-t-us = <20>; + nvidia,aspm-l0s-entrance-latency-us = <3>; + }; + + pcie_ep@141a0000 { + compatible = "nvidia,tegra194-pcie-ep"; + power-domains = <&bpmp TEGRA194_POWER_DOMAIN_PCIEX8A>; + reg = <0x00 0x141a0000 0x0 0x00020000 /* appl registers (128K) */ + 0x00 0x3a040000 0x0 0x00040000 /* iATU_DMA reg space (256K) */ + 0x00 0x3a080000 0x0 0x00040000 /* DBI reg space (256K) */ + 0x1c 0x00000000 0x4 0x00000000>; /* Address Space (16G) */ + reg-names = "appl", "atu_dma", "dbi", "addr_space"; + + status = "disabled"; + + num-lanes = <8>; + num-ib-windows = <2>; + num-ob-windows = <8>; + + pinctrl-names = "default"; + pinctrl-0 = <&clkreq_c5_bi_dir_state>; + + clocks = <&bpmp TEGRA194_CLK_PEX1_CORE_5>; + clock-names = "core"; + + resets = <&bpmp TEGRA194_RESET_PEX1_CORE_5_APB>, + <&bpmp TEGRA194_RESET_PEX1_CORE_5>; + reset-names = "apb", "core"; + + interrupts = ; /* controller interrupt */ + interrupt-names = "intr"; + + nvidia,bpmp = <&bpmp 5>; + + nvidia,aspm-cmrt-us = <60>; + nvidia,aspm-pwr-on-t-us = <20>; + nvidia,aspm-l0s-entrance-latency-us = <3>; + }; + sysram@40000000 { compatible = "nvidia,tegra194-sysram", "mmio-sram"; reg = <0x0 0x40000000 0x0 0x50000>; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi index a7dc319214a4d..b0095072bc28e 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi @@ -1612,7 +1612,7 @@ regulator-name = "VDD_HDMI_5V0"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; - gpio = <&exp1 12 GPIO_ACTIVE_LOW>; + gpio = <&exp1 12 GPIO_ACTIVE_HIGH>; enable-active-high; vin-supply = <&vdd_5v0_sys>; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi index 659753118e96f..8a02b26d07cd4 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210.dtsi @@ -917,6 +917,7 @@ <&tegra_car 128>, /* hda2hdmi */ <&tegra_car 111>; /* hda2codec_2x */ reset-names = "hda", "hda2hdmi", "hda2codec_2x"; + power-domains = <&pd_sor>; status = "disabled"; }; @@ -1116,8 +1117,9 @@ compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci"; reg = <0x0 0x700b0000 0x0 0x200>; interrupts = ; - clocks = <&tegra_car TEGRA210_CLK_SDMMC1>; - clock-names = "sdhci"; + clocks = <&tegra_car TEGRA210_CLK_SDMMC1>, + <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>; + clock-names = "sdhci", "tmclk"; resets = <&tegra_car 14>; reset-names = "sdhci"; pinctrl-names = "sdmmc-3v3", "sdmmc-1v8", @@ -1144,8 +1146,9 @@ compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci"; reg = <0x0 0x700b0200 0x0 0x200>; interrupts = ; - clocks = <&tegra_car TEGRA210_CLK_SDMMC2>; - clock-names = "sdhci"; + clocks = <&tegra_car TEGRA210_CLK_SDMMC2>, + <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>; + clock-names = "sdhci", "tmclk"; resets = <&tegra_car 9>; reset-names = "sdhci"; pinctrl-names = "sdmmc-1v8-drv"; @@ -1161,8 +1164,9 @@ compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci"; reg = <0x0 0x700b0400 0x0 0x200>; interrupts = ; - clocks = <&tegra_car TEGRA210_CLK_SDMMC3>; - clock-names = "sdhci"; + clocks = <&tegra_car TEGRA210_CLK_SDMMC3>, + <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>; + clock-names = "sdhci", "tmclk"; resets = <&tegra_car 69>; reset-names = "sdhci"; pinctrl-names = "sdmmc-3v3", "sdmmc-1v8", @@ -1184,8 +1188,9 @@ compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci"; reg = <0x0 0x700b0600 0x0 0x200>; interrupts = ; - clocks = <&tegra_car TEGRA210_CLK_SDMMC4>; - clock-names = "sdhci"; + clocks = <&tegra_car TEGRA210_CLK_SDMMC4>, + <&tegra_car TEGRA210_CLK_SDMMC_LEGACY>; + clock-names = "sdhci", "tmclk"; resets = <&tegra_car 15>; reset-names = "sdhci"; pinctrl-names = "sdmmc-3v3-drv", "sdmmc-1v8-drv"; diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 04ad2fb22b9af..dba3488492f1b 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -623,6 +623,8 @@ l21 { regulator-min-microvolt = <2950000>; regulator-max-microvolt = <2950000>; + regulator-allow-set-load; + regulator-system-load = <200000>; }; l22 { regulator-min-microvolt = <3300000>; diff --git a/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts b/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts index 70be3f95209bc..830d9f2c1e5f2 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts +++ b/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts @@ -20,7 +20,7 @@ stdout-path = "serial0"; }; - memory { + memory@40000000 { device_type = "memory"; reg = <0x0 0x40000000 0x0 0x20000000>; }; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 67ee5f5601046..1e9fa049c5502 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -253,7 +253,7 @@ status = "disabled"; }; - qpic_nand: nand@79b0000 { + qpic_nand: nand-controller@79b0000 { compatible = "qcom,ipq8074-nand"; reg = <0x79b0000 0x10000>; #address-cells = <1>; @@ -482,7 +482,7 @@ clocks { sleep_clk: sleep_clk { compatible = "fixed-clock"; - clock-frequency = <32000>; + clock-frequency = <32768>; #clock-cells = <0>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi b/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi index 242aaea688040..38c0d74767e3f 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi @@ -508,7 +508,7 @@ pins = "gpio63", "gpio64", "gpio65", "gpio66", "gpio67", "gpio68"; drive-strength = <8>; - bias-pull-none; + bias-disable; }; }; cdc_pdm_lines_sus: pdm_lines_off { @@ -521,7 +521,7 @@ pins = "gpio63", "gpio64", "gpio65", "gpio66", "gpio67", "gpio68"; drive-strength = <2>; - bias-disable; + bias-pull-down; }; }; }; @@ -537,7 +537,7 @@ pins = "gpio113", "gpio114", "gpio115", "gpio116"; drive-strength = <8>; - bias-pull-none; + bias-disable; }; }; @@ -565,7 +565,7 @@ pinconf { pins = "gpio110"; drive-strength = <8>; - bias-pull-none; + bias-disable; }; }; @@ -591,7 +591,7 @@ pinconf { pins = "gpio116"; drive-strength = <8>; - bias-pull-none; + bias-disable; }; }; ext_mclk_tlmm_lines_sus: mclk_lines_off { @@ -619,7 +619,7 @@ pins = "gpio112", "gpio117", "gpio118", "gpio119"; drive-strength = <8>; - bias-pull-none; + bias-disable; }; }; ext_sec_tlmm_lines_sus: tlmm_lines_off { diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 5ea9fb8f2f87d..301c1c467c0b7 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -16,8 +16,8 @@ #size-cells = <2>; aliases { - sdhc1 = &sdhc_1; /* SDC1 eMMC slot */ - sdhc2 = &sdhc_2; /* SDC2 SD card slot */ + mmc0 = &sdhc_1; /* SDC1 eMMC slot */ + mmc1 = &sdhc_2; /* SDC2 SD card slot */ }; chosen { }; @@ -53,7 +53,7 @@ no-map; }; - reserved@8668000 { + reserved@86680000 { reg = <0x0 0x86680000 0x0 0x80000>; no-map; }; @@ -66,7 +66,7 @@ qcom,client-id = <1>; }; - rfsa@867e00000 { + rfsa@867e0000 { reg = <0x0 0x867e0000 0x0 0x20000>; no-map; }; @@ -175,14 +175,14 @@ }; thermal-zones { - cpu0_1-thermal { + cpu0-1-thermal { polling-delay-passive = <250>; polling-delay = <1000>; thermal-sensors = <&tsens 4>; trips { - cpu0_1_alert0: trip-point@0 { + cpu0_1_alert0: trip-point0 { temperature = <75000>; hysteresis = <2000>; type = "passive"; @@ -205,14 +205,14 @@ }; }; - cpu2_3-thermal { + cpu2-3-thermal { polling-delay-passive = <250>; polling-delay = <1000>; thermal-sensors = <&tsens 3>; trips { - cpu2_3_alert0: trip-point@0 { + cpu2_3_alert0: trip-point0 { temperature = <75000>; hysteresis = <2000>; type = "passive"; @@ -242,7 +242,7 @@ thermal-sensors = <&tsens 2>; trips { - gpu_alert0: trip-point@0 { + gpu_alert0: trip-point0 { temperature = <75000>; hysteresis = <2000>; type = "passive"; @@ -262,7 +262,7 @@ thermal-sensors = <&tsens 1>; trips { - cam_alert0: trip-point@0 { + cam_alert0: trip-point0 { temperature = <75000>; hysteresis = <2000>; type = "hot"; @@ -277,7 +277,7 @@ thermal-sensors = <&tsens 0>; trips { - modem_alert0: trip-point@0 { + modem_alert0: trip-point0 { temperature = <85000>; hysteresis = <2000>; type = "hot"; @@ -934,7 +934,7 @@ reg-names = "mdp_phys"; interrupt-parent = <&mdss>; - interrupts = <0 0>; + interrupts = <0>; clocks = <&gcc GCC_MDSS_AHB_CLK>, <&gcc GCC_MDSS_AXI_CLK>, @@ -966,7 +966,7 @@ reg-names = "dsi_ctrl"; interrupt-parent = <&mdss>; - interrupts = <4 0>; + interrupts = <4>; assigned-clocks = <&gcc BYTE0_CLK_SRC>, <&gcc PCLK0_CLK_SRC>; diff --git a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts index a5f9a6ab512c4..9b989cc30edc8 100644 --- a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts @@ -30,3 +30,7 @@ }; }; }; + +&msmgpio { + gpio-reserved-ranges = <85 4>; +}; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 87f4d9c1b0d4c..f1d3c51ea8d0d 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -1598,6 +1598,8 @@ interrupts = <0 138 IRQ_TYPE_LEVEL_HIGH>; phys = <&hsusb_phy2>; phy-names = "usb2-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; @@ -1628,6 +1630,8 @@ interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>; phys = <&hsusb_phy1>, <&ssusb_phy_0>; phy-names = "usb2-phy", "usb3-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; @@ -1677,16 +1681,16 @@ "csi_clk_mux", "vfe0", "vfe1"; - interrupts = , - , - , - , - , - , - , - , - , - ; + interrupts = , + , + , + , + , + , + , + , + , + ; interrupt-names = "csiphy0", "csiphy1", "csiphy2", @@ -2094,9 +2098,6 @@ nvmem-cells = <&gpu_speed_bin>; nvmem-cell-names = "speed_bin"; - qcom,gpu-quirk-two-pass-use-wfi; - qcom,gpu-quirk-fault-detect-mask; - operating-points-v2 = <&gpu_opp_table>; gpu_opp_table: opp-table { diff --git a/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi b/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi index 9682d4dd7496e..1bae907057465 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998-clamshell.dtsi @@ -23,6 +23,43 @@ }; }; +/* + * The laptop FW does not appear to support the retention state as it is + * not advertised as enabled in ACPI, and enabling it in DT can cause boot + * hangs. + */ +&CPU0 { + cpu-idle-states = <&LITTLE_CPU_SLEEP_1>; +}; + +&CPU1 { + cpu-idle-states = <&LITTLE_CPU_SLEEP_1>; +}; + +&CPU2 { + cpu-idle-states = <&LITTLE_CPU_SLEEP_1>; +}; + +&CPU3 { + cpu-idle-states = <&LITTLE_CPU_SLEEP_1>; +}; + +&CPU4 { + cpu-idle-states = <&BIG_CPU_SLEEP_1>; +}; + +&CPU5 { + cpu-idle-states = <&BIG_CPU_SLEEP_1>; +}; + +&CPU6 { + cpu-idle-states = <&BIG_CPU_SLEEP_1>; +}; + +&CPU7 { + cpu-idle-states = <&BIG_CPU_SLEEP_1>; +}; + &qusb2phy { status = "okay"; diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi index 108667ce4f319..8d15572d18e64 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi @@ -27,6 +27,66 @@ status = "okay"; }; +&etf { + status = "okay"; +}; + +&etm1 { + status = "okay"; +}; + +&etm2 { + status = "okay"; +}; + +&etm3 { + status = "okay"; +}; + +&etm4 { + status = "okay"; +}; + +&etm5 { + status = "okay"; +}; + +&etm6 { + status = "okay"; +}; + +&etm7 { + status = "okay"; +}; + +&etm8 { + status = "okay"; +}; + +&etr { + status = "okay"; +}; + +&funnel1 { + status = "okay"; +}; + +&funnel2 { + status = "okay"; +}; + +&funnel3 { + status = "okay"; +}; + +&funnel4 { + status = "okay"; +}; + +&funnel5 { + status = "okay"; +}; + &pm8005_lsid1 { pm8005-regulators { compatible = "qcom,pm8005-regulators"; @@ -51,6 +111,10 @@ vdda-phy-dpdm-supply = <&vreg_l24a_3p075>; }; +&replicator1 { + status = "okay"; +}; + &rpm_requests { pm8998-regulators { compatible = "qcom,rpm-pm8998-regulators"; @@ -249,6 +313,10 @@ pinctrl-1 = <&sdc2_clk_off &sdc2_cmd_off &sdc2_data_off &sdc2_cd_off>; }; +&stm { + status = "okay"; +}; + &ufshc { vcc-supply = <&vreg_l20a_2p95>; vccq-supply = <&vreg_l26a_1p2>; diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index c6f81431983ee..dcb79003ca0e6 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -246,38 +246,42 @@ LITTLE_CPU_SLEEP_0: cpu-sleep-0-0 { compatible = "arm,idle-state"; idle-state-name = "little-retention"; + /* CPU Retention (C2D), L2 Active */ arm,psci-suspend-param = <0x00000002>; entry-latency-us = <81>; exit-latency-us = <86>; - min-residency-us = <200>; + min-residency-us = <504>; }; LITTLE_CPU_SLEEP_1: cpu-sleep-0-1 { compatible = "arm,idle-state"; idle-state-name = "little-power-collapse"; + /* CPU + L2 Power Collapse (C3, D4) */ arm,psci-suspend-param = <0x40000003>; - entry-latency-us = <273>; - exit-latency-us = <612>; - min-residency-us = <1000>; + entry-latency-us = <814>; + exit-latency-us = <4562>; + min-residency-us = <9183>; local-timer-stop; }; BIG_CPU_SLEEP_0: cpu-sleep-1-0 { compatible = "arm,idle-state"; idle-state-name = "big-retention"; + /* CPU Retention (C2D), L2 Active */ arm,psci-suspend-param = <0x00000002>; entry-latency-us = <79>; exit-latency-us = <82>; - min-residency-us = <200>; + min-residency-us = <1302>; }; BIG_CPU_SLEEP_1: cpu-sleep-1-1 { compatible = "arm,idle-state"; idle-state-name = "big-power-collapse"; + /* CPU + L2 Power Collapse (C3, D4) */ arm,psci-suspend-param = <0x40000003>; - entry-latency-us = <336>; - exit-latency-us = <525>; - min-residency-us = <1000>; + entry-latency-us = <724>; + exit-latency-us = <2027>; + min-residency-us = <9419>; local-timer-stop; }; }; @@ -985,7 +989,7 @@ tcsr_mutex_regs: syscon@1f40000 { compatible = "syscon"; - reg = <0x01f40000 0x20000>; + reg = <0x01f40000 0x40000>; }; tlmm: pinctrl@3400000 { @@ -998,11 +1002,12 @@ #interrupt-cells = <0x2>; }; - stm@6002000 { + stm: stm@6002000 { compatible = "arm,coresight-stm", "arm,primecell"; reg = <0x06002000 0x1000>, <0x16280000 0x180000>; reg-names = "stm-base", "stm-data-base"; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1016,9 +1021,10 @@ }; }; - funnel@6041000 { + funnel1: funnel@6041000 { compatible = "arm,coresight-dynamic-funnel", "arm,primecell"; reg = <0x06041000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1045,9 +1051,10 @@ }; }; - funnel@6042000 { + funnel2: funnel@6042000 { compatible = "arm,coresight-dynamic-funnel", "arm,primecell"; reg = <0x06042000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1075,9 +1082,10 @@ }; }; - funnel@6045000 { + funnel3: funnel@6045000 { compatible = "arm,coresight-dynamic-funnel", "arm,primecell"; reg = <0x06045000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1113,9 +1121,10 @@ }; }; - replicator@6046000 { + replicator1: replicator@6046000 { compatible = "arm,coresight-dynamic-replicator", "arm,primecell"; reg = <0x06046000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1137,9 +1146,10 @@ }; }; - etf@6047000 { + etf: etf@6047000 { compatible = "arm,coresight-tmc", "arm,primecell"; reg = <0x06047000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1163,9 +1173,10 @@ }; }; - etr@6048000 { + etr: etr@6048000 { compatible = "arm,coresight-tmc", "arm,primecell"; reg = <0x06048000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1181,9 +1192,10 @@ }; }; - etm@7840000 { + etm1: etm@7840000 { compatible = "arm,coresight-etm4x", "arm,primecell"; reg = <0x07840000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1200,9 +1212,10 @@ }; }; - etm@7940000 { + etm2: etm@7940000 { compatible = "arm,coresight-etm4x", "arm,primecell"; reg = <0x07940000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1219,9 +1232,10 @@ }; }; - etm@7a40000 { + etm3: etm@7a40000 { compatible = "arm,coresight-etm4x", "arm,primecell"; reg = <0x07a40000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1238,9 +1252,10 @@ }; }; - etm@7b40000 { + etm4: etm@7b40000 { compatible = "arm,coresight-etm4x", "arm,primecell"; reg = <0x07b40000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1257,9 +1272,10 @@ }; }; - funnel@7b60000 { /* APSS Funnel */ + funnel4: funnel@7b60000 { /* APSS Funnel */ compatible = "arm,coresight-etm4x", "arm,primecell"; reg = <0x07b60000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1343,9 +1359,10 @@ }; }; - funnel@7b70000 { + funnel5: funnel@7b70000 { compatible = "arm,coresight-dynamic-funnel", "arm,primecell"; reg = <0x07b70000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1369,9 +1386,10 @@ }; }; - etm@7c40000 { + etm5: etm@7c40000 { compatible = "arm,coresight-etm4x", "arm,primecell"; reg = <0x07c40000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1385,9 +1403,10 @@ }; }; - etm@7d40000 { + etm6: etm@7d40000 { compatible = "arm,coresight-etm4x", "arm,primecell"; reg = <0x07d40000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1401,9 +1420,10 @@ }; }; - etm@7e40000 { + etm7: etm@7e40000 { compatible = "arm,coresight-etm4x", "arm,primecell"; reg = <0x07e40000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; @@ -1417,9 +1437,10 @@ }; }; - etm@7f40000 { + etm8: etm@7f40000 { compatible = "arm,coresight-etm4x", "arm,primecell"; reg = <0x07f40000 0x1000>; + status = "disabled"; clocks = <&rpmcc RPM_SMD_QDSS_CLK>, <&rpmcc RPM_SMD_QDSS_A_CLK>; clock-names = "apb_pclk", "atclk"; diff --git a/arch/arm64/boot/dts/qcom/pm8150.dtsi b/arch/arm64/boot/dts/qcom/pm8150.dtsi index b6e304748a576..6f7dfcb8c0421 100644 --- a/arch/arm64/boot/dts/qcom/pm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/pm8150.dtsi @@ -17,7 +17,7 @@ #size-cells = <0>; pon: power-on@800 { - compatible = "qcom,pm8916-pon"; + compatible = "qcom,pm8998-pon"; reg = <0x0800>; pwrkey { compatible = "qcom,pm8941-pwrkey"; @@ -73,18 +73,8 @@ reg = <0xc000>; gpio-controller; #gpio-cells = <2>; - interrupts = <0x0 0xc0 0x0 IRQ_TYPE_NONE>, - <0x0 0xc1 0x0 IRQ_TYPE_NONE>, - <0x0 0xc2 0x0 IRQ_TYPE_NONE>, - <0x0 0xc3 0x0 IRQ_TYPE_NONE>, - <0x0 0xc4 0x0 IRQ_TYPE_NONE>, - <0x0 0xc5 0x0 IRQ_TYPE_NONE>, - <0x0 0xc6 0x0 IRQ_TYPE_NONE>, - <0x0 0xc7 0x0 IRQ_TYPE_NONE>, - <0x0 0xc8 0x0 IRQ_TYPE_NONE>, - <0x0 0xc9 0x0 IRQ_TYPE_NONE>, - <0x0 0xca 0x0 IRQ_TYPE_NONE>, - <0x0 0xcb 0x0 IRQ_TYPE_NONE>; + interrupt-controller; + #interrupt-cells = <2>; }; }; diff --git a/arch/arm64/boot/dts/qcom/pm8150b.dtsi b/arch/arm64/boot/dts/qcom/pm8150b.dtsi index 322379d5c31f9..40b5d75a4a1dc 100644 --- a/arch/arm64/boot/dts/qcom/pm8150b.dtsi +++ b/arch/arm64/boot/dts/qcom/pm8150b.dtsi @@ -62,18 +62,8 @@ reg = <0xc000>; gpio-controller; #gpio-cells = <2>; - interrupts = <0x2 0xc0 0x0 IRQ_TYPE_NONE>, - <0x2 0xc1 0x0 IRQ_TYPE_NONE>, - <0x2 0xc2 0x0 IRQ_TYPE_NONE>, - <0x2 0xc3 0x0 IRQ_TYPE_NONE>, - <0x2 0xc4 0x0 IRQ_TYPE_NONE>, - <0x2 0xc5 0x0 IRQ_TYPE_NONE>, - <0x2 0xc6 0x0 IRQ_TYPE_NONE>, - <0x2 0xc7 0x0 IRQ_TYPE_NONE>, - <0x2 0xc8 0x0 IRQ_TYPE_NONE>, - <0x2 0xc9 0x0 IRQ_TYPE_NONE>, - <0x2 0xca 0x0 IRQ_TYPE_NONE>, - <0x2 0xcb 0x0 IRQ_TYPE_NONE>; + interrupt-controller; + #interrupt-cells = <2>; }; }; diff --git a/arch/arm64/boot/dts/qcom/pm8150l.dtsi b/arch/arm64/boot/dts/qcom/pm8150l.dtsi index eb0e9a090e420..cf05e0685d101 100644 --- a/arch/arm64/boot/dts/qcom/pm8150l.dtsi +++ b/arch/arm64/boot/dts/qcom/pm8150l.dtsi @@ -56,18 +56,8 @@ reg = <0xc000>; gpio-controller; #gpio-cells = <2>; - interrupts = <0x4 0xc0 0x0 IRQ_TYPE_NONE>, - <0x4 0xc1 0x0 IRQ_TYPE_NONE>, - <0x4 0xc2 0x0 IRQ_TYPE_NONE>, - <0x4 0xc3 0x0 IRQ_TYPE_NONE>, - <0x4 0xc4 0x0 IRQ_TYPE_NONE>, - <0x4 0xc5 0x0 IRQ_TYPE_NONE>, - <0x4 0xc6 0x0 IRQ_TYPE_NONE>, - <0x4 0xc7 0x0 IRQ_TYPE_NONE>, - <0x4 0xc8 0x0 IRQ_TYPE_NONE>, - <0x4 0xc9 0x0 IRQ_TYPE_NONE>, - <0x4 0xca 0x0 IRQ_TYPE_NONE>, - <0x4 0xcb 0x0 IRQ_TYPE_NONE>; + interrupt-controller; + #interrupt-cells = <2>; }; }; diff --git a/arch/arm64/boot/dts/qcom/pm8916.dtsi b/arch/arm64/boot/dts/qcom/pm8916.dtsi index 9dd2df1cbf47d..40df4d95a47ac 100644 --- a/arch/arm64/boot/dts/qcom/pm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/pm8916.dtsi @@ -113,7 +113,7 @@ wcd_codec: codec@f000 { compatible = "qcom,pm8916-wcd-analog-codec"; - reg = <0xf000 0x200>; + reg = <0xf000>; reg-names = "pmic-codec-core"; clocks = <&gcc GCC_CODEC_DIGCODEC_CLK>; clock-names = "mclk"; diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi index 501a7330dbc88..522d3ef72df5e 100644 --- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi @@ -73,6 +73,7 @@ regulator-always-on; regulator-boot-on; regulator-name = "vdd_apc"; + regulator-initial-mode = <1>; regulator-min-microvolt = <1048000>; regulator-max-microvolt = <1384000>; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi index 34881c0113cb6..99a28d64ee627 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845-cheza.dtsi @@ -165,6 +165,8 @@ /delete-node/ &venus_mem; /delete-node/ &cdsp_mem; /delete-node/ &cdsp_pas; +/delete-node/ &zap_shader; +/delete-node/ &gpu_mem; /* Increase the size from 120 MB to 128 MB */ &mpss_region { diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts index f5a85caff1a39..bf4fde88011c8 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts @@ -337,7 +337,9 @@ &gcc { protected-clocks = , , - ; + , + , + ; }; &pm8998_gpio { @@ -517,6 +519,8 @@ vdd-1.8-xo-supply = <&vreg_l7a_1p8>; vdd-1.3-rfa-supply = <&vreg_l17a_1p3>; vdd-3.3-ch0-supply = <&vreg_l25a_3p3>; + + qcom,snoc-host-cap-8bit-quirk; }; /* PINCTRL - additions to nodes defined in sdm845.dtsi */ diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index f406a4340b05e..2287354fef863 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -2824,7 +2824,7 @@ qcom,gmu = <&gmu>; - zap-shader { + zap_shader: zap-shader { memory-region = <&gpu_mem>; }; diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index ded120d3aef58..840d6b9bbb598 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -232,7 +232,9 @@ &gcc { protected-clocks = , , - ; + , + , + ; }; &i2c1 { @@ -243,24 +245,23 @@ &i2c3 { status = "okay"; clock-frequency = <400000>; + /* Overwrite pinctrl-0 from sdm845.dtsi */ + pinctrl-0 = <&qup_i2c3_default &i2c3_hid_active>; - hid@15 { + tsel: hid@15 { compatible = "hid-over-i2c"; reg = <0x15>; hid-descr-addr = <0x1>; - interrupts-extended = <&tlmm 37 IRQ_TYPE_EDGE_RISING>; + interrupts-extended = <&tlmm 37 IRQ_TYPE_LEVEL_HIGH>; }; - hid@2c { + tsc2: hid@2c { compatible = "hid-over-i2c"; reg = <0x2c>; hid-descr-addr = <0x20>; - interrupts-extended = <&tlmm 37 IRQ_TYPE_EDGE_RISING>; - - pinctrl-names = "default"; - pinctrl-0 = <&i2c2_hid_active>; + interrupts-extended = <&tlmm 37 IRQ_TYPE_LEVEL_HIGH>; }; }; @@ -268,15 +269,15 @@ status = "okay"; clock-frequency = <400000>; - hid@10 { + tsc1: hid@10 { compatible = "hid-over-i2c"; reg = <0x10>; hid-descr-addr = <0x1>; - interrupts-extended = <&tlmm 125 IRQ_TYPE_EDGE_FALLING>; + interrupts-extended = <&tlmm 125 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; - pinctrl-0 = <&i2c6_hid_active>; + pinctrl-0 = <&i2c5_hid_active>; }; }; @@ -284,7 +285,7 @@ status = "okay"; clock-frequency = <400000>; - hid@5c { + ecsh: hid@5c { compatible = "hid-over-i2c"; reg = <0x5c>; hid-descr-addr = <0x1>; @@ -292,7 +293,7 @@ interrupts-extended = <&tlmm 92 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; - pinctrl-0 = <&i2c12_hid_active>; + pinctrl-0 = <&i2c11_hid_active>; }; }; @@ -335,7 +336,7 @@ &tlmm { gpio-reserved-ranges = <0 4>, <81 4>; - i2c2_hid_active: i2c2-hid-active { + i2c3_hid_active: i2c2-hid-active { pins = <37>; function = "gpio"; @@ -344,7 +345,7 @@ drive-strength = <2>; }; - i2c6_hid_active: i2c6-hid-active { + i2c5_hid_active: i2c5-hid-active { pins = <125>; function = "gpio"; @@ -353,7 +354,7 @@ drive-strength = <2>; }; - i2c12_hid_active: i2c12-hid-active { + i2c11_hid_active: i2c11-hid-active { pins = <92>; function = "gpio"; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 8f23fcadecb89..1954cef8c6f0b 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -336,7 +336,7 @@ <0x0 0x03D00000 0x0 0x300000>; reg-names = "west", "east", "north", "south"; interrupts = ; - gpio-ranges = <&tlmm 0 0 175>; + gpio-ranges = <&tlmm 0 0 176>; gpio-controller; #gpio-cells = <2>; interrupt-controller; @@ -459,9 +459,9 @@ qcom,tcs-offset = <0xd00>; qcom,drv-id = <2>; qcom,tcs-config = , - , - , - ; + , + , + ; rpmhcc: clock-controller { compatible = "qcom,sm8150-rpmh-clk"; diff --git a/arch/arm64/boot/dts/renesas/cat875.dtsi b/arch/arm64/boot/dts/renesas/cat875.dtsi index aaefc3ae56d50..dbdb8b093e733 100644 --- a/arch/arm64/boot/dts/renesas/cat875.dtsi +++ b/arch/arm64/boot/dts/renesas/cat875.dtsi @@ -22,7 +22,6 @@ status = "okay"; phy0: ethernet-phy@0 { - rxc-skew-ps = <1500>; reg = <0>; interrupt-parent = <&gpio2>; interrupts = <21 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm64/boot/dts/renesas/hihope-common.dtsi b/arch/arm64/boot/dts/renesas/hihope-common.dtsi index 3e376d29a7300..69585d6e36538 100644 --- a/arch/arm64/boot/dts/renesas/hihope-common.dtsi +++ b/arch/arm64/boot/dts/renesas/hihope-common.dtsi @@ -86,7 +86,7 @@ label = "rcar-sound"; - dais = <&rsnd_port0>; + dais = <&rsnd_port>; }; vbus0_usb2: regulator-vbus0-usb2 { @@ -191,7 +191,7 @@ port@2 { reg = <2>; dw_hdmi0_snd_in: endpoint { - remote-endpoint = <&rsnd_endpoint0>; + remote-endpoint = <&rsnd_endpoint>; }; }; }; @@ -327,17 +327,15 @@ /* Single DAI */ #sound-dai-cells = <0>; - ports { - rsnd_port0: port@0 { - rsnd_endpoint0: endpoint { - remote-endpoint = <&dw_hdmi0_snd_in>; + rsnd_port: port { + rsnd_endpoint: endpoint { + remote-endpoint = <&dw_hdmi0_snd_in>; - dai-format = "i2s"; - bitclock-master = <&rsnd_endpoint0>; - frame-master = <&rsnd_endpoint0>; + dai-format = "i2s"; + bitclock-master = <&rsnd_endpoint>; + frame-master = <&rsnd_endpoint>; - playback = <&ssi2>; - }; + playback = <&ssi2>; }; }; }; diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi index 4280b190dc682..6a001cdfd38e2 100644 --- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi +++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex.dtsi @@ -23,7 +23,6 @@ status = "okay"; phy0: ethernet-phy@0 { - rxc-skew-ps = <1500>; reg = <0>; interrupt-parent = <&gpio2>; interrupts = <11 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi index 06c7c849c8ab9..c2a7ec3fc209a 100644 --- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi @@ -1726,17 +1726,6 @@ "ssi.1", "ssi.0"; status = "disabled"; - ports { - #address-cells = <1>; - #size-cells = <0>; - port@0 { - reg = <0>; - }; - port@1 { - reg = <1>; - }; - }; - rcar_sound,ctu { ctu00: ctu-0 { }; ctu01: ctu-1 { }; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi index a1c2de90e4706..73ded80a79ba0 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi @@ -1212,9 +1212,8 @@ reg = <0 0xe6ea0000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 210>; - dmas = <&dmac1 0x43>, <&dmac1 0x42>, - <&dmac2 0x43>, <&dmac2 0x42>; - dma-names = "tx", "rx", "tx", "rx"; + dmas = <&dmac0 0x43>, <&dmac0 0x42>; + dma-names = "tx", "rx"; power-domains = <&sysc R8A774C0_PD_ALWAYS_ON>; resets = <&cpg 210>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/renesas/r8a77970-v3msk.dts b/arch/arm64/boot/dts/renesas/r8a77970-v3msk.dts index d7c7b9156e082..5c391248ddb31 100644 --- a/arch/arm64/boot/dts/renesas/r8a77970-v3msk.dts +++ b/arch/arm64/boot/dts/renesas/r8a77970-v3msk.dts @@ -59,7 +59,7 @@ memory@48000000 { device_type = "memory"; /* first 128MB is reserved for secure area. */ - reg = <0x0 0x48000000 0x0 0x38000000>; + reg = <0x0 0x48000000 0x0 0x78000000>; }; osc5_clk: osc5-clock { diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi index 0cd3b376635d8..4952981bb6bae 100644 --- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi @@ -652,7 +652,7 @@ }; pwm3: pwm@e6e33000 { - compatible = "renesas,pwm-r8a7790", "renesas,pwm-rcar"; + compatible = "renesas,pwm-r8a77970", "renesas,pwm-rcar"; reg = <0 0xe6e33000 0 8>; #pwm-cells = <2>; clocks = <&cpg CPG_MOD 523>; diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi index 461a47ea656da..4b9d4f1bbe010 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi @@ -990,8 +990,8 @@ reg = <1>; - vin4csi41: endpoint@2 { - reg = <2>; + vin4csi41: endpoint@3 { + reg = <3>; remote-endpoint = <&csi41vin4>; }; }; @@ -1018,8 +1018,8 @@ reg = <1>; - vin5csi41: endpoint@2 { - reg = <2>; + vin5csi41: endpoint@3 { + reg = <3>; remote-endpoint = <&csi41vin5>; }; }; @@ -1046,8 +1046,8 @@ reg = <1>; - vin6csi41: endpoint@2 { - reg = <2>; + vin6csi41: endpoint@3 { + reg = <3>; remote-endpoint = <&csi41vin6>; }; }; @@ -1074,8 +1074,8 @@ reg = <1>; - vin7csi41: endpoint@2 { - reg = <2>; + vin7csi41: endpoint@3 { + reg = <3>; remote-endpoint = <&csi41vin7>; }; }; @@ -1318,6 +1318,7 @@ ipmmu_vip0: mmu@e7b00000 { compatible = "renesas,ipmmu-r8a77980"; reg = <0 0xe7b00000 0 0x1000>; + renesas,ipmmu-main = <&ipmmu_mm 4>; power-domains = <&sysc R8A77980_PD_ALWAYS_ON>; #iommu-cells = <1>; }; @@ -1325,6 +1326,7 @@ ipmmu_vip1: mmu@e7960000 { compatible = "renesas,ipmmu-r8a77980"; reg = <0 0xe7960000 0 0x1000>; + renesas,ipmmu-main = <&ipmmu_mm 11>; power-domains = <&sysc R8A77980_PD_ALWAYS_ON>; #iommu-cells = <1>; }; diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts index b38f9d442fc08..e6d700f8c1948 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts +++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts @@ -636,7 +636,6 @@ /* audio_clkout0/1/2/3 */ #clock-cells = <1>; clock-frequency = <12288000 11289600>; - clkout-lr-synchronous; status = "okay"; diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi index 455954c3d98ea..dabee157119f9 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi @@ -1168,9 +1168,8 @@ reg = <0 0xe6ea0000 0 0x0064>; interrupts = ; clocks = <&cpg CPG_MOD 210>; - dmas = <&dmac1 0x43>, <&dmac1 0x42>, - <&dmac2 0x43>, <&dmac2 0x42>; - dma-names = "tx", "rx", "tx", "rx"; + dmas = <&dmac0 0x43>, <&dmac0 0x42>; + dma-names = "tx", "rx"; power-domains = <&sysc R8A77990_PD_ALWAYS_ON>; resets = <&cpg 210>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/renesas/r8a77995-draak.dts b/arch/arm64/boot/dts/renesas/r8a77995-draak.dts index 67634cb01d6b6..cbdd46ed3ca63 100644 --- a/arch/arm64/boot/dts/renesas/r8a77995-draak.dts +++ b/arch/arm64/boot/dts/renesas/r8a77995-draak.dts @@ -277,10 +277,6 @@ interrupt-parent = <&gpio1>; interrupts = <28 IRQ_TYPE_LEVEL_LOW>; - /* Depends on LVDS */ - max-clock = <135000000>; - min-vrefresh = <50>; - adi,input-depth = <8>; adi,input-colorspace = "rgb"; adi,input-clock = "1x"; diff --git a/arch/arm64/boot/dts/renesas/ulcb.dtsi b/arch/arm64/boot/dts/renesas/ulcb.dtsi index 3ef89171538ff..d8fccf3d4987a 100644 --- a/arch/arm64/boot/dts/renesas/ulcb.dtsi +++ b/arch/arm64/boot/dts/renesas/ulcb.dtsi @@ -470,6 +470,7 @@ mmc-hs200-1_8v; mmc-hs400-1_8v; non-removable; + full-pwr-cycle-in-suspend; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index eb992d60e6baf..f297601c9f715 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -213,20 +213,20 @@ #size-cells = <0>; /* These power domains are grouped by VD_LOGIC */ - pd_usb@PX30_PD_USB { + power-domain@PX30_PD_USB { reg = ; clocks = <&cru HCLK_HOST>, <&cru HCLK_OTG>, <&cru SCLK_OTG_ADP>; pm_qos = <&qos_usb_host>, <&qos_usb_otg>; }; - pd_sdcard@PX30_PD_SDCARD { + power-domain@PX30_PD_SDCARD { reg = ; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>; pm_qos = <&qos_sdmmc>; }; - pd_gmac@PX30_PD_GMAC { + power-domain@PX30_PD_GMAC { reg = ; clocks = <&cru ACLK_GMAC>, <&cru PCLK_GMAC>, @@ -234,7 +234,7 @@ <&cru SCLK_GMAC_RX_TX>; pm_qos = <&qos_gmac>; }; - pd_mmc_nand@PX30_PD_MMC_NAND { + power-domain@PX30_PD_MMC_NAND { reg = ; clocks = <&cru HCLK_NANDC>, <&cru HCLK_EMMC>, @@ -247,14 +247,14 @@ pm_qos = <&qos_emmc>, <&qos_nand>, <&qos_sdio>, <&qos_sfc>; }; - pd_vpu@PX30_PD_VPU { + power-domain@PX30_PD_VPU { reg = ; clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>, <&cru SCLK_CORE_VPU>; pm_qos = <&qos_vpu>, <&qos_vpu_r128>; }; - pd_vo@PX30_PD_VO { + power-domain@PX30_PD_VO { reg = ; clocks = <&cru ACLK_RGA>, <&cru ACLK_VOPB>, @@ -270,7 +270,7 @@ pm_qos = <&qos_rga_rd>, <&qos_rga_wr>, <&qos_vop_m0>, <&qos_vop_m1>; }; - pd_vi@PX30_PD_VI { + power-domain@PX30_PD_VI { reg = ; clocks = <&cru ACLK_CIF>, <&cru ACLK_ISP>, @@ -281,7 +281,7 @@ <&qos_isp_wr>, <&qos_isp_m1>, <&qos_vip>; }; - pd_gpu@PX30_PD_GPU { + power-domain@PX30_PD_GPU { reg = ; clocks = <&cru SCLK_GPU>; pm_qos = <&qos_gpu>; @@ -768,7 +768,7 @@ interrupts = ; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; pinctrl-names = "default"; @@ -783,7 +783,7 @@ interrupts = ; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; pinctrl-names = "default"; @@ -798,7 +798,7 @@ interrupts = ; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; power-domains = <&power PX30_PD_MMC_NAND>; @@ -860,7 +860,7 @@ vopl_mmu: iommu@ff470f00 { compatible = "rockchip,iommu"; reg = <0x0 0xff470f00 0x0 0x100>; - interrupts = ; + interrupts = ; interrupt-names = "vopl_mmu"; clocks = <&cru ACLK_VOPL>, <&cru HCLK_VOPL>; clock-names = "aclk", "hclk"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts index 49c4b96da3d40..05265b38cc028 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-evb.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-evb.dts @@ -86,13 +86,13 @@ assigned-clock-rate = <50000000>; assigned-clocks = <&cru SCLK_MAC2PHY>; assigned-clock-parents = <&cru SCLK_MAC2PHY_SRC>; - + status = "okay"; }; &i2c1 { status = "okay"; - rk805: rk805@18 { + rk805: pmic@18 { compatible = "rockchip,rk805"; reg = <0x18>; interrupt-parent = <&gpio2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts index bb40c163b05dc..6c3368f795ca3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-roc-cc.dts @@ -333,6 +333,7 @@ }; &usb20_otg { + dr_mode = "host"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index 62936b432f9a5..304fad1a0b573 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -169,7 +169,7 @@ &i2c1 { status = "okay"; - rk805: rk805@18 { + rk805: pmic@18 { compatible = "rockchip,rk805"; reg = <0x18>; interrupt-parent = <&gpio2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 31cc1541f1f59..6ddb6b8c1fad5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -270,13 +270,13 @@ #address-cells = <1>; #size-cells = <0>; - pd_hevc@RK3328_PD_HEVC { + power-domain@RK3328_PD_HEVC { reg = ; }; - pd_video@RK3328_PD_VIDEO { + power-domain@RK3328_PD_VIDEO { reg = ; }; - pd_vpu@RK3328_PD_VPU { + power-domain@RK3328_PD_VPU { reg = ; clocks = <&cru ACLK_VPU>, <&cru HCLK_VPU>; }; @@ -555,7 +555,7 @@ gpu: gpu@ff300000 { compatible = "rockchip,rk3328-mali", "arm,mali-450"; - reg = <0x0 0xff300000 0x0 0x40000>; + reg = <0x0 0xff300000 0x0 0x30000>; interrupts = , , , @@ -1190,8 +1190,8 @@ uart0 { uart0_xfer: uart0-xfer { - rockchip,pins = <1 RK_PB1 1 &pcfg_pull_up>, - <1 RK_PB0 1 &pcfg_pull_none>; + rockchip,pins = <1 RK_PB1 1 &pcfg_pull_none>, + <1 RK_PB0 1 &pcfg_pull_up>; }; uart0_cts: uart0-cts { @@ -1209,8 +1209,8 @@ uart1 { uart1_xfer: uart1-xfer { - rockchip,pins = <3 RK_PA4 4 &pcfg_pull_up>, - <3 RK_PA6 4 &pcfg_pull_none>; + rockchip,pins = <3 RK_PA4 4 &pcfg_pull_none>, + <3 RK_PA6 4 &pcfg_pull_up>; }; uart1_cts: uart1-cts { @@ -1228,15 +1228,15 @@ uart2-0 { uart2m0_xfer: uart2m0-xfer { - rockchip,pins = <1 RK_PA0 2 &pcfg_pull_up>, - <1 RK_PA1 2 &pcfg_pull_none>; + rockchip,pins = <1 RK_PA0 2 &pcfg_pull_none>, + <1 RK_PA1 2 &pcfg_pull_up>; }; }; uart2-1 { uart2m1_xfer: uart2m1-xfer { - rockchip,pins = <2 RK_PA0 1 &pcfg_pull_up>, - <2 RK_PA1 1 &pcfg_pull_none>; + rockchip,pins = <2 RK_PA0 1 &pcfg_pull_none>, + <2 RK_PA1 1 &pcfg_pull_up>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi b/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi index e17311e090826..216aafd90e7f1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368-lion.dtsi @@ -156,7 +156,7 @@ pinctrl-0 = <&rgmii_pins>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; - snps,reset-gpio = <&gpio3 RK_PB3 GPIO_ACTIVE_HIGH>; + snps,reset-gpio = <&gpio3 RK_PB3 GPIO_ACTIVE_LOW>; tx_delay = <0x10>; rx_delay = <0x10>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts index c706db0ee9ec6..5b7e8fbf1ffec 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts @@ -666,12 +666,15 @@ sd-uhs-sdr104; /* Power supply */ - vqmmc-supply = &vcc1v8_s3; /* IO line */ - vmmc-supply = &vcc_sdio; /* card's power */ + vqmmc-supply = <&vcc1v8_s3>; /* IO line */ + vmmc-supply = <&vcc_sdio>; /* card's power */ + #address-cells = <1>; + #size-cells = <0>; status = "okay"; brcmf: wifi@1 { + reg = <1>; compatible = "brcm,bcm4329-fmac"; interrupt-parent = <&gpio0>; interrupts = ; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index dd5624975c9b4..b7e7bb3517c03 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -281,7 +281,7 @@ sound: sound { compatible = "rockchip,rk3399-gru-sound"; - rockchip,cpu = <&i2s0 &i2s2>; + rockchip,cpu = <&i2s0 &spdif>; }; }; @@ -432,10 +432,6 @@ ap_i2c_audio: &i2c8 { status = "okay"; }; -&i2s2 { - status = "okay"; -}; - &io_domains { status = "okay"; @@ -532,6 +528,17 @@ ap_i2c_audio: &i2c8 { vqmmc-supply = <&ppvar_sd_card_io>; }; +&spdif { + status = "okay"; + + /* + * SPDIF is routed internally to DP; we either don't use these pins, or + * mux them to something else. + */ + /delete-property/ pinctrl-0; + /delete-property/ pinctrl-names; +}; + &spi1 { status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index 4944d78a0a1cb..292ca70c512b5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -654,9 +654,12 @@ sd-uhs-sdr104; vqmmc-supply = <&vcc1v8_s3>; vmmc-supply = <&vccio_sd>; + #address-cells = <1>; + #size-cells = <0>; status = "okay"; brcmf: wifi@1 { + reg = <1>; compatible = "brcm,bcm4329-fmac"; interrupt-parent = <&gpio0>; interrupts = ; @@ -682,7 +685,6 @@ &sdhci { bus-width = <8>; mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; non-removable; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts index 73be38a537960..a72e77c261ef3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts @@ -49,7 +49,7 @@ regulator-boot-on; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - vim-supply = <&vcc3v3_sys>; + vin-supply = <&vcc3v3_sys>; }; vcc3v3_sys: vcc3v3-sys { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts index 2a127985ab171..d3ed8e5e770f1 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts @@ -94,33 +94,6 @@ }; }; -&gpu_thermal { - trips { - gpu_warm: gpu_warm { - temperature = <55000>; - hysteresis = <2000>; - type = "active"; - }; - - gpu_hot: gpu_hot { - temperature = <65000>; - hysteresis = <2000>; - type = "active"; - }; - }; - cooling-maps { - map1 { - trip = <&gpu_warm>; - cooling-device = <&fan THERMAL_NO_LIMIT 1>; - }; - - map2 { - trip = <&gpu_hot>; - cooling-device = <&fan 2 THERMAL_NO_LIMIT>; - }; - }; -}; - &pinctrl { ir { ir_rx: ir-rx { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts index 0541dfce924d6..9c659f3115c88 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts @@ -648,9 +648,12 @@ pinctrl-names = "default"; pinctrl-0 = <&sdio0_bus4 &sdio0_cmd &sdio0_clk>; sd-uhs-sdr104; + #address-cells = <1>; + #size-cells = <0>; status = "okay"; brcmf: wifi@1 { + reg = <1>; compatible = "brcm,bcm4329-fmac"; interrupt-parent = <&gpio0>; interrupts = ; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 62ea288a1a70b..390b86ec65389 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -101,7 +101,7 @@ vcc5v0_host: vcc5v0-host-regulator { compatible = "regulator-fixed"; - gpio = <&gpio4 RK_PA3 GPIO_ACTIVE_HIGH>; + gpio = <&gpio4 RK_PA3 GPIO_ACTIVE_LOW>; enable-active-low; pinctrl-names = "default"; pinctrl-0 = <&vcc5v0_host_en>; @@ -157,7 +157,7 @@ phy-mode = "rgmii"; pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - snps,reset-gpio = <&gpio3 RK_PC0 GPIO_ACTIVE_HIGH>; + snps,reset-gpio = <&gpio3 RK_PC0 GPIO_ACTIVE_LOW>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; tx_delay = <0x10>; @@ -467,6 +467,12 @@ }; &sdhci { + /* + * Signal integrity isn't great at 200MHz but 100MHz has proven stable + * enough. + */ + max-frequency = <100000000>; + bus-width = <8>; mmc-hs400-1_8v; mmc-hs400-enhanced-strobe; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts index 1ae1ebd4efdd0..da3b031d4befa 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dts @@ -452,7 +452,7 @@ status = "okay"; bt656-supply = <&vcc_3v0>; - audio-supply = <&vcc_3v0>; + audio-supply = <&vcc1v8_codec>; sdmmc-supply = <&vcc_sdio>; gpio1830-supply = <&vcc_3v0>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index cede1ad81be23..4496f7e1c68f8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -29,6 +29,9 @@ i2c6 = &i2c6; i2c7 = &i2c7; i2c8 = &i2c8; + mmc0 = &sdio0; + mmc1 = &sdmmc; + mmc2 = &sdhci; serial0 = &uart0; serial1 = &uart1; serial2 = &uart2; @@ -229,6 +232,7 @@ reg = <0x0 0xf8000000 0x0 0x2000000>, <0x0 0xfd000000 0x0 0x1000000>; reg-names = "axi-base", "apb-base"; + device_type = "pci"; #address-cells = <3>; #size-cells = <2>; #interrupt-cells = <1>; @@ -247,7 +251,6 @@ <0 0 0 2 &pcie0_intc 1>, <0 0 0 3 &pcie0_intc 2>, <0 0 0 4 &pcie0_intc 3>; - linux,pci-domain = <0>; max-link-speed = <1>; msi-map = <0x0 &its 0x0 0x1000>; phys = <&pcie_phy 0>, <&pcie_phy 1>, @@ -410,7 +413,7 @@ reset-names = "usb3-otg"; status = "disabled"; - usbdrd_dwc3_0: dwc3 { + usbdrd_dwc3_0: usb@fe800000 { compatible = "snps,dwc3"; reg = <0x0 0xfe800000 0x0 0x100000>; interrupts = ; @@ -446,7 +449,7 @@ reset-names = "usb3-otg"; status = "disabled"; - usbdrd_dwc3_1: dwc3 { + usbdrd_dwc3_1: usb@fe900000 { compatible = "snps,dwc3"; reg = <0x0 0xfe900000 0x0 0x100000>; interrupts = ; @@ -1444,6 +1447,7 @@ reg = <0xf780 0x24>; clocks = <&sdhci>; clock-names = "emmcclk"; + drive-impedance-ohm = <50>; #phy-cells = <0>; status = "disabled"; }; @@ -1454,7 +1458,6 @@ clock-names = "refclk"; #phy-cells = <1>; resets = <&cru SRST_PCIEPHY>; - drive-impedance-ohm = <50>; reset-names = "phy"; status = "disabled"; }; @@ -1743,10 +1746,10 @@ interrupts = ; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_SFR>, - <&cru PLL_VPLL>, + <&cru SCLK_HDMI_CEC>, <&cru PCLK_VIO_GRF>, - <&cru SCLK_HDMI_CEC>; - clock-names = "iahb", "isfr", "vpll", "grf", "cec"; + <&cru PLL_VPLL>; + clock-names = "iahb", "isfr", "cec", "grf", "vpll"; power-domains = <&power RK3399_PD_HDCP>; reg-io-width = <4>; rockchip,grf = <&grf>; @@ -1881,10 +1884,10 @@ gpu: gpu@ff9a0000 { compatible = "rockchip,rk3399-mali", "arm,mali-t860"; reg = <0x0 0xff9a0000 0x0 0x10000>; - interrupts = , - , - ; - interrupt-names = "gpu", "job", "mmu"; + interrupts = , + , + ; + interrupt-names = "job", "mmu", "gpu"; clocks = <&cru ACLK_GPU>; power-domains = <&power RK3399_PD_GPU>; status = "disabled"; @@ -2314,7 +2317,7 @@ }; }; - sleep { + suspend { ap_pwroff: ap-pwroff { rockchip,pins = <1 RK_PA5 1 &pcfg_pull_none>; }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi index b658f2b641e29..3348a32f7d0b8 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi @@ -718,7 +718,7 @@ clocks = <&sys_clk 6>; reset-names = "ether"; resets = <&sys_rst 6>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; local-mac-address = [00 00 00 00 00 00]; socionext,syscon-phy-mode = <&soc_glue 0>; diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi index d6f6cee4d5491..b793a43e06124 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi @@ -509,7 +509,7 @@ clocks = <&sys_clk 6>; reset-names = "ether"; resets = <&sys_rst 6>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; local-mac-address = [00 00 00 00 00 00]; socionext,syscon-phy-mode = <&soc_glue 0>; @@ -530,7 +530,7 @@ clocks = <&sys_clk 7>; reset-names = "ether"; resets = <&sys_rst 7>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; local-mac-address = [00 00 00 00 00 00]; socionext,syscon-phy-mode = <&soc_glue 1>; @@ -544,8 +544,8 @@ compatible = "socionext,uniphier-dwc3", "snps,dwc3"; status = "disabled"; reg = <0x65a00000 0xcd00>; - interrupt-names = "host", "peripheral"; - interrupts = <0 134 4>, <0 135 4>; + interrupt-names = "dwc_usb3"; + interrupts = <0 134 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>, <&pinctrl_usb2>; clock-names = "ref", "bus_early", "suspend"; @@ -646,8 +646,8 @@ compatible = "socionext,uniphier-dwc3", "snps,dwc3"; status = "disabled"; reg = <0x65c00000 0xcd00>; - interrupt-names = "host", "peripheral"; - interrupts = <0 137 4>, <0 138 4>; + interrupt-names = "dwc_usb3"; + interrupts = <0 137 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>, <&pinctrl_usb3>; clock-names = "ref", "bus_early", "suspend"; diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi index 799c75fa7981e..34a30842c47ad 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi @@ -307,6 +307,7 @@ interrupts = ; dma-coherent; power-domains = <&k3_pds 151 TI_SCI_PD_EXCLUSIVE>; + clocks = <&k3_clks 151 2>, <&k3_clks 151 7>; assigned-clocks = <&k3_clks 151 2>, <&k3_clks 151 7>; assigned-clock-parents = <&k3_clks 151 4>, /* set REF_CLK to 20MHz i.e. PER0_PLL/48 */ <&k3_clks 151 9>; /* set PIPE3_TXB_CLK to CLK_12M_RC/256 (for HS only) */ @@ -346,6 +347,7 @@ interrupts = ; dma-coherent; power-domains = <&k3_pds 152 TI_SCI_PD_EXCLUSIVE>; + clocks = <&k3_clks 152 2>; assigned-clocks = <&k3_clks 152 2>; assigned-clock-parents = <&k3_clks 152 4>; /* set REF_CLK to 20MHz i.e. PER0_PLL/48 */ diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index 698ef9a1d5b75..96445111e3985 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -43,6 +43,7 @@ smmu0: smmu@36600000 { compatible = "arm,smmu-v3"; reg = <0x0 0x36600000 0x0 0x100000>; + power-domains = <&k3_pds 229 TI_SCI_PD_EXCLUSIVE>; interrupt-parent = <&gic500>; interrupts = , ; diff --git a/arch/arm64/boot/dts/ti/k3-j721e.dtsi b/arch/arm64/boot/dts/ti/k3-j721e.dtsi index 43ea1ba979220..5a6e74636d6fc 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e.dtsi @@ -60,7 +60,7 @@ i-cache-sets = <256>; d-cache-size = <0x8000>; d-cache-line-size = <64>; - d-cache-sets = <128>; + d-cache-sets = <256>; next-level-cache = <&L2_0>; }; @@ -74,7 +74,7 @@ i-cache-sets = <256>; d-cache-size = <0x8000>; d-cache-line-size = <64>; - d-cache-sets = <128>; + d-cache-sets = <256>; next-level-cache = <&L2_0>; }; }; @@ -84,7 +84,7 @@ cache-level = <2>; cache-size = <0x100000>; cache-line-size = <64>; - cache-sets = <2048>; + cache-sets = <1024>; next-level-cache = <&msmc_l3>; }; diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts index 2421ec71a201c..41a66787247b6 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts +++ b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts @@ -131,7 +131,7 @@ reg = <0>; partition@0 { - label = "data"; + label = "spi0-data"; reg = <0x0 0x100000>; }; }; @@ -149,7 +149,7 @@ reg = <0>; partition@0 { - label = "data"; + label = "spi1-data"; reg = <0x0 0x84000>; }; }; diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index 9aa67340a4d8c..b92549fb32400 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -419,7 +419,7 @@ }; i2c0: i2c@ff020000 { - compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10"; + compatible = "cdns,i2c-r1p14"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 17 4>; @@ -429,7 +429,7 @@ }; i2c1: i2c@ff030000 { - compatible = "cdns,i2c-r1p14", "cdns,i2c-r1p10"; + compatible = "cdns,i2c-r1p14"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 18 4>; @@ -582,7 +582,7 @@ }; uart0: serial@ff000000 { - compatible = "cdns,uart-r1p12", "xlnx,xuartps"; + compatible = "xlnx,zynqmp-uart", "cdns,uart-r1p12"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 21 4>; @@ -591,7 +591,7 @@ }; uart1: serial@ff010000 { - compatible = "cdns,uart-r1p12", "xlnx,xuartps"; + compatible = "xlnx,zynqmp-uart", "cdns,uart-r1p12"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 22 4>; diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 4922c4451e7c3..99cddf1145c2f 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -59,6 +59,7 @@ config CRYPTO_GHASH_ARM64_CE select CRYPTO_HASH select CRYPTO_GF128MUL select CRYPTO_LIB_AES + select CRYPTO_AEAD config CRYPTO_CRCT10DIF_ARM64_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index aa57dc639f77f..aa13344a3a5e8 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -55,7 +55,7 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #define aes_mac_update neon_aes_mac_update MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); #endif -#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS) +#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS) MODULE_ALIAS_CRYPTO("ecb(aes)"); MODULE_ALIAS_CRYPTO("cbc(aes)"); MODULE_ALIAS_CRYPTO("ctr(aes)"); @@ -668,7 +668,7 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) } static struct skcipher_alg aes_algs[] = { { -#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS) +#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS) .base = { .cra_name = "__ecb(aes)", .cra_driver_name = "__ecb-aes-" MODE, diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index ea873b8904c49..e3e27349a9fe5 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -384,7 +384,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, goto xts_tail; kernel_neon_end(); - skcipher_walk_done(&walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } if (err || likely(!tail)) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 70b1469783f9b..24bc0a3f26e2d 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -261,7 +261,7 @@ static int ghash_setkey(struct crypto_shash *tfm, static struct shash_alg ghash_alg[] = {{ .base.cra_name = "ghash", .base.cra_driver_name = "ghash-neon", - .base.cra_priority = 100, + .base.cra_priority = 150, .base.cra_blocksize = GHASH_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct ghash_key), .base.cra_module = THIS_MODULE, diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c index 895d3727c1fbc..c5405e6a6db76 100644 --- a/arch/arm64/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -30,7 +30,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_neon_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index bdc1b6d7aff79..05cdad31b0225 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -19,6 +19,7 @@ MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha1"); struct sha1_ce_state { struct sha1_state sst; diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 604a01a4ede6f..1de80293ac312 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -19,6 +19,8 @@ MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); struct sha256_ce_state { struct sha256_state sst; diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index 9a4bbfc45f407..ddf7aca9ff459 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -23,6 +23,10 @@ MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha3-224"); +MODULE_ALIAS_CRYPTO("sha3-256"); +MODULE_ALIAS_CRYPTO("sha3-384"); +MODULE_ALIAS_CRYPTO("sha3-512"); asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks, int md_len); diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index 2369540040aa9..6dfcb4f3e7768 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -23,6 +23,8 @@ MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src, int blocks); diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index b263e239cb599..a45366c3909be 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -31,14 +32,14 @@ * is therefore used to delimit the MADT GICC structure minimum length * appropriately. */ -#define ACPI_MADT_GICC_MIN_LENGTH ACPI_OFFSET( \ +#define ACPI_MADT_GICC_MIN_LENGTH offsetof( \ struct acpi_madt_generic_interrupt, efficiency_class) #define BAD_MADT_GICC_ENTRY(entry, end) \ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ (unsigned long)(entry) + (entry)->header.length > (end)) -#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \ +#define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \ spe_interrupt) + sizeof(u16)) /* Basic configuration for ACPI */ diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index b9f8d787eea9f..3cb3c4ab3ea56 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -35,13 +35,16 @@ void apply_alternatives_module(void *start, size_t length); static inline void apply_alternatives_module(void *start, size_t length) { } #endif -#define ALTINSTR_ENTRY(feature,cb) \ +#define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ - " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ - " .else\n" \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +#define ALTINSTR_ENTRY_CB(feature, cb) \ + " .word 661b - .\n" /* label */ \ " .word " __stringify(cb) "- .\n" /* callback */ \ - " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -62,33 +65,40 @@ static inline void apply_alternatives_module(void *start, size_t length) { } * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature,cb) \ + ALTINSTR_ENTRY(feature) \ ".popsection\n" \ - " .if " __stringify(cb) " == 0\n" \ - ".pushsection .altinstr_replacement, \"a\"\n" \ + ".subsection 1\n" \ "663:\n\t" \ newinstr "\n" \ "664:\n\t" \ - ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ - ".org . - (662b-661b) + (664b-663b)\n" \ - ".else\n\t" \ + ".org . - (662b-661b) + (664b-663b)\n\t" \ + ".previous\n" \ + ".endif\n" + +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY_CB(feature, cb) \ + ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ - ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) #define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) #else #include @@ -107,11 +117,11 @@ static inline void apply_alternatives_module(void *start, size_t length) { } 662: .pushsection .altinstructions, "a" altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f .popsection - .pushsection .altinstr_replacement, "ax" + .subsection 1 663: \insn2 -664: .popsection - .org . - (664b-663b) + (662b-661b) +664: .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) + .previous .endif .endm @@ -150,7 +160,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .pushsection .altinstructions, "a" altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f .popsection - .pushsection .altinstr_replacement, "ax" + .subsection 1 .align 2 /* So GAS knows label 661 is suitably aligned */ 661: .endm @@ -169,9 +179,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { } .macro alternative_else 662: .if .Lasm_alt_mode==0 - .pushsection .altinstr_replacement, "ax" + .subsection 1 .else - .popsection + .previous .endif 663: .endm @@ -181,11 +191,11 @@ static inline void apply_alternatives_module(void *start, size_t length) { } */ .macro alternative_endif 664: - .if .Lasm_alt_mode==0 - .popsection - .endif .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) + .if .Lasm_alt_mode==0 + .previous + .endif .endm /* @@ -211,7 +221,7 @@ alternative_endif .macro user_alt, label, oldinstr, newinstr, cond 9999: alternative_insn "\oldinstr", "\newinstr", \cond - _ASM_EXTABLE 9999b, \label + _asm_extable 9999b, \label .endm /* diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 89e4c8b793490..ee9bdaa40532d 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -109,7 +109,7 @@ static inline u32 gic_read_pmr(void) return read_sysreg_s(SYS_ICC_PMR_EL1); } -static inline void gic_write_pmr(u32 val) +static __always_inline void gic_write_pmr(u32 val) { write_sysreg_s(val, SYS_ICC_PMR_EL1); } diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 7ae54d7d333a5..88d20f04c64a5 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -58,6 +58,7 @@ struct arch_timer_erratum_workaround { u64 (*read_cntvct_el0)(void); int (*set_next_event_phys)(unsigned long, struct clock_event_device *); int (*set_next_event_virt)(unsigned long, struct clock_event_device *); + bool disable_compat_vdso; }; DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, @@ -164,25 +165,6 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } -/* - * Ensure that reads of the counter are treated the same as memory reads - * for the purposes of ordering by subsequent memory barriers. - * - * This insanity brought to you by speculative system register reads, - * out-of-order memory accesses, sequence locks and Thomas Gleixner. - * - * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html - */ -#define arch_counter_enforce_ordering(val) do { \ - u64 tmp, _val = (val); \ - \ - asm volatile( \ - " eor %0, %1, %1\n" \ - " add %0, sp, %0\n" \ - " ldr xzr, [%0]" \ - : "=r" (tmp) : "r" (_val)); \ -} while (0) - static __always_inline u64 __arch_counter_get_cntpct_stable(void) { u64 cnt; @@ -223,8 +205,6 @@ static __always_inline u64 __arch_counter_get_cntvct(void) return cnt; } -#undef arch_counter_enforce_ordering - static inline int arch_timer_arch_init(void) { return 0; diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index c764cc8fb3b6a..9bf56e30f4a8d 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -15,10 +15,10 @@ .macro __uaccess_ttbr0_disable, tmp1 mrs \tmp1, ttbr1_el1 // swapper_pg_dir bic \tmp1, \tmp1, #TTBR_ASID_MASK - sub \tmp1, \tmp1, #RESERVED_TTBR0_SIZE // reserved_ttbr0 just before swapper_pg_dir + sub \tmp1, \tmp1, #PAGE_SIZE // reserved_pg_dir just before swapper_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb - add \tmp1, \tmp1, #RESERVED_TTBR0_SIZE + add \tmp1, \tmp1, #PAGE_SIZE msr ttbr1_el1, \tmp1 // set reserved ASID isb .endm diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index b8cf7c85ffa2a..01112f9767bc3 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -110,6 +110,13 @@ hint #20 .endm +/* + * Clear Branch History instruction + */ + .macro clearbhb + hint #22 + .endm + /* * Speculation barrier */ @@ -462,6 +469,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endm /* + * Deprecated! Use SYM_FUNC_{START,START_WEAK,END}_PI instead. * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. */ @@ -756,4 +764,30 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .Lyield_out_\@ : .endm + .macro __mitigate_spectre_bhb_loop tmp +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_loop_iter + mov \tmp, #32 // Patched to correct the immediate +alternative_cb_end +.Lspectre_bhb_loop\@: + b . + 4 + subs \tmp, \tmp, #1 + b.ne .Lspectre_bhb_loop\@ + sb +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + /* Save/restores x0-x3 to the stack */ + .macro __mitigate_spectre_bhb_fw +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3 +alternative_cb arm64_update_smccc_conduit + nop // Patched to SMC/HVC #0 +alternative_cb_end + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 9543b5e0534d2..6e0f48ddfc656 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -17,7 +17,7 @@ #include #define ATOMIC_OP(op) \ -static inline void arch_##op(int i, atomic_t *v) \ +static __always_inline void arch_##op(int i, atomic_t *v) \ { \ __lse_ll_sc_body(op, i, v); \ } @@ -32,7 +32,7 @@ ATOMIC_OP(atomic_sub) #undef ATOMIC_OP #define ATOMIC_FETCH_OP(name, op) \ -static inline int arch_##op##name(int i, atomic_t *v) \ +static __always_inline int arch_##op##name(int i, atomic_t *v) \ { \ return __lse_ll_sc_body(op##name, i, v); \ } @@ -56,7 +56,7 @@ ATOMIC_FETCH_OPS(atomic_sub_return) #undef ATOMIC_FETCH_OPS #define ATOMIC64_OP(op) \ -static inline void arch_##op(long i, atomic64_t *v) \ +static __always_inline void arch_##op(long i, atomic64_t *v) \ { \ __lse_ll_sc_body(op, i, v); \ } @@ -71,7 +71,7 @@ ATOMIC64_OP(atomic64_sub) #undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, op) \ -static inline long arch_##op##name(long i, atomic64_t *v) \ +static __always_inline long arch_##op##name(long i, atomic64_t *v) \ { \ return __lse_ll_sc_body(op##name, i, v); \ } @@ -94,7 +94,7 @@ ATOMIC64_FETCH_OPS(atomic64_sub_return) #undef ATOMIC64_FETCH_OP #undef ATOMIC64_FETCH_OPS -static inline long arch_atomic64_dec_if_positive(atomic64_t *v) +static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v) { return __lse_ll_sc_body(atomic64_dec_if_positive, v); } diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 574808b9df4c8..da3280f639cd7 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -14,6 +14,7 @@ static inline void __lse_atomic_##op(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op " %w[i], %[v]\n" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v)); \ @@ -30,6 +31,7 @@ ATOMIC_OP(add, stadd) static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op #mb " %w[i], %w[i], %[v]" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v) \ @@ -58,6 +60,7 @@ static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \ u32 tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \ " add %w[i], %w[i], %w[tmp]" \ : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \ @@ -77,6 +80,7 @@ ATOMIC_OP_ADD_RETURN( , al, "memory") static inline void __lse_atomic_and(int i, atomic_t *v) { asm volatile( + __LSE_PREAMBLE " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -87,6 +91,7 @@ static inline void __lse_atomic_and(int i, atomic_t *v) static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -106,6 +111,7 @@ ATOMIC_FETCH_OP_AND( , al, "memory") static inline void __lse_atomic_sub(int i, atomic_t *v) { asm volatile( + __LSE_PREAMBLE " neg %w[i], %w[i]\n" " stadd %w[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -118,6 +124,7 @@ static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \ u32 tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \ " add %w[i], %w[i], %w[tmp]" \ @@ -139,6 +146,7 @@ ATOMIC_OP_SUB_RETURN( , al, "memory") static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -159,6 +167,7 @@ ATOMIC_FETCH_OP_SUB( , al, "memory") static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op " %[i], %[v]\n" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v)); \ @@ -175,6 +184,7 @@ ATOMIC64_OP(add, stadd) static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op #mb " %[i], %[i], %[v]" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v) \ @@ -203,6 +213,7 @@ static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " ldadd" #mb " %[i], %x[tmp], %[v]\n" \ " add %[i], %[i], %x[tmp]" \ : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \ @@ -222,6 +233,7 @@ ATOMIC64_OP_ADD_RETURN( , al, "memory") static inline void __lse_atomic64_and(s64 i, atomic64_t *v) { asm volatile( + __LSE_PREAMBLE " mvn %[i], %[i]\n" " stclr %[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -232,6 +244,7 @@ static inline void __lse_atomic64_and(s64 i, atomic64_t *v) static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -251,6 +264,7 @@ ATOMIC64_FETCH_OP_AND( , al, "memory") static inline void __lse_atomic64_sub(s64 i, atomic64_t *v) { asm volatile( + __LSE_PREAMBLE " neg %[i], %[i]\n" " stadd %[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -263,6 +277,7 @@ static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %x[tmp], %[v]\n" \ " add %[i], %[i], %x[tmp]" \ @@ -284,6 +299,7 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -305,6 +321,7 @@ static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v) unsigned long tmp; asm volatile( + __LSE_PREAMBLE "1: ldr %x[tmp], %[v]\n" " subs %[ret], %x[tmp], #1\n" " b.lt 2f\n" @@ -332,6 +349,7 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " mov %" #w "[tmp], %" #w "[old]\n" \ " cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \ " mov %" #w "[ret], %" #w "[tmp]" \ @@ -379,6 +397,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \ register unsigned long x4 asm ("x4") = (unsigned long)ptr; \ \ asm volatile( \ + __LSE_PREAMBLE \ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ " eor %[old1], %[old1], %[oldval1]\n" \ " eor %[old2], %[old2], %[oldval2]\n" \ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index e0e2b1946f42b..0fcd854fc95f3 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -57,6 +57,25 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, return mask; } +/* + * Ensure that reads of the counter are treated the same as memory reads + * for the purposes of ordering by subsequent memory barriers. + * + * This insanity brought to you by speculative system register reads, + * out-of-order memory accesses, sequence locks and Thomas Gleixner. + * + * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + */ +#define arch_counter_enforce_ordering(val) do { \ + u64 tmp, _val = (val); \ + \ + asm volatile( \ + " eor %0, %1, %1\n" \ + " add %0, sp, %0\n" \ + " ldr xzr, [%0]" \ + : "=r" (tmp) : "r" (_val)); \ +} while (0) + #define __smp_mb() dmb(ish) #define __smp_rmb() dmb(ishld) #define __smp_wmb() dmb(ishst) diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index e3d47b52161db..ec7720dbe2c80 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -10,6 +10,7 @@ * #imm16 values used for BRK instruction generation * 0x004: for installing kprobes * 0x005: for installing uprobes + * 0x006: for kprobe software single-step * Allowed values for kgdb are 0x400 - 0x7ff * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction @@ -19,6 +20,7 @@ */ #define KPROBES_BRK_IMM 0x004 #define UPROBES_BRK_IMM 0x005 +#define KPROBES_BRK_SS_IMM 0x006 #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 43da6dd295920..806e9dc2a852a 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -11,6 +11,7 @@ #define CTR_L1IP_MASK 3 #define CTR_DMINLINE_SHIFT 16 #define CTR_IMINLINE_SHIFT 0 +#define CTR_IMINLINE_MASK 0xf #define CTR_ERG_SHIFT 20 #define CTR_CWG_SHIFT 24 #define CTR_CWG_MASK 15 @@ -18,7 +19,7 @@ #define CTR_DIC_SHIFT 29 #define CTR_CACHE_MINLINE_MASK \ - (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT) + (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT) #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 665c78e0665a6..3e7dda6f1ab18 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -79,7 +79,7 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) * IPI all online CPUs so that they undergo a context synchronization * event and are forced to refetch the new instructions. */ -#ifdef CONFIG_KGDB + /* * KGDB performs cache maintenance with interrupts disabled, so we * will deadlock trying to IPI the secondary CPUs. In theory, we can @@ -89,9 +89,9 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) * the patching operation, so we don't need extra IPIs here anyway. * In which case, add a KGDB-specific bodge and return early. */ - if (kgdb_connected && irqs_disabled()) + if (in_dbg_master()) return; -#endif + kick_all_cpus_sync(); } diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index d064a50deb5fb..5665a3fc14be0 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -19,16 +19,17 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { __uint128_t tmp; u64 sum; + int n = ihl; /* we want it signed */ tmp = *(const __uint128_t *)iph; iph += 16; - ihl -= 4; + n -= 4; tmp += ((tmp >> 64) | (tmp << 64)); sum = tmp >> 64; do { sum += *(const u32 *)iph; iph += 4; - } while (--ihl); + } while (--n > 0); sum += ((sum >> 32) | (sum << 32)); return csum_fold((__force u32)(sum >> 32)); diff --git a/arch/arm64/include/asm/clocksource.h b/arch/arm64/include/asm/clocksource.h index 0ece64a26c8c9..0c79104472350 100644 --- a/arch/arm64/include/asm/clocksource.h +++ b/arch/arm64/include/asm/clocksource.h @@ -2,8 +2,11 @@ #ifndef _ASM_CLOCKSOURCE_H #define _ASM_CLOCKSOURCE_H +#include + struct arch_clocksource_data { - bool vdso_direct; /* Usable for direct VDSO access? */ + /* Usable for direct VDSO access? */ + enum vdso_arch_clockmode clock_mode; }; #endif diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index b0d53a265f1d5..7b4172ce497c2 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -4,6 +4,9 @@ */ #ifndef __ASM_COMPAT_H #define __ASM_COMPAT_H + +#include + #ifdef CONFIG_COMPAT /* @@ -13,8 +16,6 @@ #include #include -#include - #define COMPAT_USER_HZ 100 #ifdef __AARCH64EB__ #define COMPAT_UTS_MACHINE "armv8b\0\0" diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index d72d995b7e258..85cc06380e93e 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -25,6 +25,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64dfr1; u64 reg_id_aa64isar0; u64 reg_id_aa64isar1; + u64 reg_id_aa64isar2; u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; u64 reg_id_aa64mmfr2; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ac1dbca3d0cd3..4ffa86149d28d 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -54,7 +54,9 @@ #define ARM64_WORKAROUND_1463225 44 #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 +#define ARM64_WORKAROUND_1542419 47 +#define ARM64_SPECTRE_BHB 48 -#define ARM64_NCAPS 47 +#define ARM64_NCAPS 49 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9cde5d2e768f4..f63438474dd54 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -262,6 +262,8 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on feature of one or more CPUs. * All possible conflicts for a late CPU are ignored. + * NOTE: this means that a late CPU with the feature will *not* cause the + * capability to be advertised by cpus_have_*cap()! */ #define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ @@ -506,6 +508,34 @@ static inline bool cpu_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); } +static inline bool supports_csv2p3(int scope) +{ + u64 pfr0; + u8 csv2_val; + + if (scope == SCOPE_LOCAL_CPU) + pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); + else + pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + csv2_val = cpuid_feature_extract_unsigned_field(pfr0, + ID_AA64PFR0_CSV2_SHIFT); + return csv2_val == 3; +} + +static inline bool supports_clearbhb(int scope) +{ + u64 isar2; + + if (scope == SCOPE_LOCAL_CPU) + isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1); + else + isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); + + return cpuid_feature_extract_unsigned_field(isar2, + ID_AA64ISAR2_CLEARBHB_SHIFT); +} + static inline bool system_supports_32bit_el0(void) { return cpus_have_const_cap(ARM64_HAS_32BIT_EL0); @@ -601,7 +631,7 @@ static inline bool system_supports_generic_auth(void) cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF)); } -static inline bool system_uses_irq_prio_masking(void) +static __always_inline bool system_uses_irq_prio_masking(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); @@ -637,6 +667,18 @@ static inline int arm64_get_ssbd_state(void) void arm64_set_ssbd_mitigation(bool state); +/* Watch out, ordering is important here. */ +enum mitigation_state { + SPECTRE_UNAFFECTED, + SPECTRE_MITIGATED, + SPECTRE_VULNERABLE, +}; + +enum mitigation_state arm64_get_spectre_bhb_state(void); +bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +u8 spectre_bhb_loop_affected(int scope); +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); + extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index aca07c2f6e6e3..f0165df489a38 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -71,6 +71,14 @@ #define ARM_CPU_PART_CORTEX_A55 0xD05 #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C +#define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_NEOVERSE_V1 0xD40 +#define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_X1 0xD44 +#define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_X2 0xD48 +#define ARM_CPU_PART_NEOVERSE_N2 0xD49 +#define ARM_CPU_PART_CORTEX_A78C 0xD4B #define APM_CPU_PART_POTENZA 0x000 @@ -102,6 +110,14 @@ #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) +#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) +#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) +#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) +#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) +#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 063c964af705f..48bfbf70dbb07 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -36,7 +36,7 @@ static inline void local_daif_mask(void) trace_hardirqs_off(); } -static inline unsigned long local_daif_save(void) +static inline unsigned long local_daif_save_flags(void) { unsigned long flags; @@ -48,6 +48,15 @@ static inline unsigned long local_daif_save(void) flags |= PSR_I_BIT; } + return flags; +} + +static inline unsigned long local_daif_save(void) +{ + unsigned long flags; + + flags = local_daif_save_flags(); + local_daif_mask(); return flags; diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7619f473155f2..0253691c4b3a1 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -53,6 +53,7 @@ /* kprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5)) +#define BRK64_OPCODE_KPROBES_SS (AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5)) /* uprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5)) @@ -109,6 +110,8 @@ void disable_debug_monitors(enum dbg_active_el el); void user_rewind_single_step(struct task_struct *task); void user_fastforward_single_step(struct task_struct *task); +void user_regs_reset_single_step(struct user_pt_regs *regs, + struct task_struct *task); void kernel_enable_single_step(struct pt_regs *regs); void kernel_disable_single_step(void); diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index f987b8a8f325e..928a96b9b1617 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -63,9 +63,11 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_TEXT3, + FIX_ENTRY_TRAMP_TEXT2, + FIX_ENTRY_TRAMP_TEXT1, FIX_ENTRY_TRAMP_DATA, - FIX_ENTRY_TRAMP_TEXT, -#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) +#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 323cb306bd288..8ac55ff3094ae 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -204,4 +204,8 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); +extern bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags); +#define arch_memremap_can_ram_remap arch_memremap_can_ram_remap + #endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index a6e5da7553597..9679b74a20817 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -65,8 +65,8 @@ #define EARLY_KASLR (0) #endif -#define EARLY_ENTRIES(vstart, vend, shift) (((vend) >> (shift)) \ - - ((vstart) >> (shift)) + 1 + EARLY_KASLR) +#define EARLY_ENTRIES(vstart, vend, shift) \ + ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + EARLY_KASLR) #define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT)) @@ -89,12 +89,6 @@ #define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end)) #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) -#ifdef CONFIG_ARM64_SW_TTBR0_PAN -#define RESERVED_TTBR0_SIZE (PAGE_SIZE) -#else -#define RESERVED_TTBR0_SIZE (0) -#endif - /* Initial memory map size */ #if ARM64_SWAPPER_USES_SECTION_MAPS #define SWAPPER_BLOCK_SHIFT SECTION_SHIFT diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 97e511d645a21..8699ce30f587e 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -16,7 +16,7 @@ #include #define __ARCH_WANT_KPROBES_INSN_SLOT -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index ddf9d762ac622..9be64c0ad31f6 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -72,11 +72,12 @@ * IMO: Override CPSR.I and enable signaling with VI * FMO: Override CPSR.F and enable signaling with VF * SWIO: Turn set/way invalidates into set/way clean+invalidate + * PTW: Take a stage2 fault if a stage1 walk steps in device memory */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ - HCR_FMO | HCR_IMO) + HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) @@ -275,6 +276,7 @@ #define CPTR_EL2_DEFAULT CPTR_EL2_RES1 /* Hyp Debug Configuration Register bits */ +#define MDCR_EL2_TTRF (1 << 19) #define MDCR_EL2_TPMS (1 << 14) #define MDCR_EL2_E2PB_MASK (UL(0x3)) #define MDCR_EL2_E2PB_SHIFT (UL(12)) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 44a243754c1b8..c54e759896c1a 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -60,7 +60,7 @@ extern char __kvm_hyp_vector[]; extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); -extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); +extern void __kvm_flush_cpu_context(struct kvm_vcpu *vcpu); extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); @@ -88,6 +88,34 @@ extern u32 __kvm_get_mdcr_el2(void); *__hyp_this_cpu_ptr(sym); \ }) +#define __KVM_EXTABLE(from, to) \ + " .pushsection __kvm_ex_table, \"a\"\n" \ + " .align 3\n" \ + " .long (" #from " - .), (" #to " - .)\n" \ + " .popsection\n" + + +#define __kvm_at(at_op, addr) \ +( { \ + int __kvm_at_err = 0; \ + u64 spsr, elr; \ + asm volatile( \ + " mrs %1, spsr_el2\n" \ + " mrs %2, elr_el2\n" \ + "1: at "at_op", %3\n" \ + " isb\n" \ + " b 9f\n" \ + "2: msr spsr_el2, %1\n" \ + " msr elr_el2, %2\n" \ + " mov %w0, %4\n" \ + "9:\n" \ + __KVM_EXTABLE(1b, 2b) \ + : "+r" (__kvm_at_err), "=&r" (spsr), "=&r" (elr) \ + : "r" (addr), "i" (-EFAULT)); \ + __kvm_at_err; \ +} ) + + #else /* __ASSEMBLY__ */ .macro hyp_adr_this_cpu reg, sym, tmp @@ -113,6 +141,21 @@ extern u32 __kvm_get_mdcr_el2(void); kern_hyp_va \vcpu .endm +/* + * KVM extable for unexpected exceptions. + * In the same format _asm_extable, but output to a different section so that + * it can be mapped to EL2. The KVM version is not sorted. The caller must + * ensure: + * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented + * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup. + */ +.macro _kvm_extable, from, to + .pushsection __kvm_ex_table, "a" + .align 3 + .long (\from - .), (\to - .) + .popsection +.endm + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d69c1efc63e71..f65ff6b90f4a9 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -97,12 +97,6 @@ static inline void vcpu_ptrauth_disable(struct kvm_vcpu *vcpu) vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK); } -static inline void vcpu_ptrauth_setup_lazy(struct kvm_vcpu *vcpu) -{ - if (vcpu_has_ptrauth(vcpu)) - vcpu_ptrauth_disable(vcpu); -} - static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu) { return vcpu->arch.vsesr_el2; @@ -204,6 +198,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } +/* + * The layout of SPSR for an AArch32 state is different when observed from an + * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 + * view given an AArch64 view. + * + * In ARM DDI 0487E.a see: + * + * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426 + * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256 + * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280 + * + * Which show the following differences: + * + * | Bit | AA64 | AA32 | Notes | + * +-----+------+------+-----------------------------| + * | 24 | DIT | J | J is RES0 in ARMv8 | + * | 21 | SS | DIT | SS doesn't exist in AArch32 | + * + * ... and all other bits are (currently) common. + */ +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + const unsigned long overlap = BIT(24) | BIT(21); + unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT); + + spsr &= ~overlap; + + spsr |= dit << 21; + + return spsr; +} + static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode; @@ -263,12 +289,17 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); +} + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } -static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) +static __always_inline bool kvm_vcpu_abt_iss1tw(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); } @@ -276,7 +307,7 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) || - kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ + kvm_vcpu_abt_iss1tw(vcpu); /* AF/DBM update */ } static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) @@ -305,6 +336,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW; } +static inline bool kvm_vcpu_trap_is_exec_fault(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_trap_is_iabt(vcpu) && !kvm_vcpu_abt_iss1tw(vcpu); +} + static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC; @@ -342,6 +378,9 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) { + if (kvm_vcpu_abt_iss1tw(vcpu)) + return true; + if (kvm_vcpu_trap_is_iabt(vcpu)) return false; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f656169db8c33..fd2b72e37b40f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -44,6 +44,7 @@ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); @@ -182,6 +183,7 @@ enum vcpu_sysreg { #define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ #define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ #define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c2_TTBCR2 (c2_TTBCR + 1) /* Translation Table Base Control R. 2 */ #define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ #define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ #define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ @@ -209,6 +211,7 @@ enum vcpu_sysreg { #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) +#define cp14_DBGVCR (DBGVCR32_EL2 * 2) #define NR_COPRO_REGS (NR_SYS_REGS * 2) @@ -338,6 +341,13 @@ struct kvm_vcpu_arch { /* True when deferrable sysregs are loaded on the physical CPU, * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */ bool sysregs_loaded_on_cpu; + + /* Guest PV state */ + struct { + u64 steal; + u64 last_steal; + gpa_t base; + } steal; }; /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ @@ -392,8 +402,10 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); * CP14 and CP15 live in the same array, as they are backed by the * same system registers. */ -#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)]) +#define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) + +#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) +#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) struct kvm_vm_stat { ulong remote_tlb_flush; @@ -425,7 +437,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, unsigned flags); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); @@ -478,6 +490,27 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, int kvm_perf_init(void); int kvm_perf_teardown(void); +long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu); +gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu); +void kvm_update_stolen_time(struct kvm_vcpu *vcpu); + +int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); +int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); +int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); + +static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) +{ + vcpu_arch->steal.base = GPA_INVALID; +} + +static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch) +{ + return (vcpu_arch->steal.base != GPA_INVALID); +} + void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); @@ -548,6 +581,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} void kvm_arm_init_debug(void); +void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu); void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); @@ -677,4 +711,6 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_arm_vcpu_loaded(vcpu) ((vcpu)->arch.sysregs_loaded_on_cpu) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 97f21cc666579..7f7fdb16bb968 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -71,6 +71,9 @@ void __sysreg32_restore_state(struct kvm_vcpu *vcpu); void __debug_switch_to_guest(struct kvm_vcpu *vcpu); void __debug_switch_to_host(struct kvm_vcpu *vcpu); +void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu); +void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); + void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 02b5c48fd4678..b204501a0c391 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -10,13 +10,11 @@ #include #include -/* - * This is annoying. The mmio code requires this, even if we don't - * need any decoding. To be fixed. - */ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Witdth of the register accessed by the faulting instruction is 64-bits */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index befe37d4bc0e5..ffe0aad96b17b 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -478,7 +478,9 @@ static inline void *kvm_get_hyp_vector(void) void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); int slot = -1; - if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) { + if ((cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) || + cpus_have_const_cap(ARM64_SPECTRE_BHB)) && + data && data->template_start) { vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start)); slot = data->hyp_vectors_slot; } @@ -507,7 +509,8 @@ static inline int kvm_map_vectors(void) * !HBP + HEL2 -> allocate one vector slot and use exec mapping * HBP + HEL2 -> use hardened vertors and use exec mapping */ - if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) { + if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) || + cpus_have_const_cap(ARM64_SPECTRE_BHB)) { __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start); __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); } diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index 1b266292f0bee..ebee3113a62ff 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -4,4 +4,20 @@ #define __ALIGN .align 2 #define __ALIGN_STR ".align 2" +/* + * Annotate a function as position independent, i.e., safe to be called before + * the kernel virtual mapping is activated. + */ +#define SYM_FUNC_START_PI(x) \ + SYM_FUNC_START_ALIAS(__pi_##x); \ + SYM_FUNC_START(x) + +#define SYM_FUNC_START_WEAK_PI(x) \ + SYM_FUNC_START_ALIAS(__pi_##x); \ + SYM_FUNC_START_WEAK(x) + +#define SYM_FUNC_END_PI(x) \ + SYM_FUNC_END(x); \ + SYM_FUNC_END_ALIAS(__pi_##x) + #endif diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 80b3882781496..5de132100b6d0 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -6,6 +6,8 @@ #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) +#define __LSE_PREAMBLE ".arch_extension lse\n" + #include #include #include @@ -14,8 +16,6 @@ #include #include -__asm__(".arch_extension lse"); - extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; @@ -34,7 +34,7 @@ static inline bool system_uses_lse_atomics(void) /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ - ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS) + ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS) #else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c23c473606647..67b6b90f37eed 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -178,7 +178,6 @@ extern u64 vabits_actual; #include #include -extern s64 physvirt_offset; extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) @@ -219,7 +218,7 @@ static inline unsigned long kaslr_offset(void) ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) #define untagged_addr(addr) ({ \ - u64 __addr = (__force u64)addr; \ + u64 __addr = (__force u64)(addr); \ __addr &= __untagged_addr(__addr); \ (__force __typeof__(addr))__addr; \ }) @@ -248,13 +247,13 @@ static inline const void *__tag_set(const void *addr, u8 tag) /* - * The linear kernel range starts at the bottom of the virtual address - * space. Testing the top bit for the start of the region is a - * sufficient check and avoids having to worry about the tag. + * Check whether an arbitrary address is within the linear map, which + * lives in the [PAGE_OFFSET, PAGE_END) interval at the bottom of the + * kernel's TTBR1 address range. */ -#define __is_lm_address(addr) (!(((u64)addr) & BIT(vabits_actual - 1))) +#define __is_lm_address(addr) (((u64)(addr) ^ PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) -#define __lm_to_phys(addr) (((addr) + physvirt_offset)) +#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) #define __virt_to_phys_nodebug(x) ({ \ @@ -272,7 +271,7 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define __phys_addr_symbol(x) __pa_symbol_nodebug(x) #endif /* CONFIG_DEBUG_VIRTUAL */ -#define __phys_to_virt(x) ((unsigned long)((x) - physvirt_offset)) +#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* @@ -316,6 +315,11 @@ static inline void *phys_to_virt(phys_addr_t x) #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL) +#define page_to_virt(x) ({ \ + __typeof__(x) __page = x; \ + void *__addr = __va(page_to_phys(__page)); \ + (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\ +}) #define virt_to_page(x) pfn_to_page(virt_to_pfn(x)) #else #define page_to_virt(x) ({ \ @@ -333,7 +337,7 @@ static inline void *phys_to_virt(phys_addr_t x) #endif /* !CONFIG_SPARSEMEM_VMEMMAP || CONFIG_DEBUG_VIRTUAL */ #define virt_addr_valid(addr) ({ \ - __typeof__(addr) __addr = addr; \ + __typeof__(addr) __addr = __tag_reset(addr); \ __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr)); \ }) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index f217e32929193..1b9e49fb0e1b7 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -29,7 +29,7 @@ typedef struct { */ #define ASID(mm) ((mm)->context.id.counter & 0xffff) -static inline bool arm64_kernel_unmapped_at_el0(void) +static __always_inline bool arm64_kernel_unmapped_at_el0(void) { return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); @@ -82,6 +82,12 @@ typedef void (*bp_hardening_cb_t)(void); struct bp_hardening_data { int hyp_vectors_slot; bp_hardening_cb_t fn; + + /* + * template_start is only used by the BHB mitigation to identify the + * hyp_vectors_slot sequence. + */ + const char *template_start; }; #if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) || \ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 3827ff4040a3f..fb564de90aa7e 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -36,11 +36,11 @@ static inline void contextidr_thread_switch(struct task_struct *next) } /* - * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0. + * Set TTBR0 to reserved_pg_dir. No translations will be possible via TTBR0. */ static inline void cpu_set_reserved_ttbr0(void) { - unsigned long ttbr = phys_to_ttbr(__pa_symbol(empty_zero_page)); + unsigned long ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); write_sysreg(ttbr, ttbr0_el1); isb(); @@ -63,10 +63,7 @@ extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { - if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52)) - return false; - - return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)); + return unlikely(idmap_t0sz != TCR_T0SZ(vabits_actual)); } /* @@ -187,9 +184,9 @@ static inline void update_saved_ttbr0(struct task_struct *tsk, return; if (mm == &init_mm) - ttbr = __pa_symbol(empty_zero_page); + ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); else - ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48; + ttbr = phys_to_ttbr(virt_to_phys(mm->pgd)) | ASID(mm) << 48; WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr); } diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h index 626ad01e83bf0..dd870390d639f 100644 --- a/arch/arm64/include/asm/numa.h +++ b/arch/arm64/include/asm/numa.h @@ -25,6 +25,9 @@ const struct cpumask *cpumask_of_node(int node); /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ static inline const struct cpumask *cpumask_of_node(int node) { + if (node == NUMA_NO_NODE) + return cpu_all_mask; + return node_to_cpumask_map[node]; } #endif diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h index 799d9dd6f7ccb..cf3a0fd7c1a7e 100644 --- a/arch/arm64/include/asm/paravirt.h +++ b/arch/arm64/include/asm/paravirt.h @@ -21,6 +21,13 @@ static inline u64 paravirt_steal_clock(int cpu) { return pv_ops.time.steal_clock(cpu); } -#endif + +int __init pv_time_init(void); + +#else + +#define pv_time_init() do {} while (0) + +#endif // CONFIG_PARAVIRT #endif diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index 70b323cf83000..b33ca260e3c9d 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -17,6 +17,7 @@ #define pcibios_assign_all_busses() \ (pci_has_flag(PCI_REASSIGN_ALL_BUS)) +#define arch_can_pci_mmap_wc() 1 #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 extern int isa_dma_bridge_buggy; diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 3df60f97da1f2..a0f789fa25f39 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -215,6 +215,7 @@ #define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) #define TCR_TxSZ_WIDTH 6 #define TCR_T0SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET) +#define TCR_T1SZ_MASK (((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T1SZ_OFFSET) #define TCR_EPD0_SHIFT 7 #define TCR_EPD0_MASK (UL(1) << TCR_EPD0_SHIFT) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 8dc6c5cdabe62..99b0a32e25c1f 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -54,7 +54,7 @@ #define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) #define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) #define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) -#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) +#define PAGE_HYP_DEVICE __pgprot(_PROT_DEFAULT | PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_HYP | PTE_HYP_XN) #define PAGE_S2_MEMATTR(attr) \ ({ \ @@ -85,13 +85,12 @@ #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_EXECONLY +#define __P100 PAGE_READONLY_EXEC #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -100,7 +99,7 @@ #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_EXECONLY +#define __S100 PAGE_READONLY_EXEC #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 565aa45ef1344..3a057d4279007 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -23,6 +23,8 @@ #define VMALLOC_START (MODULES_END) #define VMALLOC_END (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K) +#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) + #define FIRST_USER_ADDRESS 0UL #ifndef __ASSEMBLY__ @@ -33,8 +35,6 @@ #include #include -extern struct page *vmemmap; - extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); @@ -54,9 +54,15 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; * page table entry, taking care of 52-bit addresses. */ #ifdef CONFIG_ARM64_PA_BITS_52 -#define __pte_to_phys(pte) \ - ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36)) -#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK) +static inline phys_addr_t __pte_to_phys(pte_t pte) +{ + return (pte_val(pte) & PTE_ADDR_LOW) | + ((pte_val(pte) & PTE_ADDR_HIGH) << 36); +} +static inline pteval_t __phys_to_pte_val(phys_addr_t phys) +{ + return (phys | (phys >> 36)) & PTE_ADDR_MASK; +} #else #define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) #define __phys_to_pte_val(phys) (phys) @@ -96,14 +102,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -/* - * Execute-only user mappings do not have the PTE_USER bit set. All valid - * kernel mappings have the PTE_UXN bit set. - */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) -#define pte_valid_young(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) @@ -111,14 +111,17 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been * remapped as PROT_NONE but are yet to be flushed from the TLB. + * Note that we can't make any assumptions based on the state of the access + * flag, since ptep_clear_flush_young() elides a DSB when invalidating the + * TLB. */ #define pte_accessible(mm, pte) \ - (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) + (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check) other than user execute-only which do not have the - * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set. + * write permission check). PROT_NONE mappings do not have the PTE_VALID bit + * set. */ #define pte_access_permitted(pte, write) \ (pte_valid_user(pte) && (!(write) || pte_write(pte))) @@ -139,13 +142,6 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot) return pte; } -static inline pte_t pte_wrprotect(pte_t pte) -{ - pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); - return pte; -} - static inline pte_t pte_mkwrite(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -171,6 +167,20 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + /* + * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY + * clear), set the PTE_DIRTY bit. + */ + if (pte_hw_dirty(pte)) + pte = pte_mkdirty(pte); + + pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + return pte; +} + static inline pte_t pte_mkold(pte_t pte) { return clear_pte_bit(pte, __pgprot(PTE_AF)); @@ -460,7 +470,9 @@ extern pgd_t init_pg_dir[PTRS_PER_PGD]; extern pgd_t init_pg_end[]; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_end[]; extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; +extern pgd_t reserved_pg_dir[PTRS_PER_PGD]; extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); @@ -785,12 +797,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres pte = READ_ONCE(*ptep); do { old_pte = pte; - /* - * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY - * clear), set the PTE_DIRTY bit. - */ - if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index 7a24bad1a58b8..076a4157a74f8 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -3,7 +3,6 @@ #define __ASM_POINTER_AUTH_H #include -#include #include #include @@ -30,6 +29,13 @@ struct ptrauth_keys { struct ptrauth_key apga; }; +/* + * Only include random.h once ptrauth_keys_* structures are defined + * to avoid yet another circular include hell (random.h * ends up + * including asm/smp.h, which requires ptrauth_keys_kernel). + */ +#include + static inline void ptrauth_keys_init(struct ptrauth_keys *keys) { if (system_supports_address_auth()) { diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 5623685c7d138..65834b84f0e16 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -184,8 +184,9 @@ void tls_preserve_current_state(void); static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) { + s32 previous_syscall = regs->syscallno; memset(regs, 0, sizeof(*regs)); - forget_syscall(regs); + regs->syscallno = previous_syscall; regs->pc = pc; if (system_uses_irq_prio_masking()) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index fbebb411ae202..92b2575b01918 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -62,6 +62,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_PAN_BIT 0x00400000 #define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 @@ -298,7 +299,17 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) static inline unsigned long regs_return_value(struct pt_regs *regs) { - return regs->regs[0]; + unsigned long val = regs->regs[0]; + + /* + * Audit currently uses regs_return_value() instead of + * syscall_get_return_value(). Apply the same sign-extension here until + * audit is updated to use syscall_get_return_value(). + */ + if (compat_user_mode(regs)) + val = sign_extend64(val, 31); + + return val; } static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) diff --git a/arch/arm64/include/asm/pvclock-abi.h b/arch/arm64/include/asm/pvclock-abi.h new file mode 100644 index 0000000000000..c4f1c0a0789cd --- /dev/null +++ b/arch/arm64/include/asm/pvclock-abi.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 Arm Ltd. */ + +#ifndef __ASM_PVCLOCK_ABI_H +#define __ASM_PVCLOCK_ABI_H + +/* The below structure is defined in ARM DEN0057A */ + +struct pvclock_vcpu_stolen_time { + __le32 revision; + __le32 attributes; + __le64 stolen_time; + /* Structure must be 64 byte aligned, pad to that size */ + u8 padding[48]; +} __packed; + +#endif diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 788ae971f11c1..a75f2882cc7cb 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -15,8 +15,14 @@ extern char __hyp_text_start[], __hyp_text_end[]; extern char __idmap_text_start[], __idmap_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __inittext_begin[], __inittext_end[]; +extern char __exittext_begin[], __exittext_end[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; extern char __entry_tramp_text_start[], __entry_tramp_text_end[]; +static inline size_t entry_tramp_text_size(void) +{ + return __entry_tramp_text_end - __entry_tramp_text_start; +} + #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index a0c8a0b652593..0eadbf933e359 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -46,7 +46,12 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); * Logical CPU mapping. */ extern u64 __cpu_logical_map[NR_CPUS]; -#define cpu_logical_map(cpu) __cpu_logical_map[cpu] +extern u64 cpu_logical_map(int cpu); + +static inline void set_cpu_logical_map(int cpu, u64 hwid) +{ + __cpu_logical_map[cpu] = hwid; +} struct seq_file; diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 65299a2dcf9cd..03e20895453a7 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -29,25 +29,36 @@ static inline void syscall_rollback(struct task_struct *task, regs->regs[0] = regs->orig_x0; } +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long val = regs->regs[0]; + + if (is_compat_thread(task_thread_info(task))) + val = sign_extend64(val, 31); + + return val; +} static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - unsigned long error = regs->regs[0]; - return IS_ERR_VALUE(error) ? error : 0; -} + unsigned long error = syscall_get_return_value(task, regs); -static inline long syscall_get_return_value(struct task_struct *task, - struct pt_regs *regs) -{ - return regs->regs[0]; + return IS_ERR_VALUE(error) ? error : 0; } static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - regs->regs[0] = (long) error ? error : val; + if (error) + val = error; + + if (is_compat_thread(task_thread_info(task))) + val = lower_32_bits(val); + + regs->regs[0] = val; } #define SYSCALL_MAX_ARGS 6 diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6e919fafb43dd..5b3bdad66b27e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -49,7 +49,9 @@ #ifndef CONFIG_BROKEN_GAS_INST #ifdef __ASSEMBLY__ -#define __emit_inst(x) .inst (x) +// The space separator is omitted so that __emit_inst(x) can be parsed as +// either an assembler directive or an assembler macro argument. +#define __emit_inst(x) .inst(x) #else #define __emit_inst(x) ".inst " __stringify((x)) "\n\t" #endif @@ -163,6 +165,7 @@ #define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) #define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) +#define SYS_ID_AA64ISAR2_EL1 sys_reg(3, 0, 0, 6, 2) #define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) @@ -573,6 +576,21 @@ #define ID_AA64ISAR1_GPI_NI 0x0 #define ID_AA64ISAR1_GPI_IMP_DEF 0x1 +/* id_aa64isar2 */ +#define ID_AA64ISAR2_CLEARBHB_SHIFT 28 +#define ID_AA64ISAR2_RPRES_SHIFT 4 +#define ID_AA64ISAR2_WFXT_SHIFT 0 + +#define ID_AA64ISAR2_RPRES_8BIT 0x0 +#define ID_AA64ISAR2_RPRES_12BIT 0x1 +/* + * Value 0x1 has been removed from the architecture, and is + * reserved, but has not yet been removed from the ARM ARM + * as of ARM DDI 0487G.b. + */ +#define ID_AA64ISAR2_WFXT_NI 0x0 +#define ID_AA64ISAR2_WFXT_SUPPORTED 0x2 + /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 @@ -644,6 +662,7 @@ #endif /* id_aa64mmfr1 */ +#define ID_AA64MMFR1_ECBHB_SHIFT 60 #define ID_AA64MMFR1_PAN_SHIFT 20 #define ID_AA64MMFR1_LOR_SHIFT 16 #define ID_AA64MMFR1_HPD_SHIFT 12 diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index f0cec4160136e..4e3ed702bec7e 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -91,6 +91,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_FSCHECK (1 << TIF_FSCHECK) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 127712b0b9705..b9a37a415bf9c 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -62,8 +62,13 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si { unsigned long ret, limit = current_thread_info()->addr_limit; + /* + * Asynchronous I/O running in a kernel thread does not have the + * TIF_TAGGED_ADDR flag of the process owning the mm, so always untag + * the user address before checking. + */ if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) && - test_thread_flag(TIF_TAGGED_ADDR)) + (current->flags & PF_KTHREAD || test_thread_flag(TIF_TAGGED_ADDR))) addr = untagged_addr(addr); __chk_user_ptr(addr); @@ -107,8 +112,8 @@ static inline void __uaccess_ttbr0_disable(void) local_irq_save(flags); ttbr = read_sysreg(ttbr1_el1); ttbr &= ~TTBR_ASID_MASK; - /* reserved_ttbr0 placed before swapper_pg_dir */ - write_sysreg(ttbr - RESERVED_TTBR0_SIZE, ttbr0_el1); + /* reserved_pg_dir placed before swapper_pg_dir */ + write_sysreg(ttbr - PAGE_SIZE, ttbr0_el1); isb(); /* Set reserved ASID */ write_sysreg(ttbr, ttbr1_el1); diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 2629a68b87244..8c1b73dc8f55d 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -25,8 +25,8 @@ #define __NR_compat_gettimeofday 78 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 -#define __NR_compat_clock_getres 247 #define __NR_compat_clock_gettime 263 +#define __NR_compat_clock_getres 264 #define __NR_compat_clock_gettime64 403 #define __NR_compat_clock_getres_time64 406 @@ -42,7 +42,6 @@ #endif #define __ARCH_WANT_SYS_CLONE -#define __ARCH_WANT_SYS_CLONE3 #ifndef __COMPAT_SYSCALL_NR #include diff --git a/arch/arm64/include/asm/vdso/clocksource.h b/arch/arm64/include/asm/vdso/clocksource.h new file mode 100644 index 0000000000000..8019f616e1f74 --- /dev/null +++ b/arch/arm64/include/asm/vdso/clocksource.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSOCLOCKSOURCE_H +#define __ASM_VDSOCLOCKSOURCE_H + +enum vdso_arch_clockmode { + /* vdso clocksource not usable */ + VDSO_CLOCKMODE_NONE, + /* vdso clocksource for both 32 and 64bit tasks */ + VDSO_CLOCKMODE_ARCHTIMER, + /* vdso clocksource for 64bit tasks only */ + VDSO_CLOCKMODE_ARCHTIMER_NOCOMPAT, +}; + +#endif diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h index c50ee1b7d5cd6..413d42e197c74 100644 --- a/arch/arm64/include/asm/vdso/compat_gettimeofday.h +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -10,6 +10,7 @@ #include #include +#include #include #define __VDSO_USE_SYSCALL ULLONG_MAX @@ -117,10 +118,10 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) u64 res; /* - * clock_mode == 0 implies that vDSO are enabled otherwise + * clock_mode == ARCHTIMER implies that vDSO are enabled otherwise * fallback on syscall. */ - if (clock_mode) + if (clock_mode != VDSO_CLOCKMODE_ARCHTIMER) return __VDSO_USE_SYSCALL; /* diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index b08f476b72b4d..d7361faa42dea 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -10,6 +10,8 @@ #include #include +#include + #define __VDSO_USE_SYSCALL ULLONG_MAX #define VDSO_HAS_CLOCK_GETRES 1 @@ -71,10 +73,10 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) u64 res; /* - * clock_mode == 0 implies that vDSO are enabled otherwise + * clock_mode != NONE implies that vDSO are enabled otherwise * fallback on syscall. */ - if (clock_mode) + if (clock_mode == VDSO_CLOCKMODE_NONE) return __VDSO_USE_SYSCALL; /* @@ -83,11 +85,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) */ isb(); asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); - /* - * This isb() is required to prevent that the seq lock is - * speculated.# - */ - isb(); + arch_counter_enforce_ordering(res); return res; } diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index 0c20a7c1bee50..e50f26741946b 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -24,9 +24,7 @@ struct vdso_data *__arm64_get_k_vdso_data(void) static __always_inline int __arm64_get_clock_mode(struct timekeeper *tk) { - u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; - - return use_syscall; + return tk->tkr_mono.clock->archdata.clock_mode; } #define __arch_get_clock_mode __arm64_get_clock_mode diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h new file mode 100644 index 0000000000000..bc9a2145f4194 --- /dev/null +++ b/arch/arm64/include/asm/vectors.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 ARM Ltd. + */ +#ifndef __ASM_VECTORS_H +#define __ASM_VECTORS_H + +#include +#include + +#include + +extern char vectors[]; +extern char tramp_vectors[]; +extern char __bp_harden_el1_vectors[]; + +/* + * Note: the order of this enum corresponds to two arrays in entry.S: + * tramp_vecs and __bp_harden_el1_vectors. By default the canonical + * 'full fat' vectors are used directly. + */ +enum arm64_bp_harden_el1_vectors { +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + /* + * Perform the BHB loop mitigation, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_LOOP, + + /* + * Make the SMC call for firmware mitigation, before branching to the + * canonical vectors. + */ + EL1_VECTOR_BHB_FW, + + /* + * Use the ClearBHB instruction, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_CLEAR_INSN, +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + + /* + * Remap the kernel before branching to the canonical vectors. + */ + EL1_VECTOR_KPTI, +}; + +#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +#define EL1_VECTOR_BHB_LOOP -1 +#define EL1_VECTOR_BHB_FW -1 +#define EL1_VECTOR_BHB_CLEAR_INSN -1 +#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + +/* The vectors to use on return from EL0. e.g. to remap the kernel */ +DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector); + +#ifndef CONFIG_UNMAP_KERNEL_AT_EL0 +#define TRAMP_VALIAS 0ul +#endif + +static inline const char * +arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot) +{ + if (arm64_kernel_unmapped_at_el0()) + return (char *)(TRAMP_VALIAS + SZ_2K * slot); + + WARN_ON_ONCE(slot == EL1_VECTOR_KPTI); + + return __bp_harden_el1_vectors + SZ_2K * slot; +} + +#endif /* __ASM_VECTORS_H */ diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 3333950b59093..ea487218db790 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -53,7 +53,7 @@ static inline unsigned long find_zero(unsigned long mask) */ static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, offset; + unsigned long ret, tmp; /* Load word from unaligned pointer addr */ asm( @@ -61,9 +61,9 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) "2:\n" " .pushsection .fixup,\"ax\"\n" " .align 2\n" - "3: and %1, %2, #0x7\n" - " bic %2, %2, #0x7\n" - " ldr %0, [%2]\n" + "3: bic %1, %2, #0x7\n" + " ldr %0, [%1]\n" + " and %1, %2, #0x7\n" " lsl %1, %1, #0x3\n" #ifndef __AARCH64EB__ " lsr %0, %0, %1\n" @@ -73,7 +73,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) " b 2b\n" " .popsection\n" _ASM_EXTABLE(1b, 3b) - : "=&r" (ret), "=&r" (offset) + : "=&r" (ret), "=&r" (tmp) : "r" (addr), "Q" (*(unsigned long *)addr)); return ret; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 67c21f9bdbad2..f3b34e2aaacb7 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -240,6 +240,11 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2 + /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) @@ -323,6 +328,8 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 +#define KVM_ARM_VCPU_PVTIME_CTRL 2 +#define KVM_ARM_VCPU_PVTIME_IPA 0 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_VCPU2_SHIFT 28 diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7ed9294e20045..d1bb5b69f1ce4 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -49,6 +49,7 @@ #define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 +#define PSR_DIT_BIT 0x01000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 #define PSR_Z_BIT 0x40000000 diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h index 4703d218663a2..f83a70e07df85 100644 --- a/arch/arm64/include/uapi/asm/unistd.h +++ b/arch/arm64/include/uapi/asm/unistd.h @@ -19,5 +19,6 @@ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYS_CLONE3 #include diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 3a58e9db5cfe7..46ec402e97edc 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -269,12 +270,19 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) int apei_claim_sea(struct pt_regs *regs) { int err = -ENOENT; + bool return_to_irqs_enabled; unsigned long current_flags; if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) return err; - current_flags = arch_local_save_flags(); + current_flags = local_daif_save_flags(); + + /* current_flags isn't useful here as daif doesn't tell us about pNMI */ + return_to_irqs_enabled = !irqs_disabled_flags(arch_local_save_flags()); + + if (regs) + return_to_irqs_enabled = interrupts_enabled(regs); /* * SEA can interrupt SError, mask it and describe this as an NMI so @@ -284,6 +292,23 @@ int apei_claim_sea(struct pt_regs *regs) nmi_enter(); err = ghes_notify_sea(); nmi_exit(); + + /* + * APEI NMI-like notifications are deferred to irq_work. Unless + * we interrupted irqs-masked code, we can do that now. + */ + if (!err) { + if (return_to_irqs_enabled) { + local_daif_restore(DAIF_PROCCTX_NOIRQ); + __irq_enter(); + irq_work_run(); + __irq_exit(); + } else { + pr_warn_ratelimited("APEI work queued but not completed"); + err = -EINPROGRESS; + } + } + local_daif_restore(current_flags); return err; diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index d1757ef1b1e74..5f8e4c2df53cc 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -41,27 +41,15 @@ bool alternative_is_applied(u16 cpufeature) /* * Check if the target PC is within an alternative block. */ -static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) +static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { - unsigned long replptr; - - if (kernel_text_address(pc)) - return true; - - replptr = (unsigned long)ALT_REPL_PTR(alt); - if (pc >= replptr && pc <= (replptr + alt->alt_len)) - return false; - - /* - * Branching into *another* alternate sequence is doomed, and - * we're not even trying to fix it up. - */ - BUG(); + unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); + return !(pc >= replptr && pc <= (replptr + alt->alt_len)); } #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) +static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) { u32 insn; @@ -106,7 +94,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } -static void patch_alternative(struct alt_instr *alt, +static noinstr void patch_alternative(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { __le32 *replptr; diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index ca158be21f833..fbf66e0973aab 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -59,6 +59,7 @@ struct insn_emulation { static LIST_HEAD(insn_emulation); static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); +static DEFINE_MUTEX(insn_emulation_mutex); static void register_emulation_hooks(struct insn_emulation_ops *ops) { @@ -207,10 +208,10 @@ static int emulation_proc_handler(struct ctl_table *table, int write, loff_t *ppos) { int ret = 0; - struct insn_emulation *insn = (struct insn_emulation *) table->data; + struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode); enum insn_emulation_mode prev_mode = insn->current_mode; - table->data = &insn->current_mode; + mutex_lock(&insn_emulation_mutex); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write || prev_mode == insn->current_mode) @@ -223,7 +224,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write, update_insn_emulation_mode(insn, INSN_UNDEF); } ret: - table->data = insn; + mutex_unlock(&insn_emulation_mutex); return ret; } @@ -247,7 +248,7 @@ static void __init register_insn_emulation_sysctl(void) sysctl->maxlen = sizeof(int); sysctl->procname = insn->ops->name; - sysctl->data = insn; + sysctl->data = &insn->current_mode; sysctl->extra1 = &insn->min; sysctl->extra2 = &insn->max; sysctl->proc_handler = emulation_proc_handler; @@ -601,7 +602,7 @@ static struct undef_hook setend_hooks[] = { }, { /* Thumb mode */ - .instr_mask = 0x0000fff7, + .instr_mask = 0xfffffff7, .instr_val = 0x0000b650, .pstate_mask = (PSR_AA32_T_BIT | PSR_AA32_MODE_MASK), .pstate_val = (PSR_AA32_T_BIT | PSR_AA32_MODE_USR), diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 7fa6828bb488a..587543c6c51cb 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -43,7 +43,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->type = type; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { unsigned int ctype, level, leaves, fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -78,7 +78,7 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int level, idx; enum cache_type type; @@ -97,6 +97,3 @@ static int __populate_cache_leaves(unsigned int cpu) } return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 93f34b4eca254..7998033306fb2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -13,6 +13,7 @@ #include #include #include +#include static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) @@ -88,13 +89,21 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, } static void -cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) +cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) { u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + bool enable_uct_trap = false; /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ if ((read_cpuid_cachetype() & mask) != (arm64_ftr_reg_ctrel0.sys_val & mask)) + enable_uct_trap = true; + + /* ... or if the system is affected by an erratum */ + if (cap->capability == ARM64_WORKAROUND_1542419) + enable_uct_trap = true; + + if (enable_uct_trap) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } @@ -108,6 +117,16 @@ DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM_INDIRECT_VECTORS extern char __smccc_workaround_1_smc_start[]; extern char __smccc_workaround_1_smc_end[]; +extern char __smccc_workaround_3_smc_start[]; +extern char __smccc_workaround_3_smc_end[]; +extern char __spectre_bhb_loop_k8_start[]; +extern char __spectre_bhb_loop_k8_end[]; +extern char __spectre_bhb_loop_k24_start[]; +extern char __spectre_bhb_loop_k24_end[]; +extern char __spectre_bhb_loop_k32_start[]; +extern char __spectre_bhb_loop_k32_end[]; +extern char __spectre_bhb_clearbhb_start[]; +extern char __spectre_bhb_clearbhb_end[]; static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -121,11 +140,11 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); } +static DEFINE_RAW_SPINLOCK(bp_lock); static void install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, const char *hyp_vecs_end) { - static DEFINE_RAW_SPINLOCK(bp_lock); int cpu, slot = -1; /* @@ -153,6 +172,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); __this_cpu_write(bp_hardening_data.fn, fn); + __this_cpu_write(bp_hardening_data.template_start, hyp_vecs_start); raw_spin_unlock(&bp_lock); } #else @@ -167,9 +187,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, } #endif /* CONFIG_KVM_INDIRECT_VECTORS */ -#include #include -#include static void call_smc_arch_workaround_1(void) { @@ -213,43 +231,31 @@ static int detect_harden_bp_fw(void) struct arm_smccc_res res; u32 midr = read_cpuid_id(); - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + break; + default: return -1; + } - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - cb = call_hvc_arch_workaround_1; - /* This is a guest, no need to patch KVM vectors */ - smccc_start = NULL; - smccc_end = NULL; - break; - default: - return -1; - } + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + cb = call_hvc_arch_workaround_1; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; break; - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - switch ((int)res.a0) { - case 1: - /* Firmware says we're just fine */ - return 0; - case 0: - cb = call_smc_arch_workaround_1; - smccc_start = __smccc_workaround_1_smc_start; - smccc_end = __smccc_workaround_1_smc_end; - break; - default: - return -1; - } + case SMCCC_CONDUIT_SMC: + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; break; default: @@ -309,11 +315,11 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt, BUG_ON(nr_inst != 1); - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: insn = aarch64_insn_get_hvc_value(); break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: insn = aarch64_insn_get_smc_value(); break; default: @@ -339,6 +345,8 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + int conduit; + if (!IS_ENABLED(CONFIG_ARM64_SSBD)) { pr_info_once("SSBD disabled by kernel configuration\n"); return; @@ -352,19 +360,10 @@ void arm64_set_ssbd_mitigation(bool state) return; } - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); - break; - - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); - break; + conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state, + NULL); - default: - WARN_ON_ONCE(1); - break; - } + WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE); } static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, @@ -374,6 +373,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, bool required = true; s32 val; bool this_cpu_safe = false; + int conduit; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); @@ -391,25 +391,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, goto out_printmsg; } - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { - ssbd_state = ARM64_SSBD_UNKNOWN; - if (!this_cpu_safe) - __ssb_safe = false; - return false; - } + conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - break; - - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_2, &res); - break; - - default: + if (conduit == SMCCC_CONDUIT_NONE) { ssbd_state = ARM64_SSBD_UNKNOWN; if (!this_cpu_safe) __ssb_safe = false; @@ -484,6 +469,12 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, return required; } +static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap) +{ + if (ssbd_state != ARM64_SSBD_FORCE_DISABLE) + cap->matches(cap, SCOPE_LOCAL_CPU); +} + /* known invulnerable cores */ static const struct midr_range arm64_ssb_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), @@ -575,6 +566,7 @@ static const struct midr_range spectre_v2_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), { /* sentinel */ } }; @@ -626,6 +618,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } +static void +cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap) +{ + cap->matches(cap, SCOPE_LOCAL_CPU); +} + static const __maybe_unused struct midr_range tx2_family_cpus[] = { MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), @@ -650,6 +648,18 @@ needs_tx2_tvm_workaround(const struct arm64_cpu_capabilities *entry, return false; } +static bool __maybe_unused +has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, + int scope) +{ + u32 midr = read_cpuid_id(); + bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT); + const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1); + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range(midr, &range) && has_dic; +} + #ifdef CONFIG_HARDEN_EL2_VECTORS static const struct midr_range arm64_harden_el2_vectors[] = { @@ -873,9 +883,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #endif { + .desc = "Branch predictor hardening", .capability = ARM64_HARDEN_BRANCH_PREDICTOR, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = check_branch_predictor, + .cpu_enable = cpu_enable_branch_predictor_hardening, }, #ifdef CONFIG_HARDEN_EL2_VECTORS { @@ -889,13 +901,27 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_SSBD, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_ssbd_mitigation, + .cpu_enable = cpu_enable_ssbd_mitigation, .midr_range_list = arm64_ssb_cpus, }, + { + .desc = "Spectre-BHB", + .capability = ARM64_SPECTRE_BHB, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = is_spectre_bhb_affected, + .cpu_enable = spectre_bhb_enable_mitigation, + }, #ifdef CONFIG_ARM64_ERRATUM_1418040 { .desc = "ARM erratum 1418040", .capability = ARM64_WORKAROUND_1418040, ERRATA_MIDR_RANGE_LIST(erratum_1418040_list), + /* + * We need to allow affected CPUs to come in late, but + * also need the non-affected CPUs to be able to come + * in at any point in time. Wonderful. + */ + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, }, #endif #ifdef CONFIG_ARM64_ERRATUM_1165522 @@ -926,6 +952,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM, ERRATA_MIDR_RANGE_LIST(tx2_family_cpus), }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1542419 + { + /* we depend on the firmware portion for correctness */ + .desc = "ARM erratum 1542419 (kernel portion)", + .capability = ARM64_WORKAROUND_1542419, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_neoverse_n1_erratum_1542419, + .cpu_enable = cpu_enable_trap_ctr_access, + }, #endif { } @@ -937,15 +973,41 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } +static const char *get_bhb_affected_string(enum mitigation_state bhb_state) +{ + switch (bhb_state) { + case SPECTRE_UNAFFECTED: + return ""; + default: + case SPECTRE_VULNERABLE: + return ", but not BHB"; + case SPECTRE_MITIGATED: + return ", BHB"; + } +} + ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { + enum mitigation_state bhb_state = arm64_get_spectre_bhb_state(); + const char *bhb_str = get_bhb_affected_string(bhb_state); + const char *v2_str = "Branch predictor hardening"; + switch (get_spectre_v2_workaround_state()) { case ARM64_BP_HARDEN_NOT_REQUIRED: - return sprintf(buf, "Not affected\n"); - case ARM64_BP_HARDEN_WA_NEEDED: - return sprintf(buf, "Mitigation: Branch predictor hardening\n"); - case ARM64_BP_HARDEN_UNKNOWN: + if (bhb_state == SPECTRE_UNAFFECTED) + return sprintf(buf, "Not affected\n"); + + /* + * Platforms affected by Spectre-BHB can't report + * "Not affected" for Spectre-v2. + */ + v2_str = "CSV2"; + fallthrough; + case ARM64_BP_HARDEN_WA_NEEDED: + return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str); + case ARM64_BP_HARDEN_UNKNOWN: + fallthrough; default: return sprintf(buf, "Vulnerable\n"); } @@ -967,3 +1029,333 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, return sprintf(buf, "Vulnerable\n"); } + +/* + * We try to ensure that the mitigation state can never change as the result of + * onlining a late CPU. + */ +static void update_mitigation_state(enum mitigation_state *oldp, + enum mitigation_state new) +{ + enum mitigation_state state; + + do { + state = READ_ONCE(*oldp); + if (new <= state) + break; + } while (cmpxchg_relaxed(oldp, state, new) != state); +} + +/* + * Spectre BHB. + * + * A CPU is either: + * - Mitigated by a branchy loop a CPU specific number of times, and listed + * in our "loop mitigated list". + * - Mitigated in software by the firmware Spectre v2 call. + * - Has the ClearBHB instruction to perform the mitigation. + * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no + * software mitigation in the vectors is needed. + * - Has CSV2.3, so is unaffected. + */ +static enum mitigation_state spectre_bhb_state; + +enum mitigation_state arm64_get_spectre_bhb_state(void) +{ + return spectre_bhb_state; +} + +/* + * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any + * SCOPE_SYSTEM call will give the right answer. + */ +u8 spectre_bhb_loop_affected(int scope) +{ + u8 k = 0; + static u8 max_bhb_k; + + if (scope == SCOPE_LOCAL_CPU) { + static const struct midr_range spectre_bhb_k32_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, + }; + static const struct midr_range spectre_bhb_k24_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + {}, + }; + static const struct midr_range spectre_bhb_k8_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + {}, + }; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) + k = 32; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) + k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) + k = 8; + + max_bhb_k = max(max_bhb_k, k); + } else { + k = max_bhb_k; + } + + return k; +} + +static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void) +{ + int ret; + struct arm_smccc_res res; + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + return SPECTRE_VULNERABLE; + + switch (psci_ops.conduit) { + case SMCCC_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_3, &res); + break; + + case SMCCC_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_3, &res); + break; + + default: + return SPECTRE_VULNERABLE; + } + + ret = res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + default: + fallthrough; + case SMCCC_RET_NOT_SUPPORTED: + return SPECTRE_VULNERABLE; + } +} + +static bool is_spectre_bhb_fw_affected(int scope) +{ + static bool system_affected; + enum mitigation_state fw_state; + bool has_smccc = (psci_ops.smccc_version >= SMCCC_VERSION_1_1); + static const struct midr_range spectre_bhb_firmware_mitigated_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + {}, + }; + bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(), + spectre_bhb_firmware_mitigated_list); + + if (scope != SCOPE_LOCAL_CPU) + return system_affected; + + fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); + if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) { + system_affected = true; + return true; + } + + return false; +} + +static bool supports_ecbhb(int scope) +{ + u64 mmfr1; + + if (scope == SCOPE_LOCAL_CPU) + mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); + else + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_ECBHB_SHIFT); +} + +bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (supports_csv2p3(scope)) + return false; + + if (supports_clearbhb(scope)) + return true; + + if (spectre_bhb_loop_affected(scope)) + return true; + + if (is_spectre_bhb_fw_affected(scope)) + return true; + + return false; +} + +static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) +{ + const char *v = arm64_get_bp_hardening_vector(slot); + + if (slot < 0) + return; + + __this_cpu_write(this_cpu_vector, v); + + /* + * When KPTI is in use, the vectors are switched when exiting to + * user-space. + */ + if (arm64_kernel_unmapped_at_el0()) + return; + + write_sysreg(v, vbar_el1); + isb(); +} + +#ifdef CONFIG_KVM_INDIRECT_VECTORS +static const char *kvm_bhb_get_vecs_end(const char *start) +{ + if (start == __smccc_workaround_3_smc_start) + return __smccc_workaround_3_smc_end; + else if (start == __spectre_bhb_loop_k8_start) + return __spectre_bhb_loop_k8_end; + else if (start == __spectre_bhb_loop_k24_start) + return __spectre_bhb_loop_k24_end; + else if (start == __spectre_bhb_loop_k32_start) + return __spectre_bhb_loop_k32_end; + else if (start == __spectre_bhb_clearbhb_start) + return __spectre_bhb_clearbhb_end; + + return NULL; +} + +static void kvm_setup_bhb_slot(const char *hyp_vecs_start) +{ + int cpu, slot = -1; + const char *hyp_vecs_end; + + if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available()) + return; + + hyp_vecs_end = kvm_bhb_get_vecs_end(hyp_vecs_start); + if (WARN_ON_ONCE(!hyp_vecs_start || !hyp_vecs_end)) + return; + + raw_spin_lock(&bp_lock); + for_each_possible_cpu(cpu) { + if (per_cpu(bp_hardening_data.template_start, cpu) == hyp_vecs_start) { + slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); + break; + } + } + + if (slot == -1) { + slot = atomic_inc_return(&arm64_el2_vector_last_slot); + BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); + __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); + } + + __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); + __this_cpu_write(bp_hardening_data.template_start, hyp_vecs_start); + raw_spin_unlock(&bp_lock); +} +#else +#define __smccc_workaround_3_smc_start NULL +#define __spectre_bhb_loop_k8_start NULL +#define __spectre_bhb_loop_k24_start NULL +#define __spectre_bhb_loop_k32_start NULL +#define __spectre_bhb_clearbhb_start NULL + +static void kvm_setup_bhb_slot(const char *hyp_vecs_start) { } +#endif + +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) +{ + enum mitigation_state fw_state, state = SPECTRE_VULNERABLE; + + if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU)) + return; + + if (get_spectre_v2_workaround_state() == ARM64_BP_HARDEN_UNKNOWN) { + /* No point mitigating Spectre-BHB alone. */ + } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { + pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); + } else if (cpu_mitigations_off()) { + pr_info_once("spectre-bhb mitigation disabled by command line option\n"); + } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { + state = SPECTRE_MITIGATED; + } else if (supports_clearbhb(SCOPE_LOCAL_CPU)) { + kvm_setup_bhb_slot(__spectre_bhb_clearbhb_start); + this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN); + + state = SPECTRE_MITIGATED; + } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) { + switch (spectre_bhb_loop_affected(SCOPE_SYSTEM)) { + case 8: + kvm_setup_bhb_slot(__spectre_bhb_loop_k8_start); + break; + case 24: + kvm_setup_bhb_slot(__spectre_bhb_loop_k24_start); + break; + case 32: + kvm_setup_bhb_slot(__spectre_bhb_loop_k32_start); + break; + default: + WARN_ON_ONCE(1); + } + this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP); + + state = SPECTRE_MITIGATED; + } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) { + fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); + if (fw_state == SPECTRE_MITIGATED) { + kvm_setup_bhb_slot(__smccc_workaround_3_smc_start); + this_cpu_set_vectors(EL1_VECTOR_BHB_FW); + + /* + * With WA3 in the vectors, the WA1 calls can be + * removed. + */ + __this_cpu_write(bp_hardening_data.fn, NULL); + + state = SPECTRE_MITIGATED; + } + } + + update_mitigation_state(&spectre_bhb_state, state); +} + +/* Patched to correct the immediate */ +void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u8 rd; + u32 insn; + u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM); + + BUG_ON(nr_inst != 1); /* MOV -> MOV */ + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) + return; + + insn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); + insn = aarch64_insn_gen_movewide(rd, loop_count, 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr++ = cpu_to_le32(insn); +} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 80f459ad0190d..d07dadd6b8ff7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -10,11 +10,13 @@ #include #include #include +#include #include #include #include #include #include + #include #include #include @@ -23,6 +25,7 @@ #include #include #include +#include #include /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ @@ -32,9 +35,7 @@ static unsigned long elf_hwcap __read_mostly; #define COMPAT_ELF_HWCAP_DEFAULT \ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\ COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; @@ -47,6 +48,8 @@ static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM6 /* Need also bit for ARM64_CB_PATCH */ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); +DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; + /* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This @@ -152,6 +155,11 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), @@ -162,11 +170,10 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), - /* Linux doesn't care about the EL3 */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), ARM64_FTR_END, }; @@ -280,7 +287,6 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { * of support. */ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), ARM64_FTR_END, }; @@ -322,7 +328,7 @@ static const struct arm64_ftr_bits ftr_id_pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_dfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + /* [31:28] TraceFilt */ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), @@ -414,6 +420,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1), + ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -585,6 +592,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); + init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2); init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); @@ -708,6 +716,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64isar0, boot->reg_id_aa64isar0); taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu, info->reg_id_aa64isar1, boot->reg_id_aa64isar1); + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu, + info->reg_id_aa64isar2, boot->reg_id_aa64isar2); /* * Differing PARange support is fine as long as all peripherals and @@ -721,9 +731,6 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); - /* - * EL3 is not our concern. - */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu, @@ -845,6 +852,7 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_AA64MMFR2_EL1); read_sysreg_case(SYS_ID_AA64ISAR0_EL1); read_sysreg_case(SYS_ID_AA64ISAR1_EL1); + read_sysreg_case(SYS_ID_AA64ISAR2_EL1); read_sysreg_case(SYS_CNTFRQ_EL0); read_sysreg_case(SYS_CTR_EL0); @@ -1045,6 +1053,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) static bool kpti_applied = false; int cpu = smp_processor_id(); + if (__this_cpu_read(this_cpu_vector) == vectors) { + const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); + + __this_cpu_write(this_cpu_vector, v); + } + /* * We don't need to rewrite the page-tables if either we've done * it already or we have KASLR enabled and therefore have not @@ -1098,7 +1112,7 @@ static bool cpu_has_broken_dbm(void) /* List of CPUs which have broken DBM support. */ static const struct midr_range cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_1024718 - MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), #endif {}, }; @@ -1367,7 +1381,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1595,6 +1609,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .match_list = list, \ } +#define HWCAP_CAP_MATCH(match, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + .matches = match, \ + } + #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { @@ -1668,8 +1688,35 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { {}, }; +#ifdef CONFIG_COMPAT +static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) +{ + /* + * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available, + * in line with that of arm32 as in vfp_init(). We make sure that the + * check is future proof, by making sure value is non-zero. + */ + u32 mvfr1; + + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1); + else + mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT); +} +#endif + static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT + HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index e4d6af2fdec71..5397f0d9e5bbb 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -53,6 +53,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) struct acpi_lpi_state *lpi; struct acpi_processor *pr = per_cpu(processors, cpu); + if (unlikely(!pr || !pr->flags.has_lpi)) + return -EINVAL; + /* * If the PSCI cpu_suspend function hook has not been initialized * idle states must not be enabled, so bail out @@ -60,9 +63,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) if (!psci_ops.cpu_suspend) return -EOPNOTSUPP; - if (unlikely(!pr || !pr->flags.has_lpi)) - return -EINVAL; - count = pr->power.count - 1; if (count <= 0) return -ENODEV; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 05933c065732b..90b35011a22f8 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -344,6 +344,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); + info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1); info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c index ca4c3e12d8c59..ec8095bfe23eb 100644 --- a/arch/arm64/kernel/crash_core.c +++ b/arch/arm64/kernel/crash_core.c @@ -6,6 +6,14 @@ #include #include +#include + +static inline u64 get_tcr_el1_t1sz(void); + +static inline u64 get_tcr_el1_t1sz(void) +{ + return (read_sysreg(tcr_el1) & TCR_T1SZ_MASK) >> TCR_T1SZ_OFFSET; +} void arch_crash_save_vmcoreinfo(void) { @@ -15,5 +23,7 @@ void arch_crash_save_vmcoreinfo(void) kimage_voffset); vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", PHYS_OFFSET); + vmcoreinfo_append_str("NUMBER(TCR_EL1_T1SZ)=0x%llx\n", + get_tcr_el1_t1sz()); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); } diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c index e6e284265f19d..58303a9ec32c4 100644 --- a/arch/arm64/kernel/crash_dump.c +++ b/arch/arm64/kernel/crash_dump.c @@ -64,5 +64,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) { memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count); + *ppos += count; + return count; } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 48222a4760c2e..d64a3c1e1b6ba 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -141,17 +141,20 @@ postcore_initcall(debug_monitors_init); /* * Single step API and exception handling. */ -static void set_regs_spsr_ss(struct pt_regs *regs) +static void set_user_regs_spsr_ss(struct user_pt_regs *regs) { regs->pstate |= DBG_SPSR_SS; } -NOKPROBE_SYMBOL(set_regs_spsr_ss); +NOKPROBE_SYMBOL(set_user_regs_spsr_ss); -static void clear_regs_spsr_ss(struct pt_regs *regs) +static void clear_user_regs_spsr_ss(struct user_pt_regs *regs) { regs->pstate &= ~DBG_SPSR_SS; } -NOKPROBE_SYMBOL(clear_regs_spsr_ss); +NOKPROBE_SYMBOL(clear_user_regs_spsr_ss); + +#define set_regs_spsr_ss(r) set_user_regs_spsr_ss(&(r)->user_regs) +#define clear_regs_spsr_ss(r) clear_user_regs_spsr_ss(&(r)->user_regs) static DEFINE_SPINLOCK(debug_hook_lock); static LIST_HEAD(user_step_hook); @@ -393,17 +396,26 @@ void user_rewind_single_step(struct task_struct *task) * If single step is active for this thread, then set SPSR.SS * to 1 to avoid returning to the active-pending state. */ - if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) set_regs_spsr_ss(task_pt_regs(task)); } NOKPROBE_SYMBOL(user_rewind_single_step); void user_fastforward_single_step(struct task_struct *task) { - if (test_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP)) + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) clear_regs_spsr_ss(task_pt_regs(task)); } +void user_regs_reset_single_step(struct user_pt_regs *regs, + struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_SINGLESTEP)) + set_user_regs_spsr_ss(regs); + else + clear_user_regs_spsr_ss(regs); +} + /* Kernel API */ void kernel_enable_single_step(struct pt_regs *regs) { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index cf3bd2976e574..8a4c108a0c0b6 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -59,18 +59,21 @@ .macro kernel_ventry, el, label, regsize = 64 .align 7 -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -alternative_if ARM64_UNMAP_KERNEL_AT_EL0 +.Lventry_start\@: .if \el == 0 + /* + * This must be the first instruction of the EL0 vector entries. It is + * skipped by the trampoline vectors, to trigger the cleanup. + */ + b .Lskip_tramp_vectors_cleanup\@ .if \regsize == 64 mrs x30, tpidrro_el0 msr tpidrro_el0, xzr .else mov x30, xzr .endif +.Lskip_tramp_vectors_cleanup\@: .endif -alternative_else_nop_endif -#endif sub sp, sp, #S_FRAME_SIZE #ifdef CONFIG_VMAP_STACK @@ -116,11 +119,15 @@ alternative_else_nop_endif mrs x0, tpidrro_el0 #endif b el\()\el\()_\label +.org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm - .macro tramp_alias, dst, sym + .macro tramp_alias, dst, sym, tmp mov_q \dst, TRAMP_VALIAS - add \dst, \dst, #(\sym - .entry.tramp.text) + adr_l \tmp, \sym + add \dst, \dst, \tmp + adr_l \tmp, .entry.tramp.text + sub \dst, \dst, \tmp .endm // This macro corrupts x0-x3. It is the caller's duty @@ -361,21 +368,25 @@ alternative_else_nop_endif ldp x24, x25, [sp, #16 * 12] ldp x26, x27, [sp, #16 * 13] ldp x28, x29, [sp, #16 * 14] - ldr lr, [sp, #S_LR] - add sp, sp, #S_FRAME_SIZE // restore sp .if \el == 0 -alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 +alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 + ldr lr, [sp, #S_LR] + add sp, sp, #S_FRAME_SIZE // restore sp + eret +alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 bne 5f - msr far_el1, x30 - tramp_alias x30, tramp_exit_native + msr far_el1, x29 + tramp_alias x30, tramp_exit_native, x29 br x30 5: - tramp_alias x30, tramp_exit_compat + tramp_alias x30, tramp_exit_compat, x29 br x30 #endif .else + ldr lr, [sp, #S_LR] + add sp, sp, #S_FRAME_SIZE // restore sp eret .endif sb @@ -1012,15 +1023,10 @@ ENDPROC(el0_svc) .popsection // .entry.text -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -/* - * Exception vectors trampoline. - */ - .pushsection ".entry.tramp.text", "ax" - + // Move from tramp_pg_dir to swapper_pg_dir .macro tramp_map_kernel, tmp mrs \tmp, ttbr1_el1 - add \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE) + add \tmp, \tmp, #(2 * PAGE_SIZE) bic \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 @@ -1037,9 +1043,10 @@ alternative_else_nop_endif #endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */ .endm + // Move from swapper_pg_dir to tramp_pg_dir .macro tramp_unmap_kernel, tmp mrs \tmp, ttbr1_el1 - sub \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE) + sub \tmp, \tmp, #(2 * PAGE_SIZE) orr \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp /* @@ -1049,12 +1056,47 @@ alternative_else_nop_endif */ .endm - .macro tramp_ventry, regsize = 64 + .macro tramp_data_page dst + adr_l \dst, .entry.tramp.text + sub \dst, \dst, PAGE_SIZE + .endm + + .macro tramp_data_read_var dst, var +#ifdef CONFIG_RANDOMIZE_BASE + tramp_data_page \dst + add \dst, \dst, #:lo12:__entry_tramp_data_\var + ldr \dst, [\dst] +#else + ldr \dst, =\var +#endif + .endm + +#define BHB_MITIGATION_NONE 0 +#define BHB_MITIGATION_LOOP 1 +#define BHB_MITIGATION_FW 2 +#define BHB_MITIGATION_INSN 3 + + .macro tramp_ventry, vector_start, regsize, kpti, bhb .align 7 1: .if \regsize == 64 msr tpidrro_el0, x30 // Restored in kernel_ventry .endif + + .if \bhb == BHB_MITIGATION_LOOP + /* + * This sequence must appear before the first indirect branch. i.e. the + * ret out of tramp_ventry. It appears here because x30 is free. + */ + __mitigate_spectre_bhb_loop x30 + .endif // \bhb == BHB_MITIGATION_LOOP + + .if \bhb == BHB_MITIGATION_INSN + clearbhb + isb + .endif // \bhb == BHB_MITIGATION_INSN + + .if \kpti == 1 /* * Defend against branch aliasing attacks by pushing a dummy * entry onto the return stack and using a RET instruction to @@ -1064,46 +1106,79 @@ alternative_else_nop_endif b . 2: tramp_map_kernel x30 -#ifdef CONFIG_RANDOMIZE_BASE - adr x30, tramp_vectors + PAGE_SIZE alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 - ldr x30, [x30] -#else - ldr x30, =vectors -#endif + tramp_data_read_var x30, vectors alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM - prfm plil1strm, [x30, #(1b - tramp_vectors)] + prfm plil1strm, [x30, #(1b - \vector_start)] alternative_else_nop_endif + msr vbar_el1, x30 - add x30, x30, #(1b - tramp_vectors) isb + .else + ldr x30, =vectors + .endif // \kpti == 1 + + .if \bhb == BHB_MITIGATION_FW + /* + * The firmware sequence must appear before the first indirect branch. + * i.e. the ret out of tramp_ventry. But it also needs the stack to be + * mapped to save/restore the registers the SMC clobbers. + */ + __mitigate_spectre_bhb_fw + .endif // \bhb == BHB_MITIGATION_FW + + add x30, x30, #(1b - \vector_start + 4) ret +.org 1b + 128 // Did we overflow the ventry slot? .endm .macro tramp_exit, regsize = 64 - adr x30, tramp_vectors + tramp_data_read_var x30, this_cpu_vector +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN + mrs x29, tpidr_el1 +alternative_else + mrs x29, tpidr_el2 +alternative_endif + ldr x30, [x30, x29] + msr vbar_el1, x30 - tramp_unmap_kernel x30 + ldr lr, [sp, #S_LR] + tramp_unmap_kernel x29 .if \regsize == 64 - mrs x30, far_el1 + mrs x29, far_el1 .endif + add sp, sp, #S_FRAME_SIZE // restore sp eret sb .endm - .align 11 -ENTRY(tramp_vectors) + .macro generate_tramp_vector, kpti, bhb +.Lvector_start\@: .space 0x400 - tramp_ventry - tramp_ventry - tramp_ventry - tramp_ventry + .rept 4 + tramp_ventry .Lvector_start\@, 64, \kpti, \bhb + .endr + .rept 4 + tramp_ventry .Lvector_start\@, 32, \kpti, \bhb + .endr + .endm - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +/* + * Exception vectors trampoline. + * The order must match __bp_harden_el1_vectors and the + * arm64_bp_harden_el1_vectors enum. + */ + .pushsection ".entry.tramp.text", "ax" + .align 11 +ENTRY(tramp_vectors) +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_INSN +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE END(tramp_vectors) ENTRY(tramp_exit_native) @@ -1121,11 +1196,55 @@ END(tramp_exit_compat) .align PAGE_SHIFT .globl __entry_tramp_data_start __entry_tramp_data_start: +__entry_tramp_data_vectors: .quad vectors +#ifdef CONFIG_ARM_SDE_INTERFACE +__entry_tramp_data___sdei_asm_handler: + .quad __sdei_asm_handler +#endif /* CONFIG_ARM_SDE_INTERFACE */ +__entry_tramp_data_this_cpu_vector: + .quad this_cpu_vector .popsection // .rodata #endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +/* + * Exception vectors for spectre mitigations on entry from EL1 when + * kpti is not in use. + */ + .macro generate_el1_vector, bhb +.Lvector_start\@: + kernel_ventry 1, sync_invalid // Synchronous EL1t + kernel_ventry 1, irq_invalid // IRQ EL1t + kernel_ventry 1, fiq_invalid // FIQ EL1t + kernel_ventry 1, error_invalid // Error EL1t + + kernel_ventry 1, sync // Synchronous EL1h + kernel_ventry 1, irq // IRQ EL1h + kernel_ventry 1, fiq_invalid // FIQ EL1h + kernel_ventry 1, error // Error EL1h + + .rept 4 + tramp_ventry .Lvector_start\@, 64, 0, \bhb + .endr + .rept 4 + tramp_ventry .Lvector_start\@, 32, 0, \bhb + .endr + .endm + +/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */ + .pushsection ".entry.text", "ax" + .align 11 +SYM_CODE_START(__bp_harden_el1_vectors) +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_el1_vector bhb=BHB_MITIGATION_LOOP + generate_el1_vector bhb=BHB_MITIGATION_FW + generate_el1_vector bhb=BHB_MITIGATION_INSN +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ +SYM_CODE_END(__bp_harden_el1_vectors) + .popsection + + /* * Register switch for AArch64. The callee-saved registers need to be saved * and restored. On entry: @@ -1212,13 +1331,7 @@ ENTRY(__sdei_asm_entry_trampoline) */ 1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] -#ifdef CONFIG_RANDOMIZE_BASE - adr x4, tramp_vectors + PAGE_SIZE - add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler - ldr x4, [x4] -#else - ldr x4, =__sdei_asm_handler -#endif + tramp_data_read_var x4, __sdei_asm_handler br x4 ENDPROC(__sdei_asm_entry_trampoline) NOKPROBE(__sdei_asm_entry_trampoline) @@ -1241,12 +1354,6 @@ ENDPROC(__sdei_asm_exit_trampoline) NOKPROBE(__sdei_asm_exit_trampoline) .ltorg .popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE -.pushsection ".rodata", "a" -__sdei_asm_trampoline_next_handler: - .quad __sdei_asm_handler -.popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* @@ -1342,7 +1449,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline + tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3 br x5 #endif ENDPROC(__sdei_asm_handler) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 37d3912cfe06f..e62c9cbf99f46 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -269,6 +269,7 @@ static void sve_free(struct task_struct *task) */ static void task_fpsimd_load(void) { + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) @@ -289,6 +290,7 @@ static void fpsimd_save(void) this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { @@ -336,7 +338,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) return sve_vl_from_vq(__bit_to_vq(bit)); } -#ifdef CONFIG_SYSCTL +#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL) static int sve_proc_do_default_vl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -382,9 +384,9 @@ static int __init sve_sysctl_init(void) return 0; } -#else /* ! CONFIG_SYSCTL */ +#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ static int __init sve_sysctl_init(void) { return 0; } -#endif /* ! CONFIG_SYSCTL */ +#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) @@ -496,7 +498,7 @@ size_t sve_state_size(struct task_struct const *task) void sve_alloc(struct task_struct *task) { if (task->thread.sve_state) { - memset(task->thread.sve_state, 0, sve_state_size(current)); + memset(task->thread.sve_state, 0, sve_state_size(task)); return; } @@ -1092,6 +1094,7 @@ void fpsimd_bind_task_to_cpu(void) struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); + WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = current->thread.sve_vl; @@ -1114,6 +1117,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); + WARN_ON(!system_supports_fpsimd()); WARN_ON(!in_softirq() && !irqs_disabled()); last->st = st; @@ -1128,8 +1132,19 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, */ void fpsimd_restore_current_state(void) { - if (!system_supports_fpsimd()) + /* + * For the tasks that were created before we detected the absence of + * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), + * e.g, init. This could be then inherited by the children processes. + * If we later detect that the system doesn't support FP/SIMD, + * we must clear the flag for all the tasks to indicate that the + * FPSTATE is clean (as we can't have one) to avoid looping for ever in + * do_notify_resume(). + */ + if (!system_supports_fpsimd()) { + clear_thread_flag(TIF_FOREIGN_FPSTATE); return; + } get_cpu_fpsimd_context(); @@ -1148,7 +1163,7 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct user_fpsimd_state const *state) { - if (!system_supports_fpsimd()) + if (WARN_ON(!system_supports_fpsimd())) return; get_cpu_fpsimd_context(); @@ -1179,7 +1194,13 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) void fpsimd_flush_task_state(struct task_struct *t) { t->thread.fpsimd_cpu = NR_CPUS; - + /* + * If we don't support fpsimd, bail out after we have + * reset the fpsimd_cpu for this task and clear the + * FPSTATE. + */ + if (!system_supports_fpsimd()) + return; barrier(); set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE); @@ -1193,6 +1214,7 @@ void fpsimd_flush_task_state(struct task_struct *t) */ static void fpsimd_flush_cpu_state(void) { + WARN_ON(!system_supports_fpsimd()); __this_cpu_write(fpsimd_last_state.st, NULL); set_thread_flag(TIF_FOREIGN_FPSTATE); } @@ -1203,6 +1225,8 @@ static void fpsimd_flush_cpu_state(void) */ void fpsimd_save_and_flush_cpu_state(void) { + if (!system_supports_fpsimd()) + return; WARN_ON(preemptible()); __get_cpu_fpsimd_context(); fpsimd_save(); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 06e56b4703153..d4a4be02d309c 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -69,13 +69,25 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; u32 old, new; - long offset = (long)pc - (long)addr; + long offset = (long)addr - (long)pc; if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS - struct plt_entry trampoline, *dst; struct module *mod; + /* + * There is only one ftrace trampoline per module. For now, + * this is not a problem since on arm64, all dynamic ftrace + * invocations are routed via ftrace_caller(). This will need + * to be revisited if support for multiple ftrace entry points + * is added in the future, but for now, the pr_err() below + * deals with a theoretical issue only. + */ + if (addr != FTRACE_ADDR) { + pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); + return -EINVAL; + } + /* * On kernels that support module PLTs, the offset between the * branch instruction and its target may legally exceed the @@ -93,46 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (WARN_ON(!mod)) return -EINVAL; - /* - * There is only one ftrace trampoline per module. For now, - * this is not a problem since on arm64, all dynamic ftrace - * invocations are routed via ftrace_caller(). This will need - * to be revisited if support for multiple ftrace entry points - * is added in the future, but for now, the pr_err() below - * deals with a theoretical issue only. - * - * Note that PLTs are place relative, and plt_entries_equal() - * checks whether they point to the same target. Here, we need - * to check if the actual opcodes are in fact identical, - * regardless of the offset in memory so use memcmp() instead. - */ - dst = mod->arch.ftrace_trampoline; - trampoline = get_plt_entry(addr, dst); - if (memcmp(dst, &trampoline, sizeof(trampoline))) { - if (plt_entry_is_initialized(dst)) { - pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); - return -EINVAL; - } - - /* point the trampoline to our ftrace entry point */ - module_disable_ro(mod); - *dst = trampoline; - module_enable_ro(mod, true); - - /* - * Ensure updated trampoline is visible to instruction - * fetch before we patch in the branch. Although the - * architecture doesn't require an IPI in this case, - * Neoverse-N1 erratum #1542419 does require one - * if the TLB maintenance in module_enable_ro() is - * skipped due to rodata_enabled. It doesn't seem worth - * it to make it conditional given that this is - * certainly not a fast-path. - */ - flush_icache_range((unsigned long)&dst[0], - (unsigned long)&dst[1]); - } - addr = (unsigned long)dst; + addr = (unsigned long)mod->arch.ftrace_trampoline; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ @@ -153,7 +126,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long pc = rec->ip; bool validate = true; u32 old = 0, new; - long offset = (long)pc - (long)addr; + long offset = (long)addr - (long)pc; if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 989b1944cb719..2f784d3b4b390 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -194,7 +194,7 @@ ENDPROC(preserve_boot_args) * to be composed of multiple pages. (This effectively scales the end index). * * vstart: virtual address of start of range - * vend: virtual address of end of range + * vend: virtual address of end of range - we map [vstart, vend] * shift: shift used to transform virtual address into index * ptrs: number of entries in page table * istart: index in table corresponding to vstart @@ -231,17 +231,18 @@ ENDPROC(preserve_boot_args) * * tbl: location of page table * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE) - * vstart: start address to map - * vend: end address to map - we map [vstart, vend] + * vstart: virtual address of start of range + * vend: virtual address of end of range - we map [vstart, vend - 1] * flags: flags to use to map last level entries * phys: physical address corresponding to vstart - physical memory is contiguous * pgds: the number of pgd entries * * Temporaries: istart, iend, tmp, count, sv - these need to be different registers - * Preserves: vstart, vend, flags - * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv + * Preserves: vstart, flags + * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv */ .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv + sub \vend, \vend, #1 add \rtbl, \tbl, #PAGE_SIZE mov \sv, \rtbl mov \count, #0 @@ -337,7 +338,7 @@ __create_page_tables: */ adrp x5, __idmap_text_end clz x5, x5 - cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? + cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough? b.ge 1f // .. then skip VA range extension adr_l x6, idmap_t0sz @@ -393,13 +394,19 @@ __create_page_tables: /* * Since the page tables have been populated with non-cacheable - * accesses (MMU disabled), invalidate the idmap and swapper page - * tables again to remove any speculatively loaded cache lines. + * accesses (MMU disabled), invalidate those tables again to + * remove any speculatively loaded cache lines. */ + dmb sy + adrp x0, idmap_pg_dir + adrp x1, idmap_pg_end + sub x1, x1, x0 + bl __inval_dcache_area + + adrp x0, init_pg_dir adrp x1, init_pg_end sub x1, x1, x0 - dmb sy bl __inval_dcache_area ret x28 @@ -964,6 +971,7 @@ __primary_switch: tlbi vmalle1 // Remove any stale TLB entries dsb nsh + isb msr sctlr_el1, x19 // re-enable the MMU isb diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 38ee1514cd9cd..b4a1607958246 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -730,6 +730,27 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, return 0; } +static int watchpoint_report(struct perf_event *wp, unsigned long addr, + struct pt_regs *regs) +{ + int step = is_default_overflow_handler(wp); + struct arch_hw_breakpoint *info = counter_arch_bp(wp); + + info->trigger = addr; + + /* + * If we triggered a user watchpoint from a uaccess routine, then + * handle the stepping ourselves since userspace really can't help + * us with this. + */ + if (!user_mode(regs) && info->ctrl.privilege == AARCH64_BREAKPOINT_EL0) + step = 1; + else + perf_bp_event(wp, regs); + + return step; +} + static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -739,7 +760,6 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, u64 val; struct perf_event *wp, **slots; struct debug_info *debug_info; - struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = this_cpu_ptr(wp_on_reg); @@ -777,25 +797,13 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, if (dist != 0) continue; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; + step = watchpoint_report(wp, addr, regs); } - if (min_dist > 0 && min_dist != -1) { - /* No exact match found. */ - wp = slots[closest_match]; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; - } + /* No exact match found? */ + if (min_dist > 0 && min_dist != -1) + step = watchpoint_report(slots[closest_match], addr, regs); + rcu_read_unlock(); if (!step) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index d801a7094076e..a02c294a47530 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -21,6 +21,7 @@ #include #include #include +#include #define AARCH64_INSN_SF_BIT BIT(31) #define AARCH64_INSN_N_BIT BIT(22) @@ -78,16 +79,29 @@ bool aarch64_insn_is_branch_imm(u32 insn) static DEFINE_RAW_SPINLOCK(patch_lock); +static bool is_exit_text(unsigned long addr) +{ + /* discarded with init text/data */ + return system_state < SYSTEM_RUNNING && + addr >= (unsigned long)__exittext_begin && + addr < (unsigned long)__exittext_end; +} + +static bool is_image_text(unsigned long addr) +{ + return core_kernel_text(addr) || is_exit_text(addr); +} + static void __kprobes *patch_map(void *addr, int fixmap) { unsigned long uintaddr = (uintptr_t) addr; - bool module = !core_kernel_text(uintaddr); + bool image = is_image_text(uintaddr); struct page *page; - if (module && IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) - page = vmalloc_to_page(addr); - else if (!module) + if (image) page = phys_to_page(__pa_symbol(addr)); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + page = vmalloc_to_page(addr); else return addr; @@ -193,8 +207,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) int i, ret = 0; struct aarch64_insn_patch *pp = arg; - /* The first CPU becomes master */ - if (atomic_inc_return(&pp->cpu_count) == 1) { + /* The last CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) { for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); @@ -1508,16 +1522,10 @@ static u32 aarch64_encode_immediate(u64 imm, u32 insn) { unsigned int immr, imms, n, ones, ror, esz, tmp; - u64 mask = ~0UL; - - /* Can't encode full zeroes or full ones */ - if (!imm || !~imm) - return AARCH64_BREAK_FAULT; + u64 mask; switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - if (upper_32_bits(imm)) - return AARCH64_BREAK_FAULT; esz = 32; break; case AARCH64_INSN_VARIANT_64BIT: @@ -1529,6 +1537,12 @@ static u32 aarch64_encode_immediate(u64 imm, return AARCH64_BREAK_FAULT; } + mask = GENMASK(esz - 1, 0); + + /* Can't encode full zeroes, full ones, or value wider than the mask */ + if (!imm || imm == mask || imm & ~mask) + return AARCH64_BREAK_FAULT; + /* * Inverse of Replicate(). Try to spot a repeating pattern * with a pow2 stride. diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 43119922341f8..1a157ca33262d 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -252,7 +252,7 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) if (!kgdb_single_step) return DBG_HOOK_ERROR; - kgdb_handle_exception(1, SIGTRAP, 0, regs); + kgdb_handle_exception(0, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } NOKPROBE_SYMBOL(kgdb_step_brk_fn); diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 0df8493624e00..cc049ff5c6a53 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -189,6 +189,7 @@ void machine_kexec(struct kimage *kimage) * the offline CPUs. Therefore, we must use the __* variant here. */ __flush_icache_range((uintptr_t)reboot_code_buffer, + (uintptr_t)reboot_code_buffer + arm64_relocate_new_kernel_size); /* Flush the kimage list and its buffers. */ diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 7b08bf9499b6b..d2a62dd17d79d 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -150,8 +150,10 @@ static int create_dtb(struct kimage *image, /* duplicate a device tree blob */ ret = fdt_open_into(initial_boot_params, buf, buf_size); - if (ret) + if (ret) { + vfree(buf); return -EINVAL; + } ret = setup_dtb(image, initrd_load_addr, initrd_len, cmdline, buf); diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index b182442b87a32..426018ebb7007 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -270,8 +270,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt_shndx = i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) mod->arch.init.plt_shndx = i; - else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && - !strcmp(secstrings + sechdrs[i].sh_name, + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".text.ftrace_trampoline")) tramp = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 03ff15bffbb6d..d0692ecb99bb0 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -470,22 +471,48 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, return -ENOEXEC; } -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) +static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) { const Elf_Shdr *s, *se; const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { - if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) - apply_alternatives_module((void *)s->sh_addr, s->sh_size); -#ifdef CONFIG_ARM64_MODULE_PLTS - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && - !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name)) - me->arch.ftrace_trampoline = (void *)s->sh_addr; -#endif + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; } + return NULL; +} + +static int module_init_ftrace_plt(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod) +{ +#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE) + const Elf_Shdr *s; + struct plt_entry *plt; + + s = find_section(hdr, sechdrs, ".text.ftrace_trampoline"); + if (!s) + return -ENOEXEC; + + plt = (void *)s->sh_addr; + *plt = get_plt_entry(FTRACE_ADDR, plt); + mod->arch.ftrace_trampoline = plt; +#endif return 0; } + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s; + s = find_section(hdr, sechdrs, ".altinstructions"); + if (s) + apply_alternatives_module((void *)s->sh_addr, s->sh_size); + + return module_init_ftrace_plt(hdr, sechdrs, me); +} diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index 22e36a21c1134..9371abe2f4c2d 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,5 +1,5 @@ SECTIONS { - .plt (NOLOAD) : { BYTE(0) } - .init.plt (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } + .plt 0 : { BYTE(0) } + .init.plt 0 : { BYTE(0) } + .text.ftrace_trampoline 0 : { BYTE(0) } } diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 4cfed91fe256e..1ef702b0be2dc 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -6,13 +6,153 @@ * Author: Stefano Stabellini */ +#define pr_fmt(fmt) "arm-pv: " fmt + +#include +#include #include +#include #include +#include +#include +#include +#include #include + #include +#include +#include struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; struct paravirt_patch_template pv_ops; EXPORT_SYMBOL_GPL(pv_ops); + +struct pv_time_stolen_time_region { + struct pvclock_vcpu_stolen_time *kaddr; +}; + +static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region); + +static bool steal_acc = true; +static int __init parse_no_stealacc(char *arg) +{ + steal_acc = false; + return 0; +} + +early_param("no-steal-acc", parse_no_stealacc); + +/* return stolen time in ns by asking the hypervisor */ +static u64 pv_steal_clock(int cpu) +{ + struct pv_time_stolen_time_region *reg; + + reg = per_cpu_ptr(&stolen_time_region, cpu); + if (!reg->kaddr) { + pr_warn_once("stolen time enabled but not configured for cpu %d\n", + cpu); + return 0; + } + + return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time)); +} + +static int stolen_time_dying_cpu(unsigned int cpu) +{ + struct pv_time_stolen_time_region *reg; + + reg = this_cpu_ptr(&stolen_time_region); + if (!reg->kaddr) + return 0; + + memunmap(reg->kaddr); + memset(reg, 0, sizeof(*reg)); + + return 0; +} + +static int init_stolen_time_cpu(unsigned int cpu) +{ + struct pv_time_stolen_time_region *reg; + struct arm_smccc_res res; + + reg = this_cpu_ptr(&stolen_time_region); + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res); + + if (res.a0 == SMCCC_RET_NOT_SUPPORTED) + return -EINVAL; + + reg->kaddr = memremap(res.a0, + sizeof(struct pvclock_vcpu_stolen_time), + MEMREMAP_WB); + + if (!reg->kaddr) { + pr_warn("Failed to map stolen time data structure\n"); + return -ENOMEM; + } + + if (le32_to_cpu(reg->kaddr->revision) != 0 || + le32_to_cpu(reg->kaddr->attributes) != 0) { + pr_warn_once("Unexpected revision or attributes in stolen time data\n"); + return -ENXIO; + } + + return 0; +} + +static int pv_time_init_stolen_time(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING, + "hypervisor/arm/pvtime:starting", + init_stolen_time_cpu, stolen_time_dying_cpu); + if (ret < 0) + return ret; + return 0; +} + +static bool has_pv_steal_clock(void) +{ + struct arm_smccc_res res; + + /* To detect the presence of PV time support we require SMCCC 1.1+ */ + if (psci_ops.smccc_version < SMCCC_VERSION_1_1) + return false; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_HV_PV_TIME_FEATURES, &res); + + if (res.a0 != SMCCC_RET_SUCCESS) + return false; + + arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_FEATURES, + ARM_SMCCC_HV_PV_TIME_ST, &res); + + return (res.a0 == SMCCC_RET_SUCCESS); +} + +int __init pv_time_init(void) +{ + int ret; + + if (!has_pv_steal_clock()) + return 0; + + ret = pv_time_init_stolen_time(); + if (ret) + return ret; + + pv_ops.time.steal_clock = pv_steal_clock; + + static_key_slow_inc(¶virt_steal_enabled); + if (steal_acc) + static_key_slow_inc(¶virt_steal_rq_enabled); + + pr_info("using stolen time PV\n"); + + return 0; +} diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index b0e03e052dd1d..b84ec4ce7d8dc 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -102,7 +102,9 @@ compat_user_backtrace(struct compat_frame_tail __user *tail, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -147,9 +149,10 @@ static int callchain_trace(struct stackframe *frame, void *data) void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe frame; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -160,18 +163,21 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return instruction_pointer(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index a0b4f1bca4917..19128d994ee97 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -155,7 +155,7 @@ armv8pmu_events_sysfs_show(struct device *dev, pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%03llx\n", pmu_attr->id); + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); } #define ARMV8_EVENT_ATTR(name, config) \ @@ -303,10 +303,13 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj, test_bit(pmu_attr->id, cpu_pmu->pmceid_bitmap)) return attr->mode; - pmu_attr->id -= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; - if (pmu_attr->id < ARMV8_PMUV3_MAX_COMMON_EVENTS && - test_bit(pmu_attr->id, cpu_pmu->pmceid_ext_bitmap)) - return attr->mode; + if (pmu_attr->id >= ARMV8_PMUV3_EXT_COMMON_EVENT_BASE) { + u64 id = pmu_attr->id - ARMV8_PMUV3_EXT_COMMON_EVENT_BASE; + + if (id < ARMV8_PMUV3_MAX_COMMON_EVENTS && + test_bit(id, cpu_pmu->pmceid_ext_bitmap)) + return attr->mode; + } return 0; } diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 0bbac612146ea..666b225aeb3ad 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -15,15 +15,34 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) return 0; /* - * Compat (i.e. 32 bit) mode: - * - PC has been set in the pt_regs struct in kernel_entry, - * - Handle SP and LR here. + * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but + * we're stuck with it for ABI compatability reasons. + * + * For a 32-bit consumer inspecting a 32-bit task, then it will look at + * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h). + * These correspond directly to a prefix of the registers saved in our + * 'struct pt_regs', with the exception of the PC, so we copy that down + * (x15 corresponds to SP_hyp in the architecture). + * + * So far, so good. + * + * The oddity arises when a 64-bit consumer looks at a 32-bit task and + * asks for registers beyond PERF_REG_ARM_MAX. In this case, we return + * SP_usr, LR_usr and PC in the positions where the AArch64 SP, LR and + * PC registers would normally live. The initial idea was to allow a + * 64-bit unwinder to unwind a 32-bit task and, although it's not clear + * how well that works in practice, somebody might be relying on it. + * + * At the time we make a sample, we don't know whether the consumer is + * 32-bit or 64-bit, so we have to cater for both possibilities. */ if (compat_user_mode(regs)) { if ((u32)idx == PERF_REG_ARM64_SP) return regs->compat_sp; if ((u32)idx == PERF_REG_ARM64_LR) return regs->compat_lr; + if (idx == 15) + return regs->pc; } if ((u32)idx == PERF_REG_ARM64_SP) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c4452827419b0..b41ed2f907b0e 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -36,25 +36,16 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) -{ - void *addrs[1]; - u32 insns[1]; - - addrs[0] = addr; - insns[0] = opcode; - - return aarch64_insn_patch_text(addrs, insns, 1); -} - static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { + kprobe_opcode_t *addr = p->ainsn.api.insn; + void *addrs[] = {addr, addr + 1}; + u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS}; + /* prepare insn slot */ - patch_text(p->ainsn.api.insn, p->opcode); + aarch64_insn_patch_text(addrs, insns, 2); - flush_icache_range((uintptr_t) (p->ainsn.api.insn), - (uintptr_t) (p->ainsn.api.insn) + - MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE)); /* * Needs restoring of return address after stepping xol. @@ -134,13 +125,18 @@ void *alloc_insn_page(void) /* arm kprobe: install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { - patch_text(p->addr, BRK64_OPCODE_KPROBES); + void *addr = p->addr; + u32 insn = BRK64_OPCODE_KPROBES; + + aarch64_insn_patch_text(&addr, &insn, 1); } /* disarm kprobe: remove breakpoint from text */ void __kprobes arch_disarm_kprobe(struct kprobe *p) { - patch_text(p->addr, p->opcode); + void *addr = p->addr; + + aarch64_insn_patch_text(&addr, &p->opcode, 1); } void __kprobes arch_remove_kprobe(struct kprobe *p) @@ -169,20 +165,15 @@ static void __kprobes set_current_kprobe(struct kprobe *p) } /* - * Interrupts need to be disabled before single-step mode is set, and not - * reenabled until after single-step mode ends. - * Without disabling interrupt on local CPU, there is a chance of - * interrupt occurrence in the period of exception return and start of - * out-of-line single-step, that result in wrongly single stepping - * into the interrupt handler. + * Mask all of DAIF while executing the instruction out-of-line, to keep things + * simple and avoid nesting exceptions. Interrupts do have to be disabled since + * the kprobe state is per-CPU and doesn't get migrated. */ static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, struct pt_regs *regs) { kcb->saved_irqflag = regs->pstate & DAIF_MASK; - regs->pstate |= PSR_I_BIT; - /* Unmask PSTATE.D for enabling software step exceptions. */ - regs->pstate &= ~PSR_D_BIT; + regs->pstate |= DAIF_MASK; } static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, @@ -225,10 +216,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, slot = (unsigned long)p->ainsn.api.insn; set_ss_context(kcb, slot); /* mark pending ss */ - - /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); - kernel_enable_single_step(regs); instruction_pointer_set(regs, slot); } else { /* insn simulation */ @@ -279,12 +267,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) } /* call post handler */ kcb->kprobe_status = KPROBE_HIT_SSDONE; - if (cur->post_handler) { - /* post_handler can hit breakpoint and single step - * again, so we enable D-flag for recursive exception. - */ + if (cur->post_handler) cur->post_handler(cur, regs, 0); - } reset_current_kprobe(); } @@ -308,8 +292,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) if (!instruction_pointer(regs)) BUG(); - kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else @@ -371,10 +353,6 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) * pre-handler and it returned non-zero, it will * modify the execution path and no need to single * stepping. Let's just reset current kprobe and exit. - * - * pre_handler can hit a breakpoint and can step thru - * before return, keep PSTATE D-flag enabled until - * pre_handler return back. */ if (!p->pre_handler || !p->pre_handler(p, regs)) { setup_singlestep(p, regs, kcb, 0); @@ -405,7 +383,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) } static int __kprobes -kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); int retval; @@ -415,16 +393,15 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) if (retval == DBG_HOOK_HANDLED) { kprobes_restore_local_irqflag(kcb, regs); - kernel_disable_single_step(); - post_kprobe_handler(kcb, regs); } return retval; } -static struct step_hook kprobes_step_hook = { - .fn = kprobe_single_step_handler, +static struct break_hook kprobes_break_ss_hook = { + .imm = KPROBES_BRK_SS_IMM, + .fn = kprobe_breakpoint_ss_handler, }; static int __kprobes @@ -568,7 +545,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) int __init arch_init_kprobes(void) { register_kernel_break_hook(&kprobes_break_hook); - register_kernel_step_hook(&kprobes_step_hook); + register_kernel_break_hook(&kprobes_break_ss_hook); return 0; } diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index a412d8edbcd24..2c247634552b1 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -38,7 +38,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, /* TODO: Currently we do not support AARCH32 instruction probing */ if (mm->context.flags & MMCF_AARCH32) - return -ENOTSUPP; + return -EOPNOTSUPP; else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 71f788cd2b187..d07fbc21f14ce 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -56,7 +56,7 @@ #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include -unsigned long __stack_chk_guard __read_mostly; +unsigned long __stack_chk_guard __ro_after_init; EXPORT_SYMBOL(__stack_chk_guard); #endif @@ -360,8 +360,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) asmlinkage void ret_from_fork(void) asm("ret_from_fork"); -int copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long stack_start, + unsigned long stk_sz, struct task_struct *p, unsigned long tls) { struct pt_regs *childregs = task_pt_regs(p); @@ -394,11 +394,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, } /* - * If a TLS pointer was passed to clone (4th argument), use it - * for the new thread. + * If a TLS pointer was passed to clone, use it for the new + * thread. */ if (clone_flags & CLONE_SETTLS) - p->thread.uw.tp_value = childregs->regs[3]; + p->thread.uw.tp_value = tls; } else { memset(childregs, 0, sizeof(struct pt_regs)); childregs->pstate = PSR_MODE_EL1h; @@ -466,6 +466,13 @@ static void ssbs_thread_switch(struct task_struct *next) if (unlikely(next->flags & PF_KTHREAD)) return; + /* + * If all CPUs implement the SSBS extension, then we just need to + * context-switch the PSTATE field. + */ + if (cpu_have_feature(cpu_feature(SSBS))) + return; + /* If the mitigation is enabled, then we leave SSBS clear. */ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || test_tsk_thread_flag(next, TIF_SSBD)) @@ -491,6 +498,30 @@ static void entry_task_switch(struct task_struct *next) __this_cpu_write(__entry_task, next); } +/* + * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT. + * Ensure access is disabled when switching to a 32bit task, ensure + * access is enabled when switching to a 64bit task. + */ +static void erratum_1418040_thread_switch(struct task_struct *next) +{ + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) || + !this_cpu_has_cap(ARM64_WORKAROUND_1418040)) + return; + + if (is_compat_thread(task_thread_info(next))) + sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0); + else + sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN); +} + +static void erratum_1418040_new_exec(void) +{ + preempt_disable(); + erratum_1418040_thread_switch(current); + preempt_enable(); +} + /* * Thread switching. */ @@ -507,6 +538,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, uao_thread_switch(next); ptrauth_thread_switch(next); ssbs_thread_switch(next); + erratum_1418040_thread_switch(next); /* * Complete any pending TLB or cache maintenance on this CPU in case @@ -565,6 +597,7 @@ void arch_setup_new_exec(void) current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0; ptrauth_thread_init_user(current); + erratum_1418040_new_exec(); } #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index c9f72b2665f1c..62d2bda7adb80 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -66,7 +66,6 @@ static int cpu_psci_cpu_disable(unsigned int cpu) static void cpu_psci_cpu_die(unsigned int cpu) { - int ret; /* * There are no known implementations of PSCI actually using the * power state field, pass a sensible default for now. @@ -74,14 +73,13 @@ static void cpu_psci_cpu_die(unsigned int cpu) u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT; - ret = psci_ops.cpu_off(state); - - pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); + psci_ops.cpu_off(state); } static int cpu_psci_cpu_kill(unsigned int cpu) { - int err, i; + int err; + unsigned long start, end; if (!psci_ops.affinity_info) return 0; @@ -91,16 +89,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu) * while it is dying. So, try again a few times. */ - for (i = 0; i < 10; i++) { + start = jiffies; + end = start + msecs_to_jiffies(100); + do { err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { - pr_info("CPU%d killed.\n", cpu); + pr_info("CPU%d killed (polled %d ms)\n", cpu, + jiffies_to_msecs(jiffies - start)); return 0; } - msleep(10); - pr_info("Retrying again to check for CPU kill\n"); - } + usleep_range(100, 1000); + } while (time_before(jiffies, end)); pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", cpu, err); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 21176d02e21a1..8a95a013dfd3c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -615,6 +615,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return 0; } +static int fpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!system_supports_fpsimd()) + return -ENODEV; + return regset->n; +} + /* * TODO: update fp accessors for lazy context switching (sync/flush hwstate) */ @@ -637,6 +644,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + if (!system_supports_fpsimd()) + return -EINVAL; + if (target == current) fpsimd_preserve_current_state(); @@ -676,6 +686,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, { int ret; + if (!system_supports_fpsimd()) + return -EINVAL; + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0); if (ret) return ret; @@ -1134,6 +1147,7 @@ static const struct user_regset aarch64_regsets[] = { */ .size = sizeof(u32), .align = sizeof(u32), + .active = fpr_active, .get = fpr_get, .set = fpr_set }, @@ -1348,6 +1362,9 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.uw.fpsimd_state; if (target == current) @@ -1381,6 +1398,9 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.uw.fpsimd_state; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); @@ -1438,6 +1458,7 @@ static const struct user_regset aarch32_regsets[] = { .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), + .active = fpr_active, .get = compat_vfp_get, .set = compat_vfp_set }, @@ -1798,20 +1819,32 @@ static void tracehook_report_syscall(struct pt_regs *regs, saved_reg = regs->regs[regno]; regs->regs[regno] = dir; - if (dir == PTRACE_SYSCALL_EXIT) + if (dir == PTRACE_SYSCALL_ENTER) { + if (tracehook_report_syscall_entry(regs)) + forget_syscall(regs); + regs->regs[regno] = saved_reg; + } else if (!test_thread_flag(TIF_SINGLESTEP)) { tracehook_report_syscall_exit(regs, 0); - else if (tracehook_report_syscall_entry(regs)) - forget_syscall(regs); + regs->regs[regno] = saved_reg; + } else { + regs->regs[regno] = saved_reg; - regs->regs[regno] = saved_reg; + /* + * Signal a pseudo-step exception since we are stepping but + * tracer modifications to the registers may have rewound the + * state machine. + */ + tracehook_report_syscall_exit(regs, 1); + } } int syscall_trace_enter(struct pt_regs *regs) { - if (test_thread_flag(TIF_SYSCALL_TRACE) || - test_thread_flag(TIF_SYSCALL_EMU)) { + unsigned long flags = READ_ONCE(current_thread_info()->flags); + + if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) { tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); - if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU)) + if (flags & _TIF_SYSCALL_EMU) return -1; } @@ -1830,12 +1863,14 @@ int syscall_trace_enter(struct pt_regs *regs) void syscall_trace_exit(struct pt_regs *regs) { + unsigned long flags = READ_ONCE(current_thread_info()->flags); + audit_syscall_exit(regs); - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) - trace_sys_exit(regs, regs_return_value(regs)); + if (flags & _TIF_SYSCALL_TRACEPOINT) + trace_sys_exit(regs, syscall_get_return_value(current, regs)); - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); rseq_syscall(regs); @@ -1913,8 +1948,8 @@ static int valid_native_regs(struct user_pt_regs *regs) */ int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task) { - if (!test_tsk_thread_flag(task, TIF_SINGLESTEP)) - regs->pstate &= ~DBG_SPSR_SS; + /* https://lore.kernel.org/lkml/20191118131525.GA4180@willie-the-truck */ + user_regs_reset_single_step(regs, task); if (is_compat_thread(task_thread_info(task))) return valid_compat_regs(regs); diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index ea94cf8f9dc6d..d6259dac62b62 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -2,6 +2,7 @@ // Copyright (C) 2017 Arm Ltd. #define pr_fmt(fmt) "sdei: " fmt +#include #include #include #include @@ -161,7 +162,7 @@ unsigned long sdei_arch_get_entry_point(int conduit) return 0; } - sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; + sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 if (arm64_kernel_unmapped_at_el0()) { diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 56f6645617548..f55f4a15a905e 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -85,7 +85,7 @@ u64 __cacheline_aligned boot_args[4]; void __init smp_setup_processor_id(void) { u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; - cpu_logical_map(0) = mpidr; + set_cpu_logical_map(0, mpidr); /* * clear __my_cpu_offset on boot CPU to avoid hang caused by @@ -276,6 +276,12 @@ arch_initcall(reserve_memblock_reserved_regions); u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; +u64 cpu_logical_map(int cpu) +{ + return __cpu_logical_map[cpu]; +} +EXPORT_SYMBOL_GPL(cpu_logical_map); + void __init setup_arch(char **cmdline_p) { init_mm.start_code = (unsigned long) _text; @@ -350,7 +356,7 @@ void __init setup_arch(char **cmdline_p) * faults in case uaccess_enable() is inadvertently called by the init * thread. */ - init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page); + init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); #endif #ifdef CONFIG_VT diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index dd2cdc0d5be20..f6d3278c1a4e0 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -782,7 +783,6 @@ static void setup_restart_syscall(struct pt_regs *regs) */ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { - struct task_struct *tsk = current; sigset_t *oldset = sigmask_to_save(); int usig = ksig->sig; int ret; @@ -806,14 +806,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) */ ret |= !valid_user_regs(®s->user_regs, current); - /* - * Fast forward the stepping logic so we step into the signal - * handler. - */ - if (!ret) - user_fastforward_single_step(tsk); - - signal_setup_done(ret, ksig, 0); + /* Step into the signal handler if we are stepping */ + signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP)); } /* @@ -875,7 +869,7 @@ static void do_signal(struct pt_regs *regs) retval == -ERESTART_RESTARTBLOCK || (retval == -ERESTARTSYS && !(ksig.ka.sa.sa_flags & SA_RESTART)))) { - regs->regs[0] = -EINTR; + syscall_set_return_value(current, regs, -EINTR, 0); regs->pc = continue_addr; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dc9fe879c2793..987220ac4cfef 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -215,6 +215,7 @@ asmlinkage notrace void secondary_start_kernel(void) if (system_uses_irq_prio_masking()) init_gic_priority_masking(); + rcu_cpu_starting(cpu); preempt_disable(); trace_hardirqs_off(); @@ -387,6 +388,7 @@ void cpu_die_early(void) /* Mark this CPU absent */ set_cpu_present(cpu, 0); + rcu_report_dead(cpu); #ifdef CONFIG_HOTPLUG_CPU update_cpu_boot_status(CPU_KILL_ME); @@ -549,7 +551,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) return; /* map the logical cpu id to cpu MPIDR */ - cpu_logical_map(cpu_count) = hwid; + set_cpu_logical_map(cpu_count, hwid); cpu_madt_gicc[cpu_count] = *processor; @@ -663,7 +665,7 @@ static void __init of_parse_and_init_cpus(void) goto next; pr_debug("cpu logical map 0x%llx\n", hwid); - cpu_logical_map(cpu_count) = hwid; + set_cpu_logical_map(cpu_count, hwid); early_map_cpu_to_node(cpu_count, of_node_to_nid(dn)); next: @@ -704,7 +706,7 @@ void __init smp_init_cpus(void) for (i = 1; i < nr_cpu_ids; i++) { if (cpu_logical_map(i) != INVALID_HWID) { if (smp_cpu_setup(i)) - cpu_logical_map(i) = INVALID_HWID; + set_cpu_logical_map(i, INVALID_HWID); } } } @@ -955,11 +957,22 @@ void tick_broadcast(const struct cpumask *mask) } #endif +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + void smp_send_stop(void) { unsigned long timeout; - if (num_online_cpus() > 1) { + if (num_other_online_cpus()) { cpumask_t mask; cpumask_copy(&mask, cpu_online_mask); @@ -972,10 +985,10 @@ void smp_send_stop(void) /* Wait up to one second for other CPUs to stop */ timeout = USEC_PER_SEC; - while (num_online_cpus() > 1 && timeout--) + while (num_other_online_cpus() && timeout--) udelay(1); - if (num_online_cpus() > 1) + if (num_other_online_cpus()) pr_warning("SMP: failed to stop secondary CPUs %*pbl\n", cpumask_pr_args(cpu_online_mask)); @@ -998,7 +1011,11 @@ void crash_smp_send_stop(void) cpus_stopped = 1; - if (num_online_cpus() == 1) { + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) { sdei_mask_local_cpu(); return; } @@ -1006,7 +1023,7 @@ void crash_smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_CRASH_STOP); diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index f1cb649594271..51274bab25653 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include +#include #include static long @@ -30,6 +32,15 @@ __do_compat_cache_op(unsigned long start, unsigned long end) if (fatal_signal_pending(current)) return 0; + if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { + /* + * The workaround requires an inner-shareable tlbi. + * We pick the reserved-ASID to minimise the impact. + */ + __tlbi(aside1is, __TLBI_VADDR(0, 0)); + dsb(ish); + } + ret = __flush_cache_user_range(start, start + chunk); if (ret) return ret; @@ -104,6 +115,6 @@ long compat_arm_syscall(struct pt_regs *regs, int scno) (compat_thumb_mode(regs) ? 2 : 4); arm64_notify_die("Oops - bad compat syscall(2)", regs, - SIGILL, ILL_ILLTRP, addr, scno); + SIGILL, ILL_ILLTRP, addr, 0); return 0; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 871c739f060ac..091c11521108a 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -50,7 +50,7 @@ static void invoke_syscall(struct pt_regs *regs, unsigned int scno, ret = do_ni_syscall(regs, scno); } - regs->regs[0] = ret; + syscall_set_return_value(current, regs, 0, ret); } static inline bool has_syscall_work(unsigned long flags) @@ -99,13 +99,13 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, regs->syscallno = scno; cortex_a76_erratum_1463225_svc_handler(); + user_exit_irqoff(); local_daif_restore(DAIF_PROCCTX); - user_exit(); if (has_syscall_work(flags)) { /* set default errno for user-issued syscall(-1) */ if (scno == NO_SYSCALL) - regs->regs[0] = -ENOSYS; + syscall_set_return_value(current, regs, -ENOSYS, 0); scno = syscall_trace_enter(regs); if (scno == NO_SYSCALL) goto trace_exit; @@ -121,7 +121,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { local_daif_mask(); flags = current_thread_info()->flags; - if (!has_syscall_work(flags)) { + if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) { /* * We're off to userspace, where interrupts are * always enabled after we restore the flags from diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 0b2946414dc9c..73f06d4b3aae5 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -30,6 +30,7 @@ #include #include +#include unsigned long profile_pc(struct pt_regs *regs) { @@ -65,4 +66,6 @@ void __init time_init(void) /* Calibrate the delay loop directly */ lpj_fine = arch_timer_rate / HZ; + + pv_time_init(); } diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index fa9528dfd0ce3..113903db666c0 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -35,21 +35,23 @@ void store_cpu_topology(unsigned int cpuid) if (mpidr & MPIDR_UP_BITMASK) return; - /* Create cpu topology mapping based on MPIDR. */ - if (mpidr & MPIDR_MT_BITMASK) { - /* Multiprocessor system : Multi-threads per core */ - cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; - } else { - /* Multiprocessor system : Single-thread per core */ - cpuid_topo->thread_id = -1; - cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | - MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | - MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; - } + /* + * This would be the place to create cpu topology based on MPIDR. + * + * However, it cannot be trusted to depict the actual topology; some + * pieces of the architecture enforce an artificial cap on Aff0 values + * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an + * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up + * having absolutely no relationship to the actual underlying system + * topology, and cannot be reasonably used as core / package ID. + * + * If the MT bit is set, Aff0 *could* be used to define a thread ID, but + * we still wouldn't be able to obtain a sane core ID. This means we + * need to entirely ignore MPIDR for any topology deduction. + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = cpuid; + cpuid_topo->package_id = cpu_to_node(cpuid); pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", cpuid, cpuid_topo->package_id, cpuid_topo->core_id, diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 34739e80211bc..4e3e9d9c81517 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -470,6 +470,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); + if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { + /* Hide DIC so that we can trap the unnecessary maintenance...*/ + val &= ~BIT(CTR_DIC_SHIFT); + + /* ... and fake IminLine to reduce the number of traps. */ + val &= ~CTR_IMINLINE_MASK; + val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK; + } + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 354b11e27c07a..033a48f30dbb8 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -260,18 +260,7 @@ static int __aarch32_alloc_vdso_pages(void) if (ret) return ret; - ret = aarch32_alloc_kuser_vdso_page(); - if (ret) { - unsigned long c_vvar = - (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]); - unsigned long c_vdso = - (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]); - - free_page(c_vvar); - free_page(c_vdso); - } - - return ret; + return aarch32_alloc_kuser_vdso_page(); } #else static int __aarch32_alloc_vdso_pages(void) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index dd2514bb1511f..3862cad2410cf 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -32,7 +32,7 @@ UBSAN_SANITIZE := n OBJECT_FILES_NON_STANDARD := y KCOV_INSTRUMENT := n -CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 7ad2d3a0cd489..815df253f96e0 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -28,6 +28,13 @@ SECTIONS .gnu.version_d : { *(.gnu.version_d) } .gnu.version_r : { *(.gnu.version_r) } + /* + * Discard .note.gnu.property sections which are unused and have + * different alignment requirement from vDSO note sections. + */ + /DISCARD/ : { + *(.note.GNU-stack .note.gnu.property) + } .note : { *(.note.*) } :text :note . = ALIGN(16); @@ -48,7 +55,6 @@ SECTIONS PROVIDE(end = .); /DISCARD/ : { - *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) } diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 76b327f88fbb1..894791f03596d 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -32,7 +32,8 @@ cc32-as-instr = $(call try-run,\ # As a result we set our own flags here. # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile -VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) +VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc +VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null) VDSO_CPPFLAGS += $(LINUXINCLUDE) # Common C and assembly flags @@ -190,7 +191,7 @@ quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ # Install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ +quiet_cmd_vdso_install = INSTALL32 $@ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so vdso.so: $(obj)/vdso.so.dbg diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index aa76f72596685..fbab044b3a39a 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -24,6 +24,13 @@ ENTRY(_text) jiffies = jiffies_64; + +#define HYPERVISOR_EXTABLE \ + . = ALIGN(SZ_8); \ + __start___kvm_ex_table = .; \ + *(__kvm_ex_table) \ + __stop___kvm_ex_table = .; + #define HYPERVISOR_TEXT \ /* \ * Align to 4 KB so that \ @@ -39,6 +46,7 @@ jiffies = jiffies_64; __hyp_idmap_text_end = .; \ __hyp_text_start = .; \ *(.hyp.text) \ + HYPERVISOR_EXTABLE \ __hyp_text_end = .; #define IDMAP_TEXT \ @@ -142,28 +150,30 @@ SECTIONS . = ALIGN(PAGE_SIZE); idmap_pg_dir = .; . += IDMAP_DIR_SIZE; + idmap_pg_end = .; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 tramp_pg_dir = .; . += PAGE_SIZE; #endif -#ifdef CONFIG_ARM64_SW_TTBR0_PAN - reserved_ttbr0 = .; - . += RESERVED_TTBR0_SIZE; -#endif + reserved_pg_dir = .; + . += PAGE_SIZE; + swapper_pg_dir = .; . += PAGE_SIZE; - swapper_pg_end = .; . = ALIGN(SEGMENT_ALIGN); __init_begin = .; __inittext_begin = .; INIT_TEXT_SECTION(8) + + __exittext_begin = .; .exit.text : { ARM_EXIT_KEEP(EXIT_TEXT) } + __exittext_end = .; . = ALIGN(4); .altinstructions : { @@ -171,9 +181,6 @@ SECTIONS *(.altinstructions) __alt_instructions_end = .; } - .altinstr_replacement : { - *(.altinstr_replacement) - } . = ALIGN(PAGE_SIZE); __inittext_end = .; @@ -269,7 +276,7 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) <= SZ_4K, "Hibernate exit text too big or misaligned") #endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, +ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE, "Entry trampoline text too big") #endif /* diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index a67121d419a2f..d8b88e40d223d 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -39,6 +39,7 @@ config KVM select IRQ_BYPASS_MANAGER select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_VCPU_RUN_PID_CHANGE + select SCHEDSTATS ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 3ac1a64d2fb9d..5ffbdc39e780e 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,6 +13,8 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 43487f0353858..2484b2cca74bc 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -68,6 +68,64 @@ void kvm_arm_init_debug(void) __this_cpu_write(mdcr_el2, kvm_call_hyp_ret(__kvm_get_mdcr_el2)); } +/** + * kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value + * + * @vcpu: the vcpu pointer + * + * This ensures we will trap access to: + * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR) + * - Debug ROM Address (MDCR_EL2_TDRA) + * - OS related registers (MDCR_EL2_TDOSA) + * - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB) + * - Self-hosted Trace Filter controls (MDCR_EL2_TTRF) + */ +static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu) +{ + /* + * This also clears MDCR_EL2_E2PB_MASK to disable guest access + * to the profiling buffer. + */ + vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK; + vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | + MDCR_EL2_TPMS | + MDCR_EL2_TTRF | + MDCR_EL2_TPMCR | + MDCR_EL2_TDRA | + MDCR_EL2_TDOSA); + + /* Is the VM being debugged by userspace? */ + if (vcpu->guest_debug) + /* Route all software debug exceptions to EL2 */ + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE; + + /* + * Trap debug register access when one of the following is true: + * - Userspace is using the hardware to debug the guest + * (KVM_GUESTDBG_USE_HW is set). + * - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear). + */ + if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) || + !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; + + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); +} + +/** + * kvm_arm_vcpu_init_debug - setup vcpu debug traps + * + * @vcpu: the vcpu pointer + * + * Set vcpu initial mdcr_el2 value. + */ +void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + kvm_arm_setup_mdcr_el2(vcpu); + preempt_enable(); +} + /** * kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state */ @@ -83,12 +141,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) * @vcpu: the vcpu pointer * * This is called before each entry into the hypervisor to setup any - * debug related registers. Currently this just ensures we will trap - * access to: - * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR) - * - Debug ROM Address (MDCR_EL2_TDRA) - * - OS related registers (MDCR_EL2_TDOSA) - * - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB) + * debug related registers. * * Additionally, KVM only traps guest accesses to the debug registers if * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY @@ -100,27 +153,14 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) { - bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY); - unsigned long mdscr; + unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2; trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); - /* - * This also clears MDCR_EL2_E2PB_MASK to disable guest access - * to the profiling buffer. - */ - vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK; - vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | - MDCR_EL2_TPMS | - MDCR_EL2_TPMCR | - MDCR_EL2_TDRA | - MDCR_EL2_TDOSA); + kvm_arm_setup_mdcr_el2(vcpu); /* Is Guest debugging in effect? */ if (vcpu->guest_debug) { - /* Route all software debug exceptions to EL2 */ - vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE; - /* Save guest debug state */ save_guest_debug_regs(vcpu); @@ -174,7 +214,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; - trap_debug = true; trace_kvm_arm_set_regset("BKPTS", get_num_brps(), &vcpu->arch.debug_ptr->dbg_bcr[0], @@ -189,15 +228,14 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) BUG_ON(!vcpu->guest_debug && vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state); - /* Trap debug register access */ - if (trap_debug) - vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA; - /* If KDE or MDE are set, perform a full save/restore cycle. */ if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; - trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); + /* Write mdcr_el2 changes since vcpu_load on VHE systems */ + if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2) + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dfd626447482e..47313a7446adc 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -202,6 +202,13 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id)); + + if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { + int i; + + for (i = 0; i < 16; i++) + *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i); + } out: return err; } @@ -858,6 +865,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_set_attr(vcpu, attr); break; + case KVM_ARM_VCPU_PVTIME_CTRL: + ret = kvm_arm_pvtime_set_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -878,6 +888,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_get_attr(vcpu, attr); break; + case KVM_ARM_VCPU_PVTIME_CTRL: + ret = kvm_arm_pvtime_get_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -898,6 +911,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_TIMER_CTRL: ret = kvm_arm_timer_has_attr(vcpu, attr); break; + case KVM_ARM_VCPU_PVTIME_CTRL: + ret = kvm_arm_pvtime_has_attr(vcpu, attr); + break; default: ret = -ENXIO; break; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 706cca23f0d2e..e0a4bcdb94516 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -11,8 +11,6 @@ #include #include -#include - #include #include #include @@ -22,6 +20,8 @@ #include #include +#include + #define CREATE_TRACE_POINTS #include "trace.h" @@ -162,31 +162,16 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -#define __ptrauth_save_key(regs, key) \ -({ \ - regs[key ## KEYLO_EL1] = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ - regs[key ## KEYHI_EL1] = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ -}) - /* * Handle the guest trying to use a ptrauth instruction, or trying to access a * ptrauth register. */ void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *ctxt; - - if (vcpu_has_ptrauth(vcpu)) { + if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_enable(vcpu); - ctxt = vcpu->arch.host_cpu_context; - __ptrauth_save_key(ctxt->sys_regs, APIA); - __ptrauth_save_key(ctxt->sys_regs, APIB); - __ptrauth_save_key(ctxt->sys_regs, APDA); - __ptrauth_save_key(ctxt->sys_regs, APDB); - __ptrauth_save_key(ctxt->sys_regs, APGA); - } else { + else kvm_inject_undefined(vcpu); - } } /* diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 160be2b4696d3..dc41b505507d7 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -136,11 +136,15 @@ ENTRY(__kvm_handle_stub_hvc) 1: cmp x0, #HVC_RESET_VECTORS b.ne 1f -reset: + /* - * Reset kvm back to the hyp stub. Do not clobber x0-x4 in - * case we coming via HVC_SOFT_RESTART. + * Set the HVC_RESET_VECTORS return code before entering the common + * path so that we do not clobber x0-x2 in case we are coming via + * HVC_SOFT_RESTART. */ + mov x0, xzr +reset: + /* Reset kvm back to the hyp stub. */ mrs x5, sctlr_el2 ldr x6, =SCTLR_ELx_FLAGS bic x5, x5, x6 // Clear SCTL_M and etc @@ -151,7 +155,6 @@ reset: /* Install stub vectors */ adr_l x5, __hyp_stub_vectors msr vbar_el2, x5 - mov x0, xzr eret 1: /* Bad stub call */ diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 0fc9872a14671..aead8a5fbe919 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -168,6 +168,21 @@ static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); } +void __hyp_text __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) +{ + /* + * Non-VHE: Disable and flush SPE data generation + * VHE: The vcpu can run, but it can't hide. + */ + __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); + +} + +void __hyp_text __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) +{ + __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); +} + void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; @@ -175,13 +190,6 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - /* - * Non-VHE: Disable and flush SPE data generation - * VHE: The vcpu can run, but it can't hide. - */ - if (!has_vhe()) - __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; @@ -201,8 +209,6 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) struct kvm_guest_debug_arch *host_dbg; struct kvm_guest_debug_arch *guest_dbg; - if (!has_vhe()) - __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index e5cc8d66bf537..dc3d7bc2292fd 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -173,20 +173,23 @@ alternative_endif // This is our single instruction exception window. A pending // SError is guaranteed to occur at the earliest when we unmask // it, and at the latest just after the ISB. - .global abort_guest_exit_start abort_guest_exit_start: isb - .global abort_guest_exit_end abort_guest_exit_end: msr daifset, #4 // Mask aborts + ret + + _kvm_extable abort_guest_exit_start, 9997f + _kvm_extable abort_guest_exit_end, 9997f +9997: + msr daifset, #4 // Mask aborts + mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) - // If the exception took place, restore the EL1 exception - // context so that we can report some information. - // Merge the exception code with the SError pending bit. - tbz x0, #ARM_EXIT_WITH_SERROR_BIT, 1f + // restore the EL1 exception context so that we can report some + // information. Merge the exception code with the SError pending bit. msr elr_el2, x2 msr esr_el2, x3 msr spsr_el2, x4 diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index ffa68d5713f1d..4287ebae8115d 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -15,6 +15,30 @@ #include #include +.macro save_caller_saved_regs_vect + /* x0 and x1 were saved in the vector entry */ + stp x2, x3, [sp, #-16]! + stp x4, x5, [sp, #-16]! + stp x6, x7, [sp, #-16]! + stp x8, x9, [sp, #-16]! + stp x10, x11, [sp, #-16]! + stp x12, x13, [sp, #-16]! + stp x14, x15, [sp, #-16]! + stp x16, x17, [sp, #-16]! +.endm + +.macro restore_caller_saved_regs_vect + ldp x16, x17, [sp], #16 + ldp x14, x15, [sp], #16 + ldp x12, x13, [sp], #16 + ldp x10, x11, [sp], #16 + ldp x8, x9, [sp], #16 + ldp x6, x7, [sp], #16 + ldp x4, x5, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +.endm + .text .pushsection .hyp.text, "ax" @@ -89,6 +113,10 @@ el1_hvc_guest: /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \ ARM_SMCCC_ARCH_WORKAROUND_2) + cbz w1, wa_epilogue + + eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \ + ARM_SMCCC_ARCH_WORKAROUND_3) cbnz w1, el1_trap #ifdef CONFIG_ARM64_SSBD @@ -142,13 +170,19 @@ el1_error: b __guest_exit el2_sync: - /* Check for illegal exception return, otherwise panic */ + /* Check for illegal exception return */ mrs x0, spsr_el2 + tbnz x0, #20, 1f - /* if this was something else, then panic! */ - tst x0, #PSR_IL_BIT - b.eq __hyp_panic + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! + bl kvm_unexpected_el2_exception + ldp x29, x30, [sp], #16 + restore_caller_saved_regs_vect + + eret +1: /* Let's attempt a recovery from the illegal exception return */ get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IL @@ -156,27 +190,14 @@ el2_sync: el2_error: - ldp x0, x1, [sp], #16 + save_caller_saved_regs_vect + stp x29, x30, [sp, #-16]! + + bl kvm_unexpected_el2_exception + + ldp x29, x30, [sp], #16 + restore_caller_saved_regs_vect - /* - * Only two possibilities: - * 1) Either we come from the exit path, having just unmasked - * PSTATE.A: change the return code to an EL2 fault, and - * carry on, as we're already in a sane state to handle it. - * 2) Or we come from anywhere else, and that's a bug: we panic. - * - * For (1), x0 contains the original return code and x1 doesn't - * contain anything meaningful at that stage. We can reuse them - * as temp registers. - * For (2), who cares? - */ - mrs x0, elr_el2 - adr x1, abort_guest_exit_start - cmp x0, x1 - adr x1, abort_guest_exit_end - ccmp x0, x1, #4, ne - b.ne __hyp_panic - mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) eret sb @@ -330,4 +351,64 @@ ENTRY(__smccc_workaround_1_smc_start) ldp x0, x1, [sp, #(8 * 2)] add sp, sp, #(8 * 4) ENTRY(__smccc_workaround_1_smc_end) + +ENTRY(__smccc_workaround_3_smc_start) + esb + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +ENTRY(__smccc_workaround_3_smc_end) + +ENTRY(__spectre_bhb_loop_k8_start) + esb + sub sp, sp, #(8 * 2) + stp x0, x1, [sp, #(8 * 0)] + mov x0, #8 +2: b . + 4 + subs x0, x0, #1 + b.ne 2b + dsb nsh + isb + ldp x0, x1, [sp, #(8 * 0)] + add sp, sp, #(8 * 2) +ENTRY(__spectre_bhb_loop_k8_end) + +ENTRY(__spectre_bhb_loop_k24_start) + esb + sub sp, sp, #(8 * 2) + stp x0, x1, [sp, #(8 * 0)] + mov x0, #8 +2: b . + 4 + subs x0, x0, #1 + b.ne 2b + dsb nsh + isb + ldp x0, x1, [sp, #(8 * 0)] + add sp, sp, #(8 * 2) +ENTRY(__spectre_bhb_loop_k24_end) + +ENTRY(__spectre_bhb_loop_k32_start) + esb + sub sp, sp, #(8 * 2) + stp x0, x1, [sp, #(8 * 0)] + mov x0, #8 +2: b . + 4 + subs x0, x0, #1 + b.ne 2b + dsb nsh + isb + ldp x0, x1, [sp, #(8 * 0)] + add sp, sp, #(8 * 2) +ENTRY(__spectre_bhb_loop_k32_end) + +ENTRY(__spectre_bhb_clearbhb_start) + esb + clearbhb + isb +ENTRY(__spectre_bhb_clearbhb_end) #endif diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 799e84a403351..768983bd23261 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -24,11 +25,23 @@ #include #include #include +#include + +extern struct exception_table_entry __start___kvm_ex_table; +extern struct exception_table_entry __stop___kvm_ex_table; /* Check whether the FP regs were dirtied while in the host-side run loop: */ static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) { - if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + /* + * When the system doesn't support FP/SIMD, we cannot rely on + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an + * abort on the very first access to FP and thus we should never + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always + * trap the accesses. + */ + if (!system_supports_fpsimd() || + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_FP_HOST); @@ -140,7 +153,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) static void deactivate_traps_vhe(void) { - extern char vectors[]; /* kernel exception vectors */ + const char *host_vectors = vectors; write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); /* @@ -151,7 +164,10 @@ static void deactivate_traps_vhe(void) asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522)); write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); - write_sysreg(vectors, vbar_el1); + + if (!arm64_kernel_unmapped_at_el0()) + host_vectors = __this_cpu_read(this_cpu_vector); + write_sysreg(host_vectors, vbar_el1); } NOKPROBE_SYMBOL(deactivate_traps_vhe); @@ -249,10 +265,10 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) * saved the guest context yet, and we may return early... */ par = read_sysreg(par_el1); - asm volatile("at s1e1r, %0" : : "r" (far)); - isb(); - - tmp = read_sysreg(par_el1); + if (!__kvm_at("s1e1r", far)) + tmp = read_sysreg(par_el1); + else + tmp = SYS_PAR_EL1_F; /* back to the guest */ write_sysreg(par, par_el1); if (unlikely(tmp & SYS_PAR_EL1_F)) @@ -484,7 +500,7 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && kvm_vcpu_dabt_isvalid(vcpu) && !kvm_vcpu_dabt_isextabt(vcpu) && - !kvm_vcpu_dabt_iss1tw(vcpu); + !kvm_vcpu_abt_iss1tw(vcpu); if (valid) { int ret = __vgic_v2_perform_cpuif_access(vcpu); @@ -670,6 +686,15 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) __sysreg_save_state_nvhe(host_ctxt); + /* + * We must flush and disable the SPE buffer for nVHE, as + * the translation regime(EL1&0) is going to be loaded with + * that of the guest. And we must do this before we change the + * translation regime to EL2 (via MDCR_EL2_EPB == 0) and + * before we load guest Stage1. + */ + __debug_save_host_buffers_nvhe(vcpu); + __activate_vm(kern_hyp_va(vcpu->kvm)); __activate_traps(vcpu); @@ -708,11 +733,13 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) __fpsimd_save_fpexc32(vcpu); + __debug_switch_to_host(vcpu); + /* * This must come after restoring the host sysregs, since a non-VHE * system may enable SPE here and make use of the TTBRs. */ - __debug_switch_to_host(vcpu); + __debug_restore_host_buffers_nvhe(vcpu); if (pmu_switch_needed) __pmu_switch_to_host(host_ctxt); @@ -746,7 +773,7 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, * making sure it is a kernel address and not a PC-relative * reference. */ - asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); + asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string)); __hyp_do_panic(str_va, spsr, elr, @@ -783,3 +810,30 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) unreachable(); } + +asmlinkage void __hyp_text kvm_unexpected_el2_exception(void) +{ + unsigned long addr, fixup; + struct kvm_cpu_context *host_ctxt; + struct exception_table_entry *entry, *end; + unsigned long elr_el2 = read_sysreg(elr_el2); + + entry = hyp_symbol_addr(__start___kvm_ex_table); + end = hyp_symbol_addr(__stop___kvm_ex_table); + host_ctxt = &__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt; + + while (entry < end) { + addr = (unsigned long)&entry->insn + entry->insn; + fixup = (unsigned long)&entry->fixup + entry->fixup; + + if (addr != elr_el2) { + entry++; + continue; + } + + write_sysreg(fixup, elr_el2); + return; + } + + hyp_panic(host_ctxt); +} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index eb0efc5557f30..7b7213fc17d95 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -182,7 +182,7 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) __tlb_switch_to_host(kvm, &cxt); } -void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) +void __hyp_text __kvm_flush_cpu_context(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); struct tlb_inv_context cxt; @@ -191,6 +191,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) __tlb_switch_to_guest(kvm, &cxt); __tlbi(vmalle1); + asm volatile("ic iallu"); dsb(nsh); isb(); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index a9d25a305af59..a364a4ad54790 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,9 +14,6 @@ #include #include -#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ - PSR_I_BIT | PSR_D_BIT) - #define CURRENT_EL_SP_EL0_VECTOR 0x0 #define CURRENT_EL_SP_ELx_VECTOR 0x200 #define LOWER_EL_AArch64_VECTOR 0x400 @@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; } +/* + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +{ + unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= PSR_MODE_EL1h; + + return new; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); /* diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f4a8ae9188275..a3105ae464be1 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -258,7 +258,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { const struct kvm_regs *cpu_reset; - int ret = -EINVAL; + int ret; bool loaded; /* Reset PMU outside of the non-preemptible section */ @@ -281,15 +281,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { - if (kvm_vcpu_enable_ptrauth(vcpu)) + if (kvm_vcpu_enable_ptrauth(vcpu)) { + ret = -EINVAL; goto out; + } } switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { - if (!cpu_has_32bit_el1()) + if (!cpu_has_32bit_el1()) { + ret = -EINVAL; goto out; + } cpu_reset = &default_regs_reset32; } else { cpu_reset = &default_regs_reset; @@ -374,10 +378,10 @@ void kvm_set_ipa_limit(void) pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n", (va_max < pa_max) ? "Virtual" : "Physical"); - WARN(ipa_max < KVM_PHYS_SHIFT, - "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max); kvm_ipa_limit = ipa_max; - kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit); + kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit, + ((kvm_ipa_limit < KVM_PHYS_SHIFT) ? + " (Reduced IPA size, limited VM/VMM compatibility)" : "")); } /* @@ -404,6 +408,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) return -EINVAL; } else { phys_shift = KVM_PHYS_SHIFT; + if (phys_shift > kvm_ipa_limit) { + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", + current->comm); + return -EINVAL; + } } parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 46822afc57e00..a25f737dfa0bb 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -432,14 +432,14 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, dbg_reg); else dbg_to_reg(vcpu, p, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -447,7 +447,7 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -457,7 +457,7 @@ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -467,21 +467,21 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val; } static bool trap_bcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, dbg_reg); else dbg_to_reg(vcpu, p, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -489,7 +489,7 @@ static bool trap_bcr(struct kvm_vcpu *vcpu, static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -500,7 +500,7 @@ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -510,22 +510,22 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val; } static bool trap_wvr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, dbg_reg); else dbg_to_reg(vcpu, p, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); + trace_trap_reg(__func__, rd->CRm, p->is_write, + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]); return true; } @@ -533,7 +533,7 @@ static bool trap_wvr(struct kvm_vcpu *vcpu, static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -543,7 +543,7 @@ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -553,21 +553,21 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val; } static bool trap_wcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *rd) { - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (p->is_write) reg_to_dbg(vcpu, p, dbg_reg); else dbg_to_reg(vcpu, p, dbg_reg); - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); + trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg); return true; } @@ -575,7 +575,7 @@ static bool trap_wcr(struct kvm_vcpu *vcpu, static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -585,7 +585,7 @@ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; + __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm]; if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; @@ -595,7 +595,7 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static void reset_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; + vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val; } static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -625,6 +625,10 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { u64 pmcr, val; + /* No PMU available, PMCR_EL0 may UNDEF... */ + if (!kvm_arm_support_pmu_v3()) + return; + pmcr = read_sysreg(pmcr_el0); /* * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN @@ -1132,16 +1136,6 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, return REG_HIDDEN_USER | REG_HIDDEN_GUEST; } -/* Visibility overrides for SVE-specific ID registers */ -static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) -{ - if (vcpu_has_sve(vcpu)) - return 0; - - return REG_HIDDEN_USER; -} - /* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */ static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu) { @@ -1168,9 +1162,6 @@ static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, { u64 val; - if (WARN_ON(!vcpu_has_sve(vcpu))) - return -ENOENT; - val = guest_id_aa64zfr0_el1(vcpu); return reg_to_user(uaddr, &val, reg->id); } @@ -1183,9 +1174,6 @@ static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, int err; u64 val; - if (WARN_ON(!vcpu_has_sve(vcpu))) - return -ENOENT; - err = reg_from_user(&val, uaddr, id); if (err) return err; @@ -1280,10 +1268,16 @@ static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { + int reg = r->reg; + + /* See the 32bit mapping in kvm_host.h */ + if (p->is_aarch32) + reg = r->reg / 2; + if (p->is_write) - vcpu_write_sys_reg(vcpu, p->regval, r->reg); + vcpu_write_sys_reg(vcpu, p->regval, reg); else - p->regval = vcpu_read_sys_reg(vcpu, r->reg); + p->regval = vcpu_read_sys_reg(vcpu, reg); return true; } @@ -1442,7 +1436,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(ID_AA64PFR1_EL1), ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), - { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility }, + { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, }, ID_UNALLOCATED(4,5), ID_UNALLOCATED(4,6), ID_UNALLOCATED(4,7), @@ -1460,7 +1454,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* CRm=6 */ ID_SANITISED(ID_AA64ISAR0_EL1), ID_SANITISED(ID_AA64ISAR1_EL1), - ID_UNALLOCATED(6,2), + ID_SANITISED(ID_AA64ISAR2_EL1), ID_UNALLOCATED(6,3), ID_UNALLOCATED(6,4), ID_UNALLOCATED(6,5), @@ -1740,9 +1734,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1757,7 +1751,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), @@ -1847,6 +1841,7 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, c2_TTBCR2 }, { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, @@ -2360,8 +2355,11 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) return NULL; + if (!index_to_params(id, ¶ms)) + return NULL; + table = get_target_table(vcpu->arch.target, true, &num); - r = find_reg_by_id(id, ¶ms, table, num); + r = find_reg(¶ms, table, num); if (!r) r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 78a9ef66288ae..073acbf02a7c8 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -14,7 +14,7 @@ * Parameters: * x0 - dest */ -ENTRY(clear_page) +SYM_FUNC_START(clear_page) mrs x1, dczid_el0 and w1, w1, #0xf mov x2, #4 @@ -25,5 +25,5 @@ ENTRY(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 1b ret -ENDPROC(clear_page) +SYM_FUNC_END(clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index aeafc03e961a8..48a3a26eff663 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -19,7 +19,7 @@ * * Alignment fixed up by hardware. */ -ENTRY(__arch_clear_user) +SYM_FUNC_START(__arch_clear_user) mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -40,7 +40,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 5: mov x0, #0 ret -ENDPROC(__arch_clear_user) +SYM_FUNC_END(__arch_clear_user) EXPORT_SYMBOL(__arch_clear_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index ebb3c06cbb5d8..8e25e89ad01fd 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -53,12 +53,12 @@ .endm end .req x5 -ENTRY(__arch_copy_from_user) +SYM_FUNC_START(__arch_copy_from_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 // Nothing to copy ret -ENDPROC(__arch_copy_from_user) +SYM_FUNC_END(__arch_copy_from_user) EXPORT_SYMBOL(__arch_copy_from_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 3d8153a1ebce9..667139013ed17 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -55,12 +55,12 @@ end .req x5 -ENTRY(__arch_copy_in_user) +SYM_FUNC_START(__arch_copy_in_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 ret -ENDPROC(__arch_copy_in_user) +SYM_FUNC_END(__arch_copy_in_user) EXPORT_SYMBOL(__arch_copy_in_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index bbb8562396afe..e125a84eb4000 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -17,7 +17,7 @@ * x0 - dest * x1 - src */ -ENTRY(copy_page) +SYM_FUNC_START(copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH // Prefetch three cache lines ahead. prfm pldl1strm, [x1, #128] @@ -75,5 +75,5 @@ alternative_else_nop_endif stnp x16, x17, [x0, #112] ret -ENDPROC(copy_page) +SYM_FUNC_END(copy_page) EXPORT_SYMBOL(copy_page) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 357eae2c18ebb..1a104d0089f3a 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -52,12 +52,12 @@ .endm end .req x5 -ENTRY(__arch_copy_to_user) +SYM_FUNC_START(__arch_copy_to_user) add end, x0, x2 #include "copy_template.S" mov x0, #0 ret -ENDPROC(__arch_copy_to_user) +SYM_FUNC_END(__arch_copy_to_user) EXPORT_SYMBOL(__arch_copy_to_user) .section .fixup,"ax" diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S index e6135f16649b1..243e107e98963 100644 --- a/arch/arm64/lib/crc32.S +++ b/arch/arm64/lib/crc32.S @@ -85,17 +85,17 @@ CPU_BE( rev16 w3, w3 ) .endm .align 5 -ENTRY(crc32_le) +SYM_FUNC_START(crc32_le) alternative_if_not ARM64_HAS_CRC32 b crc32_le_base alternative_else_nop_endif __crc32 -ENDPROC(crc32_le) +SYM_FUNC_END(crc32_le) .align 5 -ENTRY(__crc32c_le) +SYM_FUNC_START(__crc32c_le) alternative_if_not ARM64_HAS_CRC32 b __crc32c_le_base alternative_else_nop_endif __crc32 c -ENDPROC(__crc32c_le) +SYM_FUNC_END(__crc32c_le) diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 48a3ab636e4fb..edf6b970a2774 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -19,7 +19,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -WEAK(memchr) +SYM_FUNC_START_WEAK_PI(memchr) and w1, w1, #0xff 1: subs x2, x2, #1 b.mi 2f @@ -30,5 +30,5 @@ WEAK(memchr) ret 2: mov x0, #0 ret -ENDPIPROC(memchr) +SYM_FUNC_END_PI(memchr) EXPORT_SYMBOL_NOKASAN(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index b297bdaaf5498..c0671e793ea91 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -46,7 +46,7 @@ pos .req x11 limit_wd .req x12 mask .req x13 -WEAK(memcmp) +SYM_FUNC_START_WEAK_PI(memcmp) cbz limit, .Lret0 eor tmp1, src1, src2 tst tmp1, #7 @@ -243,5 +243,5 @@ CPU_LE( rev data2, data2 ) .Lret0: mov result, #0 ret -ENDPIPROC(memcmp) +SYM_FUNC_END_PI(memcmp) EXPORT_SYMBOL_NOKASAN(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index d79f48994dbb2..b03cbb3455d4d 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -56,12 +56,11 @@ stp \ptr, \regB, [\regC], \val .endm - .weak memcpy -ENTRY(__memcpy) -ENTRY(memcpy) +SYM_FUNC_START_ALIAS(__memcpy) +SYM_FUNC_START_WEAK_PI(memcpy) #include "copy_template.S" ret -ENDPIPROC(memcpy) +SYM_FUNC_END_PI(memcpy) EXPORT_SYMBOL(memcpy) -ENDPROC(__memcpy) +SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(__memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 7847751364806..1035dce4bdaf4 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -45,9 +45,8 @@ C_h .req x12 D_l .req x13 D_h .req x14 - .weak memmove -ENTRY(__memmove) -ENTRY(memmove) +SYM_FUNC_START_ALIAS(__memmove) +SYM_FUNC_START_WEAK_PI(memmove) cmp dstin, src b.lo __memcpy add tmp1, src, count @@ -184,7 +183,7 @@ ENTRY(memmove) tst count, #0x3f b.ne .Ltail63 ret -ENDPIPROC(memmove) +SYM_FUNC_END_PI(memmove) EXPORT_SYMBOL(memmove) -ENDPROC(__memmove) +SYM_FUNC_END_ALIAS(__memmove) EXPORT_SYMBOL(__memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 9fb97e6bc5602..a9c1c9a01ea90 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -42,9 +42,8 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 - .weak memset -ENTRY(__memset) -ENTRY(memset) +SYM_FUNC_START_ALIAS(__memset) +SYM_FUNC_START_WEAK_PI(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -203,7 +202,7 @@ ENTRY(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -ENDPIPROC(memset) +SYM_FUNC_END_PI(memset) EXPORT_SYMBOL(memset) -ENDPROC(__memset) +SYM_FUNC_END_ALIAS(__memset) EXPORT_SYMBOL(__memset) diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S index ca3ec18171a43..1f47eae3b0d6d 100644 --- a/arch/arm64/lib/strchr.S +++ b/arch/arm64/lib/strchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -WEAK(strchr) +SYM_FUNC_START_WEAK(strchr) and w1, w1, #0xff 1: ldrb w2, [x0], #1 cmp w2, w1 @@ -28,5 +28,5 @@ WEAK(strchr) cmp w2, w1 csel x0, x0, xzr, eq ret -ENDPROC(strchr) +SYM_FUNC_END(strchr) EXPORT_SYMBOL_NOKASAN(strchr) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index e9aefbe0b7401..4767540d1b94e 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -48,7 +48,7 @@ tmp3 .req x9 zeroones .req x10 pos .req x11 -WEAK(strcmp) +SYM_FUNC_START_WEAK_PI(strcmp) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 @@ -219,5 +219,5 @@ CPU_BE( orr syndrome, diff, has_nul ) lsr data1, data1, #56 sub result, data1, data2, lsr #56 ret -ENDPIPROC(strcmp) +SYM_FUNC_END_PI(strcmp) EXPORT_SYMBOL_NOKASAN(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 87b0cb066915f..ee3ed882dd79f 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -44,7 +44,7 @@ pos .req x12 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -WEAK(strlen) +SYM_FUNC_START_WEAK_PI(strlen) mov zeroones, #REP8_01 bic src, srcin, #15 ands tmp1, srcin, #15 @@ -111,5 +111,5 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */ csinv data1, data1, xzr, le csel data2, data2, data2a, le b .Lrealigned -ENDPIPROC(strlen) +SYM_FUNC_END_PI(strlen) EXPORT_SYMBOL_NOKASAN(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index f571581888fa4..2a7ee949ed471 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -52,7 +52,7 @@ limit_wd .req x13 mask .req x14 endloop .req x15 -WEAK(strncmp) +SYM_FUNC_START_WEAK_PI(strncmp) cbz limit, .Lret0 eor tmp1, src1, src2 mov zeroones, #REP8_01 @@ -295,5 +295,5 @@ CPU_BE( orr syndrome, diff, has_nul ) .Lret0: mov result, #0 ret -ENDPIPROC(strncmp) +SYM_FUNC_END_PI(strncmp) EXPORT_SYMBOL_NOKASAN(strncmp) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index c0bac9493c683..b72913a990389 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -47,7 +47,7 @@ limit_wd .req x14 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -WEAK(strnlen) +SYM_FUNC_START_WEAK_PI(strnlen) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 @@ -156,5 +156,5 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ .Lhit_limit: mov len, limit ret -ENDPIPROC(strnlen) +SYM_FUNC_END_PI(strnlen) EXPORT_SYMBOL_NOKASAN(strnlen) diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S index 794ac49ea4333..13132d1ed6d12 100644 --- a/arch/arm64/lib/strrchr.S +++ b/arch/arm64/lib/strrchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of last occurrence of 'c' or 0 */ -WEAK(strrchr) +SYM_FUNC_START_WEAK_PI(strrchr) mov x3, #0 and w1, w1, #0xff 1: ldrb w2, [x0], #1 @@ -29,5 +29,5 @@ WEAK(strrchr) b 1b 2: mov x0, x3 ret -ENDPIPROC(strrchr) +SYM_FUNC_END_PI(strrchr) EXPORT_SYMBOL_NOKASAN(strrchr) diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S index 047622536535d..a88613834fb07 100644 --- a/arch/arm64/lib/tishift.S +++ b/arch/arm64/lib/tishift.S @@ -7,7 +7,7 @@ #include -ENTRY(__ashlti3) +SYM_FUNC_START(__ashlti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -26,10 +26,10 @@ ENTRY(__ashlti3) lsl x1, x0, x1 mov x0, x2 ret -ENDPROC(__ashlti3) +SYM_FUNC_END(__ashlti3) EXPORT_SYMBOL(__ashlti3) -ENTRY(__ashrti3) +SYM_FUNC_START(__ashrti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -48,10 +48,10 @@ ENTRY(__ashrti3) asr x0, x1, x0 mov x1, x2 ret -ENDPROC(__ashrti3) +SYM_FUNC_END(__ashrti3) EXPORT_SYMBOL(__ashrti3) -ENTRY(__lshrti3) +SYM_FUNC_START(__lshrti3) cbz x2, 1f mov x3, #64 sub x3, x3, x2 @@ -70,5 +70,5 @@ ENTRY(__lshrti3) lsr x0, x1, x0 mov x1, x2 ret -ENDPROC(__lshrti3) +SYM_FUNC_END(__lshrti3) EXPORT_SYMBOL(__lshrti3) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index db767b072601e..7b054c67acd81 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -228,8 +228,6 @@ ENDPIPROC(__dma_flush_area) * - dir - DMA direction */ ENTRY(__dma_map_area) - cmp w2, #DMA_FROM_DEVICE - b.eq __dma_inv_area b __dma_clean_area ENDPIPROC(__dma_map_area) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 9fc6db0bcbad0..2a7339aeb1ad4 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -454,7 +454,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE; + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (kprobe_page_fault(regs, esr)) @@ -654,11 +654,13 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) inf = esr_to_fault_info(esr); - /* - * Return value ignored as we rely on signal merging. - * Future patches will make this more robust. - */ - apei_claim_sea(regs); + if (user_mode(regs) && apei_claim_sea(regs) == 0) { + /* + * APEI claimed this as a firmware-first notification. + * Some processing deferred to task_work before ret_to_user(). + */ + return 0; + } if (esr & ESR_ELx_FnV) siaddr = NULL; diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index bbeb6a5a6ba6d..0be3355e34997 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -230,6 +230,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ptep = (pte_t *)pudp; } else if (sz == (CONT_PTE_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); + if (!pmdp) + return NULL; WARN_ON(addr & (sz - 1)); /* diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 45c00a54909c9..cbcac03c0e0da 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -50,12 +50,6 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); -s64 physvirt_offset __ro_after_init; -EXPORT_SYMBOL(physvirt_offset); - -struct page *vmemmap __ro_after_init; -EXPORT_SYMBOL(vmemmap); - phys_addr_t arm64_dma_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE @@ -251,6 +245,18 @@ int pfn_valid(unsigned long pfn) if (!valid_section(__nr_to_section(pfn_to_section_nr(pfn)))) return 0; + + /* + * ZONE_DEVICE memory does not have the memblock entries. + * memblock_is_map_memory() check for ZONE_DEVICE based + * addresses will always fail. Even the normal hotplugged + * memory will never have MEMBLOCK_NOMAP flag set in their + * memblock entries. Skip memblock search for all non early + * memory sections covering all of hotplug memory including + * both normal and ZONE_DEVICE based. + */ + if (!early_section(__pfn_to_section(pfn))) + return pfn_section_valid(__pfn_to_section(pfn), pfn); #endif return memblock_is_map_memory(addr); } @@ -321,20 +327,6 @@ void __init arm64_memblock_init(void) memstart_addr = round_down(memblock_start_of_DRAM(), ARM64_MEMSTART_ALIGN); - physvirt_offset = PHYS_OFFSET - PAGE_OFFSET; - - vmemmap = ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)); - - /* - * If we are running with a 52-bit kernel VA config on a system that - * does not support it, we have to offset our vmemmap and physvirt_offset - * s.t. we avoid the 52-bit portion of the direct linear map - */ - if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) { - vmemmap += (_PAGE_OFFSET(48) - _PAGE_OFFSET(52)) >> PAGE_SHIFT; - physvirt_offset = PHYS_OFFSET - _PAGE_OFFSET(48); - } - /* * Remove the memory that we will not be able to cover with the * linear mapping. Take care not to clip the kernel which may be @@ -349,6 +341,16 @@ void __init arm64_memblock_init(void) memblock_remove(0, memstart_addr); } + /* + * If we are running with a 52-bit kernel VA config on a system that + * does not support it, we have to place the available physical + * memory in the 48-bit addressable part of the linear region, i.e., + * we have to move it upward. Since memstart_addr represents the + * physical address of PAGE_OFFSET, we have to *subtract* from it. + */ + if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) + memstart_addr -= _PAGE_OFFSET(48) - _PAGE_OFFSET(52); + /* * Apply the memory limit if it was set. Since the kernel may be loaded * high up in memory, add back the kernel region that must be accessible diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 9be71bee902ca..8dac7fcfb4bdc 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -100,3 +101,11 @@ void __init early_ioremap_init(void) { early_ioremap_setup(); } + +bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags) +{ + unsigned long pfn = PHYS_PFN(offset); + + return memblock_is_map_memory(pfn); +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 60c929f3683b9..5cf575f23af28 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -38,7 +38,7 @@ #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) -u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; u64 __section(".mmuoff.data.write") vabits_actual; @@ -583,6 +583,8 @@ early_param("rodata", parse_rodata); #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { + int i; + pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); @@ -591,11 +593,15 @@ static int __init map_entry_trampoline(void) /* Map only the text into the trampoline page table */ memset(tramp_pg_dir, 0, PGD_SIZE); - __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, - prot, __pgd_pgtable_alloc, 0); + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, + entry_tramp_text_size(), prot, + __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); /* Map both the text and data into the kernel page table */ - __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, prot); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern char __entry_tramp_data_start[]; @@ -1069,7 +1075,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; /* * FIXME: Cleanup page tables (also in arch_add_memory() in case @@ -1078,7 +1083,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be * unlocked yet. */ - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 4decf16597008..53ebb4babf3a7 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -46,7 +46,11 @@ EXPORT_SYMBOL(node_to_cpumask_map); */ const struct cpumask *cpumask_of_node(int node) { - if (WARN_ON(node >= nr_node_ids)) + + if (node == NUMA_NO_NODE) + return cpu_all_mask; + + if (WARN_ON(node < 0 || node >= nr_node_ids)) return cpu_none_mask; if (WARN_ON(node_to_cpumask_map[node] == NULL)) diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c index 67a9ba9eaa96b..cde44c13dda1b 100644 --- a/arch/arm64/mm/physaddr.c +++ b/arch/arm64/mm/physaddr.c @@ -9,7 +9,7 @@ phys_addr_t __virt_to_phys(unsigned long x) { - WARN(!__is_lm_address(x), + WARN(!__is_lm_address(__tag_reset(x)), "virt_to_phys used for non-linear address: %pK (%pS)\n", (void *)x, (void *)x); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a1e0592d1fbcd..13e78a5d86905 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -166,7 +166,7 @@ ENDPROC(cpu_do_switch_mm) .pushsection ".idmap.text", "awx" .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 - adrp \tmp1, empty_zero_page + adrp \tmp1, reserved_pg_dir phys_to_ttbr \tmp2, \tmp1 offset_ttbr1 \tmp2, \tmp1 msr ttbr1_el1, \tmp2 diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index cdc79de0c794a..9f71ca4414825 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -141,14 +141,17 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } } -static inline int bpf2a64_offset(int bpf_to, int bpf_from, +static inline int bpf2a64_offset(int bpf_insn, int off, const struct jit_ctx *ctx) { - int to = ctx->offset[bpf_to]; - /* -1 to account for the Branch instruction */ - int from = ctx->offset[bpf_from] - 1; - - return to - from; + /* BPF JMP offset is relative to the next instruction */ + bpf_insn++; + /* + * Whereas arm64 branch instructions encode the offset + * from the branch itself, so we must subtract 1 from the + * instruction offset. + */ + return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1); } static void jit_fill_hole(void *area, unsigned int size) @@ -532,7 +535,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, /* JUMP off */ case BPF_JMP | BPF_JA: - jmp_offset = bpf2a64_offset(i + off, i, ctx); + jmp_offset = bpf2a64_offset(i, off, ctx); check_imm26(jmp_offset); emit(A64_B(jmp_offset), ctx); break; @@ -559,7 +562,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_JMP32 | BPF_JSLE | BPF_X: emit(A64_CMP(is64, dst, src), ctx); emit_cond_jmp: - jmp_offset = bpf2a64_offset(i + off, i, ctx); + jmp_offset = bpf2a64_offset(i, off, ctx); check_imm19(jmp_offset); switch (BPF_OP(code)) { case BPF_JEQ: @@ -698,6 +701,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, } break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + /* + * Nothing required here. + * + * In case of arm64, we rely on the firmware mitigation of + * Speculative Store Bypass as controlled via the ssbd kernel + * parameter. Whenever the mitigation is enabled, it works + * for all of the kernel code with no need to provide any + * additional instructions. + */ + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: @@ -780,10 +796,21 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass) const struct bpf_prog *prog = ctx->prog; int i; + /* + * - offset[0] offset of the end of prologue, + * start of the 1st instruction. + * - offset[1] - offset of the end of 1st instruction, + * start of the 2nd instruction + * [....] + * - offset[3] - offset of the end of 3rd instruction, + * start of 4th instruction + */ for (i = 0; i < prog->len; i++) { const struct bpf_insn *insn = &prog->insnsi[i]; int ret; + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; ret = build_insn(insn, ctx, extra_pass); if (ret > 0) { i++; @@ -791,11 +818,16 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass) ctx->offset[i] = ctx->idx; continue; } - if (ctx->image == NULL) - ctx->offset[i] = ctx->idx; if (ret) return ret; } + /* + * offset is allocated with prog->len + 1 so fill in + * the last element with the offset after the last + * instruction (end of program) + */ + if (ctx->image == NULL) + ctx->offset[i] = ctx->idx; return 0; } @@ -871,21 +903,24 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); + ctx.offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; goto out_off; } - /* 1. Initial fake pass to compute ctx->idx. */ - - /* Fake pass to fill in ctx->offset. */ - if (build_body(&ctx, extra_pass)) { + /* + * 1. Initial fake pass to compute ctx->idx and ctx->offset. + * + * BPF line info needs ctx->offset[i] to be the offset of + * instruction[i] in jited image, so build prologue first. + */ + if (build_prologue(&ctx, was_classic)) { prog = orig_prog; goto out_off; } - if (build_prologue(&ctx, was_classic)) { + if (build_body(&ctx, extra_pass)) { prog = orig_prog; goto out_off; } @@ -938,6 +973,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_jit_binary_free(header); prog->bpf_func = NULL; prog->jited = 0; + prog->jited_len = 0; goto out_off; } bpf_jit_binary_lock_ro(header); @@ -951,7 +987,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog->jited_len = image_size; if (!prog->is_func || extra_pass) { - bpf_prog_fill_jited_linfo(prog, ctx.offset); + int i; + + /* offset[prog->len] is the size of program */ + for (i = 0; i <= prog->len; i++) + ctx.offset[i] *= AARCH64_INSN_SIZE; + bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); out_off: kfree(ctx.offset); kfree(jit_data); @@ -964,6 +1005,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 3973847b5f42e..4f48a2f0513b7 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -36,6 +36,7 @@ config CSKY select GX6605S_TIMER if CPU_CK610 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_AUDITSYSCALL + select HAVE_COPY_THREAD_TLS select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER @@ -74,7 +75,7 @@ config CPU_HAS_TLBI config CPU_HAS_LDSTEX bool help - For SMP, CPU needs "ldex&stex" instrcutions to atomic operations. + For SMP, CPU needs "ldex&stex" instructions for atomic operations. config CPU_NEED_TLBSYNC bool @@ -219,7 +220,7 @@ config FORCE_MAX_ZONEORDER int "Maximum zone order" default "11" -config RAM_BASE +config DRAM_BASE hex "DRAM start addr (the same with memory-section in dts)" default 0x0 diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h index 7ab78bd0f3b13..61d94ec7dd160 100644 --- a/arch/csky/abiv1/inc/abi/entry.h +++ b/arch/csky/abiv1/inc/abi/entry.h @@ -16,14 +16,16 @@ #define LSAVE_A4 40 #define LSAVE_A5 44 +#define usp ss1 + .macro USPTOKSP - mtcr sp, ss1 + mtcr sp, usp mfcr sp, ss0 .endm .macro KSPTOUSP mtcr sp, ss0 - mfcr sp, ss1 + mfcr sp, usp .endm .macro SAVE_ALL epc_inc @@ -45,7 +47,13 @@ add lr, r13 stw lr, (sp, 8) + mov lr, sp + addi lr, 32 + addi lr, 32 + addi lr, 16 + bt 2f mfcr lr, ss1 +2: stw lr, (sp, 16) stw a0, (sp, 20) @@ -79,9 +87,10 @@ ldw a0, (sp, 12) mtcr a0, epsr btsti a0, 31 + bt 1f ldw a0, (sp, 16) mtcr a0, ss1 - +1: ldw a0, (sp, 24) ldw a1, (sp, 28) ldw a2, (sp, 32) @@ -102,9 +111,9 @@ addi sp, 32 addi sp, 8 - bt 1f + bt 2f KSPTOUSP -1: +2: rte .endm @@ -158,15 +167,12 @@ * BA Reserved C D V */ cprcr r6, cpcr30 - lsri r6, 28 - lsli r6, 28 + lsri r6, 29 + lsli r6, 29 addi r6, 0xe cpwcr r6, cpcr30 - lsri r6, 28 - addi r6, 2 - lsli r6, 28 - addi r6, 0xe + movi r6, 0 cpwcr r6, cpcr31 .endm diff --git a/arch/csky/abiv2/fpu.c b/arch/csky/abiv2/fpu.c index 86d187d4e5af1..5acc5c2e544e1 100644 --- a/arch/csky/abiv2/fpu.c +++ b/arch/csky/abiv2/fpu.c @@ -10,11 +10,6 @@ #define MTCR_DIST 0xC0006420 #define MFCR_DIST 0xC0006020 -void __init init_fpu(void) -{ - mtcr("cr<1, 2>", 0); -} - /* * fpu_libc_helper() is to help libc to excute: * - mfcr %a, cr<1, 2> diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h index 9897a16b45e5d..ac8f65a3e75a2 100644 --- a/arch/csky/abiv2/inc/abi/entry.h +++ b/arch/csky/abiv2/inc/abi/entry.h @@ -13,6 +13,8 @@ #define LSAVE_A1 28 #define LSAVE_A2 32 #define LSAVE_A3 36 +#define LSAVE_A4 40 +#define LSAVE_A5 44 #define KSPTOUSP #define USPTOKSP @@ -31,7 +33,13 @@ mfcr lr, epsr stw lr, (sp, 12) + btsti lr, 31 + bf 1f + addi lr, sp, 152 + br 2f +1: mfcr lr, usp +2: stw lr, (sp, 16) stw a0, (sp, 20) @@ -64,8 +72,10 @@ mtcr a0, epc ldw a0, (sp, 12) mtcr a0, epsr + btsti a0, 31 ldw a0, (sp, 16) mtcr a0, usp + mtcr a0, ss0 #ifdef CONFIG_CPU_HAS_HILO ldw a0, (sp, 140) @@ -86,6 +96,9 @@ addi sp, 40 ldm r16-r30, (sp) addi sp, 72 + bf 1f + mfcr sp, ss0 +1: rte .endm @@ -214,16 +227,13 @@ */ mfcr r6, cr<30, 15> /* Get MSA0 */ 2: - lsri r6, 28 - lsli r6, 28 + lsri r6, 29 + lsli r6, 29 addi r6, 0x1ce mtcr r6, cr<30, 15> /* Set MSA0 */ - lsri r6, 28 - addi r6, 2 - lsli r6, 28 - addi r6, 0x1ce - mtcr r6, cr<31, 15> /* Set MSA1 */ + movi r6, 0 + mtcr r6, cr<31, 15> /* Clr MSA1 */ /* enable MMU */ mfcr r6, cr18 diff --git a/arch/csky/abiv2/inc/abi/fpu.h b/arch/csky/abiv2/inc/abi/fpu.h index 22ca3cf2794a1..09e2700a36936 100644 --- a/arch/csky/abiv2/inc/abi/fpu.h +++ b/arch/csky/abiv2/inc/abi/fpu.h @@ -9,7 +9,8 @@ int fpu_libc_helper(struct pt_regs *regs); void fpu_fpe(struct pt_regs *regs); -void __init init_fpu(void); + +static inline void init_fpu(void) { mtcr("cr<1, 2>", 0); } void save_to_user_fp(struct user_fp *user_fp); void restore_from_user_fp(struct user_fp *user_fp); diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 9738eacefdc7e..62bb307459caf 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -28,7 +28,7 @@ #define SSEG_SIZE 0x20000000 #define LOWMEM_LIMIT (SSEG_SIZE * 2) -#define PHYS_OFFSET_OFFSET (CONFIG_RAM_BASE & (SSEG_SIZE - 1)) +#define PHYS_OFFSET_OFFSET (CONFIG_DRAM_BASE & (SSEG_SIZE - 1)) #ifndef __ASSEMBLY__ diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h index 21e0bd5293dde..c6bcd7f7c720b 100644 --- a/arch/csky/include/asm/processor.h +++ b/arch/csky/include/asm/processor.h @@ -43,6 +43,7 @@ extern struct cpuinfo_csky cpu_data[]; struct thread_struct { unsigned long ksp; /* kernel stack pointer */ unsigned long sr; /* saved status register */ + unsigned long trap_no; /* saved status register */ /* FPU regs */ struct user_fp __aligned(16) user_fp; diff --git a/arch/csky/include/asm/uaccess.h b/arch/csky/include/asm/uaccess.h index eaa1c3403a424..60f8a41125886 100644 --- a/arch/csky/include/asm/uaccess.h +++ b/arch/csky/include/asm/uaccess.h @@ -254,7 +254,7 @@ do { \ extern int __get_user_bad(void); -#define __copy_user(to, from, n) \ +#define ___copy_to_user(to, from, n) \ do { \ int w0, w1, w2, w3; \ asm volatile( \ @@ -289,31 +289,34 @@ do { \ " subi %0, 4 \n" \ " br 3b \n" \ "5: cmpnei %0, 0 \n" /* 1B */ \ - " bf 8f \n" \ + " bf 13f \n" \ " ldb %3, (%2, 0) \n" \ "6: stb %3, (%1, 0) \n" \ " addi %2, 1 \n" \ " addi %1, 1 \n" \ " subi %0, 1 \n" \ " br 5b \n" \ - "7: br 8f \n" \ + "7: subi %0, 4 \n" \ + "8: subi %0, 4 \n" \ + "12: subi %0, 4 \n" \ + " br 13f \n" \ ".section __ex_table, \"a\" \n" \ ".align 2 \n" \ - ".long 2b, 7b \n" \ - ".long 9b, 7b \n" \ - ".long 10b, 7b \n" \ + ".long 2b, 13f \n" \ + ".long 4b, 13f \n" \ + ".long 6b, 13f \n" \ + ".long 9b, 12b \n" \ + ".long 10b, 8b \n" \ ".long 11b, 7b \n" \ - ".long 4b, 7b \n" \ - ".long 6b, 7b \n" \ ".previous \n" \ - "8: \n" \ + "13: \n" \ : "=r"(n), "=r"(to), "=r"(from), "=r"(w0), \ "=r"(w1), "=r"(w2), "=r"(w3) \ : "0"(n), "1"(to), "2"(from) \ : "memory"); \ } while (0) -#define __copy_user_zeroing(to, from, n) \ +#define ___copy_from_user(to, from, n) \ do { \ int tmp; \ int nsave; \ @@ -356,22 +359,22 @@ do { \ " addi %1, 1 \n" \ " subi %0, 1 \n" \ " br 5b \n" \ - "8: mov %3, %0 \n" \ - " movi %4, 0 \n" \ - "9: stb %4, (%1, 0) \n" \ - " addi %1, 1 \n" \ - " subi %3, 1 \n" \ - " cmpnei %3, 0 \n" \ - " bt 9b \n" \ - " br 7f \n" \ + "8: stw %3, (%1, 0) \n" \ + " subi %0, 4 \n" \ + " bf 7f \n" \ + "9: subi %0, 8 \n" \ + " bf 7f \n" \ + "13: stw %3, (%1, 8) \n" \ + " subi %0, 12 \n" \ + " bf 7f \n" \ ".section __ex_table, \"a\" \n" \ ".align 2 \n" \ - ".long 2b, 8b \n" \ + ".long 2b, 7f \n" \ + ".long 4b, 7f \n" \ + ".long 6b, 7f \n" \ ".long 10b, 8b \n" \ - ".long 11b, 8b \n" \ - ".long 12b, 8b \n" \ - ".long 4b, 8b \n" \ - ".long 6b, 8b \n" \ + ".long 11b, 9b \n" \ + ".long 12b,13b \n" \ ".previous \n" \ "7: \n" \ : "=r"(n), "=r"(to), "=r"(from), "=r"(nsave), \ diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h index 211c983c7282d..ba40189297338 100644 --- a/arch/csky/include/uapi/asm/unistd.h +++ b/arch/csky/include/uapi/asm/unistd.h @@ -1,7 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS #include diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S index 5b84f11485aeb..3821ef9b75672 100644 --- a/arch/csky/kernel/atomic.S +++ b/arch/csky/kernel/atomic.S @@ -17,10 +17,12 @@ ENTRY(csky_cmpxchg) mfcr a3, epc addi a3, TRAP0_SIZE - subi sp, 8 + subi sp, 16 stw a3, (sp, 0) mfcr a3, epsr stw a3, (sp, 4) + mfcr a3, usp + stw a3, (sp, 8) psrset ee #ifdef CONFIG_CPU_HAS_LDSTEX @@ -47,7 +49,9 @@ ENTRY(csky_cmpxchg) mtcr a3, epc ldw a3, (sp, 4) mtcr a3, epsr - addi sp, 8 + ldw a3, (sp, 8) + mtcr a3, usp + addi sp, 16 KSPTOUSP rte END(csky_cmpxchg) diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index a7a5b67df8989..4349528fbf38a 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -170,8 +170,10 @@ csky_syscall_trace: ldw a3, (sp, LSAVE_A3) #if defined(__CSKYABIV2__) subi sp, 8 - stw r5, (sp, 0x4) - stw r4, (sp, 0x0) + ldw r9, (sp, LSAVE_A4) + stw r9, (sp, 0x0) + ldw r9, (sp, LSAVE_A5) + stw r9, (sp, 0x4) #else ldw r6, (sp, LSAVE_A4) ldw r7, (sp, LSAVE_A5) @@ -318,8 +320,6 @@ ENTRY(__switch_to) mfcr a2, psr /* Save PSR value */ stw a2, (a3, THREAD_SR) /* Save PSR in task struct */ - bclri a2, 6 /* Disable interrupts */ - mtcr a2, psr SAVE_SWITCH_STACK diff --git a/arch/csky/kernel/head.S b/arch/csky/kernel/head.S index 61989f9241c02..17ed9d2504807 100644 --- a/arch/csky/kernel/head.S +++ b/arch/csky/kernel/head.S @@ -21,6 +21,11 @@ END(_start) ENTRY(_start_smp_secondary) SETUP_MMU + /* copy msa1 from CPU0 */ + lrw r6, secondary_msa1 + ld.w r6, (r6, 0) + mtcr r6, cr<31, 15> + /* set stack point */ lrw r6, secondary_stack ld.w r6, (r6, 0) diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c index e68ff375c8f88..35318a635a5fa 100644 --- a/arch/csky/kernel/perf_callchain.c +++ b/arch/csky/kernel/perf_callchain.c @@ -12,12 +12,17 @@ struct stackframe { static int unwind_frame_kernel(struct stackframe *frame) { - if (kstack_end((void *)frame->fp)) + unsigned long low = (unsigned long)task_stack_page(current); + unsigned long high = low + THREAD_SIZE; + + if (unlikely(frame->fp < low || frame->fp > high)) return -EPERM; - if (frame->fp & 0x3 || frame->fp < TASK_SIZE) + + if (kstack_end((void *)frame->fp) || frame->fp & 0x3) return -EPERM; *frame = *(struct stackframe *)frame->fp; + if (__kernel_text_address(frame->lr)) { int graph = 0; @@ -81,10 +86,11 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); unsigned long fp = 0; /* C-SKY does not support virtualization. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + if (guest_cbs && guest_cbs->is_in_guest()) return; fp = regs->regs[4]; @@ -105,10 +111,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe fr; /* C-SKY does not support virtualization. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { pr_warn("C-SKY does not support perf in guest mode!"); return; } diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index f320d9248a225..397962e11bd19 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -34,10 +34,11 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sw->r15; } -int copy_thread(unsigned long clone_flags, +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, unsigned long kthread_arg, - struct task_struct *p) + struct task_struct *p, + unsigned long tls) { struct switch_stack *childstack; struct pt_regs *childregs = task_pt_regs(p); @@ -64,7 +65,7 @@ int copy_thread(unsigned long clone_flags, childregs->usp = usp; if (clone_flags & CLONE_SETTLS) task_thread_info(p)->tp_value = childregs->tls - = childregs->regs[0]; + = tls; childregs->a0 = 0; childstack->r15 = (unsigned long) ret_from_fork; diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index 313623a19ecbf..885194720fe0b 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -96,7 +96,8 @@ static int gpr_set(struct task_struct *target, if (ret) return ret; - regs.sr = task_pt_regs(target)->sr; + /* BIT(0) of regs.sr is Condition Code/Carry bit */ + regs.sr = (regs.sr & BIT(0)) | (task_pt_regs(target)->sr & ~BIT(0)); #ifdef CONFIG_CPU_HAS_HILO regs.dcsr = task_pt_regs(target)->dcsr; #endif diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index 23ee604aafdb6..2c1e253abb74c 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -24,26 +24,9 @@ struct screen_info screen_info = { }; #endif -phys_addr_t __init_memblock memblock_end_of_REG0(void) -{ - return (memblock.memory.regions[0].base + - memblock.memory.regions[0].size); -} - -phys_addr_t __init_memblock memblock_start_of_REG1(void) -{ - return memblock.memory.regions[1].base; -} - -size_t __init_memblock memblock_size_of_REG1(void) -{ - return memblock.memory.regions[1].size; -} - static void __init csky_memblock_init(void) { unsigned long zone_size[MAX_NR_ZONES]; - unsigned long zhole_size[MAX_NR_ZONES]; signed long size; memblock_reserve(__pa(_stext), _end - _stext); @@ -57,54 +40,36 @@ static void __init csky_memblock_init(void) memblock_dump_all(); memset(zone_size, 0, sizeof(zone_size)); - memset(zhole_size, 0, sizeof(zhole_size)); min_low_pfn = PFN_UP(memblock_start_of_DRAM()); - max_pfn = PFN_DOWN(memblock_end_of_DRAM()); - - max_low_pfn = PFN_UP(memblock_end_of_REG0()); - if (max_low_pfn == 0) - max_low_pfn = max_pfn; + max_low_pfn = max_pfn = PFN_DOWN(memblock_end_of_DRAM()); size = max_pfn - min_low_pfn; - if (memblock.memory.cnt > 1) { - zone_size[ZONE_NORMAL] = - PFN_DOWN(memblock_start_of_REG1()) - min_low_pfn; - zhole_size[ZONE_NORMAL] = - PFN_DOWN(memblock_start_of_REG1()) - max_low_pfn; + if (size <= PFN_DOWN(SSEG_SIZE - PHYS_OFFSET_OFFSET)) + zone_size[ZONE_NORMAL] = size; + else if (size < PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET)) { + zone_size[ZONE_NORMAL] = + PFN_DOWN(SSEG_SIZE - PHYS_OFFSET_OFFSET); + max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL]; } else { - if (size <= PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET)) - zone_size[ZONE_NORMAL] = max_pfn - min_low_pfn; - else { - zone_size[ZONE_NORMAL] = + zone_size[ZONE_NORMAL] = PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); - max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL]; - } + max_low_pfn = min_low_pfn + zone_size[ZONE_NORMAL]; + write_mmu_msa1(read_mmu_msa0() + SSEG_SIZE); } #ifdef CONFIG_HIGHMEM - size = 0; - if (memblock.memory.cnt > 1) { - size = PFN_DOWN(memblock_size_of_REG1()); - highstart_pfn = PFN_DOWN(memblock_start_of_REG1()); - } else { - size = max_pfn - min_low_pfn - - PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); - highstart_pfn = min_low_pfn + - PFN_DOWN(LOWMEM_LIMIT - PHYS_OFFSET_OFFSET); - } - - if (size > 0) - zone_size[ZONE_HIGHMEM] = size; + zone_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn; - highend_pfn = max_pfn; + highstart_pfn = max_low_pfn; + highend_pfn = max_pfn; #endif memblock_set_current_limit(PFN_PHYS(max_low_pfn)); dma_contiguous_reserve(0); - free_area_init_node(0, zone_size, min_low_pfn, zhole_size); + free_area_init_node(0, zone_size, min_low_pfn, NULL); } void __init setup_arch(char **cmdline_p) diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 9b1b7c039ddf9..63c2bbe39d03b 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -52,10 +52,14 @@ static long restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { int err = 0; + unsigned long sr = regs->sr; /* sc_pt_regs is structured the same as the start of pt_regs */ err |= __copy_from_user(regs, &sc->sc_pt_regs, sizeof(struct pt_regs)); + /* BIT(0) of regs->sr is Condition Code/Carry bit */ + regs->sr = (sr & ~1) | (regs->sr & 1); + /* Restore the floating-point state. */ err |= restore_fpu_state(sc); diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index b753d382e4cef..b5c5bc3afeb5c 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -22,6 +22,9 @@ #include #include #include +#ifdef CONFIG_CPU_HAS_FPU +#include +#endif struct ipi_data_struct { unsigned long bits ____cacheline_aligned; @@ -120,7 +123,7 @@ void __init setup_smp_ipi(void) int rc; if (ipi_irq == 0) - panic("%s IRQ mapping failed\n", __func__); + return; rc = request_percpu_irq(ipi_irq, handle_ipi, "IPI Interrupt", &ipi_dummy_dev); @@ -156,6 +159,8 @@ volatile unsigned int secondary_hint; volatile unsigned int secondary_ccr; volatile unsigned int secondary_stack; +unsigned long secondary_msa1; + int __cpu_up(unsigned int cpu, struct task_struct *tidle) { unsigned long mask = 1 << cpu; @@ -164,6 +169,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) (unsigned int) task_stack_page(tidle) + THREAD_SIZE - 8; secondary_hint = mfcr("cr31"); secondary_ccr = mfcr("cr18"); + secondary_msa1 = read_mmu_msa1(); /* * Because other CPUs are in reset status, we must flush data diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index b057480e7463c..63715cb90ee99 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -115,8 +115,9 @@ asmlinkage void trap_c(struct pt_regs *regs) int sig; unsigned long vector; siginfo_t info; + struct task_struct *tsk = current; - vector = (mfcr("psr") >> 16) & 0xff; + vector = (regs->sr >> 16) & 0xff; switch (vector) { case VEC_ZERODIV: @@ -129,6 +130,7 @@ asmlinkage void trap_c(struct pt_regs *regs) sig = SIGTRAP; break; case VEC_ILLEGAL: + tsk->thread.trap_no = vector; die_if_kernel("Kernel mode ILLEGAL", regs, vector); #ifndef CONFIG_CPU_NO_USER_BKPT if (*(uint16_t *)instruction_pointer(regs) != USR_BKPT) @@ -146,16 +148,20 @@ asmlinkage void trap_c(struct pt_regs *regs) sig = SIGTRAP; break; case VEC_ACCESS: + tsk->thread.trap_no = vector; return buserr(regs); #ifdef CONFIG_CPU_NEED_SOFTALIGN case VEC_ALIGN: + tsk->thread.trap_no = vector; return csky_alignment(regs); #endif #ifdef CONFIG_CPU_HAS_FPU case VEC_FPE: + tsk->thread.trap_no = vector; die_if_kernel("Kernel mode FPE", regs, vector); return fpu_fpe(regs); case VEC_PRIV: + tsk->thread.trap_no = vector; die_if_kernel("Kernel mode PRIV", regs, vector); if (fpu_libc_helper(regs)) return; @@ -164,5 +170,8 @@ asmlinkage void trap_c(struct pt_regs *regs) sig = SIGSEGV; break; } + + tsk->thread.trap_no = vector; + send_sig(sig, current, 0); } diff --git a/arch/csky/lib/usercopy.c b/arch/csky/lib/usercopy.c index 647a23986fb50..3c9bd645e6431 100644 --- a/arch/csky/lib/usercopy.c +++ b/arch/csky/lib/usercopy.c @@ -7,10 +7,7 @@ unsigned long raw_copy_from_user(void *to, const void *from, unsigned long n) { - if (access_ok(from, n)) - __copy_user_zeroing(to, from, n); - else - memset(to, 0, n); + ___copy_from_user(to, from, n); return n; } EXPORT_SYMBOL(raw_copy_from_user); @@ -18,8 +15,7 @@ EXPORT_SYMBOL(raw_copy_from_user); unsigned long raw_copy_to_user(void *to, const void *from, unsigned long n) { - if (access_ok(to, n)) - __copy_user(to, from, n); + ___copy_to_user(to, from, n); return n; } EXPORT_SYMBOL(raw_copy_to_user); diff --git a/arch/csky/mm/Makefile b/arch/csky/mm/Makefile index c94ef64810986..efb7ebab342b3 100644 --- a/arch/csky/mm/Makefile +++ b/arch/csky/mm/Makefile @@ -1,8 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only ifeq ($(CONFIG_CPU_HAS_CACHEV2),y) obj-y += cachev2.o +CFLAGS_REMOVE_cachev2.o = $(CC_FLAGS_FTRACE) else obj-y += cachev1.o +CFLAGS_REMOVE_cachev1.o = $(CC_FLAGS_FTRACE) endif obj-y += dma-mapping.o diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index f76618b630f91..562c7f7087490 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -179,11 +179,14 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } no_context: + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; + /* Are we prepared to handle this kernel fault? */ if (fixup_exception(regs)) return; @@ -198,6 +201,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, die_if_kernel("Oops", regs, write); out_of_memory: + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; + /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). @@ -206,6 +211,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, return; do_sigbus: + tsk->thread.trap_no = (regs->sr >> 16) & 0xff; + up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die */ diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index d4c2292ea46bc..00e96278b3776 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; +EXPORT_SYMBOL(invalid_pte_table); unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c index 85e60509f0a83..d4b53af657c84 100644 --- a/arch/h8300/kernel/asm-offsets.c +++ b/arch/h8300/kernel/asm-offsets.c @@ -63,6 +63,9 @@ int main(void) OFFSET(TI_FLAGS, thread_info, flags); OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE, thread_info, preempt_count); +#ifdef CONFIG_PREEMPTION + DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); +#endif return 0; } diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 12cd9231c4b8f..0231d69c8bf2b 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -91,7 +91,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -124,7 +124,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%2);\n" \ " %1 = "#op "(%0,%3);\n" \ " memw_locked(%2,P3)=%1;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output), "=&r" (val) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -173,7 +173,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) " }" " memw_locked(%2, p3) = %1;" " {" - " if !p3 jump 1b;" + " if (!p3) jump 1b;" " }" "2:" : "=&r" (__oldval), "=&r" (tmp) diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 47384b094b944..71429f756af0f 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -38,7 +38,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = clrbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -62,7 +62,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = setbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -88,7 +88,7 @@ static inline int test_and_change_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = togglebit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -223,7 +223,7 @@ static inline int ffs(int x) int r; asm("{ P0 = cmp.eq(%1,#0); %0 = ct0(%1);}\n" - "{ if P0 %0 = #0; if !P0 %0 = add(%0,#1);}\n" + "{ if (P0) %0 = #0; if (!P0) %0 = add(%0,#1);}\n" : "=&r" (r) : "r" (x) : "p0"); diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index 6091322c3af96..92b8a02e588ac 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -30,7 +30,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, __asm__ __volatile__ ( "1: %0 = memw_locked(%1);\n" /* load into retval */ " memw_locked(%1,P0) = %2;\n" /* store into memory */ - " if !P0 jump 1b;\n" + " if (!P0) jump 1b;\n" : "=&r" (retval) : "r" (ptr), "r" (x) : "memory", "p0" diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h index cb635216a732c..0191f7c7193e6 100644 --- a/arch/hexagon/include/asm/futex.h +++ b/arch/hexagon/include/asm/futex.h @@ -16,7 +16,7 @@ /* For example: %1 = %4 */ \ insn \ "2: memw_locked(%3,p2) = %1;\n" \ - " if !p2 jump 1b;\n" \ + " if (!p2) jump 1b;\n" \ " %1 = #0;\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ @@ -84,10 +84,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, "1: %1 = memw_locked(%3)\n" " {\n" " p2 = cmp.eq(%1,%4)\n" - " if !p2.new jump:NT 3f\n" + " if (!p2.new) jump:NT 3f\n" " }\n" "2: memw_locked(%3,p2) = %5\n" - " if !p2 jump 1b\n" + " if (!p2) jump 1b\n" "3:\n" ".section .fixup,\"ax\"\n" "4: %0 = #%6\n" diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h index ba1a444d55b30..68a68147504d0 100644 --- a/arch/hexagon/include/asm/io.h +++ b/arch/hexagon/include/asm/io.h @@ -171,16 +171,10 @@ static inline void writel(u32 data, volatile void __iomem *addr) #define writew_relaxed __raw_writew #define writel_relaxed __raw_writel -/* - * Need an mtype somewhere in here, for cache type deals? - * This is probably too long for an inline. - */ -void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size); +void __iomem *ioremap(unsigned long phys_addr, unsigned long size); +#define ioremap_nocache ioremap +#define ioremap_uc(X, Y) ioremap((X), (Y)) -static inline void __iomem *ioremap(unsigned long phys_addr, unsigned long size) -{ - return ioremap_nocache(phys_addr, size); -} static inline void iounmap(volatile void __iomem *addr) { diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index bfe07d842ff35..ef103b73bec83 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -30,9 +30,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " { P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -46,7 +46,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock) "1: R6 = memw_locked(%0);\n" " R6 = add(R6,#-1);\n" " memw_locked(%0,P3) = R6\n" - " if !P3 jump 1b;\n" + " if (!P3) jump 1b;\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -61,7 +61,7 @@ static inline int arch_read_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " { %0 = #0; P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " { %0 = P3 }\n" "1:\n" @@ -78,9 +78,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0)\n" " { P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -94,7 +94,7 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1)\n" " { %0 = #0; P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" @@ -117,9 +117,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1b; R6 = #1; }\n" + " { if (!P3) jump 1b; R6 = #1; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -139,7 +139,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1f; R6 = #1; %0 = #0; }\n" + " { if (!P3) jump 1f; R6 = #1; %0 = #0; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" diff --git a/arch/hexagon/kernel/hexagon_ksyms.c b/arch/hexagon/kernel/hexagon_ksyms.c index cf8974beb5006..b3dbb472572ee 100644 --- a/arch/hexagon/kernel/hexagon_ksyms.c +++ b/arch/hexagon/kernel/hexagon_ksyms.c @@ -20,7 +20,7 @@ EXPORT_SYMBOL(__vmgetie); EXPORT_SYMBOL(__vmsetie); EXPORT_SYMBOL(__vmyield); EXPORT_SYMBOL(empty_zero_page); -EXPORT_SYMBOL(ioremap_nocache); +EXPORT_SYMBOL(ioremap); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c index 35f29423fda80..5ed02f699479a 100644 --- a/arch/hexagon/kernel/stacktrace.c +++ b/arch/hexagon/kernel/stacktrace.c @@ -11,8 +11,6 @@ #include #include -register unsigned long current_frame_pointer asm("r30"); - struct stackframe { unsigned long fp; unsigned long rets; @@ -30,7 +28,7 @@ void save_stack_trace(struct stack_trace *trace) low = (unsigned long)task_stack_page(current); high = low + THREAD_SIZE; - fp = current_frame_pointer; + fp = (unsigned long)__builtin_frame_address(0); while (fp >= low && fp <= (high - sizeof(*frame))) { frame = (struct stackframe *)fp; diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S index 12242c27e2df5..4023fdbea4902 100644 --- a/arch/hexagon/kernel/vm_entry.S +++ b/arch/hexagon/kernel/vm_entry.S @@ -369,7 +369,7 @@ ret_from_fork: R26.L = #LO(do_work_pending); R0 = #VM_INT_DISABLE; } - if P0 jump check_work_pending + if (P0) jump check_work_pending { R0 = R25; callr R24 diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 78f2418e97c84..3c6b7abf0080e 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -60,13 +60,8 @@ SECTIONS _end = .; - /DISCARD/ : { - EXIT_TEXT - EXIT_DATA - EXIT_CALL - } - STABS_DEBUG DWARF_DEBUG + DISCARDS } diff --git a/arch/hexagon/lib/io.c b/arch/hexagon/lib/io.c index d35d69d6588c4..55f75392857b0 100644 --- a/arch/hexagon/lib/io.c +++ b/arch/hexagon/lib/io.c @@ -27,6 +27,7 @@ void __raw_readsw(const void __iomem *addr, void *data, int len) *dst++ = *src; } +EXPORT_SYMBOL(__raw_readsw); /* * __raw_writesw - read words a short at a time @@ -47,6 +48,7 @@ void __raw_writesw(void __iomem *addr, const void *data, int len) } +EXPORT_SYMBOL(__raw_writesw); /* Pretty sure len is pre-adjusted for the length of the access already */ void __raw_readsl(const void __iomem *addr, void *data, int len) @@ -62,6 +64,7 @@ void __raw_readsl(const void __iomem *addr, void *data, int len) } +EXPORT_SYMBOL(__raw_readsl); void __raw_writesl(void __iomem *addr, const void *data, int len) { @@ -76,3 +79,4 @@ void __raw_writesl(void __iomem *addr, const void *data, int len) } +EXPORT_SYMBOL(__raw_writesl); diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c index 77d8e1e69e9b9..b103d83b5fbb2 100644 --- a/arch/hexagon/mm/ioremap.c +++ b/arch/hexagon/mm/ioremap.c @@ -9,7 +9,7 @@ #include #include -void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size) +void __iomem *ioremap(unsigned long phys_addr, unsigned long size) { unsigned long last_addr, addr; unsigned long offset = phys_addr & ~PAGE_MASK; diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug index 40ca23bd228d6..2ce008e2d1644 100644 --- a/arch/ia64/Kconfig.debug +++ b/arch/ia64/Kconfig.debug @@ -39,7 +39,7 @@ config DISABLE_VHPT config IA64_DEBUG_CMPXCHG bool "Turn on compare-and-exchange bug checking (slow!)" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && PRINTK help Selecting this option turns on bug checking for the IA-64 compare-and-exchange instructions. This is slow! Itaniums diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 32240000dc0c8..2876a7df1b0a1 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -40,7 +40,7 @@ $(error Sorry, you need a newer version of the assember, one that is built from endif quiet_cmd_gzip = GZIP $@ -cmd_gzip = cat $(real-prereqs) | gzip -n -f -9 > $@ +cmd_gzip = cat $(real-prereqs) | $(KGZIP) -n -f -9 > $@ quiet_cmd_objcopy = OBJCOPY $@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig index 8c92e095f8bb9..d42f79a33e912 100644 --- a/arch/ia64/configs/zx1_defconfig +++ b/arch/ia64/configs/zx1_defconfig @@ -35,7 +35,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_CHR_DEV_OSST=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h index f319144260ce1..9fbf32e6e8813 100644 --- a/arch/ia64/include/asm/module.h +++ b/arch/ia64/include/asm/module.h @@ -14,16 +14,20 @@ struct elf64_shdr; /* forward declration */ struct mod_arch_specific { + /* Used only at module load time. */ struct elf64_shdr *core_plt; /* core PLT section */ struct elf64_shdr *init_plt; /* init PLT section */ struct elf64_shdr *got; /* global offset table */ struct elf64_shdr *opd; /* official procedure descriptors */ struct elf64_shdr *unwind; /* unwind-table section */ unsigned long gp; /* global-pointer for module */ + unsigned int next_got_entry; /* index of next available got entry */ + /* Used at module run and cleanup time. */ void *core_unw_table; /* core unwind-table cookie returned by unwinder */ void *init_unw_table; /* init unwind-table cookie returned by unwinder */ - unsigned int next_got_entry; /* index of next available got entry */ + void *opd_addr; /* symbolize uses .opd to get to actual function */ + unsigned long opd_size; }; #define MODULE_PROC_FAMILY "ia64" diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 95a2ec37400fb..0025f890b5a38 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -552,7 +552,7 @@ ia64_get_irr(unsigned int vector) { unsigned int reg = vector / 64; unsigned int bit = vector % 64; - u64 irr; + unsigned long irr; switch (reg) { case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break; diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h index 7ff574d56429c..f31e07fc936d9 100644 --- a/arch/ia64/include/asm/ptrace.h +++ b/arch/ia64/include/asm/ptrace.h @@ -54,8 +54,7 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) { - /* FIXME: should this be bspstore + nr_dirty regs? */ - return regs->ar_bspstore; + return regs->r12; } static inline int is_syscall_success(struct pt_regs *regs) @@ -79,11 +78,6 @@ static inline long regs_return_value(struct pt_regs *regs) unsigned long __ip = instruction_pointer(regs); \ (__ip & ~3UL) + ((__ip & 3UL) << 2); \ }) -/* - * Why not default? Because user_stack_pointer() on ia64 gives register - * stack backing store instead... - */ -#define current_user_stack_pointer() (current_pt_regs()->r12) /* given a pointer to a task_struct, return the user's pt_regs */ # define task_pt_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1) diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 6c6f16e409a87..0d23c00493018 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -32,7 +32,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return regs->r10 == -1 ? regs->r8:0; + return regs->r10 == -1 ? -regs->r8:0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h index 869a3ac6bf23a..7ccc077a60bed 100644 --- a/arch/ia64/include/asm/timex.h +++ b/arch/ia64/include/asm/timex.h @@ -39,6 +39,7 @@ get_cycles (void) ret = ia64_getreg(_IA64_REG_AR_ITC); return ret; } +#define get_cycles get_cycles extern void ia64_cpu_local_tick (void); extern unsigned long long ia64_native_sched_clock (void); diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 1a8df6669eee6..18d6008b151fd 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -41,7 +41,7 @@ obj-y += esi_stub.o # must be in kernel proper endif obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o -obj-$(CONFIG_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_ELF_CORE) += elfcore.o # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 70d1587ddcd46..361b109933725 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -448,7 +448,8 @@ void __init acpi_numa_fixup(void) if (srat_num_cpus == 0) { node_set_online(0); node_cpuid[0].phys_id = hard_smp_processor_id(); - return; + slit_distance(0, 0) = LOCAL_DISTANCE; + goto out; } /* @@ -491,7 +492,7 @@ void __init acpi_numa_fixup(void) for (j = 0; j < MAX_NUMNODES; j++) slit_distance(i, j) = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; - return; + goto out; } memset(numa_slit, -1, sizeof(numa_slit)); @@ -516,6 +517,8 @@ void __init acpi_numa_fixup(void) printk("\n"); } #endif +out: + node_possible_map = node_online_map; } #endif /* CONFIG_ACPI_NUMA */ diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 8b5b8e6bc9d9a..dd5bfed52031d 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -59,7 +59,7 @@ show_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ u32 cpu=dev->id; \ - return sprintf(buf, "%lx\n", name[cpu]); \ + return sprintf(buf, "%llx\n", name[cpu]); \ } #define store(name) \ @@ -86,9 +86,9 @@ store_call_start(struct device *dev, struct device_attribute *attr, #ifdef ERR_INJ_DEBUG printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu); - printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]); - printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]); - printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n", + printk(KERN_DEBUG "err_type_info=%llx,\n", err_type_info[cpu]); + printk(KERN_DEBUG "err_struct_info=%llx,\n", err_struct_info[cpu]); + printk(KERN_DEBUG "err_data_buffer=%llx, %llx, %llx.\n", err_data_buffer[cpu].data1, err_data_buffer[cpu].data2, err_data_buffer[cpu].data3); @@ -117,8 +117,8 @@ store_call_start(struct device *dev, struct device_attribute *attr, #ifdef ERR_INJ_DEBUG printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]); - printk(KERN_DEBUG "capabilities=%lx,\n", capabilities[cpu]); - printk(KERN_DEBUG "resources=%lx\n", resources[cpu]); + printk(KERN_DEBUG "capabilities=%llx,\n", capabilities[cpu]); + printk(KERN_DEBUG "resources=%llx\n", resources[cpu]); #endif return size; } @@ -131,7 +131,7 @@ show_virtual_to_phys(struct device *dev, struct device_attribute *attr, char *buf) { unsigned int cpu=dev->id; - return sprintf(buf, "%lx\n", phys_addr[cpu]); + return sprintf(buf, "%llx\n", phys_addr[cpu]); } static ssize_t @@ -145,7 +145,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr, ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL); if (ret<=0) { #ifdef ERR_INJ_DEBUG - printk("Virtual address %lx is not existing.\n",virt_addr); + printk("Virtual address %llx is not existing.\n", virt_addr); #endif return -EINVAL; } @@ -163,7 +163,7 @@ show_err_data_buffer(struct device *dev, { unsigned int cpu=dev->id; - return sprintf(buf, "%lx, %lx, %lx\n", + return sprintf(buf, "%llx, %llx, %llx\n", err_data_buffer[cpu].data1, err_data_buffer[cpu].data2, err_data_buffer[cpu].data3); @@ -178,13 +178,13 @@ store_err_data_buffer(struct device *dev, int ret; #ifdef ERR_INJ_DEBUG - printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n", + printk("write err_data_buffer=[%llx,%llx,%llx] on cpu%d\n", err_data_buffer[cpu].data1, err_data_buffer[cpu].data2, err_data_buffer[cpu].data3, cpu); #endif - ret=sscanf(buf, "%lx, %lx, %lx", + ret = sscanf(buf, "%llx, %llx, %llx", &err_data_buffer[cpu].data1, &err_data_buffer[cpu].data2, &err_data_buffer[cpu].data3); diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index b8356edbde659..fa10d51f62177 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -411,7 +411,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = - ((struct fnptr *)kretprobe_trampoline)->ip; + (unsigned long)dereference_function_descriptor(kretprobe_trampoline); INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -487,7 +487,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, ri->ret_addr = (kprobe_opcode_t *)regs->b0; /* Replace the return addr with trampoline addr */ - regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; + regs->b0 = (unsigned long)dereference_function_descriptor(kretprobe_trampoline); } /* Check the instruction in the slot is break */ @@ -991,14 +991,14 @@ static struct kprobe trampoline_p = { int __init arch_init_kprobes(void) { trampoline_p.addr = - (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip; + dereference_function_descriptor(kretprobe_trampoline); return register_kprobe(&trampoline_p); } int __kprobes arch_trampoline_kprobe(struct kprobe *p) { if (p->addr == - (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip) + dereference_function_descriptor(kretprobe_trampoline)) return 1; return 0; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index bf2cb9294795c..d96c68122eae0 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1851,7 +1851,7 @@ ia64_mca_cpu_init(void *cpu_data) data = mca_bootmem(); first_time = 0; } else - data = (void *)__get_free_pages(GFP_KERNEL, + data = (void *)__get_free_pages(GFP_ATOMIC, get_order(sz)); if (!data) panic("Could not allocate MCA memory for cpu %d\n", diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 4d0ab323dee8c..2a40268c3d494 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -343,7 +343,7 @@ init_record_index_pools(void) /* - 2 - */ sect_min_size = sal_log_sect_min_sizes[0]; - for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++) + for (i = 1; i < ARRAY_SIZE(sal_log_sect_min_sizes); i++) if (sect_min_size > sal_log_sect_min_sizes[i]) sect_min_size = sal_log_sect_min_sizes[i]; diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index 1a42ba885188a..ee693c8cec498 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -905,9 +905,31 @@ register_unwind_table (struct module *mod) int module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { + struct mod_arch_specific *mas = &mod->arch; + DEBUGP("%s: init: entry=%p\n", __func__, mod->init); - if (mod->arch.unwind) + if (mas->unwind) register_unwind_table(mod); + + /* + * ".opd" was already relocated to the final destination. Store + * it's address for use in symbolizer. + */ + mas->opd_addr = (void *)mas->opd->sh_addr; + mas->opd_size = mas->opd->sh_size; + + /* + * Module relocation was already done at this point. Section + * headers are about to be deleted. Wipe out load-time context. + */ + mas->core_plt = NULL; + mas->init_plt = NULL; + mas->got = NULL; + mas->opd = NULL; + mas->unwind = NULL; + mas->gp = 0; + mas->next_got_entry = 0; + return 0; } @@ -926,10 +948,9 @@ module_arch_cleanup (struct module *mod) void *dereference_module_function_descriptor(struct module *mod, void *ptr) { - Elf64_Shdr *opd = mod->arch.opd; + struct mod_arch_specific *mas = &mod->arch; - if (ptr < (void *)opd->sh_addr || - ptr >= (void *)(opd->sh_addr + opd->sh_size)) + if (ptr < mas->opd_addr || ptr >= mas->opd_addr + mas->opd_size) return ptr; return dereference_function_descriptor(ptr); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index bf9c24d9ce84e..54e12b0ecebdf 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -2147,27 +2147,39 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) { struct syscall_get_set_args *args = data; struct pt_regs *pt = args->regs; - unsigned long *krbs, cfm, ndirty; + unsigned long *krbs, cfm, ndirty, nlocals, nouts; int i, count; if (unw_unwind_to_user(info) < 0) return; + /* + * We get here via a few paths: + * - break instruction: cfm is shared with caller. + * syscall args are in out= regs, locals are non-empty. + * - epsinstruction: cfm is set by br.call + * locals don't exist. + * + * For both cases argguments are reachable in cfm.sof - cfm.sol. + * CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ] + */ cfm = pt->cr_ifs; + nlocals = (cfm >> 7) & 0x7f; /* aka sol */ + nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */ krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); count = 0; if (in_syscall(pt)) - count = min_t(int, args->n, cfm & 0x7f); + count = min_t(int, args->n, nouts); + /* Iterate over outs. */ for (i = 0; i < count; i++) { + int j = ndirty + nlocals + i + args->i; if (args->rw) - *ia64_rse_skip_regs(krbs, ndirty + i + args->i) = - args->args[i]; + *ia64_rse_skip_regs(krbs, j) = args->args[i]; else - args->args[i] = *ia64_rse_skip_regs(krbs, - ndirty + i + args->i); + args->args[i] = *ia64_rse_skip_regs(krbs, j); } if (!args->rw) { diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 4f33f6e7e2065..41d243c0c6267 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -95,7 +95,7 @@ static int __init build_node_maps(unsigned long start, unsigned long len, * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been * called yet. Note that node 0 will also count all non-existent cpus. */ -static int __meminit early_nr_cpus_node(int node) +static int early_nr_cpus_node(int node) { int cpu, n = 0; @@ -110,7 +110,7 @@ static int __meminit early_nr_cpus_node(int node) * compute_pernodesize - compute size of pernode data * @node: the node id. */ -static unsigned long __meminit compute_pernodesize(int node) +static unsigned long compute_pernodesize(int node) { unsigned long pernodesize = 0, cpus; @@ -367,7 +367,7 @@ static void __init reserve_pernode_space(void) } } -static void __meminit scatter_node_data(void) +static void scatter_node_data(void) { pg_data_t **dst; int node; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index bf9df2625bc83..ee50506d86f42 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -518,7 +518,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), args->nid, args->zone, page_to_pfn(map_start), - MEMMAP_EARLY, NULL); + MEMINIT_EARLY, NULL); return 0; } @@ -527,8 +527,8 @@ memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { if (!vmem_map) { - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, - NULL); + memmap_init_zone(size, nid, zone, start_pfn, + MEMINIT_EARLY, NULL); } else { struct page *start; struct memmap_init_callback_data args; @@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py index c55276e31b6b6..bfd1b671e35fc 100644 --- a/arch/ia64/scripts/unwcheck.py +++ b/arch/ia64/scripts/unwcheck.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python # SPDX-License-Identifier: GPL-2.0 # # Usage: unwcheck.py FILE diff --git a/arch/m68k/Kconfig.bus b/arch/m68k/Kconfig.bus index 9d0a3a23d50e5..355c51309ed85 100644 --- a/arch/m68k/Kconfig.bus +++ b/arch/m68k/Kconfig.bus @@ -63,7 +63,7 @@ source "drivers/zorro/Kconfig" endif -if !MMU +if COLDFIRE config ISA_DMA_API def_bool !M5272 diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 60ac1cd8b96fb..6bc7fc14163f8 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -309,7 +309,7 @@ comment "Processor Specific Options" config M68KFPU_EMU bool "Math emulation support" - depends on MMU + depends on M68KCLASSIC && FPU help At some point in the future, this will cause floating-point math instructions to be emulated by the kernel on machines that lack a diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index c01e103492fdc..f0527b155c057 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -23,6 +23,9 @@ config ATARI this kernel on an Atari, say Y here and browse the material available in ; otherwise say N. +config ATARI_KBD_CORE + bool + config MAC bool "Macintosh support" depends on MMU @@ -187,6 +190,7 @@ config INIT_LCD config MEMORY_RESERVE int "Memory reservation (MiB)" depends on (UCSIMM || UCDIMM) + default 0 help Reserve certain memory regions on 68x328 based boards. @@ -316,6 +320,7 @@ comment "Machine Options" config UBOOT bool "Support for U-Boot command line parameters" + depends on COLDFIRE help If you say Y here kernel will try to collect command line parameters from the initial u-boot stack. diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 5d92883840969..0415d28dbe4fc 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -135,10 +135,10 @@ vmlinux.gz: vmlinux ifndef CONFIG_KGDB cp vmlinux vmlinux.tmp $(STRIP) vmlinux.tmp - gzip -9c vmlinux.tmp >vmlinux.gz + $(KGZIP) -9c vmlinux.tmp >vmlinux.gz rm vmlinux.tmp else - gzip -9c vmlinux >vmlinux.gz + $(KGZIP) -9c vmlinux >vmlinux.gz endif bzImage: vmlinux.bz2 @@ -148,10 +148,10 @@ vmlinux.bz2: vmlinux ifndef CONFIG_KGDB cp vmlinux vmlinux.tmp $(STRIP) vmlinux.tmp - bzip2 -1c vmlinux.tmp >vmlinux.bz2 + $(KBZIP2) -1c vmlinux.tmp >vmlinux.bz2 rm vmlinux.tmp else - bzip2 -1c vmlinux >vmlinux.bz2 + $(KBZIP2) -1c vmlinux >vmlinux.bz2 endif archclean: diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c index 62b0eb6cf69a3..84eab0f5e00af 100644 --- a/arch/m68k/coldfire/pci.c +++ b/arch/m68k/coldfire/pci.c @@ -216,8 +216,10 @@ static int __init mcf_pci_init(void) /* Keep a virtual mapping to IO/config space active */ iospace = (unsigned long) ioremap(PCI_IO_PA, PCI_IO_SIZE); - if (iospace == 0) + if (iospace == 0) { + pci_free_host_bridge(bridge); return -ENODEV; + } pr_info("Coldfire: PCI IO/config window mapped to 0x%x\n", (u32) iospace); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 9a33c1c006a1f..cf8103fa2f347 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -334,7 +334,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 7fdbc797a05d4..5636288a4b457 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -319,7 +319,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index f1763405a5396..015a7f401ffd5 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -334,7 +334,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 91154d6acb313..1209430e61e13 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -316,7 +316,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index c398c4a94d95c..a41b16067f5c3 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -318,7 +318,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 350d004559be8..8af104a8c000a 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -325,7 +325,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index b838dd820348e..354ff30e22c9c 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -358,7 +358,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 3f8dd61559cf0..eac7685cea427 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -315,7 +315,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index ae3b2d4f636c7..0f38c4a3c87ae 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -316,7 +316,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index cd61ef14b5828..6ede6869db1cd 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -324,7 +324,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 151f5371cd3d4..8644c47899382 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -313,7 +313,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 1dcb0ee1fe989..f2fd0da2346e5 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -313,7 +313,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SAS_ATTRS=m diff --git a/arch/m68k/emu/nfeth.c b/arch/m68k/emu/nfeth.c index a4ebd2445edae..e5831cd293d05 100644 --- a/arch/m68k/emu/nfeth.c +++ b/arch/m68k/emu/nfeth.c @@ -254,8 +254,8 @@ static void __exit nfeth_cleanup(void) for (i = 0; i < MAX_UNIT; i++) { if (nfeth_dev[i]) { - unregister_netdev(nfeth_dev[0]); - free_netdev(nfeth_dev[0]); + unregister_netdev(nfeth_dev[i]); + free_netdev(nfeth_dev[i]); } } free_irq(nfEtherIRQ, nfeth_interrupt); diff --git a/arch/m68k/include/asm/m53xxacr.h b/arch/m68k/include/asm/m53xxacr.h index 9138a624c5c81..692f90e7fecc1 100644 --- a/arch/m68k/include/asm/m53xxacr.h +++ b/arch/m68k/include/asm/m53xxacr.h @@ -89,9 +89,9 @@ * coherency though in all cases. And for copyback caches we will need * to push cached data as well. */ -#define CACHE_INIT CACR_CINVA -#define CACHE_INVALIDATE CACR_CINVA -#define CACHE_INVALIDATED CACR_CINVA +#define CACHE_INIT (CACHE_MODE + CACR_CINVA - CACR_EC) +#define CACHE_INVALIDATE (CACHE_MODE + CACR_CINVA) +#define CACHE_INVALIDATED (CACHE_MODE + CACR_CINVA) #define ACR0_MODE ((CONFIG_RAMBASE & 0xff000000) + \ (0x000f0000) + \ diff --git a/arch/m68k/include/asm/mac_via.h b/arch/m68k/include/asm/mac_via.h index de1470c4d829b..1149251ea58d2 100644 --- a/arch/m68k/include/asm/mac_via.h +++ b/arch/m68k/include/asm/mac_via.h @@ -257,6 +257,7 @@ extern int rbv_present,via_alt_mapping; struct irq_desc; +extern void via_l2_flush(int writeback); extern void via_register_interrupts(void); extern void via_irq_enable(int); extern void via_irq_disable(int); diff --git a/arch/m68k/include/asm/mvme147hw.h b/arch/m68k/include/asm/mvme147hw.h index 257b29184af91..e28eb1c0e0bfb 100644 --- a/arch/m68k/include/asm/mvme147hw.h +++ b/arch/m68k/include/asm/mvme147hw.h @@ -66,6 +66,9 @@ struct pcc_regs { #define PCC_INT_ENAB 0x08 #define PCC_TIMER_INT_CLR 0x80 + +#define PCC_TIMER_TIC_EN 0x01 +#define PCC_TIMER_COC_EN 0x02 #define PCC_TIMER_CLR_OVF 0x04 #define PCC_LEVEL_ABORT 0x07 diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h index c18165b0d9043..6b02484665691 100644 --- a/arch/m68k/include/asm/pgtable_no.h +++ b/arch/m68k/include/asm/pgtable_no.h @@ -42,7 +42,8 @@ extern void paging_init(void); * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -#define ZERO_PAGE(vaddr) (virt_to_page(0)) +extern void *empty_zero_page; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) /* * All 32bit addresses are effectively valid for vmalloc... diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h index 8a6dc6e5a279c..8ab3c350bd530 100644 --- a/arch/m68k/include/asm/raw_io.h +++ b/arch/m68k/include/asm/raw_io.h @@ -17,21 +17,21 @@ * two accesses to memory, which may be undesirable for some devices. */ #define in_8(addr) \ - ({ u8 __v = (*(__force volatile u8 *) (addr)); __v; }) + ({ u8 __v = (*(__force volatile u8 *) (unsigned long)(addr)); __v; }) #define in_be16(addr) \ - ({ u16 __v = (*(__force volatile u16 *) (addr)); __v; }) + ({ u16 __v = (*(__force volatile u16 *) (unsigned long)(addr)); __v; }) #define in_be32(addr) \ - ({ u32 __v = (*(__force volatile u32 *) (addr)); __v; }) + ({ u32 __v = (*(__force volatile u32 *) (unsigned long)(addr)); __v; }) #define in_le16(addr) \ - ({ u16 __v = le16_to_cpu(*(__force volatile __le16 *) (addr)); __v; }) + ({ u16 __v = le16_to_cpu(*(__force volatile __le16 *) (unsigned long)(addr)); __v; }) #define in_le32(addr) \ - ({ u32 __v = le32_to_cpu(*(__force volatile __le32 *) (addr)); __v; }) + ({ u32 __v = le32_to_cpu(*(__force volatile __le32 *) (unsigned long)(addr)); __v; }) -#define out_8(addr,b) (void)((*(__force volatile u8 *) (addr)) = (b)) -#define out_be16(addr,w) (void)((*(__force volatile u16 *) (addr)) = (w)) -#define out_be32(addr,l) (void)((*(__force volatile u32 *) (addr)) = (l)) -#define out_le16(addr,w) (void)((*(__force volatile __le16 *) (addr)) = cpu_to_le16(w)) -#define out_le32(addr,l) (void)((*(__force volatile __le32 *) (addr)) = cpu_to_le32(l)) +#define out_8(addr,b) (void)((*(__force volatile u8 *) (unsigned long)(addr)) = (b)) +#define out_be16(addr,w) (void)((*(__force volatile u16 *) (unsigned long)(addr)) = (w)) +#define out_be32(addr,l) (void)((*(__force volatile u32 *) (unsigned long)(addr)) = (l)) +#define out_le16(addr,w) (void)((*(__force volatile __le16 *) (unsigned long)(addr)) = cpu_to_le16(w)) +#define out_le32(addr,l) (void)((*(__force volatile __le32 *) (unsigned long)(addr)) = cpu_to_le32(l)) #define raw_inb in_8 #define raw_inw in_be16 diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h index 6a21d93582805..f4a7a340f4cae 100644 --- a/arch/m68k/include/asm/timex.h +++ b/arch/m68k/include/asm/timex.h @@ -35,7 +35,7 @@ static inline unsigned long random_get_entropy(void) { if (mach_random_get_entropy) return mach_random_get_entropy(); - return 0; + return random_get_entropy_fallback(); } #define random_get_entropy random_get_entropy diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index 3c5def10d486e..caa260f877f24 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -139,7 +139,8 @@ void __init setup_arch(char **cmdline_p) pr_debug("MEMORY -> ROMFS=0x%p-0x%06lx MEM=0x%06lx-0x%06lx\n ", __bss_stop, memory_start, memory_start, memory_end); - memblock_add(memory_start, memory_end - memory_start); + memblock_add(_rambase, memory_end - _rambase); + memblock_reserve(_rambase, memory_start - _rambase); /* Keep a copy of command line */ *cmdline_p = &command_line[0]; diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 05610e6924c16..f7121b775e5f0 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -448,7 +448,7 @@ static inline void save_fpu_state(struct sigcontext *sc, struct pt_regs *regs) if (CPU_IS_060 ? sc->sc_fpstate[2] : sc->sc_fpstate[0]) { fpu_version = sc->sc_fpstate[0]; - if (CPU_IS_020_OR_030 && + if (CPU_IS_020_OR_030 && !regs->stkadj && regs->vector >= (VEC_FPBRUC * 4) && regs->vector <= (VEC_FPNAN * 4)) { /* Clear pending exception in 68882 idle frame */ @@ -511,7 +511,7 @@ static inline int rt_save_fpu_state(struct ucontext __user *uc, struct pt_regs * if (!(CPU_IS_060 || CPU_IS_COLDFIRE)) context_size = fpstate[1]; fpu_version = fpstate[0]; - if (CPU_IS_020_OR_030 && + if (CPU_IS_020_OR_030 && !regs->stkadj && regs->vector >= (VEC_FPBRUC * 4) && regs->vector <= (VEC_FPNAN * 4)) { /* Clear pending exception in 68882 idle frame */ @@ -829,18 +829,24 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw) return 0; } +static inline struct pt_regs *rte_regs(struct pt_regs *regs) +{ + return (void *)regs + regs->stkadj; +} + static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, unsigned long mask) { + struct pt_regs *tregs = rte_regs(regs); sc->sc_mask = mask; sc->sc_usp = rdusp(); sc->sc_d0 = regs->d0; sc->sc_d1 = regs->d1; sc->sc_a0 = regs->a0; sc->sc_a1 = regs->a1; - sc->sc_sr = regs->sr; - sc->sc_pc = regs->pc; - sc->sc_formatvec = regs->format << 12 | regs->vector; + sc->sc_sr = tregs->sr; + sc->sc_pc = tregs->pc; + sc->sc_formatvec = tregs->format << 12 | tregs->vector; save_a5_state(sc, regs); save_fpu_state(sc, regs); } @@ -848,6 +854,7 @@ static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *)regs - 1; + struct pt_regs *tregs = rte_regs(regs); greg_t __user *gregs = uc->uc_mcontext.gregs; int err = 0; @@ -868,9 +875,9 @@ static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs * err |= __put_user(sw->a5, &gregs[13]); err |= __put_user(sw->a6, &gregs[14]); err |= __put_user(rdusp(), &gregs[15]); - err |= __put_user(regs->pc, &gregs[16]); - err |= __put_user(regs->sr, &gregs[17]); - err |= __put_user((regs->format << 12) | regs->vector, &uc->uc_formatvec); + err |= __put_user(tregs->pc, &gregs[16]); + err |= __put_user(tregs->sr, &gregs[17]); + err |= __put_user((tregs->format << 12) | tregs->vector, &uc->uc_formatvec); err |= rt_save_fpu_state(uc, regs); return err; } @@ -887,13 +894,14 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct sigframe __user *frame; - int fsize = frame_extra_sizes(regs->format); + struct pt_regs *tregs = rte_regs(regs); + int fsize = frame_extra_sizes(tregs->format); struct sigcontext context; int err = 0, sig = ksig->sig; if (fsize < 0) { pr_debug("setup_frame: Unknown frame format %#x\n", - regs->format); + tregs->format); return -EFAULT; } @@ -904,7 +912,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, err |= __put_user(sig, &frame->sig); - err |= __put_user(regs->vector, &frame->code); + err |= __put_user(tregs->vector, &frame->code); err |= __put_user(&frame->sc, &frame->psc); if (_NSIG_WORDS > 1) @@ -929,34 +937,28 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, push_cache ((unsigned long) &frame->retcode); - /* - * Set up registers for signal handler. All the state we are about - * to destroy is successfully copied to sigframe. - */ - wrusp ((unsigned long) frame); - regs->pc = (unsigned long) ksig->ka.sa.sa_handler; - adjustformat(regs); - /* * This is subtle; if we build more than one sigframe, all but the * first one will see frame format 0 and have fsize == 0, so we won't * screw stkadj. */ - if (fsize) + if (fsize) { regs->stkadj = fsize; - - /* Prepare to skip over the extra stuff in the exception frame. */ - if (regs->stkadj) { - struct pt_regs *tregs = - (struct pt_regs *)((ulong)regs + regs->stkadj); + tregs = rte_regs(regs); pr_debug("Performing stackadjust=%04lx\n", regs->stkadj); - /* This must be copied with decreasing addresses to - handle overlaps. */ tregs->vector = 0; tregs->format = 0; - tregs->pc = regs->pc; tregs->sr = regs->sr; } + + /* + * Set up registers for signal handler. All the state we are about + * to destroy is successfully copied to sigframe. + */ + wrusp ((unsigned long) frame); + tregs->pc = (unsigned long) ksig->ka.sa.sa_handler; + adjustformat(regs); + return 0; } @@ -964,7 +966,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - int fsize = frame_extra_sizes(regs->format); + struct pt_regs *tregs = rte_regs(regs); + int fsize = frame_extra_sizes(tregs->format); int err = 0, sig = ksig->sig; if (fsize < 0) { @@ -1013,34 +1016,27 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, push_cache ((unsigned long) &frame->retcode); - /* - * Set up registers for signal handler. All the state we are about - * to destroy is successfully copied to sigframe. - */ - wrusp ((unsigned long) frame); - regs->pc = (unsigned long) ksig->ka.sa.sa_handler; - adjustformat(regs); - /* * This is subtle; if we build more than one sigframe, all but the * first one will see frame format 0 and have fsize == 0, so we won't * screw stkadj. */ - if (fsize) + if (fsize) { regs->stkadj = fsize; - - /* Prepare to skip over the extra stuff in the exception frame. */ - if (regs->stkadj) { - struct pt_regs *tregs = - (struct pt_regs *)((ulong)regs + regs->stkadj); + tregs = rte_regs(regs); pr_debug("Performing stackadjust=%04lx\n", regs->stkadj); - /* This must be copied with decreasing addresses to - handle overlaps. */ tregs->vector = 0; tregs->format = 0; - tregs->pc = regs->pc; tregs->sr = regs->sr; } + + /* + * Set up registers for signal handler. All the state we are about + * to destroy is successfully copied to sigframe. + */ + wrusp ((unsigned long) frame); + tregs->pc = (unsigned long) ksig->ka.sa.sa_handler; + adjustformat(regs); return 0; } diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 611f73bfc87c9..d0126ab01360b 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -59,7 +59,6 @@ extern void iop_preinit(void); extern void iop_init(void); extern void via_init(void); extern void via_init_clock(irq_handler_t func); -extern void via_flush_cache(void); extern void oss_init(void); extern void psc_init(void); extern void baboon_init(void); @@ -130,21 +129,6 @@ int __init mac_parse_bootinfo(const struct bi_record *record) return unknown; } -/* - * Flip into 24bit mode for an instant - flushes the L2 cache card. We - * have to disable interrupts for this. Our IRQ handlers will crap - * themselves if they take an IRQ in 24bit mode! - */ - -static void mac_cache_card_flush(int writeback) -{ - unsigned long flags; - - local_irq_save(flags); - via_flush_cache(); - local_irq_restore(flags); -} - void __init config_mac(void) { if (!MACH_IS_MAC) @@ -175,9 +159,8 @@ void __init config_mac(void) * not. */ - if (macintosh_config->ident == MAC_MODEL_IICI - || macintosh_config->ident == MAC_MODEL_IIFX) - mach_l2_flush = mac_cache_card_flush; + if (macintosh_config->ident == MAC_MODEL_IICI) + mach_l2_flush = via_l2_flush; } diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index 9bfa170157688..c432bfafe63e2 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -183,7 +183,7 @@ static __inline__ void iop_writeb(volatile struct mac_iop *iop, __u16 addr, __u8 static __inline__ void iop_stop(volatile struct mac_iop *iop) { - iop->status_ctrl &= ~IOP_RUN; + iop->status_ctrl = IOP_AUTOINC; } static __inline__ void iop_start(volatile struct mac_iop *iop) @@ -191,14 +191,9 @@ static __inline__ void iop_start(volatile struct mac_iop *iop) iop->status_ctrl = IOP_RUN | IOP_AUTOINC; } -static __inline__ void iop_bypass(volatile struct mac_iop *iop) -{ - iop->status_ctrl |= IOP_BYPASS; -} - static __inline__ void iop_interrupt(volatile struct mac_iop *iop) { - iop->status_ctrl |= IOP_IRQ; + iop->status_ctrl = IOP_IRQ | IOP_RUN | IOP_AUTOINC; } static int iop_alive(volatile struct mac_iop *iop) @@ -244,7 +239,6 @@ void __init iop_preinit(void) } else { iop_base[IOP_NUM_SCC] = (struct mac_iop *) SCC_IOP_BASE_QUADRA; } - iop_base[IOP_NUM_SCC]->status_ctrl = 0x87; iop_scc_present = 1; } else { iop_base[IOP_NUM_SCC] = NULL; @@ -256,7 +250,7 @@ void __init iop_preinit(void) } else { iop_base[IOP_NUM_ISM] = (struct mac_iop *) ISM_IOP_BASE_QUADRA; } - iop_base[IOP_NUM_ISM]->status_ctrl = 0; + iop_stop(iop_base[IOP_NUM_ISM]); iop_ism_present = 1; } else { iop_base[IOP_NUM_ISM] = NULL; @@ -416,7 +410,8 @@ static void iop_handle_send(uint iop_num, uint chan) msg->status = IOP_MSGSTATUS_UNUSED; msg = msg->next; iop_send_queue[iop_num][chan] = msg; - if (msg) iop_do_send(msg); + if (msg && iop_readb(iop, IOP_ADDR_SEND_STATE + chan) == IOP_MSG_IDLE) + iop_do_send(msg); } /* @@ -490,16 +485,12 @@ int iop_send_message(uint iop_num, uint chan, void *privdata, if (!(q = iop_send_queue[iop_num][chan])) { iop_send_queue[iop_num][chan] = msg; + iop_do_send(msg); } else { while (q->next) q = q->next; q->next = msg; } - if (iop_readb(iop_base[iop_num], - IOP_ADDR_SEND_STATE + chan) == IOP_MSG_IDLE) { - iop_do_send(msg); - } - return 0; } diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 3c2cfcb749825..1f0fad2a98a07 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -294,10 +294,14 @@ void via_debug_dump(void) * the system into 24-bit mode for an instant. */ -void via_flush_cache(void) +void via_l2_flush(int writeback) { + unsigned long flags; + + local_irq_save(flags); via2[gBufB] &= ~VIA2B_vMode32; via2[gBufB] |= VIA2B_vMode32; + local_irq_restore(flags); } /* diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 6cb1e41d58d00..70a5f55ea6647 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -164,7 +164,7 @@ void __init cf_bootmem_alloc(void) m68k_memory[0].addr = _rambase; m68k_memory[0].size = _ramend - _rambase; - memblock_add(m68k_memory[0].addr, m68k_memory[0].size); + memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0); /* compute total pages in system */ num_pages = PFN_DOWN(_ramend - _rambase); diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 545a1fe0e1194..245376630c3de 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -117,8 +117,10 @@ static irqreturn_t mvme147_timer_int (int irq, void *dev_id) unsigned long flags; local_irq_save(flags); - m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR; - m147_pcc->t1_cntrl = PCC_TIMER_CLR_OVF; + m147_pcc->t1_cntrl = PCC_TIMER_CLR_OVF | PCC_TIMER_COC_EN | + PCC_TIMER_TIC_EN; + m147_pcc->t1_int_cntrl = PCC_INT_ENAB | PCC_TIMER_INT_CLR | + PCC_LEVEL_TIMER1; clk_total += PCC_TIMER_CYCLES; timer_routine(0, NULL); local_irq_restore(flags); @@ -136,10 +138,10 @@ void mvme147_sched_init (irq_handler_t timer_routine) /* Init the clock with a value */ /* The clock counter increments until 0xFFFF then reloads */ m147_pcc->t1_preload = PCC_TIMER_PRELOAD; - m147_pcc->t1_cntrl = 0x0; /* clear timer */ - m147_pcc->t1_cntrl = 0x3; /* start timer */ - m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR; /* clear pending ints */ - m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1; + m147_pcc->t1_cntrl = PCC_TIMER_CLR_OVF | PCC_TIMER_COC_EN | + PCC_TIMER_TIC_EN; + m147_pcc->t1_int_cntrl = PCC_INT_ENAB | PCC_TIMER_INT_CLR | + PCC_LEVEL_TIMER1; clocksource_register_hz(&mvme147_clk, PCC_TIMER_CLOCK_FREQ); } diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index 9bc2da69f80cb..2f2c2d172b24e 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -368,6 +368,7 @@ static u32 clk_total; #define PCCTOVR1_COC_EN 0x02 #define PCCTOVR1_OVR_CLR 0x04 +#define PCCTIC1_INT_LEVEL 6 #define PCCTIC1_INT_CLR 0x08 #define PCCTIC1_INT_EN 0x10 @@ -377,8 +378,8 @@ static irqreturn_t mvme16x_timer_int (int irq, void *dev_id) unsigned long flags; local_irq_save(flags); - out_8(PCCTIC1, in_8(PCCTIC1) | PCCTIC1_INT_CLR); - out_8(PCCTOVR1, PCCTOVR1_OVR_CLR); + out_8(PCCTOVR1, PCCTOVR1_OVR_CLR | PCCTOVR1_TIC_EN | PCCTOVR1_COC_EN); + out_8(PCCTIC1, PCCTIC1_INT_EN | PCCTIC1_INT_CLR | PCCTIC1_INT_LEVEL); clk_total += PCC_TIMER_CYCLES; timer_routine(0, NULL); local_irq_restore(flags); @@ -392,14 +393,15 @@ void mvme16x_sched_init (irq_handler_t timer_routine) int irq; /* Using PCCchip2 or MC2 chip tick timer 1 */ - out_be32(PCCTCNT1, 0); - out_be32(PCCTCMP1, PCC_TIMER_CYCLES); - out_8(PCCTOVR1, in_8(PCCTOVR1) | PCCTOVR1_TIC_EN | PCCTOVR1_COC_EN); - out_8(PCCTIC1, PCCTIC1_INT_EN | 6); if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, IRQF_TIMER, "timer", timer_routine)) panic ("Couldn't register timer int"); + out_be32(PCCTCNT1, 0); + out_be32(PCCTCMP1, PCC_TIMER_CYCLES); + out_8(PCCTOVR1, PCCTOVR1_OVR_CLR | PCCTOVR1_TIC_EN | PCCTOVR1_COC_EN); + out_8(PCCTIC1, PCCTIC1_INT_EN | PCCTIC1_INT_CLR | PCCTIC1_INT_LEVEL); + clocksource_register_hz(&mvme16x_clk, PCC_TIMER_CLOCK_FREQ); if (brdno == 0x0162 || brdno == 0x172) diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index e63eb5f069995..f31890078197e 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -264,6 +264,7 @@ static int q40_get_rtc_pll(struct rtc_pll_info *pll) { int tmp = Q40_RTC_CTRL; + pll->pll_ctrl = 0; pll->pll_value = tmp & Q40_RTC_PLL_MASK; if (tmp & Q40_RTC_PLL_SIGN) pll->pll_value = -pll->pll_value; diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index a1f206b90753a..c8cd66c8d2577 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -171,27 +171,27 @@ extern long __user_bad(void); #define __get_user(x, ptr) \ ({ \ - unsigned long __gu_val = 0; \ long __gu_err; \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("lbu", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lbu", (ptr), x, __gu_err); \ break; \ case 2: \ - __get_user_asm("lhu", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lhu", (ptr), x, __gu_err); \ break; \ case 4: \ - __get_user_asm("lw", (ptr), __gu_val, __gu_err); \ + __get_user_asm("lw", (ptr), x, __gu_err); \ break; \ - case 8: \ - __gu_err = __copy_from_user(&__gu_val, ptr, 8); \ - if (__gu_err) \ - __gu_err = -EFAULT; \ + case 8: { \ + __u64 __x = 0; \ + __gu_err = raw_copy_from_user(&__x, ptr, 8) ? \ + -EFAULT : 0; \ + (x) = (typeof(x))(typeof((x) - (x)))__x; \ break; \ + } \ default: \ /* __gu_val = 0; __gu_err = -EINVAL;*/ __gu_err = __user_bad();\ } \ - x = (__force __typeof__(*(ptr))) __gu_val; \ __gu_err; \ }) diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c index 0bde47e4fa694..dcba53803fa5f 100644 --- a/arch/microblaze/kernel/cpu/cache.c +++ b/arch/microblaze/kernel/cpu/cache.c @@ -92,7 +92,8 @@ static inline void __disable_dcache_nomsr(void) #define CACHE_LOOP_LIMITS(start, end, cache_line_length, cache_size) \ do { \ int align = ~(cache_line_length - 1); \ - end = min(start + cache_size, end); \ + if (start < UINT_MAX - cache_size) \ + end = min(start + cache_size, end); \ start &= align; \ } while (0) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index a0bd9bdb5f830..2811ecc1f3c71 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -46,7 +46,8 @@ config MIPS select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES select HAVE_ASM_MODVERSIONS - select HAVE_EBPF_JIT if (!CPU_MICROMIPS) + select HAVE_CBPF_JIT if !64BIT && !CPU_MICROMIPS + select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2 select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT @@ -293,6 +294,9 @@ config BCM63XX select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_HAS_EARLY_PRINTK + select SYS_HAS_CPU_BMIPS32_3300 + select SYS_HAS_CPU_BMIPS4350 + select SYS_HAS_CPU_BMIPS4380 select SWAP_IO_SPACE select GPIOLIB select HAVE_CLK @@ -862,6 +866,7 @@ config SNI_RM select I8253 select I8259 select ISA + select MIPS_L1_CACHE_SHIFT_6 select SWAP_IO_SPACE if CPU_BIG_ENDIAN select SYS_HAS_CPU_R4X00 select SYS_HAS_CPU_R5000 @@ -1391,6 +1396,7 @@ config CPU_LOONGSON3 select WEAK_REORDERING_BEYOND_LLSC select MIPS_PGD_C0_CONTEXT select MIPS_L1_CACHE_SHIFT_6 + select MIPS_FP_SUPPORT select GPIOLIB select SWIOTLB help @@ -3053,7 +3059,7 @@ config STACKTRACE_SUPPORT config PGTABLE_LEVELS int default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48 - default 3 if 64BIT && !PAGE_SIZE_64KB + default 3 if 64BIT && (!PAGE_SIZE_64KB || MIPS_VA_BITS_48) default 2 config MIPS_AUTO_PFN_OFFSET diff --git a/arch/mips/Makefile b/arch/mips/Makefile index cdc09b71febe3..9ff2c70763a0c 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -285,12 +285,23 @@ ifdef CONFIG_64BIT endif endif +# When linking a 32-bit executable the LLVM linker cannot cope with a +# 32-bit load address that has been sign-extended to 64 bits. Simply +# remove the upper 32 bits then, as it is safe to do so with other +# linkers. +ifdef CONFIG_64BIT + load-ld = $(load-y) +else + load-ld = $(subst 0xffffffff,0x,$(load-y)) +endif + KBUILD_AFLAGS += $(cflags-y) KBUILD_CFLAGS += $(cflags-y) -KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) +KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) -DLINKER_LOAD_ADDRESS=$(load-ld) KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0) bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) \ + LINKER_LOAD_ADDRESS=$(load-ld) \ VMLINUX_ENTRY_ADDRESS=$(entry-y) \ PLATFORM="$(platform-y)" \ ITS_INPUTS="$(its-y)" @@ -309,7 +320,7 @@ KBUILD_LDFLAGS += -m $(ld-emul) ifdef CONFIG_MIPS CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \ - egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \ + egrep -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \ sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g') endif diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index 4eea4188cb204..13e0beb9eee33 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -12,7 +12,7 @@ __archpost: include scripts/Kbuild.include CMD_RELOCS = arch/mips/boot/tools/relocs -quiet_cmd_relocs = RELOCS $@ +quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(CMD_RELOCS) $@ # `@true` prevents complaint when there is nothing to be done diff --git a/arch/mips/alchemy/board-xxs1500.c b/arch/mips/alchemy/board-xxs1500.c index c67dfe1f49971..ec35aedc7727d 100644 --- a/arch/mips/alchemy/board-xxs1500.c +++ b/arch/mips/alchemy/board-xxs1500.c @@ -18,6 +18,7 @@ #include #include #include +#include #include const char *get_system_type(void) diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index a95a894aceaf1..f0c8303371047 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -152,6 +152,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, { struct clk_init_data id; struct clk_hw *h; + struct clk *clk; h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) @@ -164,7 +165,13 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, id.ops = &alchemy_clkops_cpu; h->init = &id; - return clk_register(NULL, h); + clk = clk_register(NULL, h); + if (IS_ERR(clk)) { + pr_err("failed to register clock\n"); + kfree(h); + } + + return clk; } /* AUXPLLs ************************************************************/ diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig index 6889f74e06f54..490bb6da74b7e 100644 --- a/arch/mips/bcm47xx/Kconfig +++ b/arch/mips/bcm47xx/Kconfig @@ -27,6 +27,7 @@ config BCM47XX_BCMA select BCMA select BCMA_HOST_SOC select BCMA_DRIVER_MIPS + select BCMA_DRIVER_PCI if PCI select BCMA_DRIVER_PCI_HOSTMODE if PCI select BCMA_DRIVER_GPIO default y diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c index 164115944a7fd..dcfa0ea912fe1 100644 --- a/arch/mips/bcm63xx/clk.c +++ b/arch/mips/bcm63xx/clk.c @@ -381,6 +381,18 @@ void clk_disable(struct clk *clk) EXPORT_SYMBOL(clk_disable); +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + unsigned long clk_get_rate(struct clk *clk) { if (!clk) diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c index 2f81a94c71a60..7aee9ff19c1a6 100644 --- a/arch/mips/bmips/setup.c +++ b/arch/mips/bmips/setup.c @@ -167,7 +167,7 @@ void __init plat_mem_setup(void) dtb = phys_to_virt(fw_arg2); else if (fw_passed_dtb) /* UHI interface or appended dtb */ dtb = (void *)fw_passed_dtb; - else if (__dtb_start != __dtb_end) + else if (&__dtb_start != &__dtb_end) dtb = (void *)__dtb_start; else panic("no dtb found"); diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 528bd73d530a3..4ed45ade32a11 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -123,7 +123,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS targets += vmlinux.its targets += vmlinux.gz.its targets += vmlinux.bz2.its -targets += vmlinux.lzmo.its +targets += vmlinux.lzma.its targets += vmlinux.lzo.its quiet_cmd_cpp_its_S = ITS $@ diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 172801ed35b89..1d6ebbc2a5d02 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -29,8 +29,11 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS) +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + # decompressor objects (linked with vmlinuz) -vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o +vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o $(obj)/bswapsi.o ifdef CONFIG_DEBUG_ZBOOT vmlinuzobjs-$(CONFIG_DEBUG_ZBOOT) += $(obj)/dbg.o @@ -44,7 +47,7 @@ extra-y += uart-ath79.c $(obj)/uart-ath79.c: $(srctree)/arch/mips/ath79/early_printk.c $(call cmd,shipped) -vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o $(obj)/bswapsi.o +vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o extra-y += ashldi3.c $(obj)/ashldi3.c: $(obj)/%.c: $(srctree)/lib/%.c FORCE @@ -87,7 +90,7 @@ ifneq ($(zload-y),) VMLINUZ_LOAD_ADDRESS := $(zload-y) else VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \ - $(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS)) + $(obj)/vmlinux.bin $(LINKER_LOAD_ADDRESS)) endif UIMAGE_LOADADDR = $(VMLINUZ_LOAD_ADDRESS) diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c index 88f5d637b1c49..281e922e5c652 100644 --- a/arch/mips/boot/compressed/decompress.c +++ b/arch/mips/boot/compressed/decompress.c @@ -7,12 +7,15 @@ * Author: Wu Zhangjin */ +#define DISABLE_BRANCH_PROFILING + #include #include #include #include #include +#include /* * These two variables specify the free mem region @@ -113,7 +116,7 @@ void decompress_kernel(unsigned long boot_heap_start) dtb_size = fdt_totalsize((void *)&__appended_dtb); /* last four bytes is always image size in little endian */ - image_size = le32_to_cpup((void *)&__image_end - 4); + image_size = get_unaligned_le32((void *)&__image_end - 4); /* copy dtb to where the booted kernel will expect it */ memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size, diff --git a/arch/mips/boot/compressed/string.c b/arch/mips/boot/compressed/string.c index 43beecc3587cd..0b593b7092286 100644 --- a/arch/mips/boot/compressed/string.c +++ b/arch/mips/boot/compressed/string.c @@ -5,6 +5,7 @@ * Very small subset of simple string routines */ +#include #include void *memcpy(void *dest, const void *src, size_t n) @@ -27,3 +28,19 @@ void *memset(void *s, int c, size_t n) ss[i] = c; return s; } + +void * __weak memmove(void *dest, const void *src, size_t n) +{ + unsigned int i; + const char *s = src; + char *d = dest; + + if ((uintptr_t)dest < (uintptr_t)src) { + for (i = 0; i < n; i++) + d[i] = s[i]; + } else { + for (i = n; i > 0; i--) + d[i - 1] = s[i - 1]; + } + return dest; +} diff --git a/arch/mips/boot/dts/brcm/bcm3368.dtsi b/arch/mips/boot/dts/brcm/bcm3368.dtsi index 69cbef4723775..d4b2b430dad01 100644 --- a/arch/mips/boot/dts/brcm/bcm3368.dtsi +++ b/arch/mips/boot/dts/brcm/bcm3368.dtsi @@ -59,7 +59,7 @@ periph_cntl: syscon@fff8c008 { compatible = "syscon"; - reg = <0xfff8c000 0x4>; + reg = <0xfff8c008 0x4>; native-endian; }; diff --git a/arch/mips/boot/dts/brcm/bcm63268.dtsi b/arch/mips/boot/dts/brcm/bcm63268.dtsi index beec24145af7e..30fdd38aef6da 100644 --- a/arch/mips/boot/dts/brcm/bcm63268.dtsi +++ b/arch/mips/boot/dts/brcm/bcm63268.dtsi @@ -59,7 +59,7 @@ periph_cntl: syscon@10000008 { compatible = "syscon"; - reg = <0x10000000 0xc>; + reg = <0x10000008 0x4>; native-endian; }; diff --git a/arch/mips/boot/dts/brcm/bcm6358.dtsi b/arch/mips/boot/dts/brcm/bcm6358.dtsi index f21176cac0381..89a3107cad28e 100644 --- a/arch/mips/boot/dts/brcm/bcm6358.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6358.dtsi @@ -59,7 +59,7 @@ periph_cntl: syscon@fffe0008 { compatible = "syscon"; - reg = <0xfffe0000 0x4>; + reg = <0xfffe0008 0x4>; native-endian; }; diff --git a/arch/mips/boot/dts/brcm/bcm6362.dtsi b/arch/mips/boot/dts/brcm/bcm6362.dtsi index 8ae6981735b82..e48946a51242b 100644 --- a/arch/mips/boot/dts/brcm/bcm6362.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6362.dtsi @@ -59,7 +59,7 @@ periph_cntl: syscon@10000008 { compatible = "syscon"; - reg = <0x10000000 0xc>; + reg = <0x10000008 0x4>; native-endian; }; diff --git a/arch/mips/boot/dts/brcm/bcm6368.dtsi b/arch/mips/boot/dts/brcm/bcm6368.dtsi index 449c167dd8921..b84a3bfe8c51e 100644 --- a/arch/mips/boot/dts/brcm/bcm6368.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6368.dtsi @@ -59,7 +59,7 @@ periph_cntl: syscon@100000008 { compatible = "syscon"; - reg = <0x10000000 0xc>; + reg = <0x10000008 0x4>; native-endian; }; diff --git a/arch/mips/boot/dts/ingenic/qi_lb60.dts b/arch/mips/boot/dts/ingenic/qi_lb60.dts index 7a371d9c5a33f..eda37fb516f0e 100644 --- a/arch/mips/boot/dts/ingenic/qi_lb60.dts +++ b/arch/mips/boot/dts/ingenic/qi_lb60.dts @@ -69,7 +69,7 @@ "Speaker", "OUTL", "Speaker", "OUTR", "INL", "LOUT", - "INL", "ROUT"; + "INR", "ROUT"; simple-audio-card,aux-devs = <&>; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index f97be32bf699c..3ad1f76c063a9 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2199,6 +2199,9 @@ static int octeon_irq_cib_map(struct irq_domain *d, } cd = kzalloc(sizeof(*cd), GFP_KERNEL); + if (!cd) + return -ENOMEM; + cd->host_data = host_data; cd->bit = hw; diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index 51685f893eab0..04bc347147270 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -86,11 +86,12 @@ static void octeon2_usb_clocks_start(struct device *dev) "refclk-frequency", &clock_rate); if (i) { dev_err(dev, "No UCTL \"refclk-frequency\"\n"); + of_node_put(uctl_node); goto exit; } i = of_property_read_string(uctl_node, "refclk-type", &clock_type); - + of_node_put(uctl_node); if (!i && strcmp("crystal", clock_type) == 0) is_crystal_clock = true; } @@ -328,6 +329,7 @@ static int __init octeon_ehci_device_init(void) pd->dev.platform_data = &octeon_ehci_pdata; octeon_ehci_hw_start(&pd->dev); + put_device(&pd->dev); return ret; } @@ -391,6 +393,7 @@ static int __init octeon_ohci_device_init(void) pd->dev.platform_data = &octeon_ohci_pdata; octeon_ohci_hw_start(&pd->dev); + put_device(&pd->dev); return ret; } diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c index cc88a08bc1f73..e092d86e63581 100644 --- a/arch/mips/cavium-octeon/octeon-usb.c +++ b/arch/mips/cavium-octeon/octeon-usb.c @@ -518,6 +518,7 @@ static int __init dwc3_octeon_device_init(void) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { + put_device(&pdev->dev); dev_err(&pdev->dev, "No memory resources\n"); return -ENXIO; } @@ -529,8 +530,10 @@ static int __init dwc3_octeon_device_init(void) * know the difference. */ base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(base)) + if (IS_ERR(base)) { + put_device(&pdev->dev); return PTR_ERR(base); + } mutex_lock(&dwc3_octeon_clocks_mutex); dwc3_octeon_clocks_start(&pdev->dev, (u64)base); @@ -541,6 +544,7 @@ static int __init dwc3_octeon_device_init(void) devm_iounmap(&pdev->dev, base); devm_release_mem_region(&pdev->dev, res->start, resource_size(res)); + put_device(&pdev->dev); } } while (node != NULL); diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index f14ad0538f4e3..eea9b613bb740 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -112,7 +112,6 @@ CONFIG_BLK_DEV_TC86C001=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_CHR_DEV_SCH=m CONFIG_ATA=y diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig index 7a7af706e8981..c5f66b7f2b227 100644 --- a/arch/mips/configs/fuloong2e_defconfig +++ b/arch/mips/configs/fuloong2e_defconfig @@ -99,7 +99,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_ATA_OVER_ETH=m CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y # CONFIG_SCSI_LOWLEVEL is not set diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 82d942a6026e5..638d7cf5ef01d 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -99,7 +99,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_CHR_DEV_SCH=m CONFIG_SCSI_CONSTANTS=y diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index 370884018aad1..7b1fab5183170 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -50,7 +50,6 @@ CONFIG_RAID_ATTRS=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 328d4dfeb4cbe..982b990469afd 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -191,7 +191,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SCAN_ASYNC=y CONFIG_SCSI_FC_ATTRS=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 90ee0084d7862..e41f4841cb4d5 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -231,7 +231,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y CONFIG_MEDIA_USB_SUPPORT=y CONFIG_USB_VIDEO_CLASS=m CONFIG_DRM=y -CONFIG_DRM_RADEON=y +CONFIG_DRM_RADEON=m CONFIG_FB_RADEON=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=m diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 59eedf55419da..211bd3d6e6cb3 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -239,7 +239,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index 8ef612552a196..62b1969b4f55b 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -247,7 +247,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y diff --git a/arch/mips/configs/malta_kvm_guest_defconfig b/arch/mips/configs/malta_kvm_guest_defconfig index d2a008c9907c6..9185e0a0aa455 100644 --- a/arch/mips/configs/malta_kvm_guest_defconfig +++ b/arch/mips/configs/malta_kvm_guest_defconfig @@ -245,7 +245,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 970df6d427283..636311d67a533 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -245,7 +245,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 2c7adea7638f5..30d7c3db884e4 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -203,7 +203,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SCAN_ASYNC=y CONFIG_SCSI_FC_ATTRS=y diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index a25ef822e7250..5ed38e6180190 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -131,7 +131,7 @@ */ mfc0 t0,CP0_CAUSE # get pending interrupts mfc0 t1,CP0_STATUS -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) lw t2,cpu_fpu_mask #endif andi t0,ST0_IM # CAUSE.CE may be non-zero! @@ -139,7 +139,7 @@ beqz t0,spurious -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) and t2,t0 bnez t2,fpu # handle FPU immediately #endif @@ -280,7 +280,7 @@ handle_it: j dec_irq_dispatch nop -#ifdef CONFIG_32BIT +#if defined(CONFIG_32BIT) && defined(CONFIG_MIPS_FP_SUPPORT) fpu: lw t0,fpu_kstat_irq nop diff --git a/arch/mips/dec/prom/Makefile b/arch/mips/dec/prom/Makefile index d95016016b42b..2bad87551203b 100644 --- a/arch/mips/dec/prom/Makefile +++ b/arch/mips/dec/prom/Makefile @@ -6,4 +6,4 @@ lib-y += init.o memory.o cmdline.o identify.o console.o -lib-$(CONFIG_32BIT) += locore.o +lib-$(CONFIG_CPU_R3000) += locore.o diff --git a/arch/mips/dec/setup.c b/arch/mips/dec/setup.c index 61a0bf13e3083..649b50ae5b1e3 100644 --- a/arch/mips/dec/setup.c +++ b/arch/mips/dec/setup.c @@ -6,7 +6,7 @@ * for more details. * * Copyright (C) 1998 Harald Koerfgen - * Copyright (C) 2000, 2001, 2002, 2003, 2005 Maciej W. Rozycki + * Copyright (C) 2000, 2001, 2002, 2003, 2005, 2020 Maciej W. Rozycki */ #include #include @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include #include +#include #include #include #include @@ -29,7 +31,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -166,6 +170,9 @@ void __init plat_mem_setup(void) ioport_resource.start = ~0UL; ioport_resource.end = 0UL; + + /* Stay away from the firmware working memory area for now. */ + memblock_reserve(PHYS_OFFSET, __pa_symbol(&_text) - PHYS_OFFSET); } /* @@ -759,7 +766,8 @@ void __init arch_init_irq(void) dec_interrupt[DEC_IRQ_HALT] = -1; /* Register board interrupts: FPU and cascade. */ - if (dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { + if (IS_ENABLED(CONFIG_MIPS_FP_SUPPORT) && + dec_interrupt[DEC_IRQ_FPU] >= 0 && cpu_has_fpu) { struct irq_desc *desc_fpu; int irq_fpu; diff --git a/arch/mips/generic/board-boston.its.S b/arch/mips/generic/board-boston.its.S index a7f51f97b9102..c45ad27594218 100644 --- a/arch/mips/generic/board-boston.its.S +++ b/arch/mips/generic/board-boston.its.S @@ -1,22 +1,22 @@ / { images { - fdt@boston { + fdt-boston { description = "img,boston Device Tree"; data = /incbin/("boot/dts/img/boston.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; }; configurations { - conf@boston { + conf-boston { description = "Boston Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@boston"; + kernel = "kernel"; + fdt = "fdt-boston"; }; }; }; diff --git a/arch/mips/generic/board-ni169445.its.S b/arch/mips/generic/board-ni169445.its.S index e4cb4f95a8cc1..0a2e8f7a8526f 100644 --- a/arch/mips/generic/board-ni169445.its.S +++ b/arch/mips/generic/board-ni169445.its.S @@ -1,22 +1,22 @@ / { images { - fdt@ni169445 { + fdt-ni169445 { description = "NI 169445 device tree"; data = /incbin/("boot/dts/ni/169445.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; }; configurations { - conf@ni169445 { + conf-ni169445 { description = "NI 169445 Linux Kernel"; - kernel = "kernel@0"; - fdt = "fdt@ni169445"; + kernel = "kernel"; + fdt = "fdt-ni169445"; }; }; }; diff --git a/arch/mips/generic/board-ocelot.its.S b/arch/mips/generic/board-ocelot.its.S index 3da23988149a6..8c7e3a1b68d3d 100644 --- a/arch/mips/generic/board-ocelot.its.S +++ b/arch/mips/generic/board-ocelot.its.S @@ -1,40 +1,40 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ / { images { - fdt@ocelot_pcb123 { + fdt-ocelot_pcb123 { description = "MSCC Ocelot PCB123 Device Tree"; data = /incbin/("boot/dts/mscc/ocelot_pcb123.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; - fdt@ocelot_pcb120 { + fdt-ocelot_pcb120 { description = "MSCC Ocelot PCB120 Device Tree"; data = /incbin/("boot/dts/mscc/ocelot_pcb120.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; }; configurations { - conf@ocelot_pcb123 { + conf-ocelot_pcb123 { description = "Ocelot Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@ocelot_pcb123"; + kernel = "kernel"; + fdt = "fdt-ocelot_pcb123"; }; - conf@ocelot_pcb120 { + conf-ocelot_pcb120 { description = "Ocelot Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@ocelot_pcb120"; + kernel = "kernel"; + fdt = "fdt-ocelot_pcb120"; }; }; }; diff --git a/arch/mips/generic/board-xilfpga.its.S b/arch/mips/generic/board-xilfpga.its.S index a2e773d3f14f4..08c1e900eb4ed 100644 --- a/arch/mips/generic/board-xilfpga.its.S +++ b/arch/mips/generic/board-xilfpga.its.S @@ -1,22 +1,22 @@ / { images { - fdt@xilfpga { + fdt-xilfpga { description = "MIPSfpga (xilfpga) Device Tree"; data = /incbin/("boot/dts/xilfpga/nexys4ddr.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; }; configurations { - conf@xilfpga { + conf-xilfpga { description = "MIPSfpga Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@xilfpga"; + kernel = "kernel"; + fdt = "fdt-xilfpga"; }; }; }; diff --git a/arch/mips/generic/vmlinux.its.S b/arch/mips/generic/vmlinux.its.S index 1a08438fd8930..3e254676540f4 100644 --- a/arch/mips/generic/vmlinux.its.S +++ b/arch/mips/generic/vmlinux.its.S @@ -6,7 +6,7 @@ #address-cells = ; images { - kernel@0 { + kernel { description = KERNEL_NAME; data = /incbin/(VMLINUX_BINARY); type = "kernel"; @@ -15,18 +15,18 @@ compression = VMLINUX_COMPRESSION; load = /bits/ ADDR_BITS ; entry = /bits/ ADDR_BITS ; - hash@0 { + hash { algo = "sha1"; }; }; }; configurations { - default = "conf@default"; + default = "conf-default"; - conf@default { + conf-default { description = "Generic Linux kernel"; - kernel = "kernel@0"; + kernel = "kernel"; }; }; }; diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c index a3aa22c77cadc..a07a5edbcda78 100644 --- a/arch/mips/generic/yamon-dt.c +++ b/arch/mips/generic/yamon-dt.c @@ -75,7 +75,7 @@ static unsigned int __init gen_fdt_mem_array( __init int yamon_dt_append_memory(void *fdt, const struct yamon_mem_region *regions) { - unsigned long phys_memsize, memsize; + unsigned long phys_memsize = 0, memsize; __be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES]; unsigned int mem_entries; int i, err, mem_off; diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h index c23527ba65d09..64bffc1f75e0c 100644 --- a/arch/mips/include/asm/asm.h +++ b/arch/mips/include/asm/asm.h @@ -20,10 +20,27 @@ #include #include +#ifndef __VDSO__ +/* + * Emit CFI data in .debug_frame sections, not .eh_frame sections. + * We don't do DWARF unwinding at runtime, so only the offline DWARF + * information is useful to anyone. Note we should change this if we + * ever decide to enable DWARF unwinding at runtime. + */ +#define CFI_SECTIONS .cfi_sections .debug_frame +#else + /* + * For the vDSO, emit both runtime unwind information and debug + * symbols for the .dbg file. + */ +#define CFI_SECTIONS +#endif + /* * LEAF - declare leaf routine */ #define LEAF(symbol) \ + CFI_SECTIONS; \ .globl symbol; \ .align 2; \ .type symbol, @function; \ @@ -36,6 +53,7 @@ symbol: .frame sp, 0, ra; \ * NESTED - declare nested routine entry point */ #define NESTED(symbol, framesize, rpc) \ + CFI_SECTIONS; \ .globl symbol; \ .align 2; \ .type symbol, @function; \ diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index f6136871561dc..9182ce828f540 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -239,6 +239,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, " .set " MIPS_ISA_ARCH_LEVEL " \n" /* Load 64 bits from ptr */ "1: lld %L0, %3 # __cmpxchg64 \n" + " .set pop \n" /* * Split the 64 bit value we loaded into the 2 registers that hold the * ret variable. @@ -266,6 +267,8 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, " or %L1, %L1, $at \n" " .set at \n" # endif + " .set push \n" + " .set " MIPS_ISA_ARCH_LEVEL " \n" /* Attempt to store new at ptr */ " scd %L1, %2 \n" /* If we failed, loop! */ diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 983a6a7f43a11..3e26b0c7391b8 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -288,10 +288,12 @@ # define cpu_has_mips32r6 __isa_ge_or_flag(6, MIPS_CPU_ISA_M32R6) #endif #ifndef cpu_has_mips64r1 -# define cpu_has_mips64r1 __isa_range_or_flag(1, 6, MIPS_CPU_ISA_M64R1) +# define cpu_has_mips64r1 (cpu_has_64bits && \ + __isa_range_or_flag(1, 6, MIPS_CPU_ISA_M64R1)) #endif #ifndef cpu_has_mips64r2 -# define cpu_has_mips64r2 __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M64R2) +# define cpu_has_mips64r2 (cpu_has_64bits && \ + __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M64R2)) #endif #ifndef cpu_has_mips64r6 # define cpu_has_mips64r6 __isa_ge_and_flag(6, MIPS_CPU_ISA_M64R6) diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index 7bbb66760a07c..1809c408736b0 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -47,6 +47,7 @@ static inline int __pure __get_cpu_type(const int cpu_type) case CPU_34K: case CPU_1004K: case CPU_74K: + case CPU_1074K: case CPU_M14KC: case CPU_M14KEC: case CPU_INTERAPTIV: diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h index 62c7dfb90e06c..1e1247add1cf8 100644 --- a/arch/mips/include/asm/dec/prom.h +++ b/arch/mips/include/asm/dec/prom.h @@ -43,16 +43,11 @@ */ #define REX_PROM_MAGIC 0x30464354 -#ifdef CONFIG_64BIT - -#define prom_is_rex(magic) 1 /* KN04 and KN05 are REX PROMs. */ - -#else /* !CONFIG_64BIT */ - -#define prom_is_rex(magic) ((magic) == REX_PROM_MAGIC) - -#endif /* !CONFIG_64BIT */ - +/* KN04 and KN05 are REX PROMs, so only do the check for R3k systems. */ +static inline bool prom_is_rex(u32 magic) +{ + return !IS_ENABLED(CONFIG_CPU_R3000) || magic == REX_PROM_MAGIC; +} /* * 3MIN/MAXINE PROM entry points for DS5000/1xx's, DS5000/xx's and diff --git a/arch/mips/include/asm/div64.h b/arch/mips/include/asm/div64.h index dc5ea57364408..ceece76fc971a 100644 --- a/arch/mips/include/asm/div64.h +++ b/arch/mips/include/asm/div64.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000, 2004 Maciej W. Rozycki + * Copyright (C) 2000, 2004, 2021 Maciej W. Rozycki * Copyright (C) 2003, 07 Ralf Baechle (ralf@linux-mips.org) * * This file is subject to the terms and conditions of the GNU General Public @@ -9,25 +9,18 @@ #ifndef __ASM_DIV64_H #define __ASM_DIV64_H -#include - -#if BITS_PER_LONG == 64 +#include -#include +#if BITS_PER_LONG == 32 /* * No traps on overflows for any of these... */ -#define __div64_32(n, base) \ -({ \ +#define do_div64_32(res, high, low, base) ({ \ unsigned long __cf, __tmp, __tmp2, __i; \ unsigned long __quot32, __mod32; \ - unsigned long __high, __low; \ - unsigned long long __n; \ \ - __high = *__n >> 32; \ - __low = __n; \ __asm__( \ " .set push \n" \ " .set noat \n" \ @@ -51,18 +44,48 @@ " subu %0, %0, %z6 \n" \ " addiu %2, %2, 1 \n" \ "3: \n" \ - " bnez %4, 0b\n\t" \ - " srl %5, %1, 0x1f\n\t" \ + " bnez %4, 0b \n" \ + " srl %5, %1, 0x1f \n" \ " .set pop" \ : "=&r" (__mod32), "=&r" (__tmp), \ "=&r" (__quot32), "=&r" (__cf), \ "=&r" (__i), "=&r" (__tmp2) \ - : "Jr" (base), "0" (__high), "1" (__low)); \ + : "Jr" (base), "0" (high), "1" (low)); \ \ - (__n) = __quot32; \ + (res) = __quot32; \ __mod32; \ }) -#endif /* BITS_PER_LONG == 64 */ +#define __div64_32(n, base) ({ \ + unsigned long __upper, __low, __high, __radix; \ + unsigned long long __quot; \ + unsigned long long __div; \ + unsigned long __mod; \ + \ + __div = (*n); \ + __radix = (base); \ + \ + __high = __div >> 32; \ + __low = __div; \ + \ + if (__high < __radix) { \ + __upper = __high; \ + __high = 0; \ + } else { \ + __upper = __high % __radix; \ + __high /= __radix; \ + } \ + \ + __mod = do_div64_32(__low, __upper, __low, __radix); \ + \ + __quot = __high; \ + __quot = __quot << 32 | __low; \ + (*n) = __quot; \ + __mod; \ +}) + +#endif /* BITS_PER_LONG == 32 */ + +#include #endif /* __ASM_DIV64_H */ diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h index 9d84aafc33d05..9a6bf4f011b5f 100644 --- a/arch/mips/include/asm/highmem.h +++ b/arch/mips/include/asm/highmem.h @@ -36,7 +36,7 @@ extern pte_t *pkmap_page_table; * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ -#ifdef CONFIG_PHYS_ADDR_T_64BIT +#if defined(CONFIG_PHYS_ADDR_T_64BIT) || defined(CONFIG_MIPS_HUGE_TLB_SUPPORT) #define LAST_PKMAP 512 #else #define LAST_PKMAP 1024 diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index 425bb6fc3bdaa..bf1bf8c7c332b 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -53,7 +53,13 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - flush_tlb_page(vma, addr & huge_page_mask(hstate_vma(vma))); + /* + * clear the huge pte entry firstly, so that the other smp threads will + * not get old pte entry after finishing flush_tlb_page and before + * setting new huge pte entry + */ + huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + flush_tlb_page(vma, addr); } #define __HAVE_ARCH_HUGE_PTE_NONE diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 41204a49cf95e..356c61074d136 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -274,8 +274,12 @@ enum emulation_result { #define MIPS3_PG_SHIFT 6 #define MIPS3_PG_FRAME 0x3fffffc0 +#if defined(CONFIG_64BIT) +#define VPN2_MASK GENMASK(cpu_vmbits - 1, 13) +#else #define VPN2_MASK 0xffffe000 -#define KVM_ENTRYHI_ASID MIPS_ENTRYHI_ASID +#endif +#define KVM_ENTRYHI_ASID cpu_asid_mask(&boot_cpu_data) #define TLB_IS_GLOBAL(x) ((x).tlb_lo[0] & (x).tlb_lo[1] & ENTRYLO_G) #define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) #define TLB_ASID(x) ((x).tlb_hi & KVM_ENTRYHI_ASID) @@ -935,7 +939,7 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, unsigned flags); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index 136d6d464e320..93c69fc7bbd8c 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -28,7 +28,6 @@ #define cpu_has_6k_cache 0 #define cpu_has_8k_cache 0 #define cpu_has_tx39_cache 0 -#define cpu_has_fpu 1 #define cpu_has_nofpuex 0 #define cpu_has_32fpr 1 #define cpu_has_counter 1 diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index aeae2effa123d..23c67c0871b17 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -11,6 +11,7 @@ #ifndef __MIPS_ASM_MIPS_CM_H__ #define __MIPS_ASM_MIPS_CM_H__ +#include #include #include @@ -153,8 +154,8 @@ GCR_ACCESSOR_RO(32, 0x030, rev) #define CM_GCR_REV_MINOR GENMASK(7, 0) #define CM_ENCODE_REV(major, minor) \ - (((major) << __ffs(CM_GCR_REV_MAJOR)) | \ - ((minor) << __ffs(CM_GCR_REV_MINOR))) + (FIELD_PREP(CM_GCR_REV_MAJOR, major) | \ + FIELD_PREP(CM_GCR_REV_MINOR, minor)) #define CM_REV_CM2 CM_ENCODE_REV(6, 0) #define CM_REV_CM2_5 CM_ENCODE_REV(7, 0) @@ -362,10 +363,10 @@ static inline int mips_cm_revision(void) static inline unsigned int mips_cm_max_vp_width(void) { extern int smp_num_siblings; - uint32_t cfg; if (mips_cm_revision() >= CM_REV_CM3) - return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW; + return FIELD_GET(CM_GCR_SYS_CONFIG2_MAXVPW, + read_gcr_sys_config2()); if (mips_cm_present()) { /* @@ -373,8 +374,7 @@ static inline unsigned int mips_cm_max_vp_width(void) * number of VP(E)s, and if that ever changes then this will * need revisiting. */ - cfg = read_gcr_cl_config() & CM_GCR_Cx_CONFIG_PVPE; - return (cfg >> __ffs(CM_GCR_Cx_CONFIG_PVPE)) + 1; + return FIELD_GET(CM_GCR_Cx_CONFIG_PVPE, read_gcr_cl_config()) + 1; } if (IS_ENABLED(CONFIG_SMP)) diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index bdbdc19a2b8f8..c28b892937fe1 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -750,7 +750,7 @@ /* MAAR bit definitions */ #define MIPS_MAAR_VH (_U64CAST_(1) << 63) -#define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12) +#define MIPS_MAAR_ADDR GENMASK_ULL(55, 12) #define MIPS_MAAR_ADDR_SHIFT 12 #define MIPS_MAAR_S (_ULCAST_(1) << 1) #define MIPS_MAAR_VL (_ULCAST_(1) << 0) @@ -2007,7 +2007,7 @@ _ASM_MACRO_0(tlbginvf, _ASM_INSN_IF_MIPS(0x4200000c) ({ int __res; \ __asm__ __volatile__( \ ".set\tpush\n\t" \ - ".set\tmips32r2\n\t" \ + ".set\tmips32r5\n\t" \ _ASM_SET_VIRT \ "mfgc0\t%0, " #source ", %1\n\t" \ ".set\tpop" \ @@ -2020,7 +2020,7 @@ _ASM_MACRO_0(tlbginvf, _ASM_INSN_IF_MIPS(0x4200000c) ({ unsigned long long __res; \ __asm__ __volatile__( \ ".set\tpush\n\t" \ - ".set\tmips64r2\n\t" \ + ".set\tmips64r5\n\t" \ _ASM_SET_VIRT \ "dmfgc0\t%0, " #source ", %1\n\t" \ ".set\tpop" \ @@ -2033,7 +2033,7 @@ _ASM_MACRO_0(tlbginvf, _ASM_INSN_IF_MIPS(0x4200000c) do { \ __asm__ __volatile__( \ ".set\tpush\n\t" \ - ".set\tmips32r2\n\t" \ + ".set\tmips32r5\n\t" \ _ASM_SET_VIRT \ "mtgc0\t%z0, " #register ", %1\n\t" \ ".set\tpop" \ @@ -2045,7 +2045,7 @@ do { \ do { \ __asm__ __volatile__( \ ".set\tpush\n\t" \ - ".set\tmips64r2\n\t" \ + ".set\tmips64r5\n\t" \ _ASM_SET_VIRT \ "dmtgc0\t%z0, " #register ", %1\n\t" \ ".set\tpop" \ diff --git a/arch/mips/include/asm/octeon/cvmx-bootinfo.h b/arch/mips/include/asm/octeon/cvmx-bootinfo.h index 62787765575ef..ce6e5fddce0bf 100644 --- a/arch/mips/include/asm/octeon/cvmx-bootinfo.h +++ b/arch/mips/include/asm/octeon/cvmx-bootinfo.h @@ -315,7 +315,7 @@ enum cvmx_chip_types_enum { /* Functions to return string based on type */ #define ENUM_BRD_TYPE_CASE(x) \ - case x: return(#x + 16); /* Skip CVMX_BOARD_TYPE_ */ + case x: return (&#x[16]); /* Skip CVMX_BOARD_TYPE_ */ static inline const char *cvmx_board_type_to_string(enum cvmx_board_types_enum type) { @@ -404,7 +404,7 @@ static inline const char *cvmx_board_type_to_string(enum } #define ENUM_CHIP_TYPE_CASE(x) \ - case x: return(#x + 15); /* Skip CVMX_CHIP_TYPE */ + case x: return (&#x[15]); /* Skip CVMX_CHIP_TYPE */ static inline const char *cvmx_chip_type_to_string(enum cvmx_chip_types_enum type) { diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index ba967148b016b..2604fab8a92dc 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -155,6 +155,7 @@ static inline void pmd_clear(pmd_t *pmdp) #if defined(CONFIG_XPA) +#define MAX_POSSIBLE_PHYSMEM_BITS 40 #define pte_pfn(x) (((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT)) static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) @@ -170,6 +171,7 @@ pfn_pte(unsigned long pfn, pgprot_t prot) #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #define pte_pfn(x) ((unsigned long)((x).pte_high >> 6)) static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) @@ -184,6 +186,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #else +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #ifdef CONFIG_CPU_VR41XX #define pte_pfn(x) ((unsigned long)((x).pte >> (PAGE_SHIFT + 2))) #define pfn_pte(pfn, prot) __pte(((pfn) << (PAGE_SHIFT + 2)) | pgprot_val(prot)) diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 93a9dce31f255..813dfe5f45a59 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -18,10 +18,12 @@ #include #define __ARCH_USE_5LEVEL_HACK -#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) +#if CONFIG_PGTABLE_LEVELS == 2 #include -#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48)) +#elif CONFIG_PGTABLE_LEVELS == 3 #include +#else +#include #endif /* @@ -216,6 +218,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) return pgd_val(pgd); } +#define pgd_phys(pgd) virt_to_phys((void *)pgd_val(pgd)) +#define pgd_page(pgd) (pfn_to_page(pgd_phys(pgd) >> PAGE_SHIFT)) + static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) { return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address); diff --git a/arch/mips/include/asm/setup.h b/arch/mips/include/asm/setup.h index bb36a400203df..8c56b862fd9c2 100644 --- a/arch/mips/include/asm/setup.h +++ b/arch/mips/include/asm/setup.h @@ -16,7 +16,7 @@ static inline void setup_8250_early_printk_port(unsigned long base, unsigned int reg_shift, unsigned int timeout) {} #endif -extern void set_handler(unsigned long offset, void *addr, unsigned long len); +void set_handler(unsigned long offset, const void *addr, unsigned long len); extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len); typedef void (*vi_handler_t)(void); diff --git a/arch/mips/include/asm/string.h b/arch/mips/include/asm/string.h index 29030cb398ee5..1de3bbce8e88a 100644 --- a/arch/mips/include/asm/string.h +++ b/arch/mips/include/asm/string.h @@ -10,127 +10,6 @@ #ifndef _ASM_STRING_H #define _ASM_STRING_H - -/* - * Most of the inline functions are rather naive implementations so I just - * didn't bother updating them for 64-bit ... - */ -#ifdef CONFIG_32BIT - -#ifndef IN_STRING_C - -#define __HAVE_ARCH_STRCPY -static __inline__ char *strcpy(char *__dest, __const__ char *__src) -{ - char *__xdest = __dest; - - __asm__ __volatile__( - ".set\tnoreorder\n\t" - ".set\tnoat\n" - "1:\tlbu\t$1,(%1)\n\t" - "addiu\t%1,1\n\t" - "sb\t$1,(%0)\n\t" - "bnez\t$1,1b\n\t" - "addiu\t%0,1\n\t" - ".set\tat\n\t" - ".set\treorder" - : "=r" (__dest), "=r" (__src) - : "0" (__dest), "1" (__src) - : "memory"); - - return __xdest; -} - -#define __HAVE_ARCH_STRNCPY -static __inline__ char *strncpy(char *__dest, __const__ char *__src, size_t __n) -{ - char *__xdest = __dest; - - if (__n == 0) - return __xdest; - - __asm__ __volatile__( - ".set\tnoreorder\n\t" - ".set\tnoat\n" - "1:\tlbu\t$1,(%1)\n\t" - "subu\t%2,1\n\t" - "sb\t$1,(%0)\n\t" - "beqz\t$1,2f\n\t" - "addiu\t%0,1\n\t" - "bnez\t%2,1b\n\t" - "addiu\t%1,1\n" - "2:\n\t" - ".set\tat\n\t" - ".set\treorder" - : "=r" (__dest), "=r" (__src), "=r" (__n) - : "0" (__dest), "1" (__src), "2" (__n) - : "memory"); - - return __xdest; -} - -#define __HAVE_ARCH_STRCMP -static __inline__ int strcmp(__const__ char *__cs, __const__ char *__ct) -{ - int __res; - - __asm__ __volatile__( - ".set\tnoreorder\n\t" - ".set\tnoat\n\t" - "lbu\t%2,(%0)\n" - "1:\tlbu\t$1,(%1)\n\t" - "addiu\t%0,1\n\t" - "bne\t$1,%2,2f\n\t" - "addiu\t%1,1\n\t" - "bnez\t%2,1b\n\t" - "lbu\t%2,(%0)\n\t" -#if defined(CONFIG_CPU_R3000) - "nop\n\t" -#endif - "move\t%2,$1\n" - "2:\tsubu\t%2,$1\n" - "3:\t.set\tat\n\t" - ".set\treorder" - : "=r" (__cs), "=r" (__ct), "=r" (__res) - : "0" (__cs), "1" (__ct)); - - return __res; -} - -#endif /* !defined(IN_STRING_C) */ - -#define __HAVE_ARCH_STRNCMP -static __inline__ int -strncmp(__const__ char *__cs, __const__ char *__ct, size_t __count) -{ - int __res; - - __asm__ __volatile__( - ".set\tnoreorder\n\t" - ".set\tnoat\n" - "1:\tlbu\t%3,(%0)\n\t" - "beqz\t%2,2f\n\t" - "lbu\t$1,(%1)\n\t" - "subu\t%2,1\n\t" - "bne\t$1,%3,3f\n\t" - "addiu\t%0,1\n\t" - "bnez\t%3,1b\n\t" - "addiu\t%1,1\n" - "2:\n\t" -#if defined(CONFIG_CPU_R3000) - "nop\n\t" -#endif - "move\t%3,$1\n" - "3:\tsubu\t%3,$1\n\t" - ".set\tat\n\t" - ".set\treorder" - : "=r" (__cs), "=r" (__ct), "=r" (__count), "=r" (__res) - : "0" (__cs), "1" (__ct), "2" (__count)); - - return __res; -} -#endif /* CONFIG_32BIT */ - #define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, size_t __count); diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 4993db40482c8..ee26f9a4575df 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -49,8 +49,26 @@ struct thread_info { .addr_limit = KERNEL_DS, \ } -/* How to get the thread information struct from C. */ +/* + * A pointer to the struct thread_info for the currently executing thread is + * held in register $28/$gp. + * + * We declare __current_thread_info as a global register variable rather than a + * local register variable within current_thread_info() because clang doesn't + * support explicit local register variables. + * + * When building the VDSO we take care not to declare the global register + * variable because this causes GCC to not preserve the value of $28/$gp in + * functions that change its value (which is common in the PIC VDSO when + * accessing the GOT). Since the VDSO shouldn't be accessing + * __current_thread_info anyway we declare it extern in order to cause a link + * failure if it's referenced. + */ +#ifdef __VDSO__ +extern struct thread_info *__current_thread_info; +#else register struct thread_info *__current_thread_info __asm__("$28"); +#endif static inline struct thread_info *current_thread_info(void) { diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index b05bb70a2e46f..2e107886f97ac 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -40,9 +40,9 @@ typedef unsigned int cycles_t; /* - * On R4000/R4400 before version 5.0 an erratum exists such that if the - * cycle counter is read in the exact moment that it is matching the - * compare register, no interrupt will be generated. + * On R4000/R4400 an erratum exists such that if the cycle counter is + * read in the exact moment that it is matching the compare register, + * no interrupt will be generated. * * There is a suggested workaround and also the erratum can't strike if * the compare interrupt isn't being used as the clock source device. @@ -63,7 +63,7 @@ static inline int can_use_mips_counter(unsigned int prid) if (!__builtin_constant_p(cpu_has_counter)) asm volatile("" : "=m" (cpu_data[0].options)); if (likely(cpu_has_counter && - prid >= (PRID_IMP_R4000 | PRID_REV_ENCODE_44(5, 0)))) + prid > (PRID_IMP_R4000 | PRID_REV_ENCODE_44(15, 15)))) return 1; else return 0; @@ -76,25 +76,24 @@ static inline cycles_t get_cycles(void) else return 0; /* no usable counter */ } +#define get_cycles get_cycles /* * Like get_cycles - but where c0_count is not available we desperately * use c0_random in an attempt to get at least a little bit of entropy. - * - * R6000 and R6000A neither have a count register nor a random register. - * That leaves no entropy source in the CPU itself. */ static inline unsigned long random_get_entropy(void) { - unsigned int prid = read_c0_prid(); - unsigned int imp = prid & PRID_IMP_MASK; + unsigned int c0_random; - if (can_use_mips_counter(prid)) + if (can_use_mips_counter(read_c0_prid())) return read_c0_count(); - else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A)) - return read_c0_random(); + + if (cpu_has_3kex) + c0_random = (read_c0_random() >> 8) & 0x3f; else - return 0; /* no usable register */ + c0_random = read_c0_random() & 0x3f; + return (random_get_entropy_fallback() << 6) | (0x3f - c0_random); } #define random_get_entropy random_get_entropy diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index b08825531e9f9..83f1cbbbf217c 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -26,7 +26,11 @@ #define __VDSO_USE_SYSCALL ULLONG_MAX -#ifdef CONFIG_MIPS_CLOCK_VSYSCALL +#if MIPS_ISA_REV < 6 +#define VDSO_SYSCALL_CLOBBERS "hi", "lo", +#else +#define VDSO_SYSCALL_CLOBBERS +#endif static __always_inline long gettimeofday_fallback( struct __kernel_old_timeval *_tv, @@ -43,22 +47,13 @@ static __always_inline long gettimeofday_fallback( : "=r" (ret), "=r" (error) : "r" (tv), "r" (tz), "r" (nr) : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); + "$14", "$15", "$24", "$25", + VDSO_SYSCALL_CLOBBERS + "memory"); return error ? -ret : ret; } -#else - -static __always_inline long gettimeofday_fallback( - struct __kernel_old_timeval *_tv, - struct timezone *_tz) -{ - return -1; -} - -#endif - static __always_inline long clock_gettime_fallback( clockid_t _clkid, struct __kernel_timespec *_ts) @@ -78,7 +73,9 @@ static __always_inline long clock_gettime_fallback( : "=r" (ret), "=r" (error) : "r" (clkid), "r" (ts), "r" (nr) : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); + "$14", "$15", "$24", "$25", + VDSO_SYSCALL_CLOBBERS + "memory"); return error ? -ret : ret; } @@ -102,7 +99,9 @@ static __always_inline int clock_getres_fallback( : "=r" (ret), "=r" (error) : "r" (clkid), "r" (ts), "r" (nr) : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); + "$14", "$15", "$24", "$25", + VDSO_SYSCALL_CLOBBERS + "memory"); return error ? -ret : ret; } @@ -126,7 +125,9 @@ static __always_inline long clock_gettime32_fallback( : "=r" (ret), "=r" (error) : "r" (clkid), "r" (ts), "r" (nr) : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); + "$14", "$15", "$24", "$25", + VDSO_SYSCALL_CLOBBERS + "memory"); return error ? -ret : ret; } @@ -146,7 +147,9 @@ static __always_inline int clock_getres32_fallback( : "=r" (ret), "=r" (error) : "r" (clkid), "r" (ts), "r" (nr) : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); + "$14", "$15", "$24", "$25", + VDSO_SYSCALL_CLOBBERS + "memory"); return error ? -ret : ret; } diff --git a/arch/mips/include/asm/vdso/vdso.h b/arch/mips/include/asm/vdso/vdso.h index 737ddfc3411cb..a327ca21270ec 100644 --- a/arch/mips/include/asm/vdso/vdso.h +++ b/arch/mips/include/asm/vdso/vdso.h @@ -67,7 +67,7 @@ static inline const struct vdso_data *get_vdso_data(void) static inline void __iomem *get_gic(const struct vdso_data *data) { - return (void __iomem *)data - PAGE_SIZE; + return (void __iomem *)((unsigned long)data & PAGE_MASK) - PAGE_SIZE; } #endif /* CONFIG_CLKSRC_MIPS_GIC */ diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c index dc8ee21e0948e..45e327960a465 100644 --- a/arch/mips/jz4740/setup.c +++ b/arch/mips/jz4740/setup.c @@ -61,7 +61,7 @@ void __init plat_mem_setup(void) jz4740_reset_init(); - if (__dtb_start != __dtb_end) + if (&__dtb_start != &__dtb_end) dtb = __dtb_start; else dtb = (void *)fw_passed_dtb; diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index f777e44653d57..529dab855aac9 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -17,7 +17,7 @@ do { \ leaf++; \ } while (0) -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -50,21 +50,46 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +static void fill_cpumask_siblings(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + + for_each_possible_cpu(cpu1) + if (cpus_are_siblings(cpu, cpu1)) + cpumask_set_cpu(cpu1, cpu_map); +} + +static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + int cluster = cpu_cluster(&cpu_data[cpu]); + + for_each_possible_cpu(cpu1) + if (cpu_cluster(&cpu_data[cpu1]) == cluster) + cpumask_set_cpu(cpu1, cpu_map); +} + +int populate_cache_leaves(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; if (c->icache.waysize) { + /* L1 caches are per core */ + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(dcache, this_leaf, 1, CACHE_TYPE_DATA); + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(icache, this_leaf, 1, CACHE_TYPE_INST); } else { populate_cache(dcache, this_leaf, 1, CACHE_TYPE_UNIFIED); } - if (c->scache.waysize) + if (c->scache.waysize) { + /* L2 cache is per cluster */ + fill_cpumask_cluster(cpu, &this_leaf->shared_cpu_map); populate_cache(scache, this_leaf, 2, CACHE_TYPE_UNIFIED); + } if (c->tcache.waysize) populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED); @@ -73,6 +98,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index efde27c994146..9c5f8a5d097f2 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -474,20 +474,20 @@ NESTED(nmi_handler, PT_SIZE, sp) .endm .macro __build_clear_fpe + CLI + TRACE_IRQS_OFF .set push /* gas fails to assemble cfc1 for some archs (octeon).*/ \ .set mips1 SET_HARDFLOAT cfc1 a1, fcr31 .set pop - CLI - TRACE_IRQS_OFF .endm .macro __build_clear_msa_fpe - _cfcmsa a1, MSA_CSR CLI TRACE_IRQS_OFF + _cfcmsa a1, MSA_CSR .endm .macro __build_clear_ade diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index e5ea3db23d6b4..611ef512c0b81 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -119,9 +119,9 @@ static char *cm2_causes[32] = { "COH_RD_ERR", "MMIO_WR_ERR", "MMIO_RD_ERR", "0x07", "0x08", "0x09", "0x0a", "0x0b", "0x0c", "0x0d", "0x0e", "0x0f", - "0x10", "0x11", "0x12", "0x13", - "0x14", "0x15", "0x16", "INTVN_WR_ERR", - "INTVN_RD_ERR", "0x19", "0x1a", "0x1b", + "0x10", "INTVN_WR_ERR", "INTVN_RD_ERR", "0x13", + "0x14", "0x15", "0x16", "0x17", + "0x18", "0x19", "0x1a", "0x1b", "0x1c", "0x1d", "0x1e", "0x1f" }; @@ -179,8 +179,7 @@ static void mips_cm_probe_l2sync(void) phys_addr_t addr; /* L2-only sync was introduced with CM major revision 6 */ - major_rev = (read_gcr_rev() & CM_GCR_REV_MAJOR) >> - __ffs(CM_GCR_REV_MAJOR); + major_rev = FIELD_GET(CM_GCR_REV_MAJOR, read_gcr_rev()); if (major_rev < 6) return; @@ -263,13 +262,13 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core, preempt_disable(); if (cm_rev >= CM_REV_CM3) { - val = core << __ffs(CM3_GCR_Cx_OTHER_CORE); - val |= vp << __ffs(CM3_GCR_Cx_OTHER_VP); + val = FIELD_PREP(CM3_GCR_Cx_OTHER_CORE, core) | + FIELD_PREP(CM3_GCR_Cx_OTHER_VP, vp); if (cm_rev >= CM_REV_CM3_5) { val |= CM_GCR_Cx_OTHER_CLUSTER_EN; - val |= cluster << __ffs(CM_GCR_Cx_OTHER_CLUSTER); - val |= block << __ffs(CM_GCR_Cx_OTHER_BLOCK); + val |= FIELD_PREP(CM_GCR_Cx_OTHER_CLUSTER, cluster); + val |= FIELD_PREP(CM_GCR_Cx_OTHER_BLOCK, block); } else { WARN_ON(cluster != 0); WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL); @@ -299,7 +298,7 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core, spin_lock_irqsave(&per_cpu(cm_core_lock, curr_core), per_cpu(cm_core_lock_flags, curr_core)); - val = core << __ffs(CM_GCR_Cx_OTHER_CORENUM); + val = FIELD_PREP(CM_GCR_Cx_OTHER_CORENUM, core); } write_gcr_cl_other(val); @@ -343,8 +342,8 @@ void mips_cm_error_report(void) cm_other = read_gcr_error_mult(); if (revision < CM_REV_CM3) { /* CM2 */ - cause = cm_error >> __ffs(CM_GCR_ERROR_CAUSE_ERRTYPE); - ocause = cm_other >> __ffs(CM_GCR_ERROR_MULT_ERR2ND); + cause = FIELD_GET(CM_GCR_ERROR_CAUSE_ERRTYPE, cm_error); + ocause = FIELD_GET(CM_GCR_ERROR_MULT_ERR2ND, cm_other); if (!cause) return; @@ -386,8 +385,8 @@ void mips_cm_error_report(void) ulong core_id_bits, vp_id_bits, cmd_bits, cmd_group_bits; ulong cm3_cca_bits, mcp_bits, cm3_tr_bits, sched_bit; - cause = cm_error >> __ffs64(CM3_GCR_ERROR_CAUSE_ERRTYPE); - ocause = cm_other >> __ffs(CM_GCR_ERROR_MULT_ERR2ND); + cause = FIELD_GET(CM3_GCR_ERROR_CAUSE_ERRTYPE, cm_error); + ocause = FIELD_GET(CM_GCR_ERROR_MULT_ERR2ND, cm_other); if (!cause) return; diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c index 69e3e0b556bf7..1b0d4bb617a9c 100644 --- a/arch/mips/kernel/mips-cpc.c +++ b/arch/mips/kernel/mips-cpc.c @@ -27,6 +27,7 @@ phys_addr_t __weak mips_cpc_default_phys_base(void) cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc"); if (cpc_node) { err = of_address_to_resource(cpc_node, 0, &res); + of_node_put(cpc_node); if (!err) return res.start; } diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index f8d36710cd581..d408b3a5bfd57 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -168,7 +168,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) { unsigned long i = *pos; - return i < NR_CPUS ? (void *) (i + 1) : NULL; + return i < nr_cpu_ids ? (void *) (i + 1) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index 12e58053544fc..cbf6db98cfb38 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -29,8 +29,8 @@ #define EX2(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,bad_stack; \ - PTR 9b+4,bad_stack; \ + PTR 9b,fault; \ + PTR 9b+4,fault; \ .previous .set mips1 diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index 3d80a51256de6..dab8febb57419 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -187,8 +187,14 @@ static int __init relocate_exception_table(long offset) static inline __init unsigned long rotate_xor(unsigned long hash, const void *area, size_t size) { - size_t i; - unsigned long *ptr = (unsigned long *)area; + const typeof(hash) *ptr = PTR_ALIGN(area, sizeof(hash)); + size_t diff, i; + + diff = (void *)ptr - area; + if (unlikely(size < diff + sizeof(hash))) + return hash; + + size = ALIGN_DOWN(size - diff, sizeof(hash)); for (i = 0; i < size / sizeof(hash); i++) { /* Rotate by odd number of bits and XOR. */ diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 5eec13b8d222d..82e44b31aad59 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -494,7 +494,7 @@ static void __init mips_parse_crashkernel(void) if (ret != 0 || crash_size <= 0) return; - if (!memblock_find_in_range(crash_base, crash_base + crash_size, crash_size, 0)) { + if (!memblock_find_in_range(crash_base, crash_base + crash_size, crash_size, 1)) { pr_warn("Invalid memory region reserved for crash kernel\n"); return; } @@ -529,8 +529,8 @@ static void __init request_crashkernel(struct resource *res) static void __init check_kernel_sections_mem(void) { - phys_addr_t start = PFN_PHYS(PFN_DOWN(__pa_symbol(&_text))); - phys_addr_t size = PFN_PHYS(PFN_UP(__pa_symbol(&_end))) - start; + phys_addr_t start = __pa_symbol(&_text); + phys_addr_t size = __pa_symbol(&_end) - start; if (!memblock_is_region_memory(start, size)) { pr_info("Kernel sections are not in the memory maps\n"); @@ -653,7 +653,17 @@ static void __init arch_mem_init(char **cmdline_p) crashk_res.end - crashk_res.start + 1); #endif device_tree_init(); + + /* + * In order to reduce the possibility of kernel panic when failed to + * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate + * low memory as small as possible before plat_swiotlb_setup(), so + * make sparse_init() using top-down allocation. + */ + memblock_set_bottom_up(false); sparse_init(); + memblock_set_bottom_up(true); + plat_swiotlb_setup(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 712c15de6ab9f..6b304acf506fe 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -241,6 +241,8 @@ static int bmips_boot_secondary(int cpu, struct task_struct *idle) */ static void bmips_init_secondary(void) { + bmips_cpu_setup(); + switch (current_cpu_type()) { case CPU_BMIPS4350: case CPU_BMIPS4380: diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index f510c00bda882..c563b03bdccc1 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -361,6 +361,9 @@ asmlinkage void start_secondary(void) cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; + set_cpu_sibling_map(cpu); + set_cpu_core_map(cpu); + cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); @@ -372,9 +375,6 @@ asmlinkage void start_secondary(void) /* The CPU is running and counters synchronised, now mark it online */ set_cpu_online(cpu, true); - set_cpu_sibling_map(cpu); - set_cpu_core_map(cpu); - calculate_cpu_foreign_map(); /* diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 3f16f38230310..9a28e79f2e66e 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -239,12 +239,3 @@ SYSCALL_DEFINE3(cachectl, char *, addr, int, nbytes, int, op) { return -ENOSYS; } - -/* - * If we ever come here the user sp is bad. Zap the process right away. - * Due to the bad stack signaling wouldn't work. - */ -asmlinkage void bad_stack(void) -{ - do_exit(SIGSEGV); -} diff --git a/arch/mips/kernel/syscalls/Makefile b/arch/mips/kernel/syscalls/Makefile index a3d4bec695c6e..6efb2f6889a73 100644 --- a/arch/mips/kernel/syscalls/Makefile +++ b/arch/mips/kernel/syscalls/Makefile @@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR $@ '$(syshdr_pfx_$(basetarget))' \ '$(syshdr_offset_$(basetarget))' -quiet_cmd_sysnr = SYSNR $@ +quiet_cmd_sysnr = SYSNR $@ cmd_sysnr = $(CONFIG_SHELL) '$(sysnr)' '$<' '$@' \ '$(sysnr_abis_$(basetarget))' \ '$(sysnr_pfx_$(basetarget))' \ diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 37e9413a393d3..ed339d7979f3f 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -18,12 +18,82 @@ #include #include #include +#include +#include #include #include #include #include +#ifdef CONFIG_CPU_FREQ + +static DEFINE_PER_CPU(unsigned long, pcp_lpj_ref); +static DEFINE_PER_CPU(unsigned long, pcp_lpj_ref_freq); +static unsigned long glb_lpj_ref; +static unsigned long glb_lpj_ref_freq; + +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpumask *cpus = freq->policy->cpus; + unsigned long lpj; + int cpu; + + /* + * Skip lpj numbers adjustment if the CPU-freq transition is safe for + * the loops delay. (Is this possible?) + */ + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; + + /* Save the initial values of the lpjes for future scaling. */ + if (!glb_lpj_ref) { + glb_lpj_ref = boot_cpu_data.udelay_val; + glb_lpj_ref_freq = freq->old; + + for_each_online_cpu(cpu) { + per_cpu(pcp_lpj_ref, cpu) = + cpu_data[cpu].udelay_val; + per_cpu(pcp_lpj_ref_freq, cpu) = freq->old; + } + } + + /* + * Adjust global lpj variable and per-CPU udelay_val number in + * accordance with the new CPU frequency. + */ + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + loops_per_jiffy = cpufreq_scale(glb_lpj_ref, + glb_lpj_ref_freq, + freq->new); + + for_each_cpu(cpu, cpus) { + lpj = cpufreq_scale(per_cpu(pcp_lpj_ref, cpu), + per_cpu(pcp_lpj_ref_freq, cpu), + freq->new); + cpu_data[cpu].udelay_val = (unsigned int)lpj; + } + } + + return NOTIFY_OK; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; + +static int __init register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); +} +core_initcall(register_cpufreq_notifier); + +#endif /* CONFIG_CPU_FREQ */ + /* * forward reference */ @@ -71,15 +141,10 @@ static __init int cpu_has_mfc0_count_bug(void) case CPU_R4400MC: /* * The published errata for the R4400 up to 3.0 say the CPU - * has the mfc0 from count bug. - */ - if ((current_cpu_data.processor_id & 0xff) <= 0x30) - return 1; - - /* - * we assume newer revisions are ok + * has the mfc0 from count bug. This seems the last version + * produced. */ - return 0; + return 1; } return 0; diff --git a/arch/mips/kernel/topology.c b/arch/mips/kernel/topology.c index cd3e1f82e1a5d..08ad6371fbe08 100644 --- a/arch/mips/kernel/topology.c +++ b/arch/mips/kernel/topology.c @@ -20,7 +20,7 @@ static int __init topology_init(void) for_each_present_cpu(i) { struct cpu *c = &per_cpu(cpu_devices, i); - c->hotpluggable = 1; + c->hotpluggable = !!i; ret = register_cpu(c, i); if (ret) printk(KERN_WARNING "topology_init: register_cpu %d " diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 342e41de9d64e..749089c25d5e6 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1240,6 +1240,18 @@ static int enable_restore_fp_context(int msa) err = own_fpu_inatomic(1); if (msa && !err) { enable_msa(); + /* + * with MSA enabled, userspace can see MSACSR + * and MSA regs, but the values in them are from + * other task before current task, restore them + * from saved fp/msa context + */ + write_msa_csr(current->thread.fpu.msacsr); + /* + * own_fpu_inatomic(1) just restore low 64bit, + * fix the high 64bit + */ + init_msa_upper(); set_thread_flag(TIF_USEDMSA); set_thread_flag(TIF_MSA_CTX_LIVE); } @@ -2008,19 +2020,19 @@ static void *set_vi_srs_handler(int n, vi_handler_t addr, int srs) * If no shadow set is selected then use the default handler * that does normal register saving and standard interrupt exit */ - extern char except_vec_vi, except_vec_vi_lui; - extern char except_vec_vi_ori, except_vec_vi_end; - extern char rollback_except_vec_vi; - char *vec_start = using_rollback_handler() ? - &rollback_except_vec_vi : &except_vec_vi; + extern const u8 except_vec_vi[], except_vec_vi_lui[]; + extern const u8 except_vec_vi_ori[], except_vec_vi_end[]; + extern const u8 rollback_except_vec_vi[]; + const u8 *vec_start = using_rollback_handler() ? + rollback_except_vec_vi : except_vec_vi; #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN) - const int lui_offset = &except_vec_vi_lui - vec_start + 2; - const int ori_offset = &except_vec_vi_ori - vec_start + 2; + const int lui_offset = except_vec_vi_lui - vec_start + 2; + const int ori_offset = except_vec_vi_ori - vec_start + 2; #else - const int lui_offset = &except_vec_vi_lui - vec_start; - const int ori_offset = &except_vec_vi_ori - vec_start; + const int lui_offset = except_vec_vi_lui - vec_start; + const int ori_offset = except_vec_vi_ori - vec_start; #endif - const int handler_len = &except_vec_vi_end - vec_start; + const int handler_len = except_vec_vi_end - vec_start; if (handler_len > VECTORSPACING) { /* @@ -2126,6 +2138,7 @@ static void configure_status(void) change_c0_status(ST0_CU|ST0_MX|ST0_RE|ST0_FR|ST0_BEV|ST0_TS|ST0_KX|ST0_SX|ST0_UX, status_set); + back_to_back_c0_hazard(); } unsigned int hwrena; @@ -2227,7 +2240,7 @@ void per_cpu_trap_init(bool is_boot_cpu) } /* Install CPU exception handler */ -void set_handler(unsigned long offset, void *addr, unsigned long size) +void set_handler(unsigned long offset, const void *addr, unsigned long size) { #ifdef CONFIG_CPU_MICROMIPS memcpy((void *)(ebase + offset), ((unsigned char *)addr - 1), size); diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 33ee0d18fb0ad..faf98f209b3f4 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -50,7 +50,7 @@ SECTIONS /* . = 0xa800000000300000; */ . = 0xffffffff80300000; #endif - . = VMLINUX_LOAD_ADDRESS; + . = LINKER_LOAD_ADDRESS; /* read-only */ _text = .; /* Text and read-only data */ .text : { @@ -93,6 +93,7 @@ SECTIONS INIT_TASK_DATA(THREAD_SIZE) NOSAVE_DATA + PAGE_ALIGNED_DATA(PAGE_SIZE) CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) DATA_DATA @@ -225,6 +226,5 @@ SECTIONS *(.options) *(.pdr) *(.reginfo) - *(.eh_frame) } } diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 6176b9acba950..d0d832ab3d3b8 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -134,7 +134,7 @@ void release_vpe(struct vpe *v) { list_del(&v->list); if (v->load_addr) - release_progmem(v); + release_progmem(v->load_addr); kfree(v); } diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 1109924560d8c..b22a3565e1330 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -131,6 +131,8 @@ int kvm_arch_check_processor_compat(void) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { switch (type) { + case KVM_VM_MIPS_AUTO: + break; #ifdef CONFIG_KVM_MIPS_VZ case KVM_VM_MIPS_VZ: #else diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 97e538a8c1be2..97f63a84aa51f 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -512,7 +512,8 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, return 1; } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c index dd819e31fcbbf..7a623684d9b5e 100644 --- a/arch/mips/lantiq/clk.c +++ b/arch/mips/lantiq/clk.c @@ -158,6 +158,18 @@ void clk_deactivate(struct clk *clk) } EXPORT_SYMBOL(clk_deactivate); +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + static inline u32 get_counter_resolution(void) { u32 res; diff --git a/arch/mips/lantiq/falcon/sysctrl.c b/arch/mips/lantiq/falcon/sysctrl.c index 037b08f3257e0..a2837a54d9726 100644 --- a/arch/mips/lantiq/falcon/sysctrl.c +++ b/arch/mips/lantiq/falcon/sysctrl.c @@ -167,6 +167,8 @@ static inline void clkdev_add_sys(const char *dev, unsigned int module, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = NULL; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index 115b417dfb8e3..9fcc118312cb9 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -302,7 +302,7 @@ static void ltq_hw_irq_handler(struct irq_desc *desc) generic_handle_irq(irq_linear_revmap(ltq_domain, hwirq)); /* if this is a EBU irq, we need to ack it or get a deadlock */ - if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0) && LTQ_EBU_PCC_ISTAT) + if (irq == LTQ_ICU_EBU_IRQ && !module && LTQ_EBU_PCC_ISTAT != 0) ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_ISTAT) | 0x10, LTQ_EBU_PCC_ISTAT); } diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c index 51a218f04fe0d..3f568f5aae2d1 100644 --- a/arch/mips/lantiq/prom.c +++ b/arch/mips/lantiq/prom.c @@ -79,7 +79,7 @@ void __init plat_mem_setup(void) if (fw_passed_dtb) /* UHI interface */ dtb = (void *)fw_passed_dtb; - else if (__dtb_start != __dtb_end) + else if (&__dtb_start != &__dtb_end) dtb = (void *)__dtb_start; else panic("no dtb found"); diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c index aeb1b989cd4ee..e45077aecf83a 100644 --- a/arch/mips/lantiq/xway/dma.c +++ b/arch/mips/lantiq/xway/dma.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,7 @@ #define LTQ_DMA_PCTRL 0x44 #define LTQ_DMA_IRNEN 0xf4 +#define DMA_ID_CHNR GENMASK(26, 20) /* channel number */ #define DMA_DESCPT BIT(3) /* descriptor complete irq */ #define DMA_TX BIT(8) /* TX channel direction */ #define DMA_CHAN_ON BIT(0) /* channel on / off bit */ @@ -39,7 +41,6 @@ #define DMA_POLL BIT(31) /* turn on channel polling */ #define DMA_CLK_DIV4 BIT(6) /* polling clock divider */ #define DMA_2W_BURST BIT(1) /* 2 word burst length */ -#define DMA_MAX_CHANNEL 20 /* the soc has 20 channels */ #define DMA_ETOP_ENDIANNESS (0xf << 8) /* endianness swap etop channels */ #define DMA_WEIGHT (BIT(17) | BIT(16)) /* default channel wheight */ @@ -205,7 +206,7 @@ ltq_dma_init(struct platform_device *pdev) { struct clk *clk; struct resource *res; - unsigned id; + unsigned int id, nchannels; int i; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -221,21 +222,24 @@ ltq_dma_init(struct platform_device *pdev) clk_enable(clk); ltq_dma_w32_mask(0, DMA_RESET, LTQ_DMA_CTRL); + usleep_range(1, 10); + /* disable all interrupts */ ltq_dma_w32(0, LTQ_DMA_IRNEN); /* reset/configure each channel */ - for (i = 0; i < DMA_MAX_CHANNEL; i++) { + id = ltq_dma_r32(LTQ_DMA_ID); + nchannels = ((id & DMA_ID_CHNR) >> 20); + for (i = 0; i < nchannels; i++) { ltq_dma_w32(i, LTQ_DMA_CS); ltq_dma_w32(DMA_CHAN_RST, LTQ_DMA_CCTRL); ltq_dma_w32(DMA_POLL | DMA_CLK_DIV4, LTQ_DMA_CPOLL); ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL); } - id = ltq_dma_r32(LTQ_DMA_ID); dev_info(&pdev->dev, "Init done - hw rev: %X, ports: %d, channels: %d\n", - id & 0x1f, (id >> 16) & 0xf, id >> 20); + id & 0x1f, (id >> 16) & 0xf, nchannels); return 0; } diff --git a/arch/mips/lantiq/xway/gptu.c b/arch/mips/lantiq/xway/gptu.c index 3d5683e75cf1e..200fe9ff641d6 100644 --- a/arch/mips/lantiq/xway/gptu.c +++ b/arch/mips/lantiq/xway/gptu.c @@ -122,6 +122,8 @@ static inline void clkdev_add_gptu(struct device *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev_name(dev); clk->cl.con_id = con; clk->cl.clk = clk; diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c index 156a95ac5c725..6c2d9779ac727 100644 --- a/arch/mips/lantiq/xway/sysctrl.c +++ b/arch/mips/lantiq/xway/sysctrl.c @@ -311,6 +311,8 @@ static void clkdev_add_pmu(const char *dev, const char *con, bool deactivate, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -334,6 +336,8 @@ static void clkdev_add_cgu(const char *dev, const char *con, { struct clk *clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) + return; clk->cl.dev_id = dev; clk->cl.con_id = con; clk->cl.clk = clk; @@ -352,24 +356,28 @@ static void clkdev_add_pci(void) struct clk *clk_ext = kzalloc(sizeof(struct clk), GFP_KERNEL); /* main pci clock */ - clk->cl.dev_id = "17000000.pci"; - clk->cl.con_id = NULL; - clk->cl.clk = clk; - clk->rate = CLOCK_33M; - clk->rates = valid_pci_rates; - clk->enable = pci_enable; - clk->disable = pmu_disable; - clk->module = 0; - clk->bits = PMU_PCI; - clkdev_add(&clk->cl); + if (clk) { + clk->cl.dev_id = "17000000.pci"; + clk->cl.con_id = NULL; + clk->cl.clk = clk; + clk->rate = CLOCK_33M; + clk->rates = valid_pci_rates; + clk->enable = pci_enable; + clk->disable = pmu_disable; + clk->module = 0; + clk->bits = PMU_PCI; + clkdev_add(&clk->cl); + } /* use internal/external bus clock */ - clk_ext->cl.dev_id = "17000000.pci"; - clk_ext->cl.con_id = "external"; - clk_ext->cl.clk = clk_ext; - clk_ext->enable = pci_ext_enable; - clk_ext->disable = pci_ext_disable; - clkdev_add(&clk_ext->cl); + if (clk_ext) { + clk_ext->cl.dev_id = "17000000.pci"; + clk_ext->cl.con_id = "external"; + clk_ext->cl.clk = clk_ext; + clk_ext->enable = pci_ext_enable; + clk_ext->disable = pci_ext_disable; + clkdev_add(&clk_ext->cl); + } } /* xway socs can generate clocks on gpio pins */ @@ -389,9 +397,15 @@ static void clkdev_add_clkout(void) char *name; name = kzalloc(sizeof("clkout0"), GFP_KERNEL); + if (!name) + continue; sprintf(name, "clkout%d", i); clk = kzalloc(sizeof(struct clk), GFP_KERNEL); + if (!clk) { + kfree(name); + continue; + } clk->cl.dev_id = "1f103000.cgu"; clk->cl.con_id = name; clk->cl.clk = clk; @@ -514,8 +528,8 @@ void __init ltq_soc_init(void) clkdev_add_pmu("1e10b308.eth", NULL, 0, 0, PMU_SWITCH | PMU_PPE_DP | PMU_PPE_TC); clkdev_add_pmu("1da00000.usif", "NULL", 1, 0, PMU_USIF); - clkdev_add_pmu("1e108000.gswip", "gphy0", 0, 0, PMU_GPHY); - clkdev_add_pmu("1e108000.gswip", "gphy1", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy0", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy1", 0, 0, PMU_GPHY); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "afe", 1, 2, PMU_ANALOG_DSL_AFE); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); @@ -538,8 +552,8 @@ void __init ltq_soc_init(void) PMU_SWITCH | PMU_PPE_DPLUS | PMU_PPE_DPLUM | PMU_PPE_EMA | PMU_PPE_TC | PMU_PPE_SLL01 | PMU_PPE_QSB | PMU_PPE_TOP); - clkdev_add_pmu("1e108000.gswip", "gphy0", 0, 0, PMU_GPHY); - clkdev_add_pmu("1e108000.gswip", "gphy1", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy0", 0, 0, PMU_GPHY); + clkdev_add_pmu("1e108000.switch", "gphy1", 0, 0, PMU_GPHY); clkdev_add_pmu("1e103000.sdio", NULL, 1, 0, PMU_SDIO); clkdev_add_pmu("1e103100.deu", NULL, 1, 0, PMU_DEU); clkdev_add_pmu("1e116000.mei", "dfe", 1, 0, PMU_DFE); diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c index 5530070e0d05d..57497a26e79cb 100644 --- a/arch/mips/lib/mips-atomic.c +++ b/arch/mips/lib/mips-atomic.c @@ -37,7 +37,7 @@ */ notrace void arch_local_irq_disable(void) { - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -53,7 +53,7 @@ notrace void arch_local_irq_disable(void) : /* no inputs */ : "memory"); - preempt_enable(); + preempt_enable_notrace(); } EXPORT_SYMBOL(arch_local_irq_disable); @@ -61,7 +61,7 @@ notrace unsigned long arch_local_irq_save(void) { unsigned long flags; - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -78,7 +78,7 @@ notrace unsigned long arch_local_irq_save(void) : /* no inputs */ : "memory"); - preempt_enable(); + preempt_enable_notrace(); return flags; } @@ -88,7 +88,7 @@ notrace void arch_local_irq_restore(unsigned long flags) { unsigned long __tmp1; - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -106,7 +106,7 @@ notrace void arch_local_irq_restore(unsigned long flags) : "0" (flags) : "memory"); - preempt_enable(); + preempt_enable_notrace(); } EXPORT_SYMBOL(arch_local_irq_restore); diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c index 09d5deea747f2..f80a67c092b63 100644 --- a/arch/mips/lib/uncached.c +++ b/arch/mips/lib/uncached.c @@ -37,10 +37,12 @@ */ unsigned long run_uncached(void *func) { - register long sp __asm__("$sp"); register long ret __asm__("$2"); long lfunc = (long)func, ufunc; long usp; + long sp; + + __asm__("move %0, $sp" : "=r" (sp)); if (sp >= (long)CKSEG0 && sp < (long)CKSEG2) usp = CKSEG1ADDR(sp); diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 8f20d2cb37672..7e7376cc94b16 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -200,6 +200,9 @@ static void __init node_mem_init(unsigned int node) if (node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT)) memblock_reserve((node_addrspace_offset | 0xfe000000), 32 << 20); + + /* Reserve pfn range 0~node[0]->node_start_pfn */ + memblock_reserve(0, PAGE_SIZE * start_pfn); } } diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c index 13f3404f00300..9674ae1361a85 100644 --- a/arch/mips/loongson64/loongson-3/platform.c +++ b/arch/mips/loongson64/loongson-3/platform.c @@ -27,6 +27,9 @@ static int __init loongson3_platform_init(void) continue; pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + pdev->name = loongson_sysconf.sensors[i].name; pdev->id = loongson_sysconf.sensors[i].id; pdev->dev.platform_data = &loongson_sysconf.sensors[i]; diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 89b9c851d8227..3375bbe63284e 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1560,7 +1560,7 @@ static int probe_scache(void) return 1; } -static void __init loongson2_sc_init(void) +static void loongson2_sc_init(void) { struct cpuinfo_mips *c = ¤t_cpu_data; @@ -1576,7 +1576,7 @@ static void __init loongson2_sc_init(void) c->options |= MIPS_CPU_INCLUSIVE_CACHES; } -static void __init loongson3_sc_init(void) +static void loongson3_sc_init(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config2, lsize; @@ -1676,7 +1676,11 @@ static void setup_scache(void) printk("MIPS secondary cache %ldkB, %s, linesize %d bytes.\n", scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + + if (current_cpu_type() == CPU_BMIPS5000) + c->options |= MIPS_CPU_INCLUSIVE_CACHES; } + #else if (!(c->scache.flags & MIPS_CACHE_NOT_PRESENT)) panic("Dunno how to handle MIPS32 / MIPS64 second level cache"); diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index dbdbfe5d84086..e67374268b42d 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -147,7 +147,7 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) return 1; } -static int __init mips_sc_probe_cm3(void) +static int mips_sc_probe_cm3(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned long cfg = read_gcr_l2_config(); @@ -181,7 +181,7 @@ static int __init mips_sc_probe_cm3(void) return 0; } -static inline int __init mips_sc_probe(void) +static inline int mips_sc_probe(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config1, config2; diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index c13e46ced4252..60046445122b3 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -437,6 +437,7 @@ int has_transparent_hugepage(void) } return mask == PM_HUGE_MASK; } +EXPORT_SYMBOL(has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 41bb91f056885..061dc5c97d5ad 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -629,7 +629,7 @@ static __maybe_unused void build_convert_pte_to_entrylo(u32 **p, return; } - if (cpu_has_rixi && !!_PAGE_NO_EXEC) { + if (cpu_has_rixi && _PAGE_NO_EXEC != 0) { if (fill_includes_sw_bits) { UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL)); } else { @@ -1480,6 +1480,7 @@ static void build_r4000_tlb_refill_handler(void) static void setup_pw(void) { + unsigned int pwctl; unsigned long pgd_i, pgd_w; #ifndef __PAGETABLE_PMD_FOLDED unsigned long pmd_i, pmd_w; @@ -1506,6 +1507,7 @@ static void setup_pw(void) pte_i = ilog2(_PAGE_GLOBAL); pte_w = 0; + pwctl = 1 << 30; /* Set PWDirExt */ #ifndef __PAGETABLE_PMD_FOLDED write_c0_pwfield(pgd_i << 24 | pmd_i << 12 | pt_i << 6 | pte_i); @@ -1516,8 +1518,9 @@ static void setup_pw(void) #endif #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT - write_c0_pwctl(1 << 6 | psn); + pwctl |= (1 << 6 | psn); #endif + write_c0_pwctl(pwctl); write_c0_kpgd((long)swapper_pg_dir); kscratch_used_mask |= (1 << 7); /* KScratch6 is used for KPGD */ } @@ -2565,7 +2568,7 @@ static void check_pabits(void) unsigned long entry; unsigned pabits, fillbits; - if (!cpu_has_rixi || !_PAGE_NO_EXEC) { + if (!cpu_has_rixi || _PAGE_NO_EXEC == 0) { /* * We'll only be making use of the fact that we can rotate bits * into the fill if the CPU supports RIXI, so don't bother diff --git a/arch/mips/mti-malta/malta-dtshim.c b/arch/mips/mti-malta/malta-dtshim.c index 98a063093b69a..0be28adff5572 100644 --- a/arch/mips/mti-malta/malta-dtshim.c +++ b/arch/mips/mti-malta/malta-dtshim.c @@ -22,7 +22,7 @@ #define ROCIT_CONFIG_GEN1_MEMMAP_SHIFT 8 #define ROCIT_CONFIG_GEN1_MEMMAP_MASK (0xf << 8) -static unsigned char fdt_buf[16 << 10] __initdata; +static unsigned char fdt_buf[16 << 10] __initdata __aligned(8); /* determined physical memory size, not overridden by command line args */ extern unsigned long physical_memsize; diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c index 11e9527c6e441..62ffac500eb52 100644 --- a/arch/mips/mti-malta/malta-platform.c +++ b/arch/mips/mti-malta/malta-platform.c @@ -47,7 +47,8 @@ static struct plat_serial8250_port uart8250_data[] = { .mapbase = 0x1f000900, /* The CBUS UART */ .irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB2, .uartclk = 3686400, /* Twice the usual clk! */ - .iotype = UPIO_MEM32, + .iotype = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) ? + UPIO_MEM32BE : UPIO_MEM32, .flags = CBUS_UART_FLAGS, .regshift = 3, }, diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index 2d03af7d6b19d..d55912349039c 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only # MIPS networking code +obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c new file mode 100644 index 0000000000000..29a288ff4f183 --- /dev/null +++ b/arch/mips/net/bpf_jit.c @@ -0,0 +1,1299 @@ +/* + * Just-In-Time compiler for BPF filters on MIPS + * + * Copyright (c) 2014 Imagination Technologies Ltd. + * Author: Markos Chandras + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_jit.h" + +/* ABI + * r_skb_hl SKB header length + * r_data SKB data pointer + * r_off Offset + * r_A BPF register A + * r_X BPF register X + * r_skb *skb + * r_M *scratch memory + * r_skb_len SKB length + * + * On entry (*bpf_func)(*skb, *filter) + * a0 = MIPS_R_A0 = skb; + * a1 = MIPS_R_A1 = filter; + * + * Stack + * ... + * M[15] + * M[14] + * M[13] + * ... + * M[0] <-- r_M + * saved reg k-1 + * saved reg k-2 + * ... + * saved reg 0 <-- r_sp + * + * + * Packet layout + * + * <--------------------- len ------------------------> + * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------> + * ---------------------------------------------------- + * | skb->data | + * ---------------------------------------------------- + */ + +#define ptr typeof(unsigned long) + +#define SCRATCH_OFF(k) (4 * (k)) + +/* JIT flags */ +#define SEEN_CALL (1 << BPF_MEMWORDS) +#define SEEN_SREG_SFT (BPF_MEMWORDS + 1) +#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) +#define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) +#define SEEN_OFF SEEN_SREG(2) +#define SEEN_A SEEN_SREG(3) +#define SEEN_X SEEN_SREG(4) +#define SEEN_SKB SEEN_SREG(5) +#define SEEN_MEM SEEN_SREG(6) +/* SEEN_SK_DATA also implies skb_hl an skb_len */ +#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0)) + +/* Arguments used by JIT */ +#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ + +#define SBIT(x) (1 << (x)) /* Signed version of BIT() */ + +/** + * struct jit_ctx - JIT context + * @skf: The sk_filter + * @prologue_bytes: Number of bytes for prologue + * @idx: Instruction index + * @flags: JIT flags + * @offsets: Instruction offsets + * @target: Memory location for the compiled filter + */ +struct jit_ctx { + const struct bpf_prog *skf; + unsigned int prologue_bytes; + u32 idx; + u32 flags; + u32 *offsets; + u32 *target; +}; + + +static inline int optimize_div(u32 *k) +{ + /* power of 2 divides can be implemented with right shift */ + if (!(*k & (*k-1))) { + *k = ilog2(*k); + return 1; + } + + return 0; +} + +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); + +/* Simply emit the instruction if the JIT memory space has been allocated */ +#define emit_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + uasm_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +/* + * Similar to emit_instr but it must be used when we need to emit + * 32-bit or 64-bit instructions + */ +#define emit_long_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + UASM_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +/* Determine if immediate is within the 16-bit signed range */ +static inline bool is_range16(s32 imm) +{ + return !(imm >= SBIT(15) || imm < -SBIT(15)); +} + +static inline void emit_addu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, addu, dst, src1, src2); +} + +static inline void emit_nop(struct jit_ctx *ctx) +{ + emit_instr(ctx, nop); +} + +/* Load a u32 immediate to a register */ +static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + /* addiu can only handle s16 */ + if (!is_range16(imm)) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); + p = &ctx->target[ctx->idx + 1]; + uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff); + } else { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_addiu(&p, dst, r_zero, imm); + } + } + ctx->idx++; + + if (!is_range16(imm)) + ctx->idx++; +} + +static inline void emit_or(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, or, dst, src1, src2); +} + +static inline void emit_ori(unsigned int dst, unsigned src, u32 imm, + struct jit_ctx *ctx) +{ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_or(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, ori, dst, src, imm); + } +} + +static inline void emit_daddiu(unsigned int dst, unsigned int src, + int imm, struct jit_ctx *ctx) +{ + /* + * Only used for stack, so the imm is relatively small + * and it fits in 15-bits + */ + emit_instr(ctx, daddiu, dst, src, imm); +} + +static inline void emit_addiu(unsigned int dst, unsigned int src, + u32 imm, struct jit_ctx *ctx) +{ + if (!is_range16(imm)) { + emit_load_imm(r_tmp, imm, ctx); + emit_addu(dst, r_tmp, src, ctx); + } else { + emit_instr(ctx, addiu, dst, src, imm); + } +} + +static inline void emit_and(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, and, dst, src1, src2); +} + +static inline void emit_andi(unsigned int dst, unsigned int src, + u32 imm, struct jit_ctx *ctx) +{ + /* If imm does not fit in u16 then load it to register */ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_and(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, andi, dst, src, imm); + } +} + +static inline void emit_xor(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, xor, dst, src1, src2); +} + +static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx) +{ + /* If imm does not fit in u16 then load it to register */ + if (imm >= BIT(16)) { + emit_load_imm(r_tmp, imm, ctx); + emit_xor(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, xori, dst, src, imm); + } +} + +static inline void emit_stack_offset(int offset, struct jit_ctx *ctx) +{ + emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset); +} + +static inline void emit_subu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, subu, dst, src1, src2); +} + +static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx) +{ + emit_subu(reg, r_zero, reg, ctx); +} + +static inline void emit_sllv(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, sllv, dst, src, sa); +} + +static inline void emit_sll(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + /* sa is 5-bits long */ + if (sa >= BIT(5)) + /* Shifting >= 32 results in zero */ + emit_jit_reg_move(dst, r_zero, ctx); + else + emit_instr(ctx, sll, dst, src, sa); +} + +static inline void emit_srlv(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, srlv, dst, src, sa); +} + +static inline void emit_srl(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + /* sa is 5-bits long */ + if (sa >= BIT(5)) + /* Shifting >= 32 results in zero */ + emit_jit_reg_move(dst, r_zero, ctx); + else + emit_instr(ctx, srl, dst, src, sa); +} + +static inline void emit_slt(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, slt, dst, src1, src2); +} + +static inline void emit_sltu(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, sltu, dst, src1, src2); +} + +static inline void emit_sltiu(unsigned dst, unsigned int src, + unsigned int imm, struct jit_ctx *ctx) +{ + /* 16 bit immediate */ + if (!is_range16((s32)imm)) { + emit_load_imm(r_tmp, imm, ctx); + emit_sltu(dst, src, r_tmp, ctx); + } else { + emit_instr(ctx, sltiu, dst, src, imm); + } + +} + +/* Store register on the stack */ +static inline void emit_store_stack_reg(ptr reg, ptr base, + unsigned int offset, + struct jit_ctx *ctx) +{ + emit_long_instr(ctx, SW, reg, offset, base); +} + +static inline void emit_store(ptr reg, ptr base, unsigned int offset, + struct jit_ctx *ctx) +{ + emit_instr(ctx, sw, reg, offset, base); +} + +static inline void emit_load_stack_reg(ptr reg, ptr base, + unsigned int offset, + struct jit_ctx *ctx) +{ + emit_long_instr(ctx, LW, reg, offset, base); +} + +static inline void emit_load(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lw, reg, offset, base); +} + +static inline void emit_load_byte(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lb, reg, offset, base); +} + +static inline void emit_half_load(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lh, reg, offset, base); +} + +static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base, + unsigned int offset, struct jit_ctx *ctx) +{ + emit_instr(ctx, lhu, reg, offset, base); +} + +static inline void emit_mul(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, mul, dst, src1, src2); +} + +static inline void emit_div(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_divu(&p, dst, src); + p = &ctx->target[ctx->idx + 1]; + uasm_i_mflo(&p, dst); + } + ctx->idx += 2; /* 2 insts */ +} + +static inline void emit_mod(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + uasm_i_divu(&p, dst, src); + p = &ctx->target[ctx->idx + 1]; + uasm_i_mfhi(&p, dst); + } + ctx->idx += 2; /* 2 insts */ +} + +static inline void emit_dsll(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, dsll, dst, src, sa); +} + +static inline void emit_dsrl32(unsigned int dst, unsigned int src, + unsigned int sa, struct jit_ctx *ctx) +{ + emit_instr(ctx, dsrl32, dst, src, sa); +} + +static inline void emit_wsbh(unsigned int dst, unsigned int src, + struct jit_ctx *ctx) +{ + emit_instr(ctx, wsbh, dst, src); +} + +/* load pointer to register */ +static inline void emit_load_ptr(unsigned int dst, unsigned int src, + int imm, struct jit_ctx *ctx) +{ + /* src contains the base addr of the 32/64-pointer */ + emit_long_instr(ctx, LW, dst, imm, src); +} + +/* load a function pointer to register */ +static inline void emit_load_func(unsigned int reg, ptr imm, + struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_64BIT)) { + /* At this point imm is always 64-bit */ + emit_load_imm(r_tmp, (u64)imm >> 32, ctx); + emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ + emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx); + emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ + emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx); + } else { + emit_load_imm(reg, imm, ctx); + } +} + +/* Move to real MIPS register */ +static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) +{ + emit_long_instr(ctx, ADDU, dst, src, r_zero); +} + +/* Move to JIT (32-bit) register */ +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) +{ + emit_addu(dst, src, r_zero, ctx); +} + +/* Compute the immediate value for PC-relative branches. */ +static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) +{ + if (ctx->target == NULL) + return 0; + + /* + * We want a pc-relative branch. We only do forward branches + * so tgt is always after pc. tgt is the instruction offset + * we want to jump to. + + * Branch on MIPS: + * I: target_offset <- sign_extend(offset) + * I+1: PC += target_offset (delay slot) + * + * ctx->idx currently points to the branch instruction + * but the offset is added to the delay slot so we need + * to subtract 4. + */ + return ctx->offsets[tgt] - + (ctx->idx * 4 - ctx->prologue_bytes) - 4; +} + +static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2, + unsigned int imm, struct jit_ctx *ctx) +{ + if (ctx->target != NULL) { + u32 *p = &ctx->target[ctx->idx]; + + switch (cond) { + case MIPS_COND_EQ: + uasm_i_beq(&p, reg1, reg2, imm); + break; + case MIPS_COND_NE: + uasm_i_bne(&p, reg1, reg2, imm); + break; + case MIPS_COND_ALL: + uasm_i_b(&p, imm); + break; + default: + pr_warn("%s: Unhandled branch conditional: %d\n", + __func__, cond); + } + } + ctx->idx++; +} + +static inline void emit_b(unsigned int imm, struct jit_ctx *ctx) +{ + emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx); +} + +static inline void emit_jalr(unsigned int link, unsigned int reg, + struct jit_ctx *ctx) +{ + emit_instr(ctx, jalr, link, reg); +} + +static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) +{ + emit_instr(ctx, jr, reg); +} + +static inline u16 align_sp(unsigned int num) +{ + /* Double word alignment for 32-bit, quadword for 64-bit */ + unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8; + num = (num + (align - 1)) & -align; + return num; +} + +static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) +{ + int i = 0, real_off = 0; + u32 sflags, tmp_flags; + + /* Adjust the stack pointer */ + if (offset) + emit_stack_offset(-align_sp(offset), ctx); + + tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; + /* sflags is essentially a bitmap */ + while (tmp_flags) { + if ((sflags >> i) & 0x1) { + emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, + ctx); + real_off += SZREG; + } + i++; + tmp_flags >>= 1; + } + + /* save return address */ + if (ctx->flags & SEEN_CALL) { + emit_store_stack_reg(r_ra, r_sp, real_off, ctx); + real_off += SZREG; + } + + /* Setup r_M leaving the alignment gap if necessary */ + if (ctx->flags & SEEN_MEM) { + if (real_off % (SZREG * 2)) + real_off += SZREG; + emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); + } +} + +static void restore_bpf_jit_regs(struct jit_ctx *ctx, + unsigned int offset) +{ + int i, real_off = 0; + u32 sflags, tmp_flags; + + tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; + /* sflags is a bitmap */ + i = 0; + while (tmp_flags) { + if ((sflags >> i) & 0x1) { + emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, + ctx); + real_off += SZREG; + } + i++; + tmp_flags >>= 1; + } + + /* restore return address */ + if (ctx->flags & SEEN_CALL) + emit_load_stack_reg(r_ra, r_sp, real_off, ctx); + + /* Restore the sp and discard the scrach memory */ + if (offset) + emit_stack_offset(align_sp(offset), ctx); +} + +static unsigned int get_stack_depth(struct jit_ctx *ctx) +{ + int sp_off = 0; + + + /* How may s* regs do we need to preserved? */ + sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG; + + if (ctx->flags & SEEN_MEM) + sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ + + if (ctx->flags & SEEN_CALL) + sp_off += SZREG; /* Space for our ra register */ + + return sp_off; +} + +static void build_prologue(struct jit_ctx *ctx) +{ + int sp_off; + + /* Calculate the total offset for the stack pointer */ + sp_off = get_stack_depth(ctx); + save_bpf_jit_regs(ctx, sp_off); + + if (ctx->flags & SEEN_SKB) + emit_reg_move(r_skb, MIPS_R_A0, ctx); + + if (ctx->flags & SEEN_SKB_DATA) { + /* Load packet length */ + emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len), + ctx); + emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len), + ctx); + /* Load the data pointer */ + emit_load_ptr(r_skb_data, r_skb, + offsetof(struct sk_buff, data), ctx); + /* Load the header length */ + emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx); + } + + if (ctx->flags & SEEN_X) + emit_jit_reg_move(r_X, r_zero, ctx); + + /* + * Do not leak kernel data to userspace, we only need to clear + * r_A if it is ever used. In fact if it is never used, we + * will not save/restore it, so clearing it in this case would + * corrupt the state of the caller. + */ + if (bpf_needs_clear_a(&ctx->skf->insns[0]) && + (ctx->flags & SEEN_A)) + emit_jit_reg_move(r_A, r_zero, ctx); +} + +static void build_epilogue(struct jit_ctx *ctx) +{ + unsigned int sp_off; + + /* Calculate the total offset for the stack pointer */ + + sp_off = get_stack_depth(ctx); + restore_bpf_jit_regs(ctx, sp_off); + + /* Return */ + emit_jr(r_ra, ctx); + emit_nop(ctx); +} + +#define CHOOSE_LOAD_FUNC(K, func) \ + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ + func##_positive) + +static bool is_bad_offset(int b_off) +{ + return b_off > 0x1ffff || b_off < -0x20000; +} + +static int build_body(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->skf; + const struct sock_filter *inst; + unsigned int i, off, condt; + u32 k, b_off __maybe_unused; + u8 (*sk_load_func)(unsigned long *skb, int offset); + + for (i = 0; i < prog->len; i++) { + u16 code; + + inst = &(prog->insns[i]); + pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", + __func__, inst->code, inst->jt, inst->jf, inst->k); + k = inst->k; + code = bpf_anc_helper(inst); + + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + switch (code) { + case BPF_LD | BPF_IMM: + /* A <- k ==> li r_A, k */ + ctx->flags |= SEEN_A; + emit_load_imm(r_A, k, ctx); + break; + case BPF_LD | BPF_W | BPF_LEN: + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); + /* A <- len ==> lw r_A, offset(skb) */ + ctx->flags |= SEEN_SKB | SEEN_A; + off = offsetof(struct sk_buff, len); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_LD | BPF_MEM: + /* A <- M[k] ==> lw r_A, offset(M) */ + ctx->flags |= SEEN_MEM | SEEN_A; + emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_LD | BPF_W | BPF_ABS: + /* A <- P[k:4] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word); + goto load; + case BPF_LD | BPF_H | BPF_ABS: + /* A <- P[k:2] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half); + goto load; + case BPF_LD | BPF_B | BPF_ABS: + /* A <- P[k:1] */ + sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte); +load: + emit_load_imm(r_off, k, ctx); +load_common: + ctx->flags |= SEEN_CALL | SEEN_OFF | + SEEN_SKB | SEEN_A | SEEN_SKB_DATA; + + emit_load_func(r_s0, (ptr)sk_load_func, ctx); + emit_reg_move(MIPS_R_A0, r_skb, ctx); + emit_jalr(MIPS_R_RA, r_s0, ctx); + /* Load second argument to delay slot */ + emit_reg_move(MIPS_R_A1, r_off, ctx); + /* Check the error value */ + emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx), + ctx); + /* Load return register on DS for failures */ + emit_reg_move(r_ret, r_zero, ctx); + /* Return with error */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_LD | BPF_W | BPF_IND: + /* A <- P[X + k:4] */ + sk_load_func = sk_load_word; + goto load_ind; + case BPF_LD | BPF_H | BPF_IND: + /* A <- P[X + k:2] */ + sk_load_func = sk_load_half; + goto load_ind; + case BPF_LD | BPF_B | BPF_IND: + /* A <- P[X + k:1] */ + sk_load_func = sk_load_byte; +load_ind: + ctx->flags |= SEEN_OFF | SEEN_X; + emit_addiu(r_off, r_X, k, ctx); + goto load_common; + case BPF_LDX | BPF_IMM: + /* X <- k */ + ctx->flags |= SEEN_X; + emit_load_imm(r_X, k, ctx); + break; + case BPF_LDX | BPF_MEM: + /* X <- M[k] */ + ctx->flags |= SEEN_X | SEEN_MEM; + emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_LDX | BPF_W | BPF_LEN: + /* X <- len */ + ctx->flags |= SEEN_X | SEEN_SKB; + off = offsetof(struct sk_buff, len); + emit_load(r_X, r_skb, off, ctx); + break; + case BPF_LDX | BPF_B | BPF_MSH: + /* X <- 4 * (P[k:1] & 0xf) */ + ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; + /* Load offset to a1 */ + emit_load_func(r_s0, (ptr)sk_load_byte, ctx); + /* + * This may emit two instructions so it may not fit + * in the delay slot. So use a0 in the delay slot. + */ + emit_load_imm(MIPS_R_A1, k, ctx); + emit_jalr(MIPS_R_RA, r_s0, ctx); + emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ + /* Check the error value */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx); + emit_reg_move(r_ret, r_zero, ctx); + /* We are good */ + /* X <- P[1:K] & 0xf */ + emit_andi(r_X, r_A, 0xf, ctx); + /* X << 2 */ + emit_b(b_imm(i + 1, ctx), ctx); + emit_sll(r_X, r_X, 2, ctx); /* delay slot */ + break; + case BPF_ST: + /* M[k] <- A */ + ctx->flags |= SEEN_MEM | SEEN_A; + emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_STX: + /* M[k] <- X */ + ctx->flags |= SEEN_MEM | SEEN_X; + emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); + break; + case BPF_ALU | BPF_ADD | BPF_K: + /* A += K */ + ctx->flags |= SEEN_A; + emit_addiu(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_ADD | BPF_X: + /* A += X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_addu(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_K: + /* A -= K */ + ctx->flags |= SEEN_A; + emit_addiu(r_A, r_A, -k, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_X: + /* A -= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_subu(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_MUL | BPF_K: + /* A *= K */ + /* Load K to scratch register before MUL */ + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_mul(r_A, r_A, r_s0, ctx); + break; + case BPF_ALU | BPF_MUL | BPF_X: + /* A *= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_mul(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_DIV | BPF_K: + /* A /= k */ + if (k == 1) + break; + if (optimize_div(&k)) { + ctx->flags |= SEEN_A; + emit_srl(r_A, r_A, k, ctx); + break; + } + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_div(r_A, r_s0, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_K: + /* A %= k */ + if (k == 1) { + ctx->flags |= SEEN_A; + emit_jit_reg_move(r_A, r_zero, ctx); + } else { + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + emit_mod(r_A, r_s0, ctx); + } + break; + case BPF_ALU | BPF_DIV | BPF_X: + /* A /= X */ + ctx->flags |= SEEN_X | SEEN_A; + /* Check if r_X is zero */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); + emit_load_imm(r_ret, 0, ctx); /* delay slot */ + emit_div(r_A, r_X, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_X: + /* A %= X */ + ctx->flags |= SEEN_X | SEEN_A; + /* Check if r_X is zero */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); + emit_load_imm(r_ret, 0, ctx); /* delay slot */ + emit_mod(r_A, r_X, ctx); + break; + case BPF_ALU | BPF_OR | BPF_K: + /* A |= K */ + ctx->flags |= SEEN_A; + emit_ori(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_OR | BPF_X: + /* A |= X */ + ctx->flags |= SEEN_A; + emit_ori(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_XOR | BPF_K: + /* A ^= k */ + ctx->flags |= SEEN_A; + emit_xori(r_A, r_A, k, ctx); + break; + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: + /* A ^= X */ + ctx->flags |= SEEN_A; + emit_xor(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_AND | BPF_K: + /* A &= K */ + ctx->flags |= SEEN_A; + emit_andi(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_AND | BPF_X: + /* A &= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_and(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_K: + /* A <<= K */ + ctx->flags |= SEEN_A; + emit_sll(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_X: + /* A <<= X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_sllv(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_RSH | BPF_K: + /* A >>= K */ + ctx->flags |= SEEN_A; + emit_srl(r_A, r_A, k, ctx); + break; + case BPF_ALU | BPF_RSH | BPF_X: + ctx->flags |= SEEN_A | SEEN_X; + emit_srlv(r_A, r_A, r_X, ctx); + break; + case BPF_ALU | BPF_NEG: + /* A = -A */ + ctx->flags |= SEEN_A; + emit_neg(r_A, ctx); + break; + case BPF_JMP | BPF_JA: + /* pc += K */ + b_off = b_imm(i + k + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_JMP | BPF_JEQ | BPF_K: + /* pc += ( A == K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_EQ | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JEQ | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A == X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_EQ | MIPS_COND_X; + goto jmp_cmp; + case BPF_JMP | BPF_JGE | BPF_K: + /* pc += ( A >= K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GE | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JGE | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A >= X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GE | MIPS_COND_X; + goto jmp_cmp; + case BPF_JMP | BPF_JGT | BPF_K: + /* pc += ( A > K ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GT | MIPS_COND_K; + goto jmp_cmp; + case BPF_JMP | BPF_JGT | BPF_X: + ctx->flags |= SEEN_X; + /* pc += ( A > X ) ? pc->jt : pc->jf */ + condt = MIPS_COND_GT | MIPS_COND_X; +jmp_cmp: + /* Greater or Equal */ + if ((condt & MIPS_COND_GE) || + (condt & MIPS_COND_GT)) { + if (condt & MIPS_COND_K) { /* K */ + ctx->flags |= SEEN_A; + emit_sltiu(r_s0, r_A, k, ctx); + } else { /* X */ + ctx->flags |= SEEN_A | + SEEN_X; + emit_sltu(r_s0, r_A, r_X, ctx); + } + /* A < (K|X) ? r_scrach = 1 */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, + ctx); + emit_nop(ctx); + /* A > (K|X) ? scratch = 0 */ + if (condt & MIPS_COND_GT) { + /* Checking for equality */ + ctx->flags |= SEEN_A | SEEN_X; + if (condt & MIPS_COND_K) + emit_load_imm(r_s0, k, ctx); + else + emit_jit_reg_move(r_s0, r_X, + ctx); + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + /* Finally, A > K|X */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + } else { + /* A >= (K|X) so jump */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + } + } else { + /* A == K|X */ + if (condt & MIPS_COND_K) { /* K */ + ctx->flags |= SEEN_A; + emit_load_imm(r_s0, k, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, + ctx); + emit_bcond(MIPS_COND_NE, r_A, r_s0, + b_off, ctx); + emit_nop(ctx); + } else { /* X */ + /* jump true */ + ctx->flags |= SEEN_A | SEEN_X; + b_off = b_imm(i + inst->jt + 1, + ctx); + emit_bcond(MIPS_COND_EQ, r_A, r_X, + b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_bcond(MIPS_COND_NE, r_A, r_X, + b_off, ctx); + emit_nop(ctx); + } + } + break; + case BPF_JMP | BPF_JSET | BPF_K: + ctx->flags |= SEEN_A; + /* pc += (A & K) ? pc -> jt : pc -> jf */ + emit_load_imm(r_s1, k, ctx); + emit_and(r_s0, r_A, r_s1, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_JMP | BPF_JSET | BPF_X: + ctx->flags |= SEEN_X | SEEN_A; + /* pc += (A & X) ? pc -> jt : pc -> jf */ + emit_and(r_s0, r_A, r_X, ctx); + /* jump true */ + b_off = b_imm(i + inst->jt + 1, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); + emit_nop(ctx); + /* jump false */ + b_off = b_imm(i + inst->jf + 1, ctx); + emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_RET | BPF_A: + ctx->flags |= SEEN_A; + if (i != prog->len - 1) { + /* + * If this is not the last instruction + * then jump to the epilogue + */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + } + emit_reg_move(r_ret, r_A, ctx); /* delay slot */ + break; + case BPF_RET | BPF_K: + /* + * It can emit two instructions so it does not fit on + * the delay slot. + */ + emit_load_imm(r_ret, k, ctx); + if (i != prog->len - 1) { + /* + * If this is not the last instruction + * then jump to the epilogue + */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + emit_nop(ctx); + } + break; + case BPF_MISC | BPF_TAX: + /* X = A */ + ctx->flags |= SEEN_X | SEEN_A; + emit_jit_reg_move(r_X, r_A, ctx); + break; + case BPF_MISC | BPF_TXA: + /* A = X */ + ctx->flags |= SEEN_A | SEEN_X; + emit_jit_reg_move(r_A, r_X, ctx); + break; + /* AUX */ + case BPF_ANC | SKF_AD_PROTOCOL: + /* A = ntohs(skb->protocol */ + ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + protocol) != 2); + off = offsetof(struct sk_buff, protocol); + emit_half_load(r_A, r_skb, off, ctx); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* This needs little endian fixup */ + if (cpu_has_wsbh) { + /* R2 and later have the wsbh instruction */ + emit_wsbh(r_A, r_A, ctx); + } else { + /* Get first byte */ + emit_andi(r_tmp_imm, r_A, 0xff, ctx); + /* Shift it */ + emit_sll(r_tmp, r_tmp_imm, 8, ctx); + /* Get second byte */ + emit_srl(r_tmp_imm, r_A, 8, ctx); + emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx); + /* Put everyting together in r_A */ + emit_or(r_A, r_tmp, r_tmp_imm, ctx); + } +#endif + break; + case BPF_ANC | SKF_AD_CPU: + ctx->flags |= SEEN_A | SEEN_OFF; + /* A = current_thread_info()->cpu */ + BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, + cpu) != 4); + off = offsetof(struct thread_info, cpu); + /* $28/gp points to the thread_info struct */ + emit_load(r_A, 28, off, ctx); + break; + case BPF_ANC | SKF_AD_IFINDEX: + /* A = skb->dev->ifindex */ + case BPF_ANC | SKF_AD_HATYPE: + /* A = skb->dev->type */ + ctx->flags |= SEEN_SKB | SEEN_A; + off = offsetof(struct sk_buff, dev); + /* Load *dev pointer */ + emit_load_ptr(r_s0, r_skb, off, ctx); + /* error (0) in the delay slot */ + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx); + emit_reg_move(r_ret, r_zero, ctx); + if (code == (BPF_ANC | SKF_AD_IFINDEX)) { + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + off = offsetof(struct net_device, ifindex); + emit_load(r_A, r_s0, off, ctx); + } else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */ + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); + off = offsetof(struct net_device, type); + emit_half_load_unsigned(r_A, r_s0, off, ctx); + } + break; + case BPF_ANC | SKF_AD_MARK: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); + off = offsetof(struct sk_buff, mark); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_RXHASH: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); + off = offsetof(struct sk_buff, hash); + emit_load(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_VLAN_TAG: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + vlan_tci) != 2); + off = offsetof(struct sk_buff, vlan_tci); + emit_half_load_unsigned(r_A, r_skb, off, ctx); + break; + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: + ctx->flags |= SEEN_SKB | SEEN_A; + emit_load_byte(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET(), ctx); + if (PKT_VLAN_PRESENT_BIT) + emit_srl(r_A, r_A, PKT_VLAN_PRESENT_BIT, ctx); + if (PKT_VLAN_PRESENT_BIT < 7) + emit_andi(r_A, r_A, 1, ctx); + break; + case BPF_ANC | SKF_AD_PKTTYPE: + ctx->flags |= SEEN_SKB; + + emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); + /* Keep only the last 3 bits */ + emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); +#ifdef __BIG_ENDIAN_BITFIELD + /* Get the actual packet type to the lower 3 bits */ + emit_srl(r_A, r_A, 5, ctx); +#endif + break; + case BPF_ANC | SKF_AD_QUEUE: + ctx->flags |= SEEN_SKB | SEEN_A; + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, + queue_mapping) != 2); + BUILD_BUG_ON(offsetof(struct sk_buff, + queue_mapping) > 0xff); + off = offsetof(struct sk_buff, queue_mapping); + emit_half_load_unsigned(r_A, r_skb, off, ctx); + break; + default: + pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, + inst->code); + return -1; + } + } + + /* compute offsets only during the first pass */ + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + return 0; +} + +void bpf_jit_compile(struct bpf_prog *fp) +{ + struct jit_ctx ctx; + unsigned int alloc_size, tmp_idx; + + if (!bpf_jit_enable) + return; + + memset(&ctx, 0, sizeof(ctx)); + + ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); + if (ctx.offsets == NULL) + return; + + ctx.skf = fp; + + if (build_body(&ctx)) + goto out; + + tmp_idx = ctx.idx; + build_prologue(&ctx); + ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; + /* just to complete the ctx.idx count */ + build_epilogue(&ctx); + + alloc_size = 4 * ctx.idx; + ctx.target = module_alloc(alloc_size); + if (ctx.target == NULL) + goto out; + + /* Clean it */ + memset(ctx.target, 0, alloc_size); + + ctx.idx = 0; + + /* Generate the actual JIT code */ + build_prologue(&ctx); + if (build_body(&ctx)) { + module_memfree(ctx.target); + goto out; + } + build_epilogue(&ctx); + + /* Update the icache */ + flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx)); + + if (bpf_jit_enable > 1) + /* Dump JIT code */ + bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); + + fp->bpf_func = (void *)ctx.target; + fp->jited = 1; + +out: + kfree(ctx.offsets); +} + +void bpf_jit_free(struct bpf_prog *fp) +{ + if (fp->jited) + module_memfree(fp->bpf_func); + + bpf_prog_unlock_free(fp); +} diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S new file mode 100644 index 0000000000000..57154c5883b6f --- /dev/null +++ b/arch/mips/net/bpf_jit_asm.S @@ -0,0 +1,285 @@ +/* + * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF + * compiler. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + * Author: Markos Chandras + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include +#include +#include +#include "bpf_jit.h" + +/* ABI + * + * r_skb_hl skb header length + * r_skb_data skb data + * r_off(a1) offset register + * r_A BPF register A + * r_X PF register X + * r_skb(a0) *skb + * r_M *scratch memory + * r_skb_le skb length + * r_s0 Scratch register 0 + * r_s1 Scratch register 1 + * + * On entry: + * a0: *skb + * a1: offset (imm or imm + X) + * + * All non-BPF-ABI registers are free for use. On return, we only + * care about r_ret. The BPF-ABI registers are assumed to remain + * unmodified during the entire filter operation. + */ + +#define skb a0 +#define offset a1 +#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */ + + /* We know better :) so prevent assembler reordering etc */ + .set noreorder + +#define is_offset_negative(TYPE) \ + /* If offset is negative we have more work to do */ \ + slti t0, offset, 0; \ + bgtz t0, bpf_slow_path_##TYPE##_neg; \ + /* Be careful what follows in DS. */ + +#define is_offset_in_header(SIZE, TYPE) \ + /* Reading from header? */ \ + addiu $r_s0, $r_skb_hl, -SIZE; \ + slt t0, $r_s0, offset; \ + bgtz t0, bpf_slow_path_##TYPE; \ + +LEAF(sk_load_word) + is_offset_negative(word) +FEXPORT(sk_load_word_positive) + is_offset_in_header(4, word) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + .set reorder + lw $r_A, 0(t1) + .set noreorder +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh t0, $r_A + rotr $r_A, t0, 16 +# else + sll t0, $r_A, 24 + srl t1, $r_A, 24 + srl t2, $r_A, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_A, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + or $r_A, t0, t1 +# endif +#endif + jr $r_ra + move $r_ret, zero + END(sk_load_word) + +LEAF(sk_load_half) + is_offset_negative(half) +FEXPORT(sk_load_half_positive) + is_offset_in_header(2, half) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lhu $r_A, 0(t1) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh $r_A, $r_A +# else + sll t0, $r_A, 8 + srl t1, $r_A, 8 + andi t0, t0, 0xff00 + or $r_A, t0, t1 +# endif +#endif + jr $r_ra + move $r_ret, zero + END(sk_load_half) + +LEAF(sk_load_byte) + is_offset_negative(byte) +FEXPORT(sk_load_byte_positive) + is_offset_in_header(1, byte) + /* Offset within header boundaries */ + PTR_ADDU t1, $r_skb_data, offset + lbu $r_A, 0(t1) + jr $r_ra + move $r_ret, zero + END(sk_load_byte) + +/* + * call skb_copy_bits: + * (prototype in linux/skbuff.h) + * + * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len) + * + * o32 mandates we leave 4 spaces for argument registers in case + * the callee needs to use them. Even though we don't care about + * the argument registers ourselves, we need to allocate that space + * to remain ABI compliant since the callee may want to use that space. + * We also allocate 2 more spaces for $r_ra and our return register (*to). + * + * n64 is a bit different. The *caller* will allocate the space to preserve + * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no + * good reason but it does not matter that much really. + * + * (void *to) is returned in r_s0 + * + */ +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define DS_OFFSET(SIZE) (4 * SZREG) +#else +#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE)) +#endif +#define bpf_slow_path_common(SIZE) \ + /* Quick check. Are we within reasonable boundaries? */ \ + LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \ + sltu $r_s0, offset, $r_s1; \ + beqz $r_s0, fault; \ + /* Load 4th argument in DS */ \ + LONG_ADDIU a3, zero, SIZE; \ + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ + PTR_LA t0, skb_copy_bits; \ + PTR_S $r_ra, (5 * SZREG)($r_sp); \ + /* Assign low slot to a2 */ \ + PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \ + jalr t0; \ + /* Reset our destination slot (DS but it's ok) */ \ + INT_S zero, (4 * SZREG)($r_sp); \ + /* \ + * skb_copy_bits returns 0 on success and -EFAULT \ + * on error. Our data live in a2. Do not bother with \ + * our data if an error has been returned. \ + */ \ + /* Restore our frame */ \ + PTR_L $r_ra, (5 * SZREG)($r_sp); \ + INT_L $r_s0, (4 * SZREG)($r_sp); \ + bltz v0, fault; \ + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ + move $r_ret, zero; \ + +NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) + bpf_slow_path_common(4) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + wsbh t0, $r_s0 + jr $r_ra + rotr $r_A, t0, 16 +# else + sll t0, $r_s0, 24 + srl t1, $r_s0, 24 + srl t2, $r_s0, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_s0, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else + jr $r_ra + move $r_A, $r_s0 +#endif + + END(bpf_slow_path_word) + +NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) + bpf_slow_path_common(2) +#ifdef CONFIG_CPU_LITTLE_ENDIAN +# if MIPS_ISA_REV >= 2 + jr $r_ra + wsbh $r_A, $r_s0 +# else + sll t0, $r_s0, 8 + andi t1, $r_s0, 0xff00 + andi t0, t0, 0xff00 + srl t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif +#else + jr $r_ra + move $r_A, $r_s0 +#endif + + END(bpf_slow_path_half) + +NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp) + bpf_slow_path_common(1) + jr $r_ra + move $r_A, $r_s0 + + END(bpf_slow_path_byte) + +/* + * Negative entry points + */ + .macro bpf_is_end_of_data + li t0, SKF_LL_OFF + /* Reading link layer data? */ + slt t1, offset, t0 + bgtz t1, fault + /* Be careful what follows in DS. */ + .endm +/* + * call skb_copy_bits: + * (prototype in linux/filter.h) + * + * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, + * int k, unsigned int size) + * + * see above (bpf_slow_path_common) for ABI restrictions + */ +#define bpf_negative_common(SIZE) \ + PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ + PTR_LA t0, bpf_internal_load_pointer_neg_helper; \ + PTR_S $r_ra, (5 * SZREG)($r_sp); \ + jalr t0; \ + li a2, SIZE; \ + PTR_L $r_ra, (5 * SZREG)($r_sp); \ + /* Check return pointer */ \ + beqz v0, fault; \ + PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ + /* Preserve our pointer */ \ + move $r_s0, v0; \ + /* Set return value */ \ + move $r_ret, zero; \ + +bpf_slow_path_word_neg: + bpf_is_end_of_data +NESTED(sk_load_word_negative, (6 * SZREG), $r_sp) + bpf_negative_common(4) + jr $r_ra + lw $r_A, 0($r_s0) + END(sk_load_word_negative) + +bpf_slow_path_half_neg: + bpf_is_end_of_data +NESTED(sk_load_half_negative, (6 * SZREG), $r_sp) + bpf_negative_common(2) + jr $r_ra + lhu $r_A, 0($r_s0) + END(sk_load_half_negative) + +bpf_slow_path_byte_neg: + bpf_is_end_of_data +NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp) + bpf_negative_common(1) + jr $r_ra + lbu $r_A, 0($r_s0) + END(sk_load_byte_negative) + +fault: + jr $r_ra + addiu $r_ret, zero, 1 diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 46b76751f3a5f..b31b91e57c341 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -604,6 +604,7 @@ static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) { int off, b_off; + int tcc_reg; ctx->flags |= EBPF_SEEN_TC; /* @@ -616,14 +617,14 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) b_off = b_imm(this_idx + 1, ctx); emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); /* - * if (--TCC < 0) + * if (TCC-- < 0) * goto out; */ /* Delay slot */ - emit_instr(ctx, daddiu, MIPS_R_T5, - (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1); + tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4; + emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1); b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bltz, MIPS_R_T5, b_off); + emit_instr(ctx, bltz, tcc_reg, b_off); /* * prog = array->ptrs[index]; * if (prog == NULL) @@ -1354,6 +1355,9 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, } break; + case BPF_ST | BPF_NOSPEC: /* speculation barrier */ + break; + case BPF_ST | BPF_B | BPF_MEM: case BPF_ST | BPF_H | BPF_MEM: case BPF_ST | BPF_W | BPF_MEM: @@ -1803,7 +1807,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int image_size; u8 *image_ptr; - if (!prog->jit_requested || MIPS_ISA_REV < 2) + if (!prog->jit_requested) return prog; tmp = bpf_jit_blind_constants(prog); diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c index 39052de915f34..3a909194284a6 100644 --- a/arch/mips/pci/pci-legacy.c +++ b/arch/mips/pci/pci-legacy.c @@ -166,8 +166,13 @@ void pci_load_of_ranges(struct pci_controller *hose, struct device_node *node) res = hose->mem_resource; break; } - if (res != NULL) - of_pci_range_to_resource(&range, node, res); + if (res != NULL) { + res->name = node->full_name; + res->flags = range.flags; + res->start = range.cpu_addr; + res->end = range.cpu_addr + range.size - 1; + res->parent = res->child = res->sibling = NULL; + } } } diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c index d360616037525..e032932348d6f 100644 --- a/arch/mips/pci/pci-mt7620.c +++ b/arch/mips/pci/pci-mt7620.c @@ -30,6 +30,7 @@ #define RALINK_GPIOMODE 0x60 #define PPLL_CFG1 0x9c +#define PPLL_LD BIT(23) #define PPLL_DRV 0xa0 #define PDRV_SW_SET BIT(31) @@ -239,8 +240,8 @@ static int mt7620_pci_hw_init(struct platform_device *pdev) rt_sysc_m32(0, RALINK_PCIE0_CLK_EN, RALINK_CLKCFG1); mdelay(100); - if (!(rt_sysc_r32(PPLL_CFG1) & PDRV_SW_SET)) { - dev_err(&pdev->dev, "MT7620 PPLL unlock\n"); + if (!(rt_sysc_r32(PPLL_CFG1) & PPLL_LD)) { + dev_err(&pdev->dev, "pcie PLL not locked, aborting init\n"); reset_control_assert(rstpcie0); rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1); return -1; diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c index c9f4d4ba058aa..2ceda2a040edc 100644 --- a/arch/mips/pci/pci-rt2880.c +++ b/arch/mips/pci/pci-rt2880.c @@ -180,7 +180,6 @@ static inline void rt2880_pci_write_u32(unsigned long reg, u32 val) int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - u16 cmd; int irq = -1; if (dev->bus->number != 0) @@ -188,8 +187,6 @@ int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) switch (PCI_SLOT(dev->devfn)) { case 0x00: - rt2880_pci_write_u32(PCI_BASE_ADDRESS_0, 0x08000000); - (void) rt2880_pci_read_u32(PCI_BASE_ADDRESS_0); break; case 0x11: irq = RT288X_CPU_IRQ_PCI; @@ -201,16 +198,6 @@ int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) break; } - pci_write_config_byte((struct pci_dev *) dev, - PCI_CACHE_LINE_SIZE, 0x14); - pci_write_config_byte((struct pci_dev *) dev, PCI_LATENCY_TIMER, 0xFF); - pci_read_config_word((struct pci_dev *) dev, PCI_COMMAND, &cmd); - cmd |= PCI_COMMAND_MASTER | PCI_COMMAND_IO | PCI_COMMAND_MEMORY | - PCI_COMMAND_INVALIDATE | PCI_COMMAND_FAST_BACK | - PCI_COMMAND_SERR | PCI_COMMAND_WAIT | PCI_COMMAND_PARITY; - pci_write_config_word((struct pci_dev *) dev, PCI_COMMAND, cmd); - pci_write_config_byte((struct pci_dev *) dev, PCI_INTERRUPT_LINE, - dev->irq); return irq; } @@ -251,6 +238,30 @@ static int rt288x_pci_probe(struct platform_device *pdev) int pcibios_plat_dev_init(struct pci_dev *dev) { + static bool slot0_init; + + /* + * Nobody seems to initialize slot 0, but this platform requires it, so + * do it once when some other slot is being enabled. The PCI subsystem + * should configure other slots properly, so no need to do anything + * special for those. + */ + if (!slot0_init && dev->bus->number == 0) { + u16 cmd; + u32 bar0; + + slot0_init = true; + + pci_bus_write_config_dword(dev->bus, 0, PCI_BASE_ADDRESS_0, + 0x08000000); + pci_bus_read_config_dword(dev->bus, 0, PCI_BASE_ADDRESS_0, + &bar0); + + pci_bus_read_config_word(dev->bus, 0, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_MASTER | PCI_COMMAND_IO | PCI_COMMAND_MEMORY; + pci_bus_write_config_word(dev->bus, 0, PCI_COMMAND, cmd); + } + return 0; } diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 7b4d40354ee7e..adc9f83b2c448 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -279,16 +279,15 @@ static int bridge_set_affinity(struct irq_data *d, const struct cpumask *mask, struct bridge_irq_chip_data *data = d->chip_data; int bit = d->parent_data->hwirq; int pin = d->hwirq; - nasid_t nasid; int ret, cpu; ret = irq_chip_set_affinity_parent(d, mask, force); if (ret >= 0) { cpu = cpumask_first_and(mask, cpu_online_mask); - nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu)); + data->nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu)); bridge_write(data->bc, b_int_addr[pin].addr, (((data->bc->intr_addr >> 30) & 0x30000) | - bit | (nasid << 8))); + bit | (data->nasid << 8))); bridge_read(data->bc, b_wid_tflush); } return ret; @@ -445,9 +444,10 @@ static int bridge_probe(struct platform_device *pdev) return -ENOMEM; domain = irq_domain_create_hierarchy(parent, 0, 8, fn, &bridge_domain_ops, NULL); - irq_domain_free_fwnode(fn); - if (!domain) + if (!domain) { + irq_domain_free_fwnode(fn); return -ENOMEM; + } pci_set_flags(PCI_PROBE_ONLY); @@ -539,6 +539,7 @@ static int bridge_probe(struct platform_device *pdev) pci_free_resource_list(&host->windows); err_remove_domain: irq_domain_remove(domain); + irq_domain_free_fwnode(fn); return err; } @@ -546,8 +547,10 @@ static int bridge_remove(struct platform_device *pdev) { struct pci_bus *bus = platform_get_drvdata(pdev); struct bridge_controller *bc = BRIDGE_CONTROLLER(bus); + struct fwnode_handle *fn = bc->domain->fwnode; irq_domain_remove(bc->domain); + irq_domain_free_fwnode(fn); pci_lock_rescan_remove(); pci_stop_root_bus(bus); pci_remove_root_bus(bus); diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c index 50f376f058f43..f232c77ff5265 100644 --- a/arch/mips/pic32/pic32mzda/init.c +++ b/arch/mips/pic32/pic32mzda/init.c @@ -28,7 +28,7 @@ static ulong get_fdtaddr(void) if (fw_passed_dtb && !fw_arg2 && !fw_arg3) return (ulong)fw_passed_dtb; - if (__dtb_start < __dtb_end) + if (&__dtb_start < &__dtb_end) ftaddr = (ulong)__dtb_start; return ftaddr; diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig index 1434fa60f3db1..94e9ce9944944 100644 --- a/arch/mips/ralink/Kconfig +++ b/arch/mips/ralink/Kconfig @@ -51,6 +51,7 @@ choice select MIPS_GIC select COMMON_CLK select CLKSRC_MIPS_GIC + select HAVE_PCI if PCI_MT7621 endchoice choice diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c index 0ddeb31afa93a..45ca2b84f0962 100644 --- a/arch/mips/ralink/ill_acc.c +++ b/arch/mips/ralink/ill_acc.c @@ -61,6 +61,7 @@ static int __init ill_acc_of_setup(void) pdev = of_find_device_by_node(np); if (!pdev) { pr_err("%pOFn: failed to lookup pdev\n", np); + of_node_put(np); return -EINVAL; } diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 59b23095bfbb4..e2c71c6c667da 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,7 @@ __iomem void *rt_sysc_membase; __iomem void *rt_memc_membase; +EXPORT_SYMBOL_GPL(rt_sysc_membase); __iomem void *plat_of_remap_node(const char *node) { @@ -75,7 +77,7 @@ void __init plat_mem_setup(void) */ if (fw_passed_dtb) dtb = (void *)fw_passed_dtb; - else if (__dtb_start != __dtb_end) + else if (&__dtb_start != &__dtb_end) dtb = (void *)__dtb_start; __dt_setup_arch(dtb); diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index c9ecf17f86605..74808619fefeb 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -310,11 +310,9 @@ static int __init plat_setup_devices(void) static int __init setup_kmac(char *s) { printk(KERN_INFO "korina mac = %s\n", s); - if (!mac_pton(s, korina_dev0_data.mac)) { + if (!mac_pton(s, korina_dev0_data.mac)) printk(KERN_ERR "Invalid mac\n"); - return -EINVAL; - } - return 0; + return 1; } __setup("kmac=", setup_kmac); diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c index 37be04975831e..79a2f6bd2b5ab 100644 --- a/arch/mips/sgi-ip27/ip27-irq.c +++ b/arch/mips/sgi-ip27/ip27-irq.c @@ -73,6 +73,9 @@ static void setup_hub_mask(struct hub_irq_data *hd, const struct cpumask *mask) int cpu; cpu = cpumask_first_and(mask, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_any(cpu_online_mask); + nasid = COMPACT_TO_NASID_NODEID(cpu_to_node(cpu)); hd->cpu = cpu; if (!cputoslice(cpu)) { @@ -139,6 +142,7 @@ static int hub_domain_alloc(struct irq_domain *domain, unsigned int virq, /* use CPU connected to nearest hub */ hub = hub_data(NASID_TO_COMPACT_NODEID(info->nasid)); setup_hub_mask(hd, &hub->h_cpus); + info->nasid = cpu_to_node(hd->cpu); /* Make sure it's not already pending when we connect it. */ REMOTE_HUB_CLR_INTR(info->nasid, swlevel); diff --git a/arch/mips/sni/a20r.c b/arch/mips/sni/a20r.c index f9407e1704762..c6af7047eb0d2 100644 --- a/arch/mips/sni/a20r.c +++ b/arch/mips/sni/a20r.c @@ -143,7 +143,10 @@ static struct platform_device sc26xx_pdev = { }, }; -static u32 a20r_ack_hwint(void) +/* + * Trigger chipset to update CPU's CAUSE IP field + */ +static u32 a20r_update_cause_ip(void) { u32 status = read_c0_status(); @@ -205,12 +208,14 @@ static void a20r_hwint(void) int irq; clear_c0_status(IE_IRQ0); - status = a20r_ack_hwint(); + status = a20r_update_cause_ip(); cause = read_c0_cause(); irq = ffs(((cause & status) >> 8) & 0xf8); if (likely(irq > 0)) do_IRQ(SNI_A20R_IRQ_BASE + irq - 1); + + a20r_update_cause_ip(); set_c0_status(IE_IRQ0); } diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index dbace1f3e1a97..745ceb945fc50 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -18,14 +18,14 @@ static int a20r_set_periodic(struct clock_event_device *evt) { *(volatile u8 *)(A20R_PT_CLOCK_BASE + 12) = 0x34; wmb(); - *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV; + *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV & 0xff; wmb(); *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV >> 8; wmb(); *(volatile u8 *)(A20R_PT_CLOCK_BASE + 12) = 0xb4; wmb(); - *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV; + *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV & 0xff; wmb(); *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV >> 8; wmb(); diff --git a/arch/mips/tools/elf-entry.c b/arch/mips/tools/elf-entry.c index adde79ce7fc00..dbd14ff05b4cd 100644 --- a/arch/mips/tools/elf-entry.c +++ b/arch/mips/tools/elf-entry.c @@ -51,11 +51,14 @@ int main(int argc, const char *argv[]) nread = fread(&hdr, 1, sizeof(hdr), file); if (nread != sizeof(hdr)) { perror("Unable to read input file"); + fclose(file); return EXIT_FAILURE; } - if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) + if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) { + fclose(file); die("Input is not an ELF\n"); + } switch (hdr.ehdr32.e_ident[EI_CLASS]) { case ELFCLASS32: @@ -67,6 +70,7 @@ int main(int argc, const char *argv[]) entry = be32toh(hdr.ehdr32.e_entry); break; default: + fclose(file); die("Invalid ELF encoding\n"); } @@ -83,14 +87,17 @@ int main(int argc, const char *argv[]) entry = be64toh(hdr.ehdr64.e_entry); break; default: + fclose(file); die("Invalid ELF encoding\n"); } break; default: + fclose(file); die("Invalid ELF class\n"); } printf("0x%016" PRIx64 "\n", entry); + fclose(file); return EXIT_SUCCESS; } diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 996a934ece7d6..d3cd9c4cadc28 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -16,12 +16,9 @@ ccflags-vdso := \ $(filter -march=%,$(KBUILD_CFLAGS)) \ $(filter -m%-float,$(KBUILD_CFLAGS)) \ $(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \ + $(CLANG_FLAGS) \ -D__VDSO__ -ifdef CONFIG_CC_IS_CLANG -ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) -endif - # # The -fno-jump-tables flag only prevents the compiler from generating # jump tables but does not prevent the compiler from emitting absolute diff --git a/arch/mips/vdso/genvdso.c b/arch/mips/vdso/genvdso.c index b66b6b1c4aeb9..8f581a2c8578b 100644 --- a/arch/mips/vdso/genvdso.c +++ b/arch/mips/vdso/genvdso.c @@ -122,6 +122,7 @@ static void *map_vdso(const char *path, size_t *_size) if (fstat(fd, &stat) != 0) { fprintf(stderr, "%s: Failed to stat '%s': %s\n", program_name, path, strerror(errno)); + close(fd); return NULL; } @@ -130,6 +131,7 @@ static void *map_vdso(const char *path, size_t *_size) if (addr == MAP_FAILED) { fprintf(stderr, "%s: Failed to map '%s': %s\n", program_name, path, strerror(errno)); + close(fd); return NULL; } @@ -139,6 +141,7 @@ static void *map_vdso(const char *path, size_t *_size) if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) { fprintf(stderr, "%s: '%s' is not an ELF file\n", program_name, path); + close(fd); return NULL; } @@ -150,6 +153,7 @@ static void *map_vdso(const char *path, size_t *_size) default: fprintf(stderr, "%s: '%s' has invalid ELF class\n", program_name, path); + close(fd); return NULL; } @@ -161,6 +165,7 @@ static void *map_vdso(const char *path, size_t *_size) default: fprintf(stderr, "%s: '%s' has invalid ELF data order\n", program_name, path); + close(fd); return NULL; } @@ -168,15 +173,18 @@ static void *map_vdso(const char *path, size_t *_size) fprintf(stderr, "%s: '%s' has invalid ELF machine (expected EM_MIPS)\n", program_name, path); + close(fd); return NULL; } else if (swap_uint16(ehdr->e_type) != ET_DYN) { fprintf(stderr, "%s: '%s' has invalid ELF type (expected ET_DYN)\n", program_name, path); + close(fd); return NULL; } *_size = stat.st_size; + close(fd); return addr; } @@ -280,10 +288,12 @@ int main(int argc, char **argv) /* Calculate and write symbol offsets to */ if (!get_symbols(dbg_vdso_path, dbg_vdso)) { unlink(out_path); + fclose(out_file); return EXIT_FAILURE; } fprintf(out_file, "};\n"); + fclose(out_file); return EXIT_SUCCESS; } diff --git a/arch/mips/vdso/vgettimeofday.c b/arch/mips/vdso/vgettimeofday.c index 6ebdc37c89fc6..6b83b6376a4b5 100644 --- a/arch/mips/vdso/vgettimeofday.c +++ b/arch/mips/vdso/vgettimeofday.c @@ -17,12 +17,22 @@ int __vdso_clock_gettime(clockid_t clock, return __cvdso_clock_gettime32(clock, ts); } +#ifdef CONFIG_MIPS_CLOCK_VSYSCALL + +/* + * This is behind the ifdef so that we don't provide the symbol when there's no + * possibility of there being a usable clocksource, because there's nothing we + * can do without it. When libc fails the symbol lookup it should fall back on + * the standard syscall path. + */ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } +#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */ + int __vdso_clock_getres(clockid_t clock_id, struct old_timespec32 *res) { @@ -43,12 +53,22 @@ int __vdso_clock_gettime(clockid_t clock, return __cvdso_clock_gettime(clock, ts); } +#ifdef CONFIG_MIPS_CLOCK_VSYSCALL + +/* + * This is behind the ifdef so that we don't provide the symbol when there's no + * possibility of there being a usable clocksource, because there's nothing we + * can do without it. When libc fails the symbol lookup it should fall back on + * the standard syscall path. + */ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } +#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */ + int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) { diff --git a/arch/mips/vr41xx/common/icu.c b/arch/mips/vr41xx/common/icu.c index 7b7f25b4b057e..9240bcdbe74e4 100644 --- a/arch/mips/vr41xx/common/icu.c +++ b/arch/mips/vr41xx/common/icu.c @@ -640,8 +640,6 @@ static int icu_get_irq(unsigned int irq) printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2); - atomic_inc(&irq_err_count); - return -1; } diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index d9ac7e6408ef3..caddded56e77f 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -9,7 +9,11 @@ #define PG_dcache_dirty PG_arch_1 void flush_icache_range(unsigned long start, unsigned long end); +#define flush_icache_range flush_icache_range + void flush_icache_page(struct vm_area_struct *vma, struct page *page); +#define flush_icache_page flush_icache_page + #ifdef CONFIG_CPU_CACHE_ALIASING void flush_cache_mm(struct mm_struct *mm); void flush_cache_dup_mm(struct mm_struct *mm); @@ -40,12 +44,11 @@ void invalidate_kernel_vmap_range(void *addr, int size); #define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages) #else -#include -#undef flush_icache_range -#undef flush_icache_page -#undef flush_icache_user_range void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len); +#define flush_icache_user_range flush_icache_user_range + +#include #endif #endif /* __NDS32_CACHEFLUSH_H__ */ diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h index 8916ad9f9f139..e205b1db8c807 100644 --- a/arch/nds32/include/asm/uaccess.h +++ b/arch/nds32/include/asm/uaccess.h @@ -71,9 +71,7 @@ static inline void set_fs(mm_segment_t fs) * versions are void (ie, don't return a value as such). */ -#define get_user __get_user \ - -#define __get_user(x, ptr) \ +#define get_user(x, ptr) \ ({ \ long __gu_err = 0; \ __get_user_check((x), (ptr), __gu_err); \ @@ -86,6 +84,14 @@ static inline void set_fs(mm_segment_t fs) (void)0; \ }) +#define __get_user(x, ptr) \ +({ \ + long __gu_err = 0; \ + const __typeof__(*(ptr)) __user *__p = (ptr); \ + __get_user_err((x), __p, (__gu_err)); \ + __gu_err; \ +}) + #define __get_user_check(x, ptr, err) \ ({ \ const __typeof__(*(ptr)) __user *__p = (ptr); \ @@ -166,12 +172,18 @@ do { \ : "r"(addr), "i"(-EFAULT) \ : "cc") -#define put_user __put_user \ +#define put_user(x, ptr) \ +({ \ + long __pu_err = 0; \ + __put_user_check((x), (ptr), __pu_err); \ + __pu_err; \ +}) #define __put_user(x, ptr) \ ({ \ long __pu_err = 0; \ - __put_user_err((x), (ptr), __pu_err); \ + __typeof__(*(ptr)) __user *__p = (ptr); \ + __put_user_err((x), __p, __pu_err); \ __pu_err; \ }) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index 334c2a6cec23d..8a4f9babb1646 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1363,6 +1363,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); unsigned long fp = 0; unsigned long gp = 0; unsigned long lp = 0; @@ -1371,7 +1372,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, leaf_fp = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -1479,9 +1480,10 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe fr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -1493,20 +1495,23 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, unsigned long perf_instruction_pointer(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + /* However, NDS32 does not support virtualization */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return instruction_pointer(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; /* However, NDS32 does not support virtualization */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index 254703653b6f5..f34dc9bc6758e 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -239,7 +239,7 @@ void flush_dcache_page(struct page *page) { struct address_space *mapping; - mapping = page_mapping(page); + mapping = page_mapping_file(page); if (mapping && !mapping_mapped(mapping)) set_bit(PG_dcache_dirty, &page->flags); else { diff --git a/arch/nds32/mm/mmap.c b/arch/nds32/mm/mmap.c index c206b31ce07ac..1bdf5e7d1b438 100644 --- a/arch/nds32/mm/mmap.c +++ b/arch/nds32/mm/mmap.c @@ -59,7 +59,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/nios2/include/asm/entry.h b/arch/nios2/include/asm/entry.h index cf37f55efbc22..bafb7b2ca59fc 100644 --- a/arch/nios2/include/asm/entry.h +++ b/arch/nios2/include/asm/entry.h @@ -50,7 +50,8 @@ stw r13, PT_R13(sp) stw r14, PT_R14(sp) stw r15, PT_R15(sp) - stw r2, PT_ORIG_R2(sp) + movi r24, -1 + stw r24, PT_ORIG_R2(sp) stw r7, PT_ORIG_R7(sp) stw ra, PT_RA(sp) diff --git a/arch/nios2/include/asm/irqflags.h b/arch/nios2/include/asm/irqflags.h index b3ec3e510706d..25acf27862f91 100644 --- a/arch/nios2/include/asm/irqflags.h +++ b/arch/nios2/include/asm/irqflags.h @@ -9,7 +9,7 @@ static inline unsigned long arch_local_save_flags(void) { - return RDCTL(CTL_STATUS); + return RDCTL(CTL_FSTATUS); } /* @@ -18,7 +18,7 @@ static inline unsigned long arch_local_save_flags(void) */ static inline void arch_local_irq_restore(unsigned long flags) { - WRCTL(CTL_STATUS, flags); + WRCTL(CTL_FSTATUS, flags); } static inline void arch_local_irq_disable(void) diff --git a/arch/nios2/include/asm/ptrace.h b/arch/nios2/include/asm/ptrace.h index 6424621448728..9da34c3022a27 100644 --- a/arch/nios2/include/asm/ptrace.h +++ b/arch/nios2/include/asm/ptrace.h @@ -74,6 +74,8 @@ extern void show_regs(struct pt_regs *); ((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE)\ - 1) +#define force_successful_syscall_return() (current_pt_regs()->orig_r2 = -1) + int do_syscall_trace_enter(void); void do_syscall_trace_exit(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/nios2/include/asm/registers.h b/arch/nios2/include/asm/registers.h index 183c720e454d9..95b67dd16f818 100644 --- a/arch/nios2/include/asm/registers.h +++ b/arch/nios2/include/asm/registers.h @@ -11,7 +11,7 @@ #endif /* control register numbers */ -#define CTL_STATUS 0 +#define CTL_FSTATUS 0 #define CTL_ESTATUS 1 #define CTL_BSTATUS 2 #define CTL_IENABLE 3 diff --git a/arch/nios2/include/asm/timex.h b/arch/nios2/include/asm/timex.h index a769f871b28d9..40a1adc9bd03e 100644 --- a/arch/nios2/include/asm/timex.h +++ b/arch/nios2/include/asm/timex.h @@ -8,5 +8,8 @@ typedef unsigned long cycles_t; extern cycles_t get_cycles(void); +#define get_cycles get_cycles + +#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback()) #endif diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index e83f831a76f93..6be5e913c42e4 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -89,6 +89,7 @@ extern __must_check long strnlen_user(const char __user *s, long n); /* Optimized macros */ #define __get_user_asm(val, insn, addr, err) \ { \ + unsigned long __gu_val; \ __asm__ __volatile__( \ " movi %0, %3\n" \ "1: " insn " %1, 0(%2)\n" \ @@ -97,14 +98,20 @@ extern __must_check long strnlen_user(const char __user *s, long n); " .section __ex_table,\"a\"\n" \ " .word 1b, 2b\n" \ " .previous" \ - : "=&r" (err), "=r" (val) \ + : "=&r" (err), "=r" (__gu_val) \ : "r" (addr), "i" (-EFAULT)); \ + val = (__force __typeof__(*(addr)))__gu_val; \ } -#define __get_user_unknown(val, size, ptr, err) do { \ +extern void __get_user_unknown(void); + +#define __get_user_8(val, ptr, err) do { \ + u64 __val = 0; \ err = 0; \ - if (__copy_from_user(&(val), ptr, size)) { \ + if (raw_copy_from_user(&(__val), ptr, sizeof(val))) { \ err = -EFAULT; \ + } else { \ + val = (typeof(val))(typeof((val) - (val)))__val; \ } \ } while (0) @@ -120,8 +127,11 @@ do { \ case 4: \ __get_user_asm(val, "ldw", ptr, err); \ break; \ + case 8: \ + __get_user_8(val, ptr, err); \ + break; \ default: \ - __get_user_unknown(val, size, ptr, err); \ + __get_user_unknown(); \ break; \ } \ } while (0) @@ -130,9 +140,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val = 0; \ - __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\ - (x) = (__force __typeof__(x))__gu_val; \ + __get_user_common(x, sizeof(*(ptr)), __gu_ptr, __gu_err); \ __gu_err; \ }) @@ -140,11 +148,9 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val = 0; \ if (access_ok( __gu_ptr, sizeof(*__gu_ptr))) \ - __get_user_common(__gu_val, sizeof(*__gu_ptr), \ + __get_user_common(x, sizeof(*__gu_ptr), \ __gu_ptr, __gu_err); \ - (x) = (__force __typeof__(x))__gu_val; \ __gu_err; \ }) diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S index 1e515ccd698e3..af556588248e7 100644 --- a/arch/nios2/kernel/entry.S +++ b/arch/nios2/kernel/entry.S @@ -185,6 +185,7 @@ ENTRY(handle_system_call) ldw r5, PT_R5(sp) local_restart: + stw r2, PT_ORIG_R2(sp) /* Check that the requested system call is within limits */ movui r1, __NR_syscalls bgeu r2, r1, ret_invsyscall @@ -192,7 +193,6 @@ local_restart: movhi r11, %hiadj(sys_call_table) add r1, r1, r11 ldw r1, %lo(sys_call_table)(r1) - beq r1, r0, ret_invsyscall /* Check if we are being traced */ GET_THREAD_INFO r11 @@ -213,6 +213,9 @@ local_restart: translate_rc_and_ret: movi r1, 0 bge r2, zero, 3f + ldw r1, PT_ORIG_R2(sp) + addi r1, r1, 1 + beq r1, zero, 3f sub r2, zero, r2 movi r1, 1 3: @@ -255,9 +258,9 @@ traced_system_call: ldw r6, PT_R6(sp) ldw r7, PT_R7(sp) - /* Fetch the syscall function, we don't need to check the boundaries - * since this is already done. - */ + /* Fetch the syscall function. */ + movui r1, __NR_syscalls + bgeu r2, r1, traced_invsyscall slli r1, r2, 2 movhi r11,%hiadj(sys_call_table) add r1, r1, r11 @@ -276,6 +279,9 @@ traced_system_call: translate_rc_and_ret2: movi r1, 0 bge r2, zero, 4f + ldw r1, PT_ORIG_R2(sp) + addi r1, r1, 1 + beq r1, zero, 4f sub r2, zero, r2 movi r1, 1 4: @@ -287,6 +293,11 @@ end_translate_rc_and_ret2: RESTORE_SWITCH_STACK br ret_from_exception + /* If the syscall number was invalid return ENOSYS */ +traced_invsyscall: + movi r2, -ENOSYS + br translate_rc_and_ret2 + Luser_return: GET_THREAD_INFO r11 /* get thread_info pointer */ ldw r10, TI_FLAGS(r11) /* get thread_info->flags */ @@ -336,9 +347,6 @@ external_interrupt: /* skip if no interrupt is pending */ beq r12, r0, ret_from_interrupt - movi r24, -1 - stw r24, PT_ORIG_R2(sp) - /* * Process an external hardware interrupt. */ diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index a42dd09c65783..bfb20821a8f49 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -240,7 +240,7 @@ static int do_signal(struct pt_regs *regs) /* * If we were from a system call, check for system call restarting... */ - if (regs->orig_r2 >= 0) { + if (regs->orig_r2 >= 0 && regs->r1) { continue_addr = regs->ea; restart_addr = continue_addr - 4; retval = regs->r2; @@ -261,6 +261,7 @@ static int do_signal(struct pt_regs *regs) regs->ea = restart_addr; break; } + regs->orig_r2 = -1; } if (get_signal(&ksig)) { diff --git a/arch/nios2/kernel/syscall_table.c b/arch/nios2/kernel/syscall_table.c index 6176d63023c1d..c2875a6dd5a4a 100644 --- a/arch/nios2/kernel/syscall_table.c +++ b/arch/nios2/kernel/syscall_table.c @@ -13,5 +13,6 @@ #define __SYSCALL(nr, call) [nr] = (call), void *sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls-1] = sys_ni_syscall, #include }; diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform index 9e32fb7f3d4ce..e849daff6fd16 100644 --- a/arch/nios2/platform/Kconfig.platform +++ b/arch/nios2/platform/Kconfig.platform @@ -37,6 +37,7 @@ config NIOS2_DTB_PHYS_ADDR config NIOS2_DTB_SOURCE_BOOL bool "Compile and link device tree into kernel image" + depends on !COMPILE_TEST help This allows you to specify a dts (device tree source) file which will be compiled and linked into the kernel image. diff --git a/arch/openrisc/include/asm/barrier.h b/arch/openrisc/include/asm/barrier.h new file mode 100644 index 0000000000000..7538294721bed --- /dev/null +++ b/arch/openrisc/include/asm/barrier.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#define mb() asm volatile ("l.msync" ::: "memory") + +#include + +#endif /* __ASM_BARRIER_H */ diff --git a/arch/openrisc/include/asm/timex.h b/arch/openrisc/include/asm/timex.h index d52b4e536e3f9..5487fa93dd9be 100644 --- a/arch/openrisc/include/asm/timex.h +++ b/arch/openrisc/include/asm/timex.h @@ -23,6 +23,7 @@ static inline cycles_t get_cycles(void) { return mfspr(SPR_TTCR); } +#define get_cycles get_cycles /* This isn't really used any more */ #define CLOCK_TICK_RATE 1000 diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 17c24f14615fb..6839f8fcf76b2 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -164,19 +164,19 @@ struct __large_struct { #define __get_user_nocheck(x, ptr, size) \ ({ \ - long __gu_err, __gu_val; \ - __get_user_size(__gu_val, (ptr), (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + long __gu_err; \ + __get_user_size((x), (ptr), (size), __gu_err); \ __gu_err; \ }) #define __get_user_check(x, ptr, size) \ ({ \ - long __gu_err = -EFAULT, __gu_val = 0; \ - const __typeof__(*(ptr)) * __gu_addr = (ptr); \ - if (access_ok(__gu_addr, size)) \ - __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ - (x) = (__force __typeof__(*(ptr)))__gu_val; \ + long __gu_err = -EFAULT; \ + const __typeof__(*(ptr)) *__gu_addr = (ptr); \ + if (access_ok(__gu_addr, size)) \ + __get_user_size((x), __gu_addr, (size), __gu_err); \ + else \ + (x) = (__typeof__(*(ptr))) 0; \ __gu_err; \ }) @@ -190,11 +190,13 @@ do { \ case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \ case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \ case 8: __get_user_asm2(x, ptr, retval); break; \ - default: (x) = __get_user_bad(); \ + default: (x) = (__typeof__(*(ptr)))__get_user_bad(); \ } \ } while (0) #define __get_user_asm(x, addr, err, op) \ +{ \ + unsigned long __gu_tmp; \ __asm__ __volatile__( \ "1: "op" %1,0(%2)\n" \ "2:\n" \ @@ -208,10 +210,14 @@ do { \ " .align 2\n" \ " .long 1b,3b\n" \ ".previous" \ - : "=r"(err), "=r"(x) \ - : "r"(addr), "i"(-EFAULT), "0"(err)) + : "=r"(err), "=r"(__gu_tmp) \ + : "r"(addr), "i"(-EFAULT), "0"(err)); \ + (x) = (__typeof__(*(addr)))__gu_tmp; \ +} #define __get_user_asm2(x, addr, err) \ +{ \ + unsigned long long __gu_tmp; \ __asm__ __volatile__( \ "1: l.lwz %1,0(%2)\n" \ "2: l.lwz %H1,4(%2)\n" \ @@ -228,8 +234,11 @@ do { \ " .long 1b,4b\n" \ " .long 2b,4b\n" \ ".previous" \ - : "=r"(err), "=&r"(x) \ - : "r"(addr), "i"(-EFAULT), "0"(err)) + : "=r"(err), "=&r"(__gu_tmp) \ + : "r"(addr), "i"(-EFAULT), "0"(err)); \ + (x) = (__typeof__(*(addr)))( \ + (__typeof__((x)-(x)))__gu_tmp); \ +} /* more complex routines */ diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index e4a78571f8833..6b27cf4a0d786 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -547,6 +547,7 @@ EXCEPTION_ENTRY(_external_irq_handler) l.bnf 1f // ext irq enabled, all ok. l.nop +#ifdef CONFIG_PRINTK l.addi r1,r1,-0x8 l.movhi r3,hi(42f) l.ori r3,r3,lo(42f) @@ -560,6 +561,7 @@ EXCEPTION_ENTRY(_external_irq_handler) .string "\n\rESR interrupt bug: in _external_irq_handler (ESR %x)\n\r" .align 4 .previous +#endif l.ori r4,r4,SPR_SR_IEE // fix the bug // l.sw PT_SR(r1),r4 @@ -1166,13 +1168,13 @@ ENTRY(__sys_clone) l.movhi r29,hi(sys_clone) l.ori r29,r29,lo(sys_clone) l.j _fork_save_extra_regs_and_call - l.addi r7,r1,0 + l.nop ENTRY(__sys_fork) l.movhi r29,hi(sys_fork) l.ori r29,r29,lo(sys_fork) l.j _fork_save_extra_regs_and_call - l.addi r3,r1,0 + l.nop ENTRY(sys_rt_sigreturn) l.jal _sys_rt_sigreturn diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index b0dc974f9a743..ffbbf639b7f95 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -521,6 +521,15 @@ _start: l.ori r3,r0,0x1 l.mtspr r0,r3,SPR_SR + /* + * Start the TTCR as early as possible, so that the RNG can make use of + * measurements of boot time from the earliest opportunity. Especially + * important is that the TTCR does not return zero by the time we reach + * rand_initialize(). + */ + l.movhi r3,hi(SPR_TTMR_CR) + l.mtspr r0,r3,SPR_TTMR + CLEAR_GPR(r1) CLEAR_GPR(r2) CLEAR_GPR(r3) diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index d668f5be3a995..ae104eb4becc7 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -274,6 +274,8 @@ void calibrate_delay(void) pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy); + + of_node_put(cpu); } void __init setup_arch(char **cmdline_p) diff --git a/arch/openrisc/kernel/stacktrace.c b/arch/openrisc/kernel/stacktrace.c index 43f140a28bc72..54d38809e22cb 100644 --- a/arch/openrisc/kernel/stacktrace.c +++ b/arch/openrisc/kernel/stacktrace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -68,12 +69,25 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { unsigned long *sp = NULL; + if (!try_get_task_stack(tsk)) + return; + if (tsk == current) sp = (unsigned long *) &sp; - else - sp = (unsigned long *) KSTK_ESP(tsk); + else { + unsigned long ksp; + + /* Locate stack from kernel context */ + ksp = task_thread_info(tsk)->ksp; + ksp += STACK_FRAME_OVERHEAD; /* redzone */ + ksp += sizeof(struct pt_regs); + + sp = (unsigned long *) ksp; + } unwind_stack(trace, sp, save_stack_address_nosched); + + put_task_stack(tsk); } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c index 08f56af387ac4..534a52ec5e667 100644 --- a/arch/openrisc/mm/cache.c +++ b/arch/openrisc/mm/cache.c @@ -16,7 +16,7 @@ #include #include -static void cache_loop(struct page *page, const unsigned int reg) +static __always_inline void cache_loop(struct page *page, const unsigned int reg) { unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT; unsigned long line = paddr & ~(L1_CACHE_BYTES - 1); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b16237c95ea33..c97c01c755665 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -11,6 +11,7 @@ config PARISC select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_MEMORY_FAILURE @@ -62,6 +63,7 @@ config PARISC select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE select HAVE_KPROBES_ON_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_COPY_THREAD_TLS help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 36b834f1c9330..04460c2d2f8c1 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -17,7 +17,12 @@ # Mike Shaver, Helge Deller and Martin K. Petersen # +ifdef CONFIG_PARISC_SELF_EXTRACT +boot := arch/parisc/boot +KBUILD_IMAGE := $(boot)/bzImage +else KBUILD_IMAGE := vmlinuz +endif NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 @@ -60,7 +65,6 @@ KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \ -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT) CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1))) -KBUILD_LDS_MODULE += $(srctree)/arch/parisc/kernel/module.lds endif OBJCOPY_FLAGS =-O binary -R .note -R .comment -S @@ -156,7 +160,7 @@ vmlinuz: bzImage $(OBJCOPY) $(boot)/bzImage $@ else vmlinuz: vmlinux - @gzip -cf -9 $< > $@ + @$(KGZIP) -cf -9 $< > $@ endif install: diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 118953d417634..6dd4171c95305 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -212,6 +212,8 @@ atomic64_set(atomic64_t *v, s64 i) _atomic_spin_unlock_irqrestore(v, flags); } +#define atomic64_set_release(v, i) atomic64_set((v), (i)) + static __inline__ s64 atomic64_read(const atomic64_t *v) { diff --git a/arch/parisc/include/asm/barrier.h b/arch/parisc/include/asm/barrier.h index dbaaca84f27f3..640d46edf32e7 100644 --- a/arch/parisc/include/asm/barrier.h +++ b/arch/parisc/include/asm/barrier.h @@ -26,6 +26,67 @@ #define __smp_rmb() mb() #define __smp_wmb() mb() +#define __smp_store_release(p, v) \ +do { \ + typeof(p) __p = (p); \ + union { typeof(*p) __val; char __c[1]; } __u = \ + { .__val = (__force typeof(*p)) (v) }; \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile("stb,ma %0,0(%1)" \ + : : "r"(*(__u8 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("sth,ma %0,0(%1)" \ + : : "r"(*(__u16 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("stw,ma %0,0(%1)" \ + : : "r"(*(__u32 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + case 8: \ + if (IS_ENABLED(CONFIG_64BIT)) \ + asm volatile("std,ma %0,0(%1)" \ + : : "r"(*(__u64 *)__u.__c), "r"(__p) \ + : "memory"); \ + break; \ + } \ +} while (0) + +#define __smp_load_acquire(p) \ +({ \ + union { typeof(*p) __val; char __c[1]; } __u; \ + typeof(p) __p = (p); \ + compiletime_assert_atomic_type(*p); \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile("ldb,ma 0(%1),%0" \ + : "=r"(*(__u8 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("ldh,ma 0(%1),%0" \ + : "=r"(*(__u16 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("ldw,ma 0(%1),%0" \ + : "=r"(*(__u32 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + case 8: \ + if (IS_ENABLED(CONFIG_64BIT)) \ + asm volatile("ldd,ma 0(%1),%0" \ + : "=r"(*(__u64 *)__u.__c) : "r"(__p) \ + : "memory"); \ + break; \ + } \ + __u.__val; \ +}) #include #endif /* !__ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h index f627c37dad9c9..a736dc59bbef8 100644 --- a/arch/parisc/include/asm/cmpxchg.h +++ b/arch/parisc/include/asm/cmpxchg.h @@ -44,8 +44,14 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size) ** if (((unsigned long)p & 0xf) == 0) ** return __ldcw(p); */ -#define xchg(ptr, x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) _x_ = (x); \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)_x_, (ptr), sizeof(*(ptr))); \ + __ret; \ +}) /* bug catcher for when unsupported size is used - won't link */ extern void __cmpxchg_called_with_bad_pointer(void); @@ -54,6 +60,7 @@ extern void __cmpxchg_called_with_bad_pointer(void); extern unsigned long __cmpxchg_u32(volatile unsigned int *m, unsigned int old, unsigned int new_); extern u64 __cmpxchg_u64(volatile u64 *ptr, u64 old, u64 new_); +extern u8 __cmpxchg_u8(volatile u8 *ptr, u8 old, u8 new_); /* don't worry...optimizer will get rid of most of this */ static inline unsigned long @@ -65,6 +72,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size) #endif case 4: return __cmpxchg_u32((unsigned int *)ptr, (unsigned int)old, (unsigned int)new_); + case 1: return __cmpxchg_u8((u8 *)ptr, old & 0xff, new_ & 0xff); } __cmpxchg_called_with_bad_pointer(); return old; diff --git a/arch/parisc/include/asm/kexec.h b/arch/parisc/include/asm/kexec.h index a99ea747d7ede..87e1740069955 100644 --- a/arch/parisc/include/asm/kexec.h +++ b/arch/parisc/include/asm/kexec.h @@ -2,8 +2,6 @@ #ifndef _ASM_PARISC_KEXEC_H #define _ASM_PARISC_KEXEC_H -#ifdef CONFIG_KEXEC - /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ @@ -32,6 +30,4 @@ static inline void crash_setup_regs(struct pt_regs *newregs, #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ - #endif /* _ASM_PARISC_KEXEC_H */ diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 93caf17ac5e2f..9ebf3b0413d5f 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -181,7 +181,7 @@ extern int npmem_ranges; #include #include -#define PAGE0 ((struct zeropage *)__PAGE_OFFSET) +#define PAGE0 ((struct zeropage *)absolute_pointer(__PAGE_OFFSET)) /* DEFINITION OF THE ZERO-PAGE (PAG0) */ /* based on work by Jason Eckhardt (jason@equator.com) */ diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 197d2247e4db2..16aec9ba2580a 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -37,12 +37,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *x) volatile unsigned int *a; a = __ldcw_align(x); -#ifdef CONFIG_SMP - (void) __ldcw(a); -#else - mb(); -#endif - *a = 1; + /* Release with ordered store. */ + __asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory"); } static inline int arch_spin_trylock(arch_spinlock_t *x) diff --git a/arch/parisc/include/asm/string.h b/arch/parisc/include/asm/string.h index 4a0c9dbd62fd0..f6e1132f4e352 100644 --- a/arch/parisc/include/asm/string.h +++ b/arch/parisc/include/asm/string.h @@ -8,19 +8,4 @@ extern void * memset(void *, int, size_t); #define __HAVE_ARCH_MEMCPY void * memcpy(void * dest,const void *src,size_t count); -#define __HAVE_ARCH_STRLEN -extern size_t strlen(const char *s); - -#define __HAVE_ARCH_STRCPY -extern char *strcpy(char *dest, const char *src); - -#define __HAVE_ARCH_STRNCPY -extern char *strncpy(char *dest, const char *src, size_t count); - -#define __HAVE_ARCH_STRCAT -extern char *strcat(char *dest, const char *src); - -#define __HAVE_ARCH_MEMSET -extern void *memset(void *, int, size_t); - #endif diff --git a/arch/parisc/include/asm/timex.h b/arch/parisc/include/asm/timex.h index 45537cd4d1d39..1cd2bd3eef33b 100644 --- a/arch/parisc/include/asm/timex.h +++ b/arch/parisc/include/asm/timex.h @@ -12,9 +12,10 @@ typedef unsigned long cycles_t; -static inline cycles_t get_cycles (void) +static inline cycles_t get_cycles(void) { return mfctl(16); } +#define get_cycles get_cycles #endif diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh index 6f68784fea25f..a8c49815f58c8 100644 --- a/arch/parisc/install.sh +++ b/arch/parisc/install.sh @@ -39,6 +39,7 @@ verify "$3" if [ -n "${INSTALLKERNEL}" ]; then if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + if [ -x /usr/sbin/${INSTALLKERNEL} ]; then exec /usr/sbin/${INSTALLKERNEL} "$@"; fi fi # Default install diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 2663c8f8be115..068d90950d937 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -37,5 +37,5 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_KEXEC) += kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += kexec.o relocate_kernel.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 3b330e58a4f03..516f3891e793f 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -520,7 +520,6 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path) dev->id.hversion_rev = iodc_data[1] & 0x0f; dev->id.sversion = ((iodc_data[4] & 0x0f) << 16) | (iodc_data[5] << 8) | iodc_data[6]; - dev->hpa.name = parisc_pathname(dev); dev->hpa.start = hpa; /* This is awkward. The STI spec says that gfx devices may occupy * 32MB or 64MB. Unfortunately, we don't know how to tell whether @@ -534,10 +533,10 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path) dev->hpa.end = hpa + 0xfff; } dev->hpa.flags = IORESOURCE_MEM; - name = parisc_hardware_description(&dev->id); - if (name) { - strlcpy(dev->name, name, sizeof(dev->name)); - } + dev->hpa.name = dev->name; + name = parisc_hardware_description(&dev->id) ? : "unknown"; + snprintf(dev->name, sizeof(dev->name), "%s [%s]", + name, parisc_pathname(dev)); /* Silently fail things like mouse ports which are subsumed within * the keyboard controller @@ -810,7 +809,7 @@ EXPORT_SYMBOL(device_to_hwpath); static void walk_native_bus(unsigned long io_io_low, unsigned long io_io_high, struct device *parent); -static void walk_lower_bus(struct parisc_device *dev) +static void __init walk_lower_bus(struct parisc_device *dev) { unsigned long io_io_low, io_io_high; @@ -889,8 +888,8 @@ static void print_parisc_device(struct parisc_device *dev) static int count; print_pa_hwpath(dev, hw_path); - pr_info("%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", - ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, + pr_info("%d. %s at %pap [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", + ++count, dev->name, &(dev->hpa.start), hw_path, dev->id.hw_type, dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); if (dev->num_addrs) { diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index b96d744969779..2f64f112934b6 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -454,7 +454,6 @@ nop LDREG 0(\ptp),\pte bb,<,n \pte,_PAGE_PRESENT_BIT,3f - LDCW 0(\tmp),\tmp1 b \fault stw \spc,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) @@ -464,23 +463,26 @@ 3: .endm - /* Release pa_tlb_lock lock without reloading lock address. */ - .macro tlb_unlock0 spc,tmp,tmp1 + /* Release pa_tlb_lock lock without reloading lock address. + Note that the values in the register spc are limited to + NR_SPACE_IDS (262144). Thus, the stw instruction always + stores a nonzero value even when register spc is 64 bits. + We use an ordered store to ensure all prior accesses are + performed prior to releasing the lock. */ + .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP 98: or,COND(=) %r0,\spc,%r0 - LDCW 0(\tmp),\tmp1 - or,COND(=) %r0,\spc,%r0 - stw \spc,0(\tmp) + stw,ma \spc,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif .endm /* Release pa_tlb_lock lock. */ - .macro tlb_unlock1 spc,tmp,tmp1 + .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP 98: load_pa_tlb_lock \tmp 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) - tlb_unlock0 \spc,\tmp,\tmp1 + tlb_unlock0 \spc,\tmp #endif .endm @@ -1163,7 +1165,7 @@ dtlb_miss_20w: idtlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1189,7 +1191,7 @@ nadtlb_miss_20w: idtlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1223,7 +1225,7 @@ dtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1256,7 +1258,7 @@ nadtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1285,7 +1287,7 @@ dtlb_miss_20: idtlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1313,7 +1315,7 @@ nadtlb_miss_20: idtlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1420,7 +1422,7 @@ itlb_miss_20w: iitlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1444,7 +1446,7 @@ naitlb_miss_20w: iitlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1478,7 +1480,7 @@ itlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1502,7 +1504,7 @@ naitlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1532,7 +1534,7 @@ itlb_miss_20: iitlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1552,7 +1554,7 @@ naitlb_miss_20: iitlbt pte,prot - tlb_unlock1 spc,t0,t1 + tlb_unlock1 spc,t0 rfir nop @@ -1582,7 +1584,7 @@ dbit_trap_20w: idtlbt pte,prot - tlb_unlock0 spc,t0,t1 + tlb_unlock0 spc,t0 rfir nop #else @@ -1608,7 +1610,7 @@ dbit_trap_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock0 spc,t0,t1 + tlb_unlock0 spc,t0 rfir nop @@ -1628,7 +1630,7 @@ dbit_trap_20: idtlbt pte,prot - tlb_unlock0 spc,t0,t1 + tlb_unlock0 spc,t0 rfir nop #endif @@ -1839,8 +1841,8 @@ syscall_restore: LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* Are we being ptraced? */ - ldw TASK_FLAGS(%r1),%r19 - ldi _TIF_SYSCALL_TRACE_MASK,%r2 + LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19 + ldi _TIF_SINGLESTEP|_TIF_BLOCKSTEP,%r2 and,COND(=) %r19,%r2,%r0 b,n syscall_restore_rfi diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index e5fcfb70cc7c0..4d54aa70ea5f3 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -376,7 +376,11 @@ static inline int eirr_to_irq(unsigned long eirr) /* * IRQ STACK - used for irq handler */ +#ifdef CONFIG_64BIT +#define IRQ_STACK_SIZE (4096 << 4) /* 64k irq stack size */ +#else #define IRQ_STACK_SIZE (4096 << 3) /* 32k irq stack size */ +#endif union irq_stack_union { unsigned long stack[IRQ_STACK_SIZE/sizeof(unsigned long)]; diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index ac5f34993b53b..1c50093e2ebee 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -862,7 +863,7 @@ int module_finalize(const Elf_Ehdr *hdr, const char *strtab = NULL; const Elf_Shdr *s; char *secstrings; - int err, symindex = -1; + int symindex = -1; Elf_Sym *newptr, *oldptr; Elf_Shdr *symhdr = NULL; #ifdef DEBUG @@ -946,11 +947,13 @@ int module_finalize(const Elf_Ehdr *hdr, /* patch .altinstructions */ apply_alternatives(aseg, aseg + s->sh_size, me->name); +#ifdef CONFIG_DYNAMIC_FTRACE /* For 32 bit kernels we're compiling modules with * -ffunction-sections so we must relocate the addresses in the - *__mcount_loc section. + * ftrace callsite section. */ - if (symindex != -1 && !strcmp(secname, "__mcount_loc")) { + if (symindex != -1 && !strcmp(secname, FTRACE_CALLSITE_SECTION)) { + int err; if (s->sh_type == SHT_REL) err = apply_relocate((Elf_Shdr *)sechdrs, strtab, symindex, @@ -962,6 +965,7 @@ int module_finalize(const Elf_Ehdr *hdr, if (err) return err; } +#endif } return 0; } diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds deleted file mode 100644 index 1a9a92aca5c8a..0000000000000 --- a/arch/parisc/kernel/module.lds +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -SECTIONS { - __mcount_loc : { - *(__patchable_function_entries) - } -} diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 8ed409ecec933..e8a6a751dfd8e 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -17,10 +17,6 @@ #include EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcpy); -EXPORT_SYMBOL(strncpy); -EXPORT_SYMBOL(strcat); #include EXPORT_SYMBOL(__xchg8); diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c index 80a0ab372802d..e59574f65e641 100644 --- a/arch/parisc/kernel/patch.c +++ b/arch/parisc/kernel/patch.c @@ -40,10 +40,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags, *need_unmap = 1; set_fixmap(fixmap, page_to_phys(page)); - if (flags) - raw_spin_lock_irqsave(&patch_lock, *flags); - else - __acquire(&patch_lock); + raw_spin_lock_irqsave(&patch_lock, *flags); return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); } @@ -52,10 +49,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { clear_fixmap(fixmap); - if (flags) - raw_spin_unlock_irqrestore(&patch_lock, *flags); - else - __release(&patch_lock); + raw_spin_unlock_irqrestore(&patch_lock, *flags); } void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) @@ -67,8 +61,9 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) int mapped; /* Make sure we don't have any aliases in cache */ - flush_kernel_vmap_range(addr, len); - flush_icache_range(start, end); + flush_kernel_dcache_range_asm(start, end); + flush_kernel_icache_range_asm(start, end); + flush_tlb_kernel_range(start, end); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped); @@ -81,8 +76,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) * We're crossing a page boundary, so * need to remap */ - flush_kernel_vmap_range((void *)fixmap, - (p-fixmap) * sizeof(*p)); + flush_kernel_dcache_range_asm((unsigned long)fixmap, + (unsigned long)p); + flush_tlb_kernel_range((unsigned long)fixmap, + (unsigned long)p); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, @@ -90,10 +87,10 @@ void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) } } - flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p)); + flush_kernel_dcache_range_asm((unsigned long)fixmap, (unsigned long)p); + flush_tlb_kernel_range((unsigned long)fixmap, (unsigned long)p); if (mapped) patch_unmap(FIX_TEXT_POKE0, &flags); - flush_icache_range(start, end); } void __kprobes __patch_text(void *addr, u32 insn) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index ecc5c27712087..230a6422b99f3 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -208,8 +208,8 @@ arch_initcall(parisc_idle_init); * Copy architecture-specific thread state */ int -copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long kthread_arg, struct task_struct *p) +copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long kthread_arg, struct task_struct *p, unsigned long tls) { struct pt_regs *cregs = &(p->thread.regs); void *stack = task_stack_page(p); @@ -254,9 +254,9 @@ copy_thread(unsigned long clone_flags, unsigned long usp, cregs->ksp = (unsigned long)stack + THREAD_SZ_ALGN + FRAME_SIZE; cregs->kpc = (unsigned long) &child_return; - /* Setup thread TLS area from the 4th parameter in clone */ + /* Setup thread TLS area */ if (clone_flags & CLONE_SETTLS) - cregs->cr27 = cregs->gr[23]; + cregs->cr27 = tls; } return 0; diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 13f771f74ee3b..b0045889864c2 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -419,8 +419,7 @@ show_cpuinfo (struct seq_file *m, void *v) } seq_printf(m, " (0x%02lx)\n", boot_cpu_data.pdc.capabilities); - seq_printf(m, "model\t\t: %s\n" - "model name\t: %s\n", + seq_printf(m, "model\t\t: %s - %s\n", boot_cpu_data.pdc.sys_model_name, cpuinfo->dev ? cpuinfo->dev->name : "Unknown"); diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 02895a8f2c551..92223f9ff05c7 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -238,6 +238,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, #endif usp = (regs->gr[30] & ~(0x01UL)); +#ifdef CONFIG_64BIT + if (is_compat_task()) { + /* The gcc alloca implementation leaves garbage in the upper 32 bits of sp */ + usp = (compat_uint_t)usp; + } +#endif /*FIXME: frame_size parameter is unused, remove it. */ frame = get_sigframe(&ksig->ka, usp, sizeof(*frame)); diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index e202c37e56af3..9997465c11820 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,10 @@ enum ipi_message_type { IPI_CALL_FUNC, IPI_CPU_START, IPI_CPU_STOP, - IPI_CPU_TEST + IPI_CPU_TEST, +#ifdef CONFIG_KGDB + IPI_ENTER_KGDB, +#endif }; @@ -169,7 +173,12 @@ ipi_interrupt(int irq, void *dev_id) case IPI_CPU_TEST: smp_debug(100, KERN_DEBUG "CPU%d is alive!\n", this_cpu); break; - +#ifdef CONFIG_KGDB + case IPI_ENTER_KGDB: + smp_debug(100, KERN_DEBUG "CPU%d ENTER_KGDB\n", this_cpu); + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); + break; +#endif default: printk(KERN_CRIT "Unknown IPI num on CPU%d: %lu\n", this_cpu, which); @@ -225,6 +234,12 @@ send_IPI_allbutself(enum ipi_message_type op) } } +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(void) +{ + send_IPI_allbutself(IPI_ENTER_KGDB); +} +#endif inline void smp_send_stop(void) { send_IPI_allbutself(IPI_CPU_STOP); } diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 97ac707c6bfff..8b616aada6492 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -478,7 +478,7 @@ lws_start: extrd,u %r1,PSW_W_BIT,1,%r1 /* sp must be aligned on 4, so deposit the W bit setting into * the bottom of sp temporarily */ - or,ev %r1,%r30,%r30 + or,od %r1,%r30,%r30 /* Clip LWS number to a 32-bit value for 32-bit processes */ depdi 0, 31, 32, %r20 @@ -640,11 +640,7 @@ cas_action: sub,<> %r28, %r25, %r0 2: stw %r24, 0(%r26) /* Free lock */ -#ifdef CONFIG_SMP -98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ stw %r0, 4(%sr2,%r20) @@ -658,11 +654,7 @@ cas_action: 3: /* Error occurred on load or store */ /* Free lock */ -#ifdef CONFIG_SMP -98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG stw %r0, 4(%sr2,%r20) #endif @@ -863,11 +855,7 @@ cas2_action: cas2_end: /* Free lock */ -#ifdef CONFIG_SMP -98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 /* Return to userspace, set no error */ @@ -877,11 +865,7 @@ cas2_end: 22: /* Error occurred on load or store */ /* Free lock */ -#ifdef CONFIG_SMP -98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - stw %r20, 0(%sr2,%r20) + stw,ma %r20, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 1(%r0),%r28 b lws_exit diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 285ff516150cf..51f15a414e29a 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -413,7 +413,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 04508158815c1..9c1ae9d242ca9 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -245,27 +245,13 @@ void __init time_init(void) static int __init init_cr16_clocksource(void) { /* - * The cr16 interval timers are not syncronized across CPUs on - * different sockets, so mark them unstable and lower rating on - * multi-socket SMP systems. + * The cr16 interval timers are not syncronized across CPUs, even if + * they share the same socket. */ if (num_online_cpus() > 1 && !running_on_qemu) { - int cpu; - unsigned long cpu0_loc; - cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; - - for_each_online_cpu(cpu) { - if (cpu == 0) - continue; - if ((cpu0_loc != 0) && - (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) - continue; - - clocksource_cr16.name = "cr16_unstable"; - clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; - clocksource_cr16.rating = 0; - break; - } + clocksource_cr16.name = "cr16_unstable"; + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; } /* XXX: We may want to mark sched_clock stable here if cr16 clocks are diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 82fc011894889..2a1060d747a5d 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -783,7 +783,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) * unless pagefault_disable() was called before. */ - if (fault_space == 0 && !faulthandler_disabled()) + if (faulthandler_disabled() || fault_space == 0) { /* Clean up and return if in exception table. */ if (fixup_exception(regs)) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 237d20dd5622d..cc6ed74960501 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -107,7 +107,7 @@ #define R1(i) (((i)>>21)&0x1f) #define R2(i) (((i)>>16)&0x1f) #define R3(i) ((i)&0x1f) -#define FR3(i) ((((i)<<1)&0x1f)|(((i)>>6)&1)) +#define FR3(i) ((((i)&0x1f)<<1)|(((i)>>6)&1)) #define IM(i,n) (((i)>>1&((1<<(n-1))-1))|((i)&1?((0-1L)<<(n-1)):0)) #define IM5_2(i) IM((i)>>16,5) #define IM5_3(i) IM((i),5) @@ -340,7 +340,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); - return 0; + return ret; } static int emulate_std(struct pt_regs *regs, int frreg, int flop) { @@ -397,7 +397,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) __asm__ __volatile__ ( " mtsp %4, %%sr1\n" " zdep %2, 29, 2, %%r19\n" -" dep %%r0, 31, 2, %2\n" +" dep %%r0, 31, 2, %3\n" " mtsar %%r19\n" " zvdepi -2, 32, %%r19\n" "1: ldw 0(%%sr1,%3),%%r20\n" @@ -409,7 +409,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) " andcm %%r21, %%r19, %%r21\n" " or %1, %%r20, %1\n" " or %2, %%r21, %2\n" -"3: stw %1,0(%%sr1,%1)\n" +"3: stw %1,0(%%sr1,%3)\n" "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" " copy %%r0, %0\n" @@ -596,7 +596,6 @@ void handle_unaligned(struct pt_regs *regs) ret = ERR_NOTHANDLED; /* "undefined", but lets kill them. */ break; } -#ifdef CONFIG_PA20 switch (regs->iir & OPCODE2_MASK) { case OPCODE_FLDD_L: @@ -607,22 +606,23 @@ void handle_unaligned(struct pt_regs *regs) flop=1; ret = emulate_std(regs, R2(regs->iir),1); break; +#ifdef CONFIG_PA20 case OPCODE_LDD_L: ret = emulate_ldd(regs, R2(regs->iir),0); break; case OPCODE_STD_L: ret = emulate_std(regs, R2(regs->iir),0); break; - } #endif + } switch (regs->iir & OPCODE3_MASK) { case OPCODE_FLDW_L: flop=1; - ret = emulate_ldw(regs, R2(regs->iir),0); + ret = emulate_ldw(regs, R2(regs->iir), 1); break; case OPCODE_LDW_M: - ret = emulate_ldw(regs, R2(regs->iir),1); + ret = emulate_ldw(regs, R2(regs->iir), 0); break; case OPCODE_FSTW_L: diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 87ae476d1c4f5..86a57fb0e6fae 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -21,6 +21,8 @@ #include #include +#include +#include /* #define DEBUG 1 */ #ifdef DEBUG @@ -203,6 +205,11 @@ int __init unwind_init(void) return 0; } +static bool pc_is_kernel_fn(unsigned long pc, void *fn) +{ + return (unsigned long)dereference_kernel_function_descriptor(fn) == pc; +} + static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int frame_size) { /* @@ -221,7 +228,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int extern void * const _call_on_stack; #endif /* CONFIG_IRQSTACKS */ - if (pc == (unsigned long) &handle_interruption) { + if (pc_is_kernel_fn(pc, handle_interruption)) { struct pt_regs *regs = (struct pt_regs *)(info->sp - frame_size - PT_SZ_ALGN); dbg("Unwinding through handle_interruption()\n"); info->prev_sp = regs->gr[30]; @@ -229,13 +236,13 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc == (unsigned long) &ret_from_kernel_thread || - pc == (unsigned long) &syscall_exit) { + if (pc_is_kernel_fn(pc, ret_from_kernel_thread) || + pc_is_kernel_fn(pc, syscall_exit)) { info->prev_sp = info->prev_ip = 0; return 1; } - if (pc == (unsigned long) &intr_return) { + if (pc_is_kernel_fn(pc, intr_return)) { struct pt_regs *regs; dbg("Found intr_return()\n"); @@ -246,20 +253,20 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc == (unsigned long) &_switch_to_ret) { + if (pc_is_kernel_fn(pc, _switch_to) || + pc_is_kernel_fn(pc, _switch_to_ret)) { info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); return 1; } #ifdef CONFIG_IRQSTACKS - if (pc == (unsigned long) &_call_on_stack) { + if (pc_is_kernel_fn(pc, _call_on_stack)) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; } #endif - return 0; } diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile index 2d7a9974dbaef..7b197667faf6c 100644 --- a/arch/parisc/lib/Makefile +++ b/arch/parisc/lib/Makefile @@ -3,7 +3,7 @@ # Makefile for parisc-specific library files # -lib-y := lusercopy.o bitops.o checksum.o io.o memcpy.o \ - ucmpdi2.o delay.o string.o +lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ + ucmpdi2.o delay.o obj-y := iomap.o diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c index 70ffbcf889b8e..2e4d1f05a9264 100644 --- a/arch/parisc/lib/bitops.c +++ b/arch/parisc/lib/bitops.c @@ -79,3 +79,15 @@ unsigned long __cmpxchg_u32(volatile unsigned int *ptr, unsigned int old, unsign _atomic_spin_unlock_irqrestore(ptr, flags); return (unsigned long)prev; } + +u8 __cmpxchg_u8(volatile u8 *ptr, u8 old, u8 new) +{ + unsigned long flags; + u8 prev; + + _atomic_spin_lock_irqsave(ptr, flags); + if ((prev = *ptr) == old) + *ptr = new; + _atomic_spin_unlock_irqrestore(ptr, flags); + return prev; +} diff --git a/arch/parisc/lib/memset.c b/arch/parisc/lib/memset.c new file mode 100644 index 0000000000000..133e4809859a3 --- /dev/null +++ b/arch/parisc/lib/memset.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include +#include + +#define OPSIZ (BITS_PER_LONG/8) +typedef unsigned long op_t; + +void * +memset (void *dstpp, int sc, size_t len) +{ + unsigned int c = sc; + long int dstp = (long int) dstpp; + + if (len >= 8) + { + size_t xlen; + op_t cccc; + + cccc = (unsigned char) c; + cccc |= cccc << 8; + cccc |= cccc << 16; + if (OPSIZ > 4) + /* Do the shift in two steps to avoid warning if long has 32 bits. */ + cccc |= (cccc << 16) << 16; + + /* There are at least some bytes to set. + No need to test for LEN == 0 in this alignment loop. */ + while (dstp % OPSIZ != 0) + { + ((unsigned char *) dstp)[0] = c; + dstp += 1; + len -= 1; + } + + /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */ + xlen = len / (OPSIZ * 8); + while (xlen > 0) + { + ((op_t *) dstp)[0] = cccc; + ((op_t *) dstp)[1] = cccc; + ((op_t *) dstp)[2] = cccc; + ((op_t *) dstp)[3] = cccc; + ((op_t *) dstp)[4] = cccc; + ((op_t *) dstp)[5] = cccc; + ((op_t *) dstp)[6] = cccc; + ((op_t *) dstp)[7] = cccc; + dstp += 8 * OPSIZ; + xlen -= 1; + } + len %= OPSIZ * 8; + + /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */ + xlen = len / OPSIZ; + while (xlen > 0) + { + ((op_t *) dstp)[0] = cccc; + dstp += OPSIZ; + xlen -= 1; + } + len %= OPSIZ; + } + + /* Write the last few bytes. */ + while (len > 0) + { + ((unsigned char *) dstp)[0] = c; + dstp += 1; + len -= 1; + } + + return dstpp; +} diff --git a/arch/parisc/lib/string.S b/arch/parisc/lib/string.S deleted file mode 100644 index 4a64264427a63..0000000000000 --- a/arch/parisc/lib/string.S +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PA-RISC assembly string functions - * - * Copyright (C) 2019 Helge Deller - */ - -#include -#include - - .section .text.hot - .level PA_ASM_LEVEL - - t0 = r20 - t1 = r21 - t2 = r22 - -ENTRY_CFI(strlen, frame=0,no_calls) - or,COND(<>) arg0,r0,ret0 - b,l,n .Lstrlen_null_ptr,r0 - depwi 0,31,2,ret0 - cmpb,COND(<>) arg0,ret0,.Lstrlen_not_aligned - ldw,ma 4(ret0),t0 - cmpib,tr 0,r0,.Lstrlen_loop - uxor,nbz r0,t0,r0 -.Lstrlen_not_aligned: - uaddcm arg0,ret0,t1 - shladd t1,3,r0,t1 - mtsar t1 - depwi -1,%sar,32,t0 - uxor,nbz r0,t0,r0 -.Lstrlen_loop: - b,l,n .Lstrlen_end_loop,r0 - ldw,ma 4(ret0),t0 - cmpib,tr 0,r0,.Lstrlen_loop - uxor,nbz r0,t0,r0 -.Lstrlen_end_loop: - extrw,u,<> t0,7,8,r0 - addib,tr,n -3,ret0,.Lstrlen_out - extrw,u,<> t0,15,8,r0 - addib,tr,n -2,ret0,.Lstrlen_out - extrw,u,<> t0,23,8,r0 - addi -1,ret0,ret0 -.Lstrlen_out: - bv r0(rp) - uaddcm ret0,arg0,ret0 -.Lstrlen_null_ptr: - bv,n r0(rp) -ENDPROC_CFI(strlen) - - -ENTRY_CFI(strcpy, frame=0,no_calls) - ldb 0(arg1),t0 - stb t0,0(arg0) - ldo 0(arg0),ret0 - ldo 1(arg1),t1 - cmpb,= r0,t0,2f - ldo 1(arg0),t2 -1: ldb 0(t1),arg1 - stb arg1,0(t2) - ldo 1(t1),t1 - cmpb,<> r0,arg1,1b - ldo 1(t2),t2 -2: bv,n r0(rp) -ENDPROC_CFI(strcpy) - - -ENTRY_CFI(strncpy, frame=0,no_calls) - ldb 0(arg1),t0 - stb t0,0(arg0) - ldo 1(arg1),t1 - ldo 0(arg0),ret0 - cmpb,= r0,t0,2f - ldo 1(arg0),arg1 -1: ldo -1(arg2),arg2 - cmpb,COND(=),n r0,arg2,2f - ldb 0(t1),arg0 - stb arg0,0(arg1) - ldo 1(t1),t1 - cmpb,<> r0,arg0,1b - ldo 1(arg1),arg1 -2: bv,n r0(rp) -ENDPROC_CFI(strncpy) - - -ENTRY_CFI(strcat, frame=0,no_calls) - ldb 0(arg0),t0 - cmpb,= t0,r0,2f - ldo 0(arg0),ret0 - ldo 1(arg0),arg0 -1: ldb 0(arg0),t1 - cmpb,<>,n r0,t1,1b - ldo 1(arg0),arg0 -2: ldb 0(arg1),t2 - stb t2,0(arg0) - ldo 1(arg0),arg0 - ldb 0(arg1),t0 - cmpb,<> r0,t0,2b - ldo 1(arg1),arg1 - bv,n r0(rp) -ENDPROC_CFI(strcat) - - -ENTRY_CFI(memset, frame=0,no_calls) - copy arg0,ret0 - cmpb,COND(=) r0,arg0,4f - copy arg0,t2 - cmpb,COND(=) r0,arg2,4f - ldo -1(arg2),arg3 - subi -1,arg3,t0 - subi 0,t0,t1 - cmpiclr,COND(>=) 0,t1,arg2 - ldo -1(t1),arg2 - extru arg2,31,2,arg0 -2: stb arg1,0(t2) - ldo 1(t2),t2 - addib,>= -1,arg0,2b - ldo -1(arg3),arg3 - cmpiclr,COND(<=) 4,arg2,r0 - b,l,n 4f,r0 -#ifdef CONFIG_64BIT - depd,* r0,63,2,arg2 -#else - depw r0,31,2,arg2 -#endif - ldo 1(t2),t2 -3: stb arg1,-1(t2) - stb arg1,0(t2) - stb arg1,1(t2) - stb arg1,2(t2) - addib,COND(>) -4,arg2,3b - ldo 4(t2),t2 -4: bv,n r0(rp) -ENDPROC_CFI(memset) - - .end diff --git a/arch/parisc/math-emu/fpudispatch.c b/arch/parisc/math-emu/fpudispatch.c index 7c46969ead9b1..01ed133227c25 100644 --- a/arch/parisc/math-emu/fpudispatch.c +++ b/arch/parisc/math-emu/fpudispatch.c @@ -310,12 +310,15 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1]; return(NOEXCEPTION); } + BUG(); case 3: /* FABS */ switch (fmt) { case 2: /* illegal */ @@ -325,13 +328,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and clear sign bit */ fpregs[t] = fpregs[r1] & 0x7fffffff; return(NOEXCEPTION); } + BUG(); case 6: /* FNEG */ switch (fmt) { case 2: /* illegal */ @@ -341,13 +347,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and invert sign bit */ fpregs[t] = fpregs[r1] ^ 0x80000000; return(NOEXCEPTION); } + BUG(); case 7: /* FNEGABS */ switch (fmt) { case 2: /* illegal */ @@ -357,13 +366,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and set sign bit */ fpregs[t] = fpregs[r1] | 0x80000000; return(NOEXCEPTION); } + BUG(); case 4: /* FSQRT */ switch (fmt) { case 0: @@ -376,6 +388,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 5: /* FRND */ switch (fmt) { case 0: @@ -389,7 +402,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(MAJOR_0C_EXCP); } } /* end of switch (subop) */ - + BUG(); case 1: /* class 1 */ df = extru(ir,fpdfpos,2); /* get dest format */ if ((df & 2) || (fmt & 2)) { @@ -419,6 +432,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* dbl/dbl */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FCNVXF */ switch(fmt) { case 0: /* sgl/sgl */ @@ -434,6 +448,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvxf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 2: /* FCNVFX */ switch(fmt) { case 0: /* sgl/sgl */ @@ -449,6 +464,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfx(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 3: /* FCNVFXT */ switch(fmt) { case 0: /* sgl/sgl */ @@ -464,6 +480,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 5: /* FCNVUF (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -479,6 +496,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvuf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 6: /* FCNVFU (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -494,6 +512,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfu(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 7: /* FCNVFUT (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -509,10 +528,11 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfut(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 4: /* undefined */ return(MAJOR_0C_EXCP); } /* end of switch subop */ - + BUG(); case 2: /* class 2 */ fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS]; r2 = extru(ir, fpr2pos, 5) * sizeof(double)/sizeof(u_int); @@ -590,6 +610,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FTEST */ switch (fmt) { case 0: @@ -609,8 +630,10 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: return(MAJOR_0C_EXCP); } + BUG(); } /* end of switch subop */ } /* end of else for PA1.0 & PA1.1 */ + BUG(); case 3: /* class 3 */ r2 = extru(ir,fpr2pos,5) * sizeof(double)/sizeof(u_int); if (r2 == 0) @@ -633,6 +656,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FSUB */ switch (fmt) { case 0: @@ -645,6 +669,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 2: /* FMPY */ switch (fmt) { case 0: @@ -657,6 +682,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 3: /* FDIV */ switch (fmt) { case 0: @@ -669,6 +695,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 4: /* FREM */ switch (fmt) { case 0: @@ -681,6 +708,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); } /* end of class 3 switch */ } /* end of switch(class) */ @@ -736,10 +764,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1]; return(NOEXCEPTION); } + BUG(); case 3: /* FABS */ switch (fmt) { case 2: @@ -747,10 +777,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] & 0x7fffffff; return(NOEXCEPTION); } + BUG(); case 6: /* FNEG */ switch (fmt) { case 2: @@ -758,10 +790,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] ^ 0x80000000; return(NOEXCEPTION); } + BUG(); case 7: /* FNEGABS */ switch (fmt) { case 2: @@ -769,10 +803,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] | 0x80000000; return(NOEXCEPTION); } + BUG(); case 4: /* FSQRT */ switch (fmt) { case 0: @@ -785,6 +821,7 @@ u_int fpregs[]; case 3: return(MAJOR_0E_EXCP); } + BUG(); case 5: /* FRMD */ switch (fmt) { case 0: @@ -798,7 +835,7 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); } } /* end of switch (subop */ - + BUG(); case 1: /* class 1 */ df = extru(ir,fpdfpos,2); /* get dest format */ /* @@ -826,6 +863,7 @@ u_int fpregs[]; case 3: /* dbl/dbl */ return(MAJOR_0E_EXCP); } + BUG(); case 1: /* FCNVXF */ switch(fmt) { case 0: /* sgl/sgl */ @@ -841,6 +879,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvxf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 2: /* FCNVFX */ switch(fmt) { case 0: /* sgl/sgl */ @@ -856,6 +895,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfx(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 3: /* FCNVFXT */ switch(fmt) { case 0: /* sgl/sgl */ @@ -871,6 +911,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 5: /* FCNVUF (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -886,6 +927,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvuf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 6: /* FCNVFU (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -901,6 +943,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfu(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 7: /* FCNVFUT (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -916,9 +959,11 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfut(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 4: /* undefined */ return(MAJOR_0C_EXCP); } /* end of switch subop */ + BUG(); case 2: /* class 2 */ /* * Be careful out there. @@ -994,6 +1039,7 @@ u_int fpregs[]; } } /* end of switch subop */ } /* end of else for PA1.0 & PA1.1 */ + BUG(); case 3: /* class 3 */ /* * Be careful out there. @@ -1026,6 +1072,7 @@ u_int fpregs[]; return(dbl_fadd(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 1: /* FSUB */ switch (fmt) { case 0: @@ -1035,6 +1082,7 @@ u_int fpregs[]; return(dbl_fsub(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 2: /* FMPY or XMPYU */ /* * check for integer multiply (x bit set) @@ -1071,6 +1119,7 @@ u_int fpregs[]; &fpregs[r2],&fpregs[t],status)); } } + BUG(); case 3: /* FDIV */ switch (fmt) { case 0: @@ -1080,6 +1129,7 @@ u_int fpregs[]; return(dbl_fdiv(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 4: /* FREM */ switch (fmt) { case 0: diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c index 474cd241c1509..02e19a32e6c5f 100644 --- a/arch/parisc/mm/fixmap.c +++ b/arch/parisc/mm/fixmap.c @@ -18,12 +18,9 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys) pte_t *pte; if (pmd_none(*pmd)) - pmd = pmd_alloc(NULL, pgd, vaddr); - - pte = pte_offset_kernel(pmd, vaddr); - if (pte_none(*pte)) pte = pte_alloc_kernel(pmd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); set_pte_at(&init_mm, vaddr, pte, __mk_pte(phys, PAGE_KERNEL_RWX)); flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); } diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index ddca8287d43ba..15b7dae1808db 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -347,9 +347,9 @@ static void __init setup_bootmem(void) static bool kernel_set_to_readonly; -static void __init map_pages(unsigned long start_vaddr, - unsigned long start_paddr, unsigned long size, - pgprot_t pgprot, int force) +static void __ref map_pages(unsigned long start_vaddr, + unsigned long start_paddr, unsigned long size, + pgprot_t pgprot, int force) { pgd_t *pg_dir; pmd_t *pmd; @@ -485,7 +485,7 @@ void __init set_kernel_text_rw(int enable_read_write) flush_tlb_all(); } -void __ref free_initmem(void) +void free_initmem(void) { unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; @@ -499,7 +499,6 @@ void __ref free_initmem(void) /* The init text pages are marked R-X. We have to * flush the icache and mark them RW- * - * This is tricky, because map_pages is in the init section. * Do a dummy remap of the data section first (the data * section is already PAGE_KERNEL) to pull in the TLB entries * for map_kernel */ @@ -588,7 +587,7 @@ void __init mem_init(void) > BITS_PER_LONG); high_memory = __va((max_pfn << PAGE_SHIFT)); - set_max_mapnr(page_to_pfn(virt_to_page(high_memory - 1)) + 1); + set_max_mapnr(max_low_pfn); memblock_free_all(); #ifdef CONFIG_PA11 @@ -892,9 +891,9 @@ void flush_tlb_all(void) { int do_recycle; - __inc_irq_stat(irq_tlb_count); do_recycle = 0; spin_lock(&sid_lock); + __inc_irq_stat(irq_tlb_count); if (dirty_space_ids > RECYCLE_THRESHOLD) { BUG_ON(recycle_inuse); /* FIXME: Use a semaphore/wait queue here */ get_dirty_sids(&recycle_ndirty,recycle_dirty_array); @@ -913,8 +912,8 @@ void flush_tlb_all(void) #else void flush_tlb_all(void) { - __inc_irq_stat(irq_tlb_count); spin_lock(&sid_lock); + __inc_irq_stat(irq_tlb_count); flush_tlb_all_local(NULL); recycle_sids(); spin_unlock(&sid_lock); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3e56c9c2f16ee..20212908dd676 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -133,7 +133,7 @@ config PPC select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 - select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UACCESS_MCSAFE if PPC64 @@ -147,6 +147,7 @@ config PPC select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_EXTABLE_SORT @@ -171,7 +172,7 @@ config PPC select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL - select HAVE_ARCH_KASAN if PPC32 + select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT @@ -213,7 +214,7 @@ config PPC select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) - select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S) + select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC64 && PPC_BOOK3S && SMP select HAVE_OPROFILE select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS @@ -221,8 +222,7 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_FREE select HAVE_MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN @@ -237,6 +237,7 @@ config PPC select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_SG_DMA_LENGTH select OF + select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE select OF_EARLY_FLATTREE select OLD_SIGACTION if PPC32 select OLD_SIGSUSPEND @@ -722,7 +723,7 @@ config PPC_64K_PAGES config PPC_256K_PAGES bool "256k page size" - depends on 44x && !STDBINUTILS + depends on 44x && !STDBINUTILS && !PPC_47x help Make the page size 256k. @@ -747,6 +748,7 @@ config THREAD_SHIFT range 13 15 default "15" if PPC_256K_PAGES default "14" if PPC64 + default "14" if KASAN default "13" help Used to define the stack size. The default is almost always what you @@ -934,6 +936,28 @@ config PPC_MEM_KEYS If unsure, say y. +config PPC_SECURE_BOOT + prompt "Enable secure boot support" + bool + depends on PPC_POWERNV + depends on IMA_ARCH_POLICY + help + Systems with firmware secure boot enabled need to define security + policies to extend secure boot to the OS. This config allows a user + to enable OS secure boot on systems that have firmware support for + it. If in doubt say N. + +config PPC_SECVAR_SYSFS + bool "Enable sysfs interface for POWER secure variables" + default y + depends on PPC_SECURE_BOOT + depends on SYSFS + help + POWER secure variables are managed and controlled by firmware. + These variables are exposed to userspace via sysfs to enable + read/write operations on these variables. Say Y if you have + secure boot enabled and want to expose variables to userspace. + endmenu config ISA_DMA_API @@ -1022,6 +1046,19 @@ config FSL_RIO Include support for RapidIO controller on Freescale embedded processors (MPC8548, MPC8641, etc). +config PPC_RTAS_FILTER + bool "Enable filtering of RTAS syscalls" + default y + depends on PPC_RTAS + help + The RTAS syscall API has security issues that could be used to + compromise system integrity. This option enforces restrictions on the + RTAS calls and arguments passed by userspace programs to mitigate + these issues. + + Say Y unless you know what you are doing and the filter is causing + problems for you. + endmenu config NONSTATIC_KERNEL diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index c59920920ddc4..2ca9114fcf002 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -352,6 +352,7 @@ config PPC_EARLY_DEBUG_CPM_ADDR config FAIL_IOMMU bool "Fault-injection capability for IOMMU" depends on FAULT_INJECTION + depends on PCI || IBMVIO help Provide fault-injection capability for IOMMU. Each device can be selectively enabled via the fail_iommu property. @@ -371,7 +372,7 @@ config PPC_PTDUMP config PPC_DEBUG_WX bool "Warn on W+X mappings at boot" - depends on PPC_PTDUMP + depends on PPC_PTDUMP && STRICT_KERNEL_RWX help Generate a warning if any W+X mappings are found at boot. diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 83522c9fc7b66..75e9472553c9b 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -17,23 +17,6 @@ HAS_BIARCH := $(call cc-option-yn, -m32) # Set default 32 bits cross compilers for vdso and boot wrapper CROSS32_COMPILE ?= -ifeq ($(HAS_BIARCH),y) -ifeq ($(CROSS32_COMPILE),) -ifdef CONFIG_PPC32 -# These options will be overridden by any -mcpu option that the CPU -# or platform code sets later on the command line, but they are needed -# to set a sane 32-bit cpu target for the 64-bit cross compiler which -# may default to the wrong ISA. -KBUILD_CFLAGS += -mcpu=powerpc -KBUILD_AFLAGS += -mcpu=powerpc -endif -endif -endif - -ifdef CONFIG_PPC_BOOK3S_32 -KBUILD_CFLAGS += -mcpu=powerpc -endif - # If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use # ppc64_defconfig because we have nothing better to go on. uname := $(shell uname -m) @@ -91,11 +74,13 @@ MULTIPLEWORD := -mmultiple endif ifdef CONFIG_PPC64 +ifndef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 endif +endif ifndef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align @@ -141,6 +126,7 @@ endif endif CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no) +ifndef CONFIG_CC_IS_CLANG ifdef CONFIG_CPU_LITTLE_ENDIAN CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) @@ -149,6 +135,7 @@ CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif +endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) @@ -170,7 +157,7 @@ else CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5)) CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4) endif -else +else ifdef CONFIG_PPC_BOOK3E_64 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64 endif @@ -188,6 +175,7 @@ endif endif CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) +AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU)) # Altivec option not allowed with e500mc64 in GCC. ifdef CONFIG_ALTIVEC @@ -198,14 +186,6 @@ endif CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU) CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU)) -ifdef CONFIG_PPC32 -ifdef CONFIG_PPC_E500MC -CFLAGS-y += $(call cc-option,-mcpu=e500mc,-mcpu=powerpc) -else -CFLAGS-$(CONFIG_E500) += $(call cc-option,-mcpu=8540 -msoft-float,-mcpu=powerpc) -endif -endif - asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) KBUILD_CPPFLAGS += -I $(srctree)/arch/$(ARCH) $(asinstr) @@ -246,7 +226,6 @@ KBUILD_CFLAGS += $(call cc-option,-mno-string) cpu-as-$(CONFIG_4xx) += -Wa,-m405 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) -cpu-as-$(CONFIG_E200) += -Wa,-me200 cpu-as-$(CONFIG_E500) += -Wa,-me500 # When using '-many -mpower4' gas will first try and find a matching power4 @@ -281,7 +260,7 @@ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ all: zImage # With make 3.82 we cannot mix normal and wildcard targets -BOOT_TARGETS1 := zImage zImage.initrd uImage +BOOT_TARGETS1 := zImage zImage.initrd uImage vmlinux.strip BOOT_TARGETS2 := zImage% dtbImage% treeImage.% cuImage.% simpleImage.% uImage.% PHONY += $(BOOT_TARGETS1) $(BOOT_TARGETS2) diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink index 134f12f89b92b..2268396ff4bba 100644 --- a/arch/powerpc/Makefile.postlink +++ b/arch/powerpc/Makefile.postlink @@ -17,11 +17,11 @@ quiet_cmd_head_check = CHKHEAD $@ quiet_cmd_relocs_check = CHKREL $@ ifdef CONFIG_PPC_BOOK3S_64 cmd_relocs_check = \ - $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" ; \ + $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \ $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@" else cmd_relocs_check = \ - $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" + $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" endif # `@true` prevents complaint when there is nothing to be done diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c index 1699e95315523..00c4d843a023d 100644 --- a/arch/powerpc/boot/4xx.c +++ b/arch/powerpc/boot/4xx.c @@ -228,7 +228,7 @@ void ibm4xx_denali_fixup_memsize(void) dpath = 8; /* 64 bits */ /* get address pins (rows) */ - val = SDRAM0_READ(DDR0_42); + val = SDRAM0_READ(DDR0_42); row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT); if (row > max_row) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index dfbd7f22eef5e..8c69bd07ada6a 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -119,7 +119,7 @@ src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ elf_util.c $(zlib-y) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c -src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c +src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c ifndef CONFIG_PPC64_BOOT_WRAPPER src-wlib-y += crtsavres.S diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 92608f34d3123..1d83966f5ef64 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -44,9 +44,6 @@ p_end: .long _end p_pstack: .long _platform_stack_top #endif - .globl _zimage_start - /* Clang appears to require the .weak directive to be after the symbol - * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */ .weak _zimage_start _zimage_start: .globl _zimage_start_lib diff --git a/arch/powerpc/boot/devtree.c b/arch/powerpc/boot/devtree.c index 5d91036ad626d..58fbcfcc98c9e 100644 --- a/arch/powerpc/boot/devtree.c +++ b/arch/powerpc/boot/devtree.c @@ -13,6 +13,7 @@ #include "string.h" #include "stdio.h" #include "ops.h" +#include "of.h" void dt_fixup_memory(u64 start, u64 size) { @@ -23,21 +24,25 @@ void dt_fixup_memory(u64 start, u64 size) root = finddevice("/"); if (getprop(root, "#address-cells", &naddr, sizeof(naddr)) < 0) naddr = 2; + else + naddr = be32_to_cpu(naddr); if (naddr < 1 || naddr > 2) fatal("Can't cope with #address-cells == %d in /\n\r", naddr); if (getprop(root, "#size-cells", &nsize, sizeof(nsize)) < 0) nsize = 1; + else + nsize = be32_to_cpu(nsize); if (nsize < 1 || nsize > 2) fatal("Can't cope with #size-cells == %d in /\n\r", nsize); i = 0; if (naddr == 2) - memreg[i++] = start >> 32; - memreg[i++] = start & 0xffffffff; + memreg[i++] = cpu_to_be32(start >> 32); + memreg[i++] = cpu_to_be32(start & 0xffffffff); if (nsize == 2) - memreg[i++] = size >> 32; - memreg[i++] = size & 0xffffffff; + memreg[i++] = cpu_to_be32(size >> 32); + memreg[i++] = cpu_to_be32(size & 0xffffffff); memory = finddevice("/memory"); if (! memory) { @@ -45,9 +50,9 @@ void dt_fixup_memory(u64 start, u64 size) setprop_str(memory, "device_type", "memory"); } - printf("Memory <- <0x%x", memreg[0]); + printf("Memory <- <0x%x", be32_to_cpu(memreg[0])); for (i = 1; i < (naddr + nsize); i++) - printf(" 0x%x", memreg[i]); + printf(" 0x%x", be32_to_cpu(memreg[i])); printf("> (%ldMB)\n\r", (unsigned long)(size >> 20)); setprop(memory, "reg", memreg, (naddr + nsize)*sizeof(u32)); @@ -65,10 +70,10 @@ void dt_fixup_cpu_clocks(u32 cpu, u32 tb, u32 bus) printf("CPU bus-frequency <- 0x%x (%dMHz)\n\r", bus, MHZ(bus)); while ((devp = find_node_by_devtype(devp, "cpu"))) { - setprop_val(devp, "clock-frequency", cpu); - setprop_val(devp, "timebase-frequency", tb); + setprop_val(devp, "clock-frequency", cpu_to_be32(cpu)); + setprop_val(devp, "timebase-frequency", cpu_to_be32(tb)); if (bus > 0) - setprop_val(devp, "bus-frequency", bus); + setprop_val(devp, "bus-frequency", cpu_to_be32(bus)); } timebase_period_ns = 1000000000 / tb; @@ -80,7 +85,7 @@ void dt_fixup_clock(const char *path, u32 freq) if (devp) { printf("%s: clock-frequency <- %x (%dMHz)\n\r", path, freq, MHZ(freq)); - setprop_val(devp, "clock-frequency", freq); + setprop_val(devp, "clock-frequency", cpu_to_be32(freq)); } } @@ -133,8 +138,12 @@ void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize) { if (getprop(node, "#address-cells", naddr, 4) != 4) *naddr = 2; + else + *naddr = be32_to_cpu(*naddr); if (getprop(node, "#size-cells", nsize, 4) != 4) *nsize = 1; + else + *nsize = be32_to_cpu(*nsize); } static void copy_val(u32 *dest, u32 *src, int naddr) @@ -163,9 +172,9 @@ static int add_reg(u32 *reg, u32 *add, int naddr) int i, carry = 0; for (i = MAX_ADDR_CELLS - 1; i >= MAX_ADDR_CELLS - naddr; i--) { - u64 tmp = (u64)reg[i] + add[i] + carry; + u64 tmp = (u64)be32_to_cpu(reg[i]) + be32_to_cpu(add[i]) + carry; carry = tmp >> 32; - reg[i] = (u32)tmp; + reg[i] = cpu_to_be32((u32)tmp); } return !carry; @@ -180,18 +189,18 @@ static int compare_reg(u32 *reg, u32 *range, u32 *rangesize) u32 end; for (i = 0; i < MAX_ADDR_CELLS; i++) { - if (reg[i] < range[i]) + if (be32_to_cpu(reg[i]) < be32_to_cpu(range[i])) return 0; - if (reg[i] > range[i]) + if (be32_to_cpu(reg[i]) > be32_to_cpu(range[i])) break; } for (i = 0; i < MAX_ADDR_CELLS; i++) { - end = range[i] + rangesize[i]; + end = be32_to_cpu(range[i]) + be32_to_cpu(rangesize[i]); - if (reg[i] < end) + if (be32_to_cpu(reg[i]) < end) break; - if (reg[i] > end) + if (be32_to_cpu(reg[i]) > end) return 0; } @@ -240,7 +249,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, return 0; dt_get_reg_format(parent, &naddr, &nsize); - if (nsize > 2) return 0; @@ -252,10 +260,10 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, copy_val(last_addr, prop_buf + offset, naddr); - ret_size = prop_buf[offset + naddr]; + ret_size = be32_to_cpu(prop_buf[offset + naddr]); if (nsize == 2) { ret_size <<= 32; - ret_size |= prop_buf[offset + naddr + 1]; + ret_size |= be32_to_cpu(prop_buf[offset + naddr + 1]); } for (;;) { @@ -278,7 +286,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, offset = find_range(last_addr, prop_buf, prev_naddr, naddr, prev_nsize, buflen / 4); - if (offset < 0) return 0; @@ -296,8 +303,7 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr, if (naddr > 2) return 0; - ret_addr = ((u64)last_addr[2] << 32) | last_addr[3]; - + ret_addr = ((u64)be32_to_cpu(last_addr[2]) << 32) | be32_to_cpu(last_addr[3]); if (sizeof(void *) == 4 && (ret_addr >= 0x100000000ULL || ret_size > 0x100000000ULL || ret_addr + ret_size > 0x100000000ULL)) @@ -350,11 +356,14 @@ int dt_is_compatible(void *node, const char *compat) int dt_get_virtual_reg(void *node, void **addr, int nres) { unsigned long xaddr; - int n; + int n, i; n = getprop(node, "virtual-reg", addr, nres * 4); - if (n > 0) + if (n > 0) { + for (i = 0; i < n/4; i ++) + ((u32 *)addr)[i] = be32_to_cpu(((u32 *)addr)[i]); return n / 4; + } for (n = 0; n < nres; n++) { if (!dt_xlate_reg(node, n, &xaddr, NULL)) diff --git a/arch/powerpc/boot/dts/charon.dts b/arch/powerpc/boot/dts/charon.dts index 408b486b13dff..cd589539f313f 100644 --- a/arch/powerpc/boot/dts/charon.dts +++ b/arch/powerpc/boot/dts/charon.dts @@ -35,7 +35,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts index 0e5e9d3acf79f..19a14e62e65f4 100644 --- a/arch/powerpc/boot/dts/digsy_mtc.dts +++ b/arch/powerpc/boot/dts/digsy_mtc.dts @@ -16,7 +16,7 @@ model = "intercontrol,digsy-mtc"; compatible = "intercontrol,digsy-mtc"; - memory { + memory@0 { reg = <0x00000000 0x02000000>; // 32MB }; diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi index 1b4aafc1f6a27..9716a0484ecf0 100644 --- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi @@ -122,7 +122,15 @@ }; /include/ "pq3-i2c-0.dtsi" + i2c@3000 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-i2c-1.dtsi" + i2c@3100 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-duart-0.dtsi" /include/ "pq3-espi-0.dtsi" spi0: spi@7000 { diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index 872e4485dc3f0..ddc018d42252f 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -371,7 +371,23 @@ }; /include/ "qoriq-i2c-0.dtsi" + i2c@118000 { + fsl,i2c-erratum-a004447; + }; + + i2c@118100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-i2c-1.dtsi" + i2c@119000 { + fsl,i2c-erratum-a004447; + }; + + i2c@119100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-duart-0.dtsi" /include/ "qoriq-duart-1.dtsi" /include/ "qoriq-gpio-0.dtsi" diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi index e1a961f05dcd5..baa0c503e741b 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi index c288f3c6c6378..93095600e8086 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy6: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi index 94f3e71750124..ff4bd38f06459 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi index 94a76982d214b..1fa38ed6f59e2 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy7: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi index b5ff5f71c6b8b..a8cc9780c0c42 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi index ee44182c63485..8b8bd70c93823 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi index f05f0d775039b..619c880b54d8d 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy2: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi index a9114ec510759..d7ebb73a400d0 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy3: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi index 44dd00ac7367f..b151d696a0699 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy4: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi index 5b1b84b58602f..adc0ae0013a3c 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy5: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi index 0e1daaef9e74b..435047e0e250e 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy14: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi index 68c5ef779266a..c098657cca0a7 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy15: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi index 605363cc1117f..9d06824815f34 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy8: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi index 1955dfa136348..70e947730c4ba 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy9: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi index 2c1476454ee01..ad96e65295959 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy10: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi index b8b541ff5fb03..034bc4b71f7a5 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy11: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi index 4b2cfddd1b155..93ca23d82b39b 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy12: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi index 0a52ddf7cc171..23b3117a2fd2a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy13: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi index c90702b04a530..48e5cd61599c6 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi @@ -79,6 +79,7 @@ fman0: fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfc000 0x1000>; + fsl,erratum-a009885; }; xmdio0: mdio@fd000 { @@ -86,6 +87,7 @@ fman0: fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfd000 0x1000>; + fsl,erratum-a009885; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts index 5ba6fbfca2742..f82f85c65964c 100644 --- a/arch/powerpc/boot/dts/fsl/t1023rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts @@ -154,7 +154,7 @@ fm1mac3: ethernet@e4000 { phy-handle = <&sgmii_aqr_phy3>; - phy-connection-type = "sgmii-2500"; + phy-connection-type = "2500base-x"; sleep = <&rcpm 0x20000000>; }; diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi index 099a598c74c00..bfe1ed5be3374 100644 --- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi +++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi @@ -139,12 +139,12 @@ fman@400000 { ethernet@e6000 { phy-handle = <&phy_rgmii_0>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { phy-handle = <&phy_rgmii_1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; mdio0: mdio@fc000 { diff --git a/arch/powerpc/boot/dts/lite5200.dts b/arch/powerpc/boot/dts/lite5200.dts index cb2782dd6132c..e7b194775d783 100644 --- a/arch/powerpc/boot/dts/lite5200.dts +++ b/arch/powerpc/boot/dts/lite5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/lite5200b.dts b/arch/powerpc/boot/dts/lite5200b.dts index 2b86c81f90485..547cbe726ff23 100644 --- a/arch/powerpc/boot/dts/lite5200b.dts +++ b/arch/powerpc/boot/dts/lite5200b.dts @@ -31,7 +31,7 @@ led4 { gpios = <&gpio_simple 2 1>; }; }; - memory { + memory@0 { reg = <0x00000000 0x10000000>; // 256MB }; diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts index 61cae9dcddef4..f3188018faceb 100644 --- a/arch/powerpc/boot/dts/media5200.dts +++ b/arch/powerpc/boot/dts/media5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB RAM }; diff --git a/arch/powerpc/boot/dts/mpc5200b.dtsi b/arch/powerpc/boot/dts/mpc5200b.dtsi index 648fe31795f49..8b796f3b11da7 100644 --- a/arch/powerpc/boot/dts/mpc5200b.dtsi +++ b/arch/powerpc/boot/dts/mpc5200b.dtsi @@ -33,7 +33,7 @@ }; }; - memory: memory { + memory: memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/o2d.dts b/arch/powerpc/boot/dts/o2d.dts index 24a46f65e5299..e0a8d3034417f 100644 --- a/arch/powerpc/boot/dts/o2d.dts +++ b/arch/powerpc/boot/dts/o2d.dts @@ -12,7 +12,7 @@ model = "ifm,o2d"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi index 6661955a2be47..b55a9e5bd828c 100644 --- a/arch/powerpc/boot/dts/o2d.dtsi +++ b/arch/powerpc/boot/dts/o2d.dtsi @@ -19,7 +19,7 @@ model = "ifm,o2d"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/o2dnt2.dts b/arch/powerpc/boot/dts/o2dnt2.dts index eeba7f5507d5d..c2eedbd1f5fcb 100644 --- a/arch/powerpc/boot/dts/o2dnt2.dts +++ b/arch/powerpc/boot/dts/o2dnt2.dts @@ -12,7 +12,7 @@ model = "ifm,o2dnt2"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/o3dnt.dts b/arch/powerpc/boot/dts/o3dnt.dts index fd00396b0593e..e4c1bdd412716 100644 --- a/arch/powerpc/boot/dts/o3dnt.dts +++ b/arch/powerpc/boot/dts/o3dnt.dts @@ -12,7 +12,7 @@ model = "ifm,o3dnt"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts index c259c6b3ac5ab..5674f978b9830 100644 --- a/arch/powerpc/boot/dts/pcm032.dts +++ b/arch/powerpc/boot/dts/pcm032.dts @@ -22,7 +22,7 @@ model = "phytec,pcm032"; compatible = "phytec,pcm032"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/tqm5200.dts b/arch/powerpc/boot/dts/tqm5200.dts index 9ed0bc78967e1..5bb25a9e40a01 100644 --- a/arch/powerpc/boot/dts/tqm5200.dts +++ b/arch/powerpc/boot/dts/tqm5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h index 2abc8e83b95e9..9757d4f6331e7 100644 --- a/arch/powerpc/boot/libfdt_env.h +++ b/arch/powerpc/boot/libfdt_env.h @@ -6,6 +6,8 @@ #include #define INT_MAX ((int)(~0U>>1)) +#define UINT32_MAX ((u32)~0U) +#define INT32_MAX ((s32)(UINT32_MAX >> 1)) #include "of.h" diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c index b0da4466d4198..f16d2be1d0f31 100644 --- a/arch/powerpc/boot/ns16550.c +++ b/arch/powerpc/boot/ns16550.c @@ -15,6 +15,7 @@ #include "stdio.h" #include "io.h" #include "ops.h" +#include "of.h" #define UART_DLL 0 /* Out: Divisor Latch Low */ #define UART_DLM 1 /* Out: Divisor Latch High */ @@ -58,16 +59,20 @@ int ns16550_console_init(void *devp, struct serial_console_data *scdp) int n; u32 reg_offset; - if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) + if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) { + printf("virt reg parse fail...\r\n"); return -1; + } n = getprop(devp, "reg-offset", ®_offset, sizeof(reg_offset)); if (n == sizeof(reg_offset)) - reg_base += reg_offset; + reg_base += be32_to_cpu(reg_offset); n = getprop(devp, "reg-shift", ®_shift, sizeof(reg_shift)); if (n != sizeof(reg_shift)) reg_shift = 0; + else + reg_shift = be32_to_cpu(reg_shift); scdp->open = ns16550_open; scdp->putc = ns16550_putc; diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index 9457863147f9b..00179cd6bdd08 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -128,7 +128,7 @@ int serial_console_init(void) dt_is_compatible(devp, "fsl,cpm2-smc-uart")) rc = cpm_console_init(devp, &serial_cd); #endif -#ifdef CONFIG_PPC_MPC52XX +#ifdef CONFIG_PPC_MPC52xx else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart")) rc = mpc5200_psc_console_init(devp, &serial_cd); #endif diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config index 9575a38c9155b..b507df6ac69fa 100644 --- a/arch/powerpc/configs/85xx-hw.config +++ b/arch/powerpc/configs/85xx-hw.config @@ -2,7 +2,6 @@ CONFIG_AQUANTIA_PHY=y CONFIG_AT803X_PHY=y CONFIG_ATA=y CONFIG_BLK_DEV_SD=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_BLK_DEV_SR=y CONFIG_BROADCOM_PHY=y CONFIG_C293_PCIE=y diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index cf94d28d0e310..340140160c7b5 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig @@ -47,7 +47,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SYM53C8XX_2=y diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index 9ff493dd8439b..6c5a4414e9ee4 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -45,7 +45,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SYM53C8XX_2=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index fbfcc85e4dc01..a68c7f3af10ed 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -62,7 +62,6 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index 2975e64629aa8..161351a18517b 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -41,7 +41,6 @@ CONFIG_BLK_DEV_RAM_SIZE=8192 # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_IPR=y CONFIG_ATA=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 285d506c5a769..2f5e06309f096 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -39,6 +39,7 @@ CONFIG_MTD_CFI_GEOMETRY=y # CONFIG_MTD_CFI_I2 is not set CONFIG_MTD_CFI_I4=y CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PHYSMAP_OF=y # CONFIG_BLK_DEV is not set CONFIG_NETDEVICES=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 4b6d31d4474e8..ddf5e97877e2b 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -60,7 +60,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_CHR_DEV_OSST=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=y CONFIG_SCSI_CONSTANTS=y @@ -110,7 +109,6 @@ CONFIG_FB_NVIDIA=y CONFIG_FB_NVIDIA_I2C=y CONFIG_FB_RADEON=y # CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 4e6e95f926460..5cad09f935621 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -119,7 +119,6 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_FC_ATTRS=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 6658cceb928c6..2a7c53cc2f83e 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -111,7 +111,6 @@ CONFIG_BLK_DEV_NVME=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SCAN_ASYNC=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index b250e6f5a7ca7..5569d36066dc7 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -110,7 +110,6 @@ CONFIG_VIRTIO_BLK=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_FC_ATTRS=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 0d746774c2bde..33a01a9e86be4 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -60,7 +60,6 @@ CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_FC_ATTRS=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 9dca4cffa623d..682d68f39c2b5 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -372,7 +372,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_CHR_DEV_OSST=m CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=m CONFIG_SCSI_ENCLOSURE=m @@ -778,7 +777,6 @@ CONFIG_FB_TRIDENT=m CONFIG_FB_SM501=m CONFIG_FB_IBM_GXT4500=y CONFIG_LCD_PLATFORM=m -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 26126b4d4de33..d58686a66388f 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -97,7 +97,6 @@ CONFIG_VIRTIO_BLK=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_FC_ATTRS=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 1253482a67c0d..2e25b264f70fb 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -83,7 +83,6 @@ CONFIG_EEPROM_AT24=m # CONFIG_OCXL is not set CONFIG_BLK_DEV_SD=m CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SCAN_ASYNC=y diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 9c63b596e6ce5..f0f16b4fc5ea1 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -6,44 +6,35 @@ #include -static inline int arch_get_random_long(unsigned long *v) +static inline bool arch_get_random_long(unsigned long *v) { - return 0; + return false; } -static inline int arch_get_random_int(unsigned int *v) +static inline bool arch_get_random_int(unsigned int *v) { - return 0; + return false; } -static inline int arch_get_random_seed_long(unsigned long *v) +static inline bool arch_get_random_seed_long(unsigned long *v) { if (ppc_md.get_random_seed) return ppc_md.get_random_seed(v); - return 0; + return false; } -static inline int arch_get_random_seed_int(unsigned int *v) + +static inline bool arch_get_random_seed_int(unsigned int *v) { unsigned long val; - int rc; + bool rc; - rc = arch_get_random_long(&val); + rc = arch_get_random_seed_long(&val); if (rc) *v = val; return rc; } - -static inline int arch_has_random(void) -{ - return 0; -} - -static inline int arch_has_random_seed(void) -{ - return !!ppc_md.get_random_seed; -} #endif /* CONFIG_ARCH_RANDOM */ #ifdef CONFIG_PPC_POWERNV diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 8561498e653cb..d84d1417ddb6e 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -152,9 +152,12 @@ void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr); /* Patch sites */ extern s32 patch__call_flush_count_cache; extern s32 patch__flush_count_cache_return; +extern s32 patch__flush_link_stack_return; +extern s32 patch__call_kvm_flush_link_stack; extern s32 patch__memset_nocache, patch__memcpy_nocache; extern long flush_count_cache; +extern long kvm_flush_link_stack; #ifdef CONFIG_PPC_TRANSACTIONAL_MEM void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv); diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h index fbe8df4330190..dc953d22e3c68 100644 --- a/arch/powerpc/include/asm/barrier.h +++ b/arch/powerpc/include/asm/barrier.h @@ -44,6 +44,8 @@ # define SMPWMB eieio #endif +/* clang defines this macro for a builtin, which will not work with runtime patching */ +#undef __lwsync #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory") #define dma_rmb() __lwsync() #define dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory") diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 603aed229af78..46338f2360046 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -217,15 +217,34 @@ static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr) */ static __inline__ int fls(unsigned int x) { - return 32 - __builtin_clz(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 32 - __builtin_clz(x) : 0; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (x)); + return 32 - lz; } #include +/* + * 64-bit can do this using one cntlzd (count leading zeroes doubleword) + * instruction; for 32-bit we use the generic version, which does two + * 32-bit fls calls. + */ +#ifdef CONFIG_PPC64 static __inline__ int fls64(__u64 x) { - return 64 - __builtin_clzll(x); + int lz; + + if (__builtin_constant_p(x)) + return x ? 64 - __builtin_clzll(x) : 0; + asm("cntlzd %0,%1" : "=r" (lz) : "r" (x)); + return 64 - lz; } +#else +#include +#endif #ifdef CONFIG_PPC64 unsigned int __arch_hweight8(unsigned int w); diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index f9dc597b0b868..6eb311eb818b2 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_KUP_H #define _ASM_POWERPC_BOOK3S_32_KUP_H +#include #include #ifdef __ASSEMBLY__ @@ -75,7 +76,7 @@ .macro kuap_check current, gpr #ifdef CONFIG_PPC_KUAP_DEBUG - lwz \gpr2, KUAP(thread) + lwz \gpr, THREAD + KUAP(\current) 999: twnei \gpr, 0 EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) #endif @@ -102,11 +103,13 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) isync(); /* Context sync required after mtsrin() */ } -static inline void allow_user_access(void __user *to, const void __user *from, u32 size) +static __always_inline void allow_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { u32 addr, end; - if (__builtin_constant_p(to) && to == NULL) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (!(dir & KUAP_WRITE)) return; addr = (__force u32)to; @@ -119,11 +122,16 @@ static inline void allow_user_access(void __user *to, const void __user *from, u kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ } -static inline void prevent_user_access(void __user *to, const void __user *from, u32 size) +static __always_inline void prevent_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { u32 addr = (__force u32)to; u32 end = min(addr + size, TASK_SIZE); + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (!(dir & KUAP_WRITE)) + return; + if (!addr || addr >= TASK_SIZE || !size) return; @@ -131,12 +139,17 @@ static inline void prevent_user_access(void __user *to, const void __user *from, kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { + unsigned long begin = regs->kuap & 0xf0000000; + unsigned long end = regs->kuap << 28; + if (!is_write) return false; - return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !"); + return WARN(address < begin || address >= end, + "Bug: write fault blocked by segment registers !"); } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 9983177026305..dc5c039eb28e6 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { @@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 0796533d37dd5..5325bd9c9b47b 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -37,8 +37,10 @@ static inline bool pte_user(pte_t pte) */ #ifdef CONFIG_PTE_64BIT #define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #else #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif /* @@ -555,9 +557,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_val(*ptep) & _PAGE_HASHPTE) flush_hash_entry(mm, ptep, addr); __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 8fd8599c9395c..80c9534148821 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -13,20 +13,19 @@ */ #define MAX_EA_BITS_PER_CONTEXT 46 -#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2) /* - * Our page table limit us to 64TB. Hence for the kernel mapping, - * each MAP area is limited to 16 TB. - * The four map areas are: linear mapping, vmap, IO and vmemmap + * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB + * of vmemmap space. To better support sparse memory layout, we use 61TB + * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap. */ +#define REGION_SHIFT (40) #define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT) /* - * Define the address range of the kernel non-linear virtual area - * 16TB + * Define the address range of the kernel non-linear virtual area (61TB) */ -#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000) +#define H_KERN_VIRT_START ASM_CONST(0xc0003d0000000000) #ifndef __ASSEMBLY__ #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) @@ -156,6 +155,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, extern int hash__has_transparent_hugepage(void); #endif +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + BUG(); + return pmd; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index d1d9177d9ebdd..0729c034e56f0 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -246,7 +246,7 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, */ static inline int hash__pmd_trans_huge(pmd_t pmd) { - return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) == + return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)) == (_PAGE_PTE | H_PAGE_THP_HUGE)); } @@ -272,6 +272,12 @@ extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static inline pmd_t hash__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE | _PAGE_DEVMAP)); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */ diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index f254de956d6ac..a29b64129a7d4 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -11,13 +11,12 @@ #ifdef __ASSEMBLY__ -.macro kuap_restore_amr gpr #ifdef CONFIG_PPC_KUAP +.macro kuap_restore_amr gpr BEGIN_MMU_FTR_SECTION_NESTED(67) ld \gpr, STACK_REGS_KUAP(r1) mtspr SPRN_AMR, \gpr END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) -#endif .endm .macro kuap_check_amr gpr1, gpr2 @@ -31,6 +30,7 @@ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) #endif .endm +#endif .macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr #ifdef CONFIG_PPC_KUAP @@ -54,6 +54,10 @@ #else /* !__ASSEMBLY__ */ +#include + +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); + #ifdef CONFIG_PPC_KUAP #include @@ -77,32 +81,39 @@ static inline void set_kuap(unsigned long value) isync(); } -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), + "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); +} +#else /* CONFIG_PPC_KUAP */ +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } +static inline void set_kuap(unsigned long value) { } +#endif /* !CONFIG_PPC_KUAP */ + +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { // This is written so we can resolve to a single case at build time - if (__builtin_constant_p(to) && to == NULL) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (dir == KUAP_READ) set_kuap(AMR_KUAP_BLOCK_WRITE); - else if (__builtin_constant_p(from) && from == NULL) + else if (dir == KUAP_WRITE) set_kuap(AMR_KUAP_BLOCK_READ); else set_kuap(0); } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) -{ - return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && - (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), - "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); -} -#endif /* CONFIG_PPC_KUAP */ - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 15b75005bc34e..3fa1b962dc279 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -600,8 +600,11 @@ extern void slb_set_size(u16 size); * */ #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) + +// The + 2 accounts for INVALID_REGION and 1 more to avoid overlap with kernel #define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \ - MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT) + MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT + 2) + /* * For platforms that support on 65bit VA we limit the context bits */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index bb3deb76c951b..2f4ddc802fe9d 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -225,14 +225,14 @@ static inline void early_init_mmu_secondary(void) extern void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size); -extern void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size); static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - if (early_radix_enabled()) - return radix__setup_initial_memory_limit(first_memblock_base, - first_memblock_size); + /* + * Hash has more strict restrictions. At this point we don't + * know which translations we will pick. Hence go with hash + * restrictions. + */ return hash__setup_initial_memory_limit(first_memblock_base, first_memblock_size); } diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index d5a44912902f9..cae9e814593a8 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list; extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); -#ifdef CONFIG_SMP extern void __tlb_remove_table(void *_table); -#endif void pte_frag_destroy(void *pte_frag); static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index b01624e5c4671..e1eb8aa9cfbbb 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -998,10 +998,25 @@ extern struct page *pgd_page(pgd_t pgd); #define pud_page_vaddr(pud) __va(pud_val(pud) & ~PUD_MASKED_BITS) #define pgd_page_vaddr(pgd) __va(pgd_val(pgd) & ~PGD_MASKED_BITS) -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1)) -#define pud_index(address) (((address) >> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) -#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) -#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) +static inline unsigned long pgd_index(unsigned long address) +{ + return (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1); +} + +static inline unsigned long pud_index(unsigned long address) +{ + return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); +} + +static inline unsigned long pmd_index(unsigned long address) +{ + return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); +} + +static inline unsigned long pte_index(unsigned long address) +{ + return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); +} /* * Find an entry in a page-table-directory. We combine the address region @@ -1303,7 +1318,9 @@ extern void serialize_against_pte_lookup(struct mm_struct *mm); static inline pmd_t pmd_mkdevmap(pmd_t pmd) { - return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); + if (radix_enabled()) + return radix__pmd_mkdevmap(pmd); + return hash__pmd_mkdevmap(pmd); } static inline int pmd_devmap(pmd_t pmd) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index d97db3ad9aae2..5131ed787409f 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -206,8 +206,10 @@ static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr, * from ptesync, it should probably go into update_mmu_cache, rather * than set_pte_at (which is used to set ptes unrelated to faults). * - * Spurious faults to vmalloc region are not tolerated, so there is - * a ptesync in flush_cache_vmap. + * Spurious faults from the kernel memory are not tolerated, so there + * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows + * the pte update sequence from ISA Book III 6.10 Translation Table + * Update Synchronization Requirements. */ } @@ -263,6 +265,11 @@ static inline int radix__has_transparent_hugepage(void) } #endif +static inline pmd_t radix__pmd_mkdevmap(pmd_t pmd) +{ + return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP)); +} + extern int __meminit radix__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/bpf_perf_event.h b/arch/powerpc/include/asm/bpf_perf_event.h new file mode 100644 index 0000000000000..e8a7b4ffb58c2 --- /dev/null +++ b/arch/powerpc/include/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BPF_PERF_EVENT_H +#define _ASM_POWERPC_BPF_PERF_EVENT_H + +#include + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _ASM_POWERPC_BPF_PERF_EVENT_H */ diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 45e3137ccd71c..72b81015cebe9 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -55,42 +55,48 @@ struct ppc64_caches { extern struct ppc64_caches ppc64_caches; -static inline u32 l1_cache_shift(void) +static inline u32 l1_dcache_shift(void) { return ppc64_caches.l1d.log_block_size; } -static inline u32 l1_cache_bytes(void) +static inline u32 l1_dcache_bytes(void) { return ppc64_caches.l1d.block_size; } + +static inline u32 l1_icache_shift(void) +{ + return ppc64_caches.l1i.log_block_size; +} + +static inline u32 l1_icache_bytes(void) +{ + return ppc64_caches.l1i.block_size; +} #else -static inline u32 l1_cache_shift(void) +static inline u32 l1_dcache_shift(void) { return L1_CACHE_SHIFT; } -static inline u32 l1_cache_bytes(void) +static inline u32 l1_dcache_bytes(void) { return L1_CACHE_BYTES; } + +static inline u32 l1_icache_shift(void) +{ + return L1_CACHE_SHIFT; +} + +static inline u32 l1_icache_bytes(void) +{ + return L1_CACHE_BYTES; +} + #endif -#endif /* ! __ASSEMBLY__ */ - -#if defined(__ASSEMBLY__) -/* - * For a snooping icache, we still need a dummy icbi to purge all the - * prefetched instructions from the ifetch buffers. We also need a sync - * before the icbi to order the the actual stores to memory that might - * have modified instructions with the icbi. - */ -#define PURGE_PREFETCHED_INS \ - sync; \ - icbi 0,r3; \ - sync; \ - isync -#else #define __read_mostly __attribute__((__section__(".data..read_mostly"))) #ifdef CONFIG_PPC_BOOK3S_32 @@ -124,6 +130,17 @@ static inline void dcbst(void *addr) { __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory"); } + +static inline void icbi(void *addr) +{ + asm volatile ("icbi 0, %0" : : "r"(addr) : "memory"); +} + +static inline void iccci(void *addr) +{ + asm volatile ("iccci 0, %0" : : "r"(addr) : "memory"); +} + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_CACHE_H */ diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index eef388f2659f4..4a1c9f0200e1b 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -42,29 +42,25 @@ extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) -extern void flush_icache_range(unsigned long, unsigned long); +void flush_icache_range(unsigned long start, unsigned long stop); extern void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len); -extern void __flush_dcache_icache(void *page_va); extern void flush_dcache_icache_page(struct page *page); -#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE) -extern void __flush_dcache_icache_phys(unsigned long physaddr); -#else -static inline void __flush_dcache_icache_phys(unsigned long physaddr) -{ - BUG(); -} -#endif - -/* - * Write any modified data cache blocks out to memory and invalidate them. - * Does not invalidate the corresponding instruction cache blocks. +void __flush_dcache_icache(void *page); + +/** + * flush_dcache_range(): Write any modified data cache blocks out to memory and + * invalidate them. Does not invalidate the corresponding instruction cache + * blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) */ static inline void flush_dcache_range(unsigned long start, unsigned long stop) { - unsigned long shift = l1_cache_shift(); - unsigned long bytes = l1_cache_bytes(); + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); void *addr = (void *)(start & ~(bytes - 1)); unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; @@ -89,8 +85,8 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop) */ static inline void clean_dcache_range(unsigned long start, unsigned long stop) { - unsigned long shift = l1_cache_shift(); - unsigned long bytes = l1_cache_bytes(); + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); void *addr = (void *)(start & ~(bytes - 1)); unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; @@ -108,8 +104,8 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop) static inline void invalidate_dcache_range(unsigned long start, unsigned long stop) { - unsigned long shift = l1_cache_shift(); - unsigned long bytes = l1_cache_bytes(); + unsigned long shift = l1_dcache_shift(); + unsigned long bytes = l1_dcache_bytes(); void *addr = (void *)(start & ~(bytes - 1)); unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 898b542628815..96ef54bc7d968 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -22,6 +22,7 @@ #define BRANCH_ABSOLUTE 0x2 bool is_offset_in_branch_range(long offset); +bool is_offset_in_cond_branch_range(long offset); unsigned int create_branch(const unsigned int *addr, unsigned long target, int flags); unsigned int create_cond_branch(const unsigned int *addr, @@ -72,7 +73,7 @@ void __patch_exception(int exc, unsigned long addr); #endif #define OP_RT_RA_MASK 0xffff0000UL -#define LIS_R2 0x3c020000UL +#define LIS_R2 0x3c400000UL #define ADDIS_R2_R12 0x3c4c0000UL #define ADDI_R2_R2 0x38420000UL diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h index a116fe9317892..3bdd74739cb88 100644 --- a/arch/powerpc/include/asm/cpm1.h +++ b/arch/powerpc/include/asm/cpm1.h @@ -68,6 +68,7 @@ extern void cpm_reset(void); #define PROFF_SPI ((uint)0x0180) #define PROFF_SCC3 ((uint)0x0200) #define PROFF_SMC1 ((uint)0x0280) +#define PROFF_DSP1 ((uint)0x02c0) #define PROFF_SCC4 ((uint)0x0300) #define PROFF_SMC2 ((uint)0x0380) diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 7897d16e09904..727d4b3219379 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -7,7 +7,7 @@ #include #include -static inline bool early_cpu_has_feature(unsigned long feature) +static __always_inline bool early_cpu_has_feature(unsigned long feature) { return !!((CPU_FTRS_ALWAYS & feature) || (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature)); @@ -46,7 +46,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature) return static_branch_likely(&cpu_feature_keys[i]); } #else -static inline bool cpu_has_feature(unsigned long feature) +static __always_inline bool cpu_has_feature(unsigned long feature) { return early_cpu_has_feature(feature); } diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index cf00ff0d121de..235911fb0e24f 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -367,7 +367,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX) #define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_NOEXECUTE) #define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \ - CPU_FTR_MAYBE_CAN_NAP) + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NOEXECUTE) #define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_NOEXECUTE) @@ -407,7 +407,6 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \ CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT) -#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) /* 64-bit CPUs */ #define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \ @@ -507,8 +506,6 @@ enum { CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_82XX | CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 | CPU_FTRS_CLASSIC32 | -#else - CPU_FTRS_GENERIC_32 | #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX | @@ -585,8 +582,6 @@ enum { CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_82XX & CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 & CPU_FTRS_CLASSIC32 & -#else - CPU_FTRS_GENERIC_32 & #endif #ifdef CONFIG_PPC_8xx CPU_FTRS_8XX & diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index deb99fd6e060f..1a71eacf54a71 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_CPUTHREADS_H #ifndef __ASSEMBLY__ +#include #include #include @@ -99,6 +100,36 @@ static inline int cpu_last_thread_sibling(int cpu) return cpu | (threads_per_core - 1); } +/* + * tlb_thread_siblings are siblings which share a TLB. This is not + * architected, is not something a hypervisor could emulate and a future + * CPU may change behaviour even in compat mode, so this should only be + * used on PowerNV, and only with care. + */ +static inline int cpu_first_tlb_thread_sibling(int cpu) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return cpu & ~0x6; /* Big Core */ + else + return cpu_first_thread_sibling(cpu); +} + +static inline int cpu_last_tlb_thread_sibling(int cpu) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return cpu | 0x6; /* Big Core */ + else + return cpu_last_thread_sibling(cpu); +} + +static inline int cpu_tlb_thread_sibling_step(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8)) + return 2; /* Big Core */ + else + return 1; +} + static inline u32 get_tensr(void) { #ifdef CONFIG_BOOKE diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h index 7141ccea8c94e..a92059964579b 100644 --- a/arch/powerpc/include/asm/dcr-native.h +++ b/arch/powerpc/include/asm/dcr-native.h @@ -53,8 +53,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val) #define mfdcr(rn) \ ({unsigned int rval; \ if (__builtin_constant_p(rn) && rn < 1024) \ - asm volatile("mfdcr %0," __stringify(rn) \ - : "=r" (rval)); \ + asm volatile("mfdcr %0, %1" : "=r" (rval) \ + : "n" (rn)); \ else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \ rval = mfdcrx(rn); \ else \ @@ -64,8 +64,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val) #define mtdcr(rn, v) \ do { \ if (__builtin_constant_p(rn) && rn < 1024) \ - asm volatile("mtdcr " __stringify(rn) ",%0" \ - : : "r" (v)); \ + asm volatile("mtdcr %0, %1" \ + : : "n" (rn), "r" (v)); \ else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \ mtdcrx(rn, v); \ else \ diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 3d76e1c388c29..ee61542c6c3d9 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -8,31 +8,44 @@ #ifndef _ASM_POWERPC_LMB_H #define _ASM_POWERPC_LMB_H +#include + struct drmem_lmb { u64 base_addr; u32 drc_index; u32 aa_index; u32 flags; -#ifdef CONFIG_MEMORY_HOTPLUG - int nid; -#endif }; struct drmem_lmb_info { struct drmem_lmb *lmbs; int n_lmbs; - u32 lmb_size; + u64 lmb_size; }; extern struct drmem_lmb_info *drmem_info; +static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb, + const struct drmem_lmb *start) +{ + /* + * DLPAR code paths can take several milliseconds per element + * when interacting with firmware. Ensure that we don't + * unfairly monopolize the CPU. + */ + if (((++lmb - start) % 16) == 0) + cond_resched(); + + return lmb; +} + #define for_each_drmem_lmb_in_range(lmb, start, end) \ - for ((lmb) = (start); (lmb) <= (end); (lmb)++) + for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start)) #define for_each_drmem_lmb(lmb) \ for_each_drmem_lmb_in_range((lmb), \ &drmem_info->lmbs[0], \ - &drmem_info->lmbs[drmem_info->n_lmbs - 1]) + &drmem_info->lmbs[drmem_info->n_lmbs]) /* * The of_drconf_cell_v1 struct defines the layout of the LMB data @@ -66,7 +79,7 @@ struct of_drconf_cell_v2 { #define DRCONF_MEM_AI_INVALID 0x00000040 #define DRCONF_MEM_RESERVED 0x00000080 -static inline u32 drmem_lmb_size(void) +static inline u64 drmem_lmb_size(void) { return drmem_info->lmb_size; } @@ -103,22 +116,4 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) lmb->aa_index = 0xffffffff; } -#ifdef CONFIG_MEMORY_HOTPLUG -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ - lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ - lmb->nid = -1; -} -#else -static inline void lmb_set_nid(struct drmem_lmb *lmb) -{ -} -static inline void lmb_clear_nid(struct drmem_lmb *lmb) -{ -} -#endif - #endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 33f4f72eb035b..6d0795d7b89c1 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -61,11 +61,18 @@ nop; \ nop +#define ENTRY_FLUSH_SLOT \ + ENTRY_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; + /* * r10 must be free to use, r13 must be paca */ #define INTERRUPT_TO_KERNEL \ - STF_ENTRY_BARRIER_SLOT + STF_ENTRY_BARRIER_SLOT; \ + ENTRY_FLUSH_SLOT /* * Macros for annotating the expected destination of (h)rfid @@ -127,6 +134,9 @@ hrfid; \ b hrfi_flush_fallback +#else /* __ASSEMBLY__ */ +/* Prototype for function defined in exceptions-64s.S */ +void do_uaccess_flush(void); #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h index c814a2b553893..8d61c8f3fec47 100644 --- a/arch/powerpc/include/asm/fadump-internal.h +++ b/arch/powerpc/include/asm/fadump-internal.h @@ -64,12 +64,14 @@ struct fadump_memory_range { }; /* fadump memory ranges info */ +#define RNG_NAME_SZ 16 struct fadump_mrange_info { - char name[16]; + char name[RNG_NAME_SZ]; struct fadump_memory_range *mem_ranges; u32 mem_ranges_sz; u32 mem_range_cnt; u32 max_mem_ranges; + bool is_static; }; /* Platform specific callback functions */ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index b0af97add7517..fbd406cd6916c 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,22 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define UACCESS_FLUSH_FIXUP_SECTION \ +959: \ + .pushsection __uaccess_flush_fixup,"a"; \ + .align 2; \ +960: \ + FTR_ENTRY_OFFSET 959b-960b; \ + .popsection; + +#define ENTRY_FLUSH_FIXUP_SECTION \ +957: \ + .pushsection __entry_flush_fixup,"a"; \ + .align 2; \ +958: \ + FTR_ENTRY_OFFSET 957b-958b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -237,8 +253,11 @@ label##3: \ #include extern long stf_barrier_fallback; +extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; +extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 0cfc365d814ba..722289a1d000e 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -77,7 +77,12 @@ enum fixed_addresses { static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { - map_kernel_page(fix_to_virt(idx), phys, flags); + if (__builtin_constant_p(idx)) + BUILD_BUG_ON(idx >= __end_of_fixed_addresses); + else if (WARN_ON(idx >= __end_of_fixed_addresses)) + return; + + map_kernel_page(__fix_to_virt(idx), phys, flags); } #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index f54a08a2cd709..017336f2b0864 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -96,7 +96,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name #endif /* PPC64_ELF_ABI_v1 */ #endif /* CONFIG_FTRACE_SYSCALLS */ -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER) #include static inline void this_cpu_disable_ftrace(void) @@ -108,9 +108,12 @@ static inline void this_cpu_enable_ftrace(void) { get_paca()->ftrace_enabled = 1; } + +void ftrace_free_init_tramp(void); #else /* CONFIG_PPC64 */ static inline void this_cpu_disable_ftrace(void) { } static inline void this_cpu_enable_ftrace(void) { } +static inline void ftrace_free_init_tramp(void) { } #endif /* CONFIG_PPC64 */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index eea28ca679dbb..bc7d9d06a6d9d 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -35,7 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; - allow_write_to_user(uaddr, sizeof(*uaddr)); + allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); pagefault_disable(); switch (op) { @@ -62,7 +62,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, *oval = oldval; - prevent_write_to_user(uaddr, sizeof(*uaddr)); + prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); return ret; } @@ -76,7 +76,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; - allow_write_to_user(uaddr, sizeof(*uaddr)); + allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); + __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -97,7 +98,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = prev; - prevent_write_to_user(uaddr, sizeof(*uaddr)); + prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); + return ret; } diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index a63ec938636de..daba2d2a02a0b 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -345,25 +345,37 @@ static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr) */ static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stbcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stbcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr) { - __asm__ __volatile__("sthcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + sthcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stwcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stwcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr) { - __asm__ __volatile__("stdcix %0,0,%1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + stdcix %0,0,%1; \ + .machine pop;" : : "r" (val), "r" (paddr) : "memory"); } @@ -375,7 +387,10 @@ static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr) static inline u8 __raw_rm_readb(volatile void __iomem *paddr) { u8 ret; - __asm__ __volatile__("lbzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lbzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -383,7 +398,10 @@ static inline u8 __raw_rm_readb(volatile void __iomem *paddr) static inline u16 __raw_rm_readw(volatile void __iomem *paddr) { u16 ret; - __asm__ __volatile__("lhzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lhzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -391,7 +409,10 @@ static inline u16 __raw_rm_readw(volatile void __iomem *paddr) static inline u32 __raw_rm_readl(volatile void __iomem *paddr) { u32 ret; - __asm__ __volatile__("lwzcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + lwzcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } @@ -399,7 +420,10 @@ static inline u32 __raw_rm_readl(volatile void __iomem *paddr) static inline u64 __raw_rm_readq(volatile void __iomem *paddr) { u64 ret; - __asm__ __volatile__("ldcix %0,0, %1" + __asm__ __volatile__(".machine push; \ + .machine power6; \ + ldcix %0,0, %1; \ + .machine pop;" : "=r" (ret) : "r" (paddr) : "memory"); return ret; } diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h index 296e51c2f066b..6db06f58deeda 100644 --- a/arch/powerpc/include/asm/kasan.h +++ b/arch/powerpc/include/asm/kasan.h @@ -23,9 +23,7 @@ #define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) -#define KASAN_SHADOW_END 0UL - -#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START) +#define KASAN_SHADOW_END (-(-KASAN_SHADOW_START >> KASAN_SHADOW_SCALE_SHIFT)) #ifdef CONFIG_KASAN void kasan_early_init(void); diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 5b5e39643a27c..ed4f5f536fc1d 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -2,7 +2,11 @@ #ifndef _ASM_POWERPC_KUP_H_ #define _ASM_POWERPC_KUP_H_ -#ifdef CONFIG_PPC64 +#define KUAP_READ 1 +#define KUAP_WRITE 2 +#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) + +#ifdef CONFIG_PPC_BOOK3S_64 #include #endif #ifdef CONFIG_PPC_8xx @@ -20,9 +24,15 @@ .macro kuap_restore sp, current, gpr1, gpr2, gpr3 .endm +.macro kuap_restore_amr gpr +.endm + .macro kuap_check current, gpr .endm +.macro kuap_check_amr gpr1, gpr2 +.endm + #endif #else /* !__ASSEMBLY__ */ @@ -41,33 +51,60 @@ static inline void setup_kuep(bool disabled) { } void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } + +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return false; +} + +static inline void kuap_check_amr(void) { } + +/* + * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush + * the L1D cache after user accesses. Only include the empty stubs for other + * platforms. + */ +#ifndef CONFIG_PPC_BOOK3S_64 static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) { } + unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) { } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; } + unsigned long size, unsigned long dir) { } +#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) { - allow_user_access(NULL, from, size); + allow_user_access(NULL, from, size, KUAP_READ); } static inline void allow_write_to_user(void __user *to, unsigned long size) { - allow_user_access(to, NULL, size); + allow_user_access(to, NULL, size, KUAP_WRITE); +} + +static inline void allow_read_write_user(void __user *to, const void __user *from, + unsigned long size) +{ + allow_user_access(to, from, size, KUAP_READ_WRITE); } static inline void prevent_read_from_user(const void __user *from, unsigned long size) { - prevent_user_access(NULL, from, size); + prevent_user_access(NULL, from, size, KUAP_READ); } static inline void prevent_write_to_user(void __user *to, unsigned long size) { - prevent_user_access(to, NULL, size); + prevent_user_access(to, NULL, size, KUAP_WRITE); +} + +static inline void prevent_read_write_user(void __user *to, const void __user *from, + unsigned long size) +{ + prevent_user_access(to, from, size, KUAP_READ_WRITE); } #endif /* !__ASSEMBLY__ */ -#endif /* _ASM_POWERPC_KUP_H_ */ +#endif /* _ASM_POWERPC_KUAP_H_ */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 635fb154b33f9..a3633560493be 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -150,4 +150,7 @@ #define KVM_INST_FETCH_FAILED -1 +/* Extract PO and XOP opcode fields */ +#define PO_XOP_OPCODE_MASK 0xfc0007fe + #endif /* __POWERPC_KVM_ASM_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 6fe6ad64cba57..740b52ec35097 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -58,7 +58,8 @@ #define KVM_ARCH_WANT_MMU_NOTIFIER extern int kvm_unmap_hva_range(struct kvm *kvm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, + unsigned flags); extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 7bcb64444a394..f71c361dc356f 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -59,6 +59,9 @@ struct machdep_calls { int (*pcibios_root_bridge_prepare)(struct pci_host_bridge *bridge); + /* finds all the pci_controllers present at boot */ + void (*discover_phbs)(void); + /* To setup PHBs when using automatic OF platform driver for PCI */ int (*pci_setup_phb)(struct pci_controller *host); diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 58efca9343113..f132b418a8c7a 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -216,7 +216,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) { - switch_mm(prev, next, current); + switch_mm_irqs_off(prev, next, current); } /* We don't currently use enter_lazy_tlb() for anything */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 1c3133b5f86aa..6fe97465e350b 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -34,18 +34,19 @@ #include static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_INIT); } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_KUAP); } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), "Bug: fault blocked by AP register !"); diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 552b96eef0c8e..3d32d7103ec8e 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -148,8 +148,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) #define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #else #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif /* diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 332b13b4ecdb2..29c43665a7536 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { unsigned long pgf = (unsigned long)table; @@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 7fed9dc0f147a..3d2a78ab051a7 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -199,9 +199,9 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, */ if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) { __asm__ __volatile__("\ - stw%U0%X0 %2,%0\n\ + stw%X0 %2,%0\n\ eieio\n\ - stw%U0%X0 %L2,%1" + stw%X1 %L2,%1" : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : "r" (pte) : "memory"); return; diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 378e3997845ab..c1f25a760eb13 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -211,7 +211,10 @@ #define OPAL_MPIPL_UPDATE 173 #define OPAL_MPIPL_REGISTER_TAG 174 #define OPAL_MPIPL_QUERY_TAG 175 -#define OPAL_LAST 175 +#define OPAL_SECVAR_GET 176 +#define OPAL_SECVAR_GET_NEXT 177 +#define OPAL_SECVAR_ENQUEUE_UPDATE 178 +#define OPAL_LAST 178 #define QUIESCE_HOLD 1 /* Spin all calls at entry */ #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a0cf8fba4d12b..9986ac34b8e22 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -298,6 +298,13 @@ int opal_sensor_group_clear(u32 group_hndl, int token); int opal_sensor_group_enable(u32 group_hndl, int token, bool enable); int opal_nx_coproc_init(uint32_t chip_id, uint32_t ct); +int opal_secvar_get(const char *key, uint64_t key_len, u8 *data, + uint64_t *data_size); +int opal_secvar_get_next(const char *key, uint64_t *key_len, + uint64_t key_buf_size); +int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data, + uint64_t data_size); + s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size); s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr); s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr); diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index c8bb14ff47135..d92353a96f811 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ + pfn_valid(virt_to_pfn(_addr)); \ +}) /* * On Book-E parts we need __va to parse the device tree and we can't @@ -212,6 +216,9 @@ static inline bool pfn_valid(unsigned long pfn) #define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET) #else #ifdef CONFIG_PPC64 + +#define VIRTUAL_WARN_ON(x) WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && (x)) + /* * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit. @@ -219,13 +226,13 @@ static inline bool pfn_valid(unsigned long pfn) */ #define __va(x) \ ({ \ - VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); \ + VIRTUAL_WARN_ON((unsigned long)(x) >= PAGE_OFFSET); \ (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); \ }) #define __pa(x) \ ({ \ - VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); \ + VIRTUAL_WARN_ON((unsigned long)(x) < PAGE_OFFSET); \ (unsigned long)(x) & 0x0fffffffffffffffUL; \ }) @@ -295,8 +302,13 @@ static inline bool pfn_valid(unsigned long pfn) /* * Some number of bits at the level of the page table that points to * a hugepte are used to encode the size. This masks those bits. + * On 8xx, HW assistance requires 4k alignment for the hugepte. */ +#ifdef CONFIG_PPC_8xx +#define HUGEPD_SHIFT_MASK 0xfff +#else #define HUGEPD_SHIFT_MASK 0x3f +#endif #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h index dce863a7635cd..8e5b7d0b851c6 100644 --- a/arch/powerpc/include/asm/percpu.h +++ b/arch/powerpc/include/asm/percpu.h @@ -10,8 +10,6 @@ #ifdef CONFIG_SMP -#include - #define __my_cpu_offset local_paca->data_offset #endif /* CONFIG_SMP */ @@ -19,4 +17,6 @@ #include +#include + #endif /* _ASM_POWERPC_PERCPU_H_ */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 7426d7a90e1e1..7aba3c7ea25cd 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -12,6 +12,8 @@ #ifdef CONFIG_PPC_PERF_CTRS #include +#else +static inline bool is_sier_available(void) { return false; } #endif #ifdef CONFIG_FSL_EMB_PERF_EVENT diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h index c6bbe9778d3cd..3c09109e708ef 100644 --- a/arch/powerpc/include/asm/pmc.h +++ b/arch/powerpc/include/asm/pmc.h @@ -34,6 +34,13 @@ static inline void ppc_set_pmu_inuse(int inuse) #endif } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +static inline int ppc_get_pmu_inuse(void) +{ + return get_paca()->pmcregs_in_use; +} +#endif + extern void power4_enable_pmcs(void); #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index edcb1fc50aebc..d0ee0ede57678 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -15,6 +15,7 @@ #define PCI_SLOT_ID_PREFIX (1UL << 63) #define PCI_SLOT_ID(phb_id, bdfn) \ (PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id)) +#define PCI_PHB_SLOT_ID(phb_id) (phb_id) extern int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id); extern int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len); diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index c1df75edde441..a9af63bd430f2 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -204,6 +204,7 @@ #define PPC_INST_ICBT 0x7c00002c #define PPC_INST_ICSWX 0x7c00032d #define PPC_INST_ICSWEPX 0x7c00076d +#define PPC_INST_DSSALL 0x7e00066c #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 @@ -439,6 +440,7 @@ __PPC_RA(a) | __PPC_RB(b)) #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ __PPC_RA(a) | __PPC_RB(b)) +#define PPC_DSSALL stringify_in_c(.long PPC_INST_DSSALL) #define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LQARX | \ ___PPC_RT(t) | ___PPC_RA(a) | \ ___PPC_RB(b) | __PPC_EH(eh)) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index a9993e7a443bc..64b998db9d3e9 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -291,7 +291,6 @@ struct thread_struct { #else #define INIT_THREAD { \ .ksp = INIT_SP, \ - .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ .addr_limit = KERNEL_DS, \ .fpexc_mode = 0, \ .fscr = FSCR_TAR | FSCR_EBB \ diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index cb89e4bf55cef..964063765662b 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -71,6 +71,7 @@ struct ps3_dma_region_ops; * @bus_addr: The 'translated' bus address of the region. * @len: The length in bytes of the region. * @offset: The offset from the start of memory of the region. + * @dma_mask: Device dma_mask. * @ioid: The IOID of the device who owns this region * @chunk_list: Opaque variable used by the ioc page manager. * @region_ops: struct ps3_dma_region_ops - dma region operations @@ -85,6 +86,7 @@ struct ps3_dma_region { enum ps3_dma_region_type region_type; unsigned long len; unsigned long offset; + u64 dma_mask; /* driver variables (set by ps3_dma_region_create) */ unsigned long bus_addr; diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index ee3ada66deb58..5a424f867c828 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -62,6 +62,9 @@ struct pt_regs }; #endif + +#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)) + #ifdef __powerpc64__ /* @@ -203,7 +206,7 @@ do { \ #endif /* __powerpc64__ */ #define arch_has_single_step() (1) -#ifndef CONFIG_BOOK3S_601 +#ifndef CONFIG_PPC_BOOK3S_601 #define arch_has_block_step() (true) #else #define arch_has_block_step() (false) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b3cbb1136bce0..34d08ff21b988 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -796,7 +796,7 @@ #define THRM1_TIN (1 << 31) #define THRM1_TIV (1 << 30) #define THRM1_THRES(x) ((x&0x7f)<<23) -#define THRM3_SITV(x) ((x&0x3fff)<<1) +#define THRM3_SITV(x) ((x & 0x1fff) << 1) #define THRM1_TID (1<<2) #define THRM1_TIE (1<<1) #define THRM1_V (1<<0) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3c1887351c713..bd227e0eab07b 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -368,8 +368,6 @@ extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); -extern int rtas_online_cpus_mask(cpumask_var_t cpus); -extern int rtas_offline_cpus_mask(cpumask_var_t cpus); extern int rtas_ibm_suspend_me(u64 handle); struct rtc_time; diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 5a9b6eb651b6c..d19871763ed4a 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -5,8 +5,22 @@ #include #include + +#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed + #include +extern bool init_mem_is_free; + +static inline int arch_is_kernel_initmem_freed(unsigned long addr) +{ + if (!init_mem_is_free) + return 0; + + return addr >= (unsigned long)__init_begin && + addr < (unsigned long)__init_end; +} + extern char __head_end[]; #ifdef __powerpc64__ diff --git a/arch/powerpc/include/asm/secure_boot.h b/arch/powerpc/include/asm/secure_boot.h new file mode 100644 index 0000000000000..a2ff556916c66 --- /dev/null +++ b/arch/powerpc/include/asm/secure_boot.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Secure boot definitions + * + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + */ +#ifndef _ASM_POWER_SECURE_BOOT_H +#define _ASM_POWER_SECURE_BOOT_H + +#ifdef CONFIG_PPC_SECURE_BOOT + +bool is_ppc_secureboot_enabled(void); +bool is_ppc_trustedboot_enabled(void); + +#else + +static inline bool is_ppc_secureboot_enabled(void) +{ + return false; +} + +static inline bool is_ppc_trustedboot_enabled(void) +{ + return false; +} + +#endif +#endif diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 759597bf0fd86..316a9c8d1929e 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -9,7 +9,7 @@ #define _ASM_POWERPC_SECURITY_FEATURES_H -extern unsigned long powerpc_security_features; +extern u64 powerpc_security_features; extern bool rfi_flush; /* These are bit flags */ @@ -24,21 +24,26 @@ void setup_stf_barrier(void); void do_stf_barrier_fixups(enum stf_barrier_type types); void setup_count_cache_flush(void); -static inline void security_ftr_set(unsigned long feature) +static inline void security_ftr_set(u64 feature) { powerpc_security_features |= feature; } -static inline void security_ftr_clear(unsigned long feature) +static inline void security_ftr_clear(u64 feature) { powerpc_security_features &= ~feature; } -static inline bool security_ftr_enabled(unsigned long feature) +static inline bool security_ftr_enabled(u64 feature) { return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations @@ -81,12 +86,22 @@ static inline bool security_ftr_enabled(unsigned long feature) // Software required to flush count cache on context switch #define SEC_FTR_FLUSH_COUNT_CACHE 0x0000000000000400ull +// Software required to flush link stack on context switch +#define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull + +// The L1-D cache should be flushed when entering the kernel +#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull + +// The L1-D cache should be flushed after user accesses from the kernel +#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_L1D_FLUSH_ENTRY | \ + SEC_FTR_L1D_FLUSH_UACCESS | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h new file mode 100644 index 0000000000000..4cc35b58b9862 --- /dev/null +++ b/arch/powerpc/include/asm/secvar.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + * + * PowerPC secure variable operations. + */ +#ifndef SECVAR_OPS_H +#define SECVAR_OPS_H + +#include +#include + +extern const struct secvar_operations *secvar_ops; + +struct secvar_operations { + int (*get)(const char *key, uint64_t key_len, u8 *data, + uint64_t *data_size); + int (*get_next)(const char *key, uint64_t *key_len, + uint64_t keybufsize); + int (*set)(const char *key, uint64_t key_len, u8 *data, + uint64_t data_size); +}; + +#ifdef CONFIG_PPC_SECURE_BOOT + +extern void set_secvar_ops(const struct secvar_operations *ops); + +#else + +static inline void set_secvar_ops(const struct secvar_operations *ops) { } + +#endif + +#endif diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h index e9f81bb3f83b0..f798e80e41061 100644 --- a/arch/powerpc/include/asm/setjmp.h +++ b/arch/powerpc/include/asm/setjmp.h @@ -7,7 +7,9 @@ #define JMP_BUF_LEN 23 -extern long setjmp(long *) __attribute__((returns_twice)); -extern void longjmp(long *, long) __attribute__((noreturn)); +typedef long jmp_buf[JMP_BUF_LEN]; + +extern int setjmp(jmp_buf env) __attribute__((returns_twice)); +extern void longjmp(jmp_buf env, int val) __attribute__((noreturn)); #endif /* _ASM_POWERPC_SETJMP_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 65676e2325b85..6f2f4497e13b3 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,12 +52,16 @@ enum l1d_flush_type { }; void setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_entry_flush(bool enable); +void setup_uaccess_flush(bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_uaccess_flush_fixups(enum l1d_flush_type types); +void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index e9a960e28f3c8..d6102e34eb21b 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -15,6 +15,7 @@ * * (the type definitions are in asm/spinlock_types.h) */ +#include #include #ifdef CONFIG_PPC64 #include @@ -36,10 +37,12 @@ #endif #ifdef CONFIG_PPC_PSERIES +DECLARE_STATIC_KEY_FALSE(shared_processor); + #define vcpu_is_preempted vcpu_is_preempted static inline bool vcpu_is_preempted(int cpu) { - if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + if (!static_branch_unlikely(&shared_processor)) return false; return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1); } diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index d2d2c4bd84358..5405cb2fe65a2 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -17,11 +17,12 @@ typedef unsigned long cycles_t; static inline cycles_t get_cycles(void) { - if (IS_ENABLED(CONFIG_BOOK3S_601)) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) return 0; return mftb(); } +#define get_cycles get_cycles #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_TIMEX_H */ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index b2c0be93929dd..02a1c18cdba3d 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -26,6 +26,17 @@ #define tlb_flush tlb_flush extern void tlb_flush(struct mmu_gather *tlb); +/* + * book3s: + * Hash does not use the linux page-tables, so we can avoid + * the TLB invalidate for page-table freeing, Radix otoh does use the + * page-tables and needs the TLBI. + * + * nohash: + * We still do TLB invalidate in the __pte_free_tlb routine before we + * add the page table pages to mmu gather table batch. + */ +#define tlb_needs_table_invalidate() radix_enabled() /* Get the generic bits... */ #include @@ -56,19 +67,6 @@ static inline int mm_is_thread_local(struct mm_struct *mm) return false; return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)); } -static inline void mm_reset_thread_local(struct mm_struct *mm) -{ - WARN_ON(atomic_read(&mm->context.copros) > 0); - /* - * It's possible for mm_access to take a reference on mm_users to - * access the remote mm from another thread, but it's not allowed - * to set mm_cpumask, so mm_users may be > 1 here. - */ - WARN_ON(current->mm != mm); - atomic_set(&mm->context.active_cpus, 1); - cpumask_clear(mm_cpumask(mm)); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); -} #else /* CONFIG_PPC_BOOK3S_64 */ static inline int mm_is_thread_local(struct mm_struct *mm) { diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 15002b51ff18d..a14a32f6c3ccd 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -191,8 +191,11 @@ extern long __get_user_bad(void); */ #define __get_user_atomic_128_aligned(kaddr, uaddr, err) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine altivec\n" \ "1: lvx 0,0,%1 # get user\n" \ " stvx 0,0,%2 # put kernel\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -313,9 +316,9 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) unsigned long ret; barrier_nospec(); - allow_user_access(to, from, n); + allow_read_write_user(to, from, n); ret = __copy_tofrom_user(to, from, n); - prevent_user_access(to, from, n); + prevent_read_write_user(to, from, n); return ret; } #endif /* __powerpc64__ */ @@ -401,7 +404,7 @@ copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n) return n; } -extern unsigned long __clear_user(void __user *addr, unsigned long size); +unsigned long __arch_clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { @@ -409,12 +412,17 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) might_fault(); if (likely(access_ok(addr, size))) { allow_write_to_user(addr, size); - ret = __clear_user(addr, size); + ret = __arch_clear_user(addr, size); prevent_write_to_user(addr, size); } return ret; } +static inline unsigned long __clear_user(void __user *addr, unsigned long size) +{ + return clear_user(addr, size); +} + extern long strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strnlen_user(const char __user *str, long n); diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index c61d59ed3b45f..2ccb938d85447 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -82,6 +82,7 @@ struct vdso_data { __s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */ __s64 wtom_clock_sec; /* Wall to monotonic clock sec */ struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ + __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ }; @@ -103,6 +104,7 @@ struct vdso_data { __s32 wtom_clock_nsec; struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ + __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 dcache_block_size; /* L1 d-cache block size */ __u32 icache_block_size; /* L1 i-cache block size */ diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index f2dfcd50a2d3e..33aee7490cbb1 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -39,6 +39,7 @@ #define XIVE_ESB_VAL_P 0x2 #define XIVE_ESB_VAL_Q 0x1 +#define XIVE_ESB_INVALID 0xFF /* * Thread Management (aka "TM") registers diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h deleted file mode 100644 index 5e1e648aeec4c..0000000000000 --- a/arch/powerpc/include/uapi/asm/bpf_perf_event.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ -#define _UAPI__ASM_BPF_PERF_EVENT_H__ - -#include - -typedef struct user_pt_regs bpf_user_pt_regs_t; - -#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/powerpc/include/uapi/asm/errno.h b/arch/powerpc/include/uapi/asm/errno.h index cc79856896a19..4ba87de32be00 100644 --- a/arch/powerpc/include/uapi/asm/errno.h +++ b/arch/powerpc/include/uapi/asm/errno.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_ERRNO_H #define _ASM_POWERPC_ERRNO_H +#undef EDEADLOCK #include #undef EDEADLOCK diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index a7ca8fe623686..a9a7e7ab45d39 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,9 +5,6 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -# Disable clang warning for using setjmp without setjmp.h header -CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) - ifdef CONFIG_PPC64 CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) endif @@ -16,12 +13,14 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) +CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code @@ -39,7 +38,6 @@ KASAN_SANITIZE_btext.o := n ifdef CONFIG_KASAN CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING -CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif @@ -161,6 +159,9 @@ ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),) obj-y += ucall.o endif +obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o ima_arch.o secvar-ops.o +obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o + # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n KCOV_INSTRUMENT_prom_init.o := n @@ -184,6 +185,9 @@ KCOV_INSTRUMENT_cputable.o := n KCOV_INSTRUMENT_setup_64.o := n KCOV_INSTRUMENT_paca.o := n +CFLAGS_setup_64.o += -fno-stack-protector +CFLAGS_paca.o += -fno-stack-protector + extra-$(CONFIG_PPC_FPU) += fpu.o extra-$(CONFIG_ALTIVEC) += vector.o extra-$(CONFIG_PPC64) += entry_64.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 484f54dab2475..af399675248ed 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -285,7 +285,7 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); + DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS); STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); @@ -387,6 +387,7 @@ int main(void) OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec); OFFSET(STAMP_XTIME, vdso_data, stamp_xtime); OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); + OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res); OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size); @@ -417,7 +418,6 @@ int main(void) DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 6dfceaa820e42..b0e0b3cd91eec 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -250,8 +250,10 @@ int __init btext_find_display(int allow_nonstdout) rc = btext_initialize(np); printk("result: %d\n", rc); } - if (rc == 0) + if (rc == 0) { + of_node_put(np); break; + } } return rc; } diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index a460298c7ddb4..f91ecb10d0ae7 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -184,7 +184,7 @@ __init_LPCR_ISA300: __init_FSCR: mfspr r3,SPRN_FSCR - ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB + ori r3,r3,FSCR_TAR|FSCR_EBB mtspr SPRN_FSCR,r3 blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index e745abc5457a0..245be4fafe134 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2193,11 +2193,13 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, * oprofile_cpu_type already has a value, then we are * possibly overriding a real PVR with a logical one, * and, in that case, keep the current value for - * oprofile_cpu_type. + * oprofile_cpu_type. Futhermore, let's ensure that the + * fix for the PMAO bug is enabled on compatibility mode. */ if (old.oprofile_cpu_type != NULL) { t->oprofile_cpu_type = old.oprofile_cpu_type; t->oprofile_type = old.oprofile_type; + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } } diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index e486d1d78de28..f4cb2c546adbb 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -160,7 +160,8 @@ u64 dma_iommu_get_required_mask(struct device *dev) return bypass_mask; } - mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1); + mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) + + tbl->it_page_shift - 1); mask += mask - 1; return mask; diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 180b3a5d1001a..3551f11accf0c 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -139,7 +139,6 @@ static void __init cpufeatures_setup_cpu(void) /* Initialize the base environment -- clear FSCR/HFSCR. */ hv_mode = !!(mfmsr() & MSR_HV); if (hv_mode) { - /* CPU_FTR_HVMODE is used early in PACA setup */ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE; mtspr(SPRN_HFSCR, 0); } @@ -347,6 +346,14 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f) { u64 lpcr; + /* + * Linux relies on FSCR[DSCR] being clear, so that we can take the + * facility unavailable interrupt and track the task's usage of DSCR. + * See facility_unavailable_exception(). + * Clear the bit here so that feat_enable() doesn't set it. + */ + f->fscr_bit_nr = -1; + feat_enable(f); lpcr = mfspr(SPRN_LPCR); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index bc8a551013be9..8b0e523b2abbe 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -368,14 +368,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) pa = pte_pfn(*ptep); /* On radix we can do hugepage mappings for io, so handle that */ - if (hugepage_shift) { - pa <<= hugepage_shift; - pa |= token & ((1ul << hugepage_shift) - 1); - } else { - pa <<= PAGE_SHIFT; - pa |= token & (PAGE_SIZE - 1); - } + if (!hugepage_shift) + hugepage_shift = PAGE_SHIFT; + pa <<= PAGE_SHIFT; + pa |= token & ((1ul << hugepage_shift) - 1); return pa; } @@ -503,7 +500,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) rc = 1; if (pe->state & EEH_PE_ISOLATED) { pe->check_count++; - if (pe->check_count % EEH_MAX_FAILS == 0) { + if (pe->check_count == EEH_MAX_FAILS) { dn = pci_device_to_OF_node(dev); if (dn) location = of_get_property(dn, "ibm,loc-code", diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index cf11277ebd020..000ebb5a6fb3c 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -272,8 +272,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) { struct pci_io_addr_range *piar; struct rb_node *n; + unsigned long flags; - spin_lock(&pci_io_addr_cache_root.piar_lock); + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) { piar = rb_entry(n, struct pci_io_addr_range, rb_node); @@ -281,7 +282,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev)); } - spin_unlock(&pci_io_addr_cache_root.piar_lock); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); return 0; } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index d9279d0ee9f54..4fd7efdf2a53a 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -541,12 +541,6 @@ static void eeh_rmv_device(struct eeh_dev *edev, void *userdata) pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); edev->pdev = NULL; - - /* - * We have to set the VF PE number to invalid one, which is - * required to plug the VF successfully. - */ - pdn->pe_number = IODA_INVALID_PE; #endif if (rmv_data) list_add(&edev->rmv_entry, &rmv_data->removed_vf_list); @@ -897,12 +891,12 @@ void eeh_handle_normal_event(struct eeh_pe *pe) /* Log the event */ if (pe->type & EEH_PE_PHB) { - pr_err("EEH: PHB#%x failure detected, location: %s\n", + pr_err("EEH: Recovering PHB#%x, location: %s\n", pe->phb->global_number, eeh_pe_loc_get(pe)); } else { struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb); - pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", + pr_err("EEH: Recovering PHB#%x-PE#%x\n", pe->phb->global_number, pe->addr); pr_err("EEH: PE location: %s, PHB location: %s\n", eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); @@ -1206,6 +1200,17 @@ void eeh_handle_special_event(void) eeh_pe_state_mark(pe, EEH_PE_RECOVERING); eeh_handle_normal_event(pe); } else { + eeh_for_each_pe(pe, tmp_pe) + eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) + edev->mode &= ~EEH_DEV_NO_HANDLER; + + /* Notify all devices to be down */ + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_pe_report( + "error_detected(permanent failure)", pe, + eeh_report_failure, NULL); + pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); @@ -1214,16 +1219,6 @@ void eeh_handle_special_event(void) (phb_pe->state & EEH_PE_RECOVERING)) continue; - eeh_for_each_pe(pe, tmp_pe) - eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) - edev->mode &= ~EEH_DEV_NO_HANDLER; - - /* Notify all devices to be down */ - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_set_channel_state(pe, pci_channel_io_perm_failure); - eeh_pe_report( - "error_detected(permanent failure)", pe, - eeh_report_failure, NULL); bus = eeh_pe_bus_get(phb_pe); if (!bus) { pr_err("%s: Cannot find PCI bus for " diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d60908ea37fb9..c72894ff9d614 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -179,7 +179,7 @@ transfer_to_handler: 2: /* if from kernel, check interrupted DOZE/NAP mode and * check for stack overflow */ - kuap_save_and_lock r11, r12, r9, r2, r0 + kuap_save_and_lock r11, r12, r9, r2, r6 addi r2, r12, -THREAD lwz r9,KSP_LIMIT(r12) cmplw r1,r9 /* if r1 <= ksp_limit */ @@ -284,6 +284,7 @@ reenable_mmu: rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ kuap_restore r11, r2, r3, r4, r5 + lwz r2, GPR2(r11) b fast_exception_return #endif @@ -335,6 +336,9 @@ trace_syscall_entry_irq_off: .globl transfer_to_syscall transfer_to_syscall: +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_lock r11, r12 +#endif #ifdef CONFIG_TRACE_IRQFLAGS andi. r12,r9,MSR_EE beq- trace_syscall_entry_irq_off @@ -704,7 +708,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ - kuap_check r2, r4 + kuap_check r2, r0 #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all @@ -777,7 +781,7 @@ fast_exception_return: 1: lis r3,exc_exit_restart_end@ha addi r3,r3,exc_exit_restart_end@l cmplw r12,r3 -#if CONFIG_PPC_BOOK3S_601 +#ifdef CONFIG_PPC_BOOK3S_601 bge 2b #else bge 3f @@ -785,7 +789,7 @@ fast_exception_return: lis r4,exc_exit_restart@ha addi r4,r4,exc_exit_restart@l cmplw r12,r4 -#if CONFIG_PPC_BOOK3S_601 +#ifdef CONFIG_PPC_BOOK3S_601 blt 2b #else blt 3f diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6467bdab8d405..3fd3ef352e3fd 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -537,6 +537,7 @@ flush_count_cache: /* Save LR into r9 */ mflr r9 + // Flush the link stack .rept 64 bl .+4 .endr @@ -546,6 +547,11 @@ flush_count_cache: .balign 32 /* Restore LR */ 1: mtlr r9 + + // If we're just flushing the link stack, return here +3: nop + patch_site 3b patch__flush_link_stack_return + li r9,0x7fff mtctr r9 diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d0018dd17e0a6..88bba0a931d65 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1090,17 +1090,19 @@ EXC_COMMON_BEGIN(machine_check_idle_common) bl machine_check_queue_event /* - * We have not used any non-volatile GPRs here, and as a rule - * most exception code including machine check does not. - * Therefore PACA_NAPSTATELOST does not need to be set. Idle - * wakeup will restore volatile registers. + * GPR-loss wakeups are relatively straightforward, because the + * idle sleep code has saved all non-volatile registers on its + * own stack, and r1 in PACAR1. * - * Load the original SRR1 into r3 for pnv_powersave_wakeup_mce. + * For no-loss wakeups the r1 and lr registers used by the + * early machine check handler have to be restored first. r2 is + * the kernel TOC, so no need to restore it. * * Then decrement MCE nesting after finishing with the stack. */ ld r3,_MSR(r1) ld r4,_LINK(r1) + ld r1,GPR1(r1) lhz r11,PACA_IN_MCE(r13) subi r11,r11,1 @@ -1109,7 +1111,7 @@ EXC_COMMON_BEGIN(machine_check_idle_common) mtlr r4 rlwinm r10,r3,47-31,30,31 cmpwi cr1,r10,2 - bltlr cr1 /* no state loss, return to idle caller */ + bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */ b idle_return_gpr_loss #endif @@ -1148,7 +1150,7 @@ EXC_REAL_BEGIN(data_access, 0x300, 0x80) INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1 EXC_REAL_END(data_access, 0x300, 0x80) EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) - INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1 + INT_HANDLER data_access, 0x300, ool=1, virt=1, dar=1, dsisr=1 EXC_VIRT_END(data_access, 0x4300, 0x80) INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1 EXC_COMMON_BEGIN(data_access_common) @@ -1203,7 +1205,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(instruction_access, 0x400, 0x80) - INT_HANDLER instruction_access, 0x400, kvm=1 + INT_HANDLER instruction_access, 0x400, ool=1, kvm=1 EXC_REAL_END(instruction_access, 0x400, 0x80) EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) INT_HANDLER instruction_access, 0x400, virt=1 @@ -1223,7 +1225,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) - INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1 + INT_HANDLER instruction_access_slb, 0x480, ool=1, area=PACA_EXSLB, kvm=1 EXC_REAL_END(instruction_access_slb, 0x480, 0x80) EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB @@ -1363,17 +1365,17 @@ EXC_REAL_BEGIN(decrementer, 0x900, 0x80) INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1 EXC_REAL_END(decrementer, 0x900, 0x80) EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) - INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED + INT_HANDLER decrementer, 0x900, ool=1, virt=1, bitmask=IRQS_DISABLED EXC_VIRT_END(decrementer, 0x4900, 0x80) INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0 EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80) - INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1 + INT_HANDLER hdecrementer, 0x980, ool=1, hsrr=EXC_HV, kvm=1 EXC_REAL_END(hdecrementer, 0x980, 0x80) EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80) - INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1 + INT_HANDLER hdecrementer, 0x980, ool=1, virt=1, hsrr=EXC_HV, kvm=1 EXC_VIRT_END(hdecrementer, 0x4980, 0x80) INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0 EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) @@ -2044,15 +2046,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr -TRAMP_REAL_BEGIN(rfi_flush_fallback) - SET_SCRATCH0(r13); - GET_PACA(r13); - std r1,PACA_EXRFI+EX_R12(r13) - ld r1,PACAKSAVE(r13) - std r9,PACA_EXRFI+EX_R9(r13) - std r10,PACA_EXRFI+EX_R10(r13) - std r11,PACA_EXRFI+EX_R11(r13) - mfctr r9 +/* Clobbers r10, r11, ctr */ +.macro L1D_DISPLACEMENT_FLUSH ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) ld r11,PACA_L1D_FLUSH_SIZE(r13) srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ @@ -2063,7 +2058,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) sync /* - * The load adresses are at staggered offsets within cachelines, + * The load addresses are at staggered offsets within cachelines, * which suits some pipelines better (on others it should not * hurt). */ @@ -2078,7 +2073,30 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) ld r11,(0x80 + 8)*7(r10) addi r10,r10,0x80*8 bdnz 1b +.endm +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + blr + +TRAMP_REAL_BEGIN(rfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r1,PACA_EXRFI+EX_R12(r13) + ld r1,PACAKSAVE(r13) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -2096,32 +2114,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -2130,6 +2123,19 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) GET_SCRATCH0(r13); hrfid +USE_TEXT_SECTION() + +_GLOBAL(do_uaccess_flush) + UACCESS_FLUSH_FIXUP_SECTION + nop + nop + nop + blr + L1D_DISPLACEMENT_FLUSH + blr +_ASM_NOKPROBE_SYMBOL(do_uaccess_flush) +EXPORT_SYMBOL(do_uaccess_flush) + /* * Real mode exceptions actually use this too, but alternate * instruction code patches (which end up in the common .text area) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index ed59855430b93..69d64f406204f 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -38,8 +38,17 @@ static void __init fadump_reserve_crash_area(u64 base); #ifndef CONFIG_PRESERVE_FA_DUMP static DEFINE_MUTEX(fadump_mutex); -struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 }; -struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 }; +struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; + +#define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ +#define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ + sizeof(struct fadump_memory_range)) +static struct fadump_memory_range rngs[RESERVED_RNGS_CNT]; +struct fadump_mrange_info reserved_mrange_info = { "reserved", rngs, + RESERVED_RNGS_SZ, 0, + RESERVED_RNGS_CNT, true }; + +static void __init early_init_dt_scan_reserved_ranges(unsigned long node); #ifdef CONFIG_CMA static struct cma *fadump_cma; @@ -108,6 +117,11 @@ static int __init fadump_cma_init(void) { return 1; } int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) { + if (depth == 0) { + early_init_dt_scan_reserved_ranges(node); + return 0; + } + if (depth != 1) return 0; @@ -265,7 +279,7 @@ static void fadump_show_config(void) * that is required for a kernel to boot successfully. * */ -static inline u64 fadump_calculate_reserve_size(void) +static __init u64 fadump_calculate_reserve_size(void) { u64 base, size, bootmem_min; int ret; @@ -429,10 +443,72 @@ static int __init fadump_get_boot_mem_regions(void) return ret; } +/* + * Returns true, if the given range overlaps with reserved memory ranges + * starting at idx. Also, updates idx to index of overlapping memory range + * with the given memory range. + * False, otherwise. + */ +static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx) +{ + bool ret = false; + int i; + + for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) { + u64 rbase = reserved_mrange_info.mem_ranges[i].base; + u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size; + + if (end <= rbase) + break; + + if ((end > rbase) && (base < rend)) { + *idx = i; + ret = true; + break; + } + } + + return ret; +} + +/* + * Locate a suitable memory area to reserve memory for FADump. While at it, + * lookup reserved-ranges & avoid overlap with them, as they are used by F/W. + */ +static u64 __init fadump_locate_reserve_mem(u64 base, u64 size) +{ + struct fadump_memory_range *mrngs; + phys_addr_t mstart, mend; + int idx = 0; + u64 i, ret = 0; + + mrngs = reserved_mrange_info.mem_ranges; + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + &mstart, &mend, NULL) { + pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n", + i, mstart, mend, base); + + if (mstart > base) + base = PAGE_ALIGN(mstart); + + while ((mend > base) && ((mend - base) >= size)) { + if (!overlaps_reserved_ranges(base, base+size, &idx)) { + ret = base; + goto out; + } + + base = mrngs[idx].base + mrngs[idx].size; + base = PAGE_ALIGN(base); + } + } + +out: + return ret; +} + int __init fadump_reserve_mem(void) { - u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE; - bool is_memblock_bottom_up = memblock_bottom_up(); + u64 base, size, mem_boundary, bootmem_min; int ret = 1; if (!fw_dump.fadump_enabled) @@ -453,9 +529,9 @@ int __init fadump_reserve_mem(void) PAGE_ALIGN(fadump_calculate_reserve_size()); #ifdef CONFIG_CMA if (!fw_dump.nocma) { - align = FADUMP_CMA_ALIGNMENT; fw_dump.boot_memory_size = - ALIGN(fw_dump.boot_memory_size, align); + ALIGN(fw_dump.boot_memory_size, + FADUMP_CMA_ALIGNMENT); } #endif @@ -523,13 +599,9 @@ int __init fadump_reserve_mem(void) * Reserve memory at an offset closer to bottom of the RAM to * minimize the impact of memory hot-remove operation. */ - memblock_set_bottom_up(true); - base = memblock_find_in_range(base, mem_boundary, size, align); - - /* Restore the previous allocation mode */ - memblock_set_bottom_up(is_memblock_bottom_up); + base = fadump_locate_reserve_mem(base, size); - if (!base) { + if (!base || (base + size > mem_boundary)) { pr_err("Failed to find memory chunk for reservation!\n"); goto error_out; } @@ -726,10 +798,14 @@ void fadump_free_cpu_notes_buf(void) static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info) { + if (mrange_info->is_static) { + mrange_info->mem_range_cnt = 0; + return; + } + kfree(mrange_info->mem_ranges); - mrange_info->mem_ranges = NULL; - mrange_info->mem_ranges_sz = 0; - mrange_info->max_mem_ranges = 0; + memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0, + (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ)); } /* @@ -759,7 +835,6 @@ static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info) sizeof(struct fadump_memory_range)); return 0; } - static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, u64 base, u64 end) { @@ -778,7 +853,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, start = mem_ranges[mrange_info->mem_range_cnt - 1].base; size = mem_ranges[mrange_info->mem_range_cnt - 1].size; - if ((start + size) == base) + /* + * Boot memory area needs separate PT_LOAD segment(s) as it + * is moved to a different location at the time of crash. + * So, fold only if the region is not boot memory area. + */ + if ((start + size) == base && start >= fw_dump.boot_mem_top) is_adjacent = true; } if (!is_adjacent) { @@ -786,6 +866,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) { int ret; + if (mrange_info->is_static) { + pr_err("Reached array size limit for %s memory ranges\n", + mrange_info->name); + return -ENOSPC; + } + ret = fadump_alloc_mem_ranges(mrange_info); if (ret) return ret; @@ -1202,20 +1288,19 @@ static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info) * Scan reserved-ranges to consider them while reserving/releasing * memory for FADump. */ -static inline int fadump_scan_reserved_mem_ranges(void) +static void __init early_init_dt_scan_reserved_ranges(unsigned long node) { - struct device_node *root; const __be32 *prop; int len, ret = -1; unsigned long i; - root = of_find_node_by_path("/"); - if (!root) - return ret; + /* reserved-ranges already scanned */ + if (reserved_mrange_info.mem_range_cnt != 0) + return; - prop = of_get_property(root, "reserved-ranges", &len); + prop = of_get_flat_dt_prop(node, "reserved-ranges", &len); if (!prop) - return ret; + return; /* * Each reserved range is an (address,size) pair, 2 cells each, @@ -1237,7 +1322,8 @@ static inline int fadump_scan_reserved_mem_ranges(void) } } - return ret; + /* Compact reserved ranges */ + sort_and_merge_mem_ranges(&reserved_mrange_info); } /* @@ -1251,32 +1337,21 @@ static void fadump_release_memory(u64 begin, u64 end) u64 ra_start, ra_end, tstart; int i, ret; - fadump_scan_reserved_mem_ranges(); - ra_start = fw_dump.reserve_dump_area_start; ra_end = ra_start + fw_dump.reserve_dump_area_size; /* - * Add reserved dump area to reserved ranges list - * and exclude all these ranges while releasing memory. + * If reserved ranges array limit is hit, overwrite the last reserved + * memory range with reserved dump area to ensure it is excluded from + * the memory being released (reused for next FADump registration). */ - ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); - if (ret != 0) { - /* - * Not enough memory to setup reserved ranges but the system is - * running shortage of memory. So, release all the memory except - * Reserved dump area (reused for next fadump registration). - */ - if (begin < ra_end && end > ra_start) { - if (begin < ra_start) - fadump_release_reserved_area(begin, ra_start); - if (end > ra_end) - fadump_release_reserved_area(ra_end, end); - } else - fadump_release_reserved_area(begin, end); + if (reserved_mrange_info.mem_range_cnt == + reserved_mrange_info.max_mem_ranges) + reserved_mrange_info.mem_range_cnt--; + ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end); + if (ret != 0) return; - } /* Get the reserved ranges list in order first. */ sort_and_merge_mem_ranges(&reserved_mrange_info); diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index 4a24f8f026c79..edaab1142498c 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -418,14 +418,11 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r1,_PAGE_PRESENT | _PAGE_EXEC -#endif + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) bge- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -484,13 +481,10 @@ DataLoadTLBMiss: lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP - li r1, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_PRESENT -#endif + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER bge- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1, _PAGE_PRESENT | _PAGE_ACCESSED addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ @@ -564,13 +558,10 @@ DataStoreTLBMiss: lis r1,PAGE_OFFSET@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT -#endif + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER bge- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ @@ -843,7 +834,7 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr -load_segment_registers: +_GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index ad79fddb974de..9019f1395d39a 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -420,6 +420,10 @@ generic_secondary_common_init: /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) ld r23,0(r23) @@ -448,10 +452,6 @@ generic_secondary_common_init: sync /* order paca.run and cur_cpu_spec */ isync /* In case code patching happened */ - /* Create a temp kernel stack for use before relocation is on. */ - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD - b __secondary_start #endif /* SMP */ @@ -945,15 +945,8 @@ start_here_multiplatform: std r0,0(r4) #endif - /* The following gets the stack set up with the regs */ - /* pointing to the real addr of the kernel stack. This is */ - /* all done to support the C function call below which sets */ - /* up the htab. This is done because we have relocated the */ - /* kernel but are still running in real mode. */ - - LOAD_REG_ADDR(r3,init_thread_union) - /* set up a stack pointer */ + LOAD_REG_ADDR(r3,init_thread_union) LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) add r1,r3,r1 li r0,0 @@ -999,7 +992,7 @@ start_here_common: bl start_kernel /* Not reached */ - trap +0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 /* diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 19f583e18402e..6f3e417f55a35 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -191,7 +191,7 @@ SystemCall: /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD) + EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) /* Called from DataStoreTLBMiss when perf TLB misses events are activated */ #ifdef CONFIG_PERF_EVENTS @@ -229,9 +229,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mtspr SPRN_SPRG_SCRATCH1, r11 -#endif /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -278,27 +276,23 @@ InstructionTLBMiss: #ifdef ITLB_MISS_KERNEL mtcr r11 #endif -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. * Software indicator bits 22, 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ - rlwimi r10, r10, 0, 0x0f00 /* Clear bits 20-23 */ + rlwinm r10, r10, 0, ~0x0f00 /* Clear bits 20-23 */ rlwimi r10, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */ ori r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -308,9 +302,7 @@ InstructionTLBMiss: addi r10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi #endif @@ -394,11 +386,9 @@ DataStoreTLBMiss: * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); * r10 = (r10 & ~PRESENT) | r11; */ -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT + rlwinm r11, r10, 32-7, _PAGE_PRESENT and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index adf0505dbe022..519d49547e2f3 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -238,6 +238,9 @@ set_ivor: bl early_init +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif #ifdef CONFIG_RELOCATABLE mr r3,r30 mr r4,r31 @@ -264,9 +267,6 @@ set_ivor: /* * Decide what sort of machine this is and initialize the MMU. */ -#ifdef CONFIG_KASAN - bl kasan_early_init -#endif mr r3,r30 mr r4,r31 bl machine_init diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index a36fd053c3dba..0615ba86baef3 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -37,7 +37,7 @@ static int __init powersave_off(char *arg) { ppc_md.power_save = NULL; cpuidle_disable = IDLE_POWERSAVE_OFF; - return 0; + return 1; } __setup("powersave=off", powersave_off); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 0ffdd18b9f268..acb8215c5a01d 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -129,7 +129,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) mtspr SPRN_HID0,r4 BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index d32751994a62c..c3f62c9ce7406 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -50,28 +50,32 @@ _GLOBAL(isa300_idle_stop_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) /* 168 bytes */ PPC_STOP b . /* catch bugs */ @@ -87,8 +91,8 @@ _GLOBAL(isa300_idle_stop_mayloss) */ _GLOBAL(idle_return_gpr_loss) ld r1,PACAR1(r13) - ld r4,-8*19(r1) - ld r5,-8*20(r1) + ld r4,-8*20(r1) + ld r5,-8*21(r1) mtlr r4 mtcr r5 /* @@ -96,38 +100,40 @@ _GLOBAL(idle_return_gpr_loss) * from PACATOC. This could be avoided for that less common case * if KVM saved its r2. */ - ld r2,-8*0(r1) - ld r14,-8*1(r1) - ld r15,-8*2(r1) - ld r16,-8*3(r1) - ld r17,-8*4(r1) - ld r18,-8*5(r1) - ld r19,-8*6(r1) - ld r20,-8*7(r1) - ld r21,-8*8(r1) - ld r22,-8*9(r1) - ld r23,-8*10(r1) - ld r24,-8*11(r1) - ld r25,-8*12(r1) - ld r26,-8*13(r1) - ld r27,-8*14(r1) - ld r28,-8*15(r1) - ld r29,-8*16(r1) - ld r30,-8*17(r1) - ld r31,-8*18(r1) + ld r2,-8*1(r1) + ld r14,-8*2(r1) + ld r15,-8*3(r1) + ld r16,-8*4(r1) + ld r17,-8*5(r1) + ld r18,-8*6(r1) + ld r19,-8*7(r1) + ld r20,-8*8(r1) + ld r21,-8*9(r1) + ld r22,-8*10(r1) + ld r23,-8*11(r1) + ld r24,-8*12(r1) + ld r25,-8*13(r1) + ld r26,-8*14(r1) + ld r27,-8*15(r1) + ld r28,-8*16(r1) + ld r29,-8*17(r1) + ld r30,-8*18(r1) + ld r31,-8*19(r1) blr /* * This is the sequence required to execute idle instructions, as * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. - * - * The 0(r1) slot is used to save r2 in isa206, so use that here. + * We have to store a GPR somewhere, ptesync, then reload it, and create + * a false dependency on the result of the load. It doesn't matter which + * GPR we store, or where we store it. We have already stored r2 to the + * stack at -8(r1) in isa206_idle_insn_mayloss, so use that. */ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r2,0(r1); \ + std r2,-8(r1); \ ptesync; \ - ld r2,0(r1); \ + ld r2,-8(r1); \ 236: cmpd cr0,r2,r2; \ bne 236b; \ IDLE_INST; \ @@ -152,28 +158,32 @@ _GLOBAL(isa206_idle_insn_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) cmpwi r3,PNV_THREAD_NAP bne 1f IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c new file mode 100644 index 0000000000000..957abd5920754 --- /dev/null +++ b/arch/powerpc/kernel/ima_arch.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + */ + +#include +#include + +bool arch_ima_get_secureboot(void) +{ + return is_ppc_secureboot_enabled(); +} + +/* + * The "secure_rules" are enabled only on "secureboot" enabled systems. + * These rules verify the file signatures against known good values. + * The "appraise_type=imasig|modsig" option allows the known good signature + * to be stored as an xattr or as an appended signature. + * + * To avoid duplicate signature verification as much as possible, the IMA + * policy rule for module appraisal is added only if CONFIG_MODULE_SIG + * is not enabled. + */ +static const char *const secure_rules[] = { + "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", +#ifndef CONFIG_MODULE_SIG + "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", +#endif + NULL +}; + +/* + * The "trusted_rules" are enabled only on "trustedboot" enabled systems. + * These rules add the kexec kernel image and kernel modules file hashes to + * the IMA measurement list. + */ +static const char *const trusted_rules[] = { + "measure func=KEXEC_KERNEL_CHECK", + "measure func=MODULE_CHECK", + NULL +}; + +/* + * The "secure_and_trusted_rules" contains rules for both the secure boot and + * trusted boot. The "template=ima-modsig" option includes the appended + * signature, when available, in the IMA measurement list. + */ +static const char *const secure_and_trusted_rules[] = { + "measure func=KEXEC_KERNEL_CHECK template=ima-modsig", + "measure func=MODULE_CHECK template=ima-modsig", + "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", +#ifndef CONFIG_MODULE_SIG + "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", +#endif + NULL +}; + +/* + * Returns the relevant IMA arch-specific policies based on the system secure + * boot state. + */ +const char *const *arch_get_ima_policy(void) +{ + if (is_ppc_secureboot_enabled()) { + if (IS_ENABLED(CONFIG_MODULE_SIG)) + set_module_sig_enforced(); + + if (is_ppc_trustedboot_enabled()) + return secure_and_trusted_rules; + else + return secure_rules; + } else if (is_ppc_trustedboot_enabled()) { + return trusted_rules; + } + + return NULL; +} diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 9704f3f76e63e..d7d42bd448c4a 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1057,7 +1057,7 @@ int iommu_take_ownership(struct iommu_table *tbl) spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) - spin_lock(&tbl->pools[i].lock); + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); iommu_table_release_pages(tbl); @@ -1085,7 +1085,7 @@ void iommu_release_ownership(struct iommu_table *tbl) spin_lock_irqsave(&tbl->large_pool.lock, flags); for (i = 0; i < tbl->nr_pools; i++) - spin_lock(&tbl->pools[i].lock); + spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); memset(tbl->it_map, 0, sz); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 5645bc9cbc09a..add67498c126b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -619,8 +619,6 @@ void __do_irq(struct pt_regs *regs) trace_irq_entry(regs); - check_stack_overflow(); - /* * Query the platform PIC for the interrupt & ack it. * @@ -652,6 +650,8 @@ void do_IRQ(struct pt_regs *regs) irqsp = hardirq_ctx[raw_smp_processor_id()]; sirqsp = softirq_ctx[raw_smp_processor_id()]; + check_stack_overflow(); + /* Already there ? */ if (unlikely(cursp == irqsp || cursp == sirqsp)) { __do_irq(regs); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2d27ec4feee4a..dd01a4abc2582 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -264,6 +264,10 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; + if (!IS_ENABLED(CONFIG_BOOKE) && + (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))) + return 0; + /* * We don't want to be preempted for the entire * duration of kprobe processing diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 617eba82531cb..d89cf802d9aa7 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -669,7 +669,7 @@ static void __init kvm_use_magic_page(void) on_each_cpu(kvm_map_magic_page, &features, 1); /* Quick self-test to see if the mapping works */ - if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { + if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { kvm_patching_worked = false; return; } diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 2020d255585fa..7684f644e93e4 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR) /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -293,7 +293,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) isync /* Stop DST streams */ - DSSALL + PPC_DSSALL sync /* Get the current enable bit of the L3CR into r4 */ @@ -402,7 +402,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR) _GLOBAL(__flush_disable_L1) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index c4ed328a7b963..716f8bb17461c 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -114,11 +114,12 @@ void machine_kexec(struct kimage *image) void __init reserve_crashkernel(void) { - unsigned long long crash_size, crash_base; + unsigned long long crash_size, crash_base, total_mem_sz; int ret; + total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size(); /* use common parsing */ - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size, &crash_base); if (ret == 0 && crash_size > 0) { crashk_res.start = crash_base; @@ -145,11 +146,18 @@ void __init reserve_crashkernel(void) if (!crashk_res.start) { #ifdef CONFIG_PPC64 /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. + * On the LPAR platform place the crash kernel to mid of + * RMA size (512MB or more) to ensure the crash kernel + * gets enough space to place itself and some stack to be + * in the first segment. At the same time normal kernel + * also get enough space to allocate memory for essential + * system resource in the first segment. Keep the crash + * kernel starts at 128MB offset on other platforms. */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); + if (firmware_has_feature(FW_FEATURE_LPAR)) + crashk_res.start = ppc64_rma_size / 2; + else + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); #else crashk_res.start = KDUMP_KERNELBASE; #endif @@ -177,6 +185,7 @@ void __init reserve_crashkernel(void) /* Crash kernel trumps memory limit */ if (memory_limit && memory_limit <= crashk_res.end) { memory_limit = crashk_res.end + 1; + total_mem_sz = memory_limit; printk("Adjusted memory limit for crashkernel, now 0x%llx\n", memory_limit); } @@ -185,7 +194,7 @@ void __init reserve_crashkernel(void) "for crashkernel (System RAM: %ldMB)\n", (unsigned long)(crash_size >> 20), (unsigned long)(crashk_res.start >> 20), - (unsigned long)(memblock_phys_mem_size() >> 20)); + (unsigned long)(total_mem_sz >> 20)); if (!memblock_is_region_memory(crashk_res.start, crash_size) || memblock_reserve(crashk_res.start, crash_size)) { diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 82df4b09e79f4..f4e4a1926a7a5 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -316,126 +316,6 @@ _GLOBAL(flush_instruction_cache) EXPORT_SYMBOL(flush_instruction_cache) #endif /* CONFIG_PPC_8xx */ -/* - * Write any modified data cache blocks out to memory - * and invalidate the corresponding instruction cache blocks. - * This is a no-op on the 601. - * - * flush_icache_range(unsigned long start, unsigned long stop) - */ -_GLOBAL(flush_icache_range) -#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200) - PURGE_PREFETCHED_INS - blr /* for 601 and e200, do nothing */ -#else - rlwinm r3,r3,0,0,31 - L1_CACHE_SHIFT - subf r4,r3,r4 - addi r4,r4,L1_CACHE_BYTES - 1 - srwi. r4,r4,L1_CACHE_SHIFT - beqlr - mtctr r4 - mr r6,r3 -1: dcbst 0,r3 - addi r3,r3,L1_CACHE_BYTES - bdnz 1b - sync /* wait for dcbst's to get to ram */ -#ifndef CONFIG_44x - mtctr r4 -2: icbi 0,r6 - addi r6,r6,L1_CACHE_BYTES - bdnz 2b -#else - /* Flash invalidate on 44x because we are passed kmapped addresses and - this doesn't work for userspace pages due to the virtually tagged - icache. Sigh. */ - iccci 0, r0 -#endif - sync /* additional sync needed on g4 */ - isync - blr -#endif -_ASM_NOKPROBE_SYMBOL(flush_icache_range) -EXPORT_SYMBOL(flush_icache_range) - -/* - * Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * This is a no-op on the 601 and e200 which have a unified cache. - * - * void __flush_dcache_icache(void *page) - */ -_GLOBAL(__flush_dcache_icache) -#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200) - PURGE_PREFETCHED_INS - blr -#else - rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ - li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ - mtctr r4 - mr r6,r3 -0: dcbst 0,r3 /* Write line to ram */ - addi r3,r3,L1_CACHE_BYTES - bdnz 0b - sync -#ifdef CONFIG_44x - /* We don't flush the icache on 44x. Those have a virtual icache - * and we don't have access to the virtual address here (it's - * not the page vaddr but where it's mapped in user space). The - * flushing of the icache on these is handled elsewhere, when - * a change in the address space occurs, before returning to - * user space - */ -BEGIN_MMU_FTR_SECTION - blr -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) -#endif /* CONFIG_44x */ - mtctr r4 -1: icbi 0,r6 - addi r6,r6,L1_CACHE_BYTES - bdnz 1b - sync - isync - blr -#endif - -#ifndef CONFIG_BOOKE -/* - * Flush a particular page from the data cache to RAM, identified - * by its physical address. We turn off the MMU so we can just use - * the physical address (this may be a highmem page without a kernel - * mapping). - * - * void __flush_dcache_icache_phys(unsigned long physaddr) - */ -_GLOBAL(__flush_dcache_icache_phys) -#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200) - PURGE_PREFETCHED_INS - blr /* for 601 and e200, do nothing */ -#else - mfmsr r10 - rlwinm r0,r10,0,28,26 /* clear DR */ - mtmsr r0 - isync - rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ - li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ - mtctr r4 - mr r6,r3 -0: dcbst 0,r3 /* Write line to ram */ - addi r3,r3,L1_CACHE_BYTES - bdnz 0b - sync - mtctr r4 -1: icbi 0,r6 - addi r6,r6,L1_CACHE_BYTES - bdnz 1b - sync - mtmsr r10 /* restore DR */ - isync - blr -#endif -#endif /* CONFIG_BOOKE */ - /* * Copy a whole page. We use the dcbz instruction on the destination * to reduce memory traffic (it eliminates the unnecessary reads of diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index b55a7b4cb543f..ff20c253f2737 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -49,108 +49,6 @@ _GLOBAL(call_do_irq) mtlr r0 blr - .section ".toc","aw" -PPC64_CACHES: - .tc ppc64_caches[TC],ppc64_caches - .section ".text" - -/* - * Write any modified data cache blocks out to memory - * and invalidate the corresponding instruction cache blocks. - * - * flush_icache_range(unsigned long start, unsigned long stop) - * - * flush all bytes from start through stop-1 inclusive - */ - -_GLOBAL_TOC(flush_icache_range) -BEGIN_FTR_SECTION - PURGE_PREFETCHED_INS - blr -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) -/* - * Flush the data cache to memory - * - * Different systems have different cache line sizes - * and in some cases i-cache and d-cache line sizes differ from - * each other. - */ - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mtctr r8 -1: dcbst 0,r6 - add r6,r6,r7 - bdnz 1b - sync - -/* Now invalidate the instruction cache */ - - lwz r7,ICACHEL1BLOCKSIZE(r10) /* Get Icache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 - lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mtctr r8 -2: icbi 0,r6 - add r6,r6,r7 - bdnz 2b - isync - blr -_ASM_NOKPROBE_SYMBOL(flush_icache_range) -EXPORT_SYMBOL(flush_icache_range) - -/* - * Flush a particular page from the data cache to RAM. - * Note: this is necessary because the instruction cache does *not* - * snoop from the data cache. - * - * void __flush_dcache_icache(void *page) - */ -_GLOBAL(__flush_dcache_icache) -/* - * Flush the data cache to memory - * - * Different systems have different cache line sizes - */ - -BEGIN_FTR_SECTION - PURGE_PREFETCHED_INS - blr -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) - -/* Flush the dcache */ - ld r7,PPC64_CACHES@toc(r2) - clrrdi r3,r3,PAGE_SHIFT /* Page align */ - lwz r4,DCACHEL1BLOCKSPERPAGE(r7) /* Get # dcache blocks per page */ - lwz r5,DCACHEL1BLOCKSIZE(r7) /* Get dcache block size */ - mr r6,r3 - mtctr r4 -0: dcbst 0,r6 - add r6,r6,r5 - bdnz 0b - sync - -/* Now invalidate the icache */ - - lwz r4,ICACHEL1BLOCKSPERPAGE(r7) /* Get # icache blocks per page */ - lwz r5,ICACHEL1BLOCKSIZE(r7) /* Get icache block size */ - mtctr r4 -1: icbi 0,r3 - add r3,r3,r5 - bdnz 1b - isync - blr - _GLOBAL(__bswapdi2) EXPORT_SYMBOL(__bswapdi2) srdi r8,r3,32 diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 949eceb254d85..c786adfb9413f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -86,7 +86,7 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align, * This is very early in boot, so no harm done if the kernel crashes at * this point. */ - BUG_ON(shared_lppaca_size >= shared_lppaca_total_size); + BUG_ON(shared_lppaca_size > shared_lppaca_total_size); return ptr; } @@ -214,11 +214,15 @@ void setup_paca(struct paca_struct *new_paca) /* On Book3E, initialize the TLB miss exception frames */ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); #else - /* In HV mode, we setup both HPACA and PACA to avoid problems + /* + * In HV mode, we setup both HPACA and PACA to avoid problems * if we do a GET_PACA() before the feature fixups have been - * applied + * applied. + * + * Normally you should test against CPU_FTR_HVMODE, but CPU features + * are not yet set up when we first reach here. */ - if (early_cpu_has_feature(CPU_FTR_HVMODE)) + if (mfmsr() & MSR_HV) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 1c448cf255061..f97e1e5a9c68b 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -66,23 +66,35 @@ void set_pci_dma_ops(const struct dma_map_ops *dma_ops) pci_dma_ops = dma_ops; } -/* - * This function should run under locking protection, specifically - * hose_spinlock. - */ static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; - u32 prop_32; u64 prop; /* * Try fixed PHB numbering first, by checking archs and reading - * the respective device-tree properties. Firstly, try powernv by - * reading "ibm,opal-phbid", only present in OPAL environment. + * the respective device-tree properties. Firstly, try reading + * standard "linux,pci-domain", then try reading "ibm,opal-phbid" + * (only present in powernv OPAL environment), then try device-tree + * alias and as the last try to use lower bits of "reg" property. */ - ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + ret = of_get_pci_domain_nr(dn); + if (ret >= 0) { + prop = ret; + ret = 0; + } + if (ret) + ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + + if (ret) { + ret = of_alias_get_id(dn, "pci"); + if (ret >= 0) { + prop = ret; + ret = 0; + } + } if (ret) { + u32 prop_32; ret = of_property_read_u32_index(dn, "reg", 1, &prop_32); prop = prop_32; } @@ -90,18 +102,20 @@ static int get_phb_number(struct device_node *dn) if (!ret) phb_id = (int)(prop & (MAX_PHBS - 1)); + spin_lock(&hose_spinlock); + /* We need to be sure to not use the same PHB number twice. */ if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) - return phb_id; + goto out_unlock; - /* - * If not pseries nor powernv, or if fixed PHB numbering tried to add - * the same PHB number twice, then fallback to dynamic PHB numbering. - */ + /* If everything fails then fallback to dynamic PHB numbering. */ phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); BUG_ON(phb_id >= MAX_PHBS); set_bit(phb_id, phb_bitmap); +out_unlock: + spin_unlock(&hose_spinlock); + return phb_id; } @@ -112,10 +126,13 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL); if (phb == NULL) return NULL; - spin_lock(&hose_spinlock); + phb->global_number = get_phb_number(dev); + + spin_lock(&hose_spinlock); list_add_tail(&phb->list_node, &hose_list); spin_unlock(&hose_spinlock); + phb->dn = dev; phb->is_dynamic = slab_is_available(); #ifdef CONFIG_PPC64 @@ -347,6 +364,7 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) } return NULL; } +EXPORT_SYMBOL(pci_find_hose_for_OF_device); struct pci_controller *pci_find_controller_for_domain(int domain_nr) { @@ -1577,6 +1595,7 @@ int early_find_capability(struct pci_controller *hose, int bus, int devfn, { return pci_bus_find_capability(fake_pci_bus(hose, bus), devfn, cap); } +EXPORT_SYMBOL_GPL(early_find_capability); struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus) { @@ -1669,3 +1688,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl); + + +static int __init discover_phbs(void) +{ + if (ppc_md.discover_phbs) + ppc_md.discover_phbs(); + + return 0; +} +core_initcall(discover_phbs); diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 9524009ca1ae4..d876eda926094 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -244,9 +244,22 @@ void remove_dev_pci_data(struct pci_dev *pdev) continue; #ifdef CONFIG_EEH - /* Release EEH device for the VF */ + /* + * Release EEH state for this VF. The PCI core + * has already torn down the pci_dev for this VF, but + * we're responsible to removing the eeh_dev since it + * has the same lifetime as the pci_dn that spawned it. + */ edev = pdn_to_eeh_dev(pdn); if (edev) { + /* + * We allocate pci_dn's for the totalvfs count, + * but only only the vfs that were activated + * have a configured PE. + */ + if (edev->pe) + eeh_rmv_from_parent_pe(edev); + pdn->edev = NULL; kfree(edev); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 639ceae7da9d8..cf87573e6e785 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1218,29 +1218,31 @@ struct task_struct *__switch_to(struct task_struct *prev, static void show_instructions(struct pt_regs *regs) { int i; + unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); printk("Instruction dump:"); + /* + * If we were executing with the MMU off for instructions, adjust pc + * rather than printing XXXXXXXX. + */ + if (!IS_ENABLED(CONFIG_BOOKE) && !(regs->msr & MSR_IR)) { + pc = (unsigned long)phys_to_virt(pc); + nip = (unsigned long)phys_to_virt(regs->nip); + } + for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; if (!(i % 8)) pr_cont("\n"); -#if !defined(CONFIG_BOOKE) - /* If executing with the IMMU off, adjust pc rather - * than print XXXXXXXX. - */ - if (!(regs->msr & MSR_IR)) - pc = (unsigned long)phys_to_virt(pc); -#endif - if (!__kernel_text_address(pc) || probe_kernel_address((const void *)pc, instr)) { pr_cont("XXXXXXXX "); } else { - if (regs->nip == pc) + if (nip == pc) pr_cont("<%08x> ", instr); else pr_cont("%08x ", instr); @@ -1717,7 +1719,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) tm_reclaim_current(0); #endif - memset(regs->gpr, 0, sizeof(regs->gpr)); + memset(®s->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0])); regs->ctr = 0; regs->link = 0; regs->xer = 0; @@ -1999,12 +2001,12 @@ static unsigned long __get_wchan(struct task_struct *p) return 0; do { - sp = *(unsigned long *)sp; + sp = READ_ONCE_NOCHECK(*(unsigned long *)sp); if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) || p->state == TASK_RUNNING) return 0; if (count > 0) { - ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE]; + ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]); if (!in_sched_functions(ip)) return ip; } @@ -2079,7 +2081,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) * See if this is an exception frame. * We look for the "regshere" marker in the current frame. */ - if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE) + if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 6620f37abe732..d1ba175013430 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -266,7 +266,7 @@ static struct feature_property { }; #if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU) -static inline void identical_pvr_fixup(unsigned long node) +static __init void identical_pvr_fixup(unsigned long node) { unsigned int pvr; const char *model = of_get_flat_dt_prop(node, "model", NULL); @@ -685,6 +685,23 @@ static void __init tm_init(void) static void tm_init(void) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#ifdef CONFIG_PPC64 +static void __init save_fscr_to_task(void) +{ + /* + * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we + * have configured via the device tree features or via __init_FSCR(). + * That value will then be propagated to pid 1 (init) and all future + * processes. + */ + if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) + init_task.thread.fscr = mfspr(SPRN_FSCR); +} +#else +static inline void save_fscr_to_task(void) {}; +#endif + + void __init early_init_devtree(void *params) { phys_addr_t limit; @@ -723,6 +740,13 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + /* + * As generic code authors expect to be able to use static keys + * in early_param() handlers, we initialize the static keys just + * before parsing early params (it's fine to call jump_label_init() + * more than once). + */ + jump_label_init(); parse_early_param(); /* make sure we've parsed cmdline for mem= before this */ @@ -773,6 +797,8 @@ void __init early_init_devtree(void *params) BUG(); } + save_fscr_to_task(); + #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) /* We'll later wait for secondaries to check in; there are * NCPUS-1 non-boot CPUs :-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 100f1b57ec2fd..7f4e2c031a9ab 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1053,7 +1053,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .reserved2 = 0, .reserved3 = 0, .subprocessors = 1, - .byte22 = OV5_FEAT(OV5_DRMEM_V2), + .byte22 = OV5_FEAT(OV5_DRMEM_V2) | OV5_FEAT(OV5_DRC_INFO), .intarch = 0, .mmu = 0, .hash_ext = 0, @@ -1305,14 +1305,10 @@ static void __init prom_check_platform_support(void) if (prop_len > sizeof(vec)) prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n", prop_len); - prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", - &vec, sizeof(vec)); - for (i = 0; i < sizeof(vec); i += 2) { - prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 - , vec[i] - , vec[i + 1]); - prom_parse_platform_support(vec[i], vec[i + 1], - &supported); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], &supported); } } @@ -1761,6 +1757,9 @@ static void __init prom_rtas_os_term(char *str) if (token == 0) prom_panic("Could not get token for ibm,os-term\n"); os_term_args.token = cpu_to_be32(token); + os_term_args.nargs = cpu_to_be32(1); + os_term_args.nret = cpu_to_be32(1); + os_term_args.args[0] = cpu_to_be32(__pa(str)); prom_rtas_hcall((uint64_t)&os_term_args); } #endif /* CONFIG_PPC_SVM */ @@ -2920,7 +2919,7 @@ static void __init fixup_device_tree_efika_add_phy(void) /* Check if the phy-handle property exists - bail if it does */ rv = prom_getprop(node, "phy-handle", prop, sizeof(prop)); - if (!rv) + if (rv <= 0) return; /* diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index b183ab9c5107c..dfa5f729f774d 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -13,7 +13,7 @@ # If you really need to reference something from prom_init.o add # it to the list below: -grep "^CONFIG_KASAN=y$" .config >/dev/null +grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null if [ $? -eq 0 ] then MEM_FUNCS="__memcpy __memset" diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 8c92febf5f443..63bfc5250b67e 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -3014,8 +3014,13 @@ long arch_ptrace(struct task_struct *child, long request, flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&tmp, &child->thread.TS_FPR(fpidx), - sizeof(long)); + if (IS_ENABLED(CONFIG_PPC32)) { + // On 32-bit the index we are passed refers to 32-bit words + tmp = ((u32 *)child->thread.fp_state.fpr)[fpidx]; + } else { + memcpy(&tmp, &child->thread.TS_FPR(fpidx), + sizeof(long)); + } else tmp = child->thread.fp_state.fpscr; } @@ -3047,8 +3052,13 @@ long arch_ptrace(struct task_struct *child, long request, flush_fp_to_thread(child); if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, - sizeof(long)); + if (IS_ENABLED(CONFIG_PPC32)) { + // On 32-bit the index we are passed refers to 32-bit words + ((u32 *)child->thread.fp_state.fpr)[fpidx] = data; + } else { + memcpy(&child->thread.TS_FPR(fpidx), &data, + sizeof(long)); + } else child->thread.fp_state.fpscr = data; ret = 0; @@ -3398,4 +3408,7 @@ void __init pt_regs_check(void) offsetof(struct user_pt_regs, result)); BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs)); + + // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX)); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c5fa251b8950c..35e246e39705b 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -842,96 +842,6 @@ static void rtas_percpu_suspend_me(void *info) __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1); } -enum rtas_cpu_state { - DOWN, - UP, -}; - -#ifndef CONFIG_SMP -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - if (!cpumask_empty(cpus)) { - cpumask_clear(cpus); - return -EINVAL; - } else - return 0; -} -#else -/* On return cpumask will be altered to indicate CPUs changed. - * CPUs with states changed will be set in the mask, - * CPUs with status unchanged will be unset in the mask. */ -static int rtas_cpu_state_change_mask(enum rtas_cpu_state state, - cpumask_var_t cpus) -{ - int cpu; - int cpuret = 0; - int ret = 0; - - if (cpumask_empty(cpus)) - return 0; - - for_each_cpu(cpu, cpus) { - struct device *dev = get_cpu_device(cpu); - - switch (state) { - case DOWN: - cpuret = device_offline(dev); - break; - case UP: - cpuret = device_online(dev); - break; - } - if (cpuret < 0) { - pr_debug("%s: cpu_%s for cpu#%d returned %d.\n", - __func__, - ((state == UP) ? "up" : "down"), - cpu, cpuret); - if (!ret) - ret = cpuret; - if (state == UP) { - /* clear bits for unchanged cpus, return */ - cpumask_shift_right(cpus, cpus, cpu); - cpumask_shift_left(cpus, cpus, cpu); - break; - } else { - /* clear bit for unchanged cpu, continue */ - cpumask_clear_cpu(cpu, cpus); - } - } - cond_resched(); - } - - return ret; -} -#endif - -int rtas_online_cpus_mask(cpumask_var_t cpus) -{ - int ret; - - ret = rtas_cpu_state_change_mask(UP, cpus); - - if (ret) { - cpumask_var_t tmp_mask; - - if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL)) - return ret; - - /* Use tmp_mask to preserve cpus mask from first failure */ - cpumask_copy(tmp_mask, cpus); - rtas_offline_cpus_mask(tmp_mask); - free_cpumask_var(tmp_mask); - } - - return ret; -} - -int rtas_offline_cpus_mask(cpumask_var_t cpus) -{ - return rtas_cpu_state_change_mask(DOWN, cpus); -} - int rtas_ibm_suspend_me(u64 handle) { long state; @@ -939,8 +849,6 @@ int rtas_ibm_suspend_me(u64 handle) unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; struct rtas_suspend_me_data data; DECLARE_COMPLETION_ONSTACK(done); - cpumask_var_t offline_mask; - int cpuret; if (!rtas_service_present("ibm,suspend-me")) return -ENOSYS; @@ -961,9 +869,6 @@ int rtas_ibm_suspend_me(u64 handle) return -EIO; } - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - atomic_set(&data.working, 0); atomic_set(&data.done, 0); atomic_set(&data.error, 0); @@ -972,24 +877,8 @@ int rtas_ibm_suspend_me(u64 handle) lock_device_hotplug(); - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask); - cpuret = rtas_online_cpus_mask(offline_mask); - if (cpuret) { - pr_err("%s: Could not bring present CPUs online.\n", __func__); - atomic_set(&data.error, cpuret); - goto out; - } - cpu_hotplug_disable(); - /* Check if we raced with a CPU-Offline Operation */ - if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) { - pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__); - atomic_set(&data.error, -EAGAIN); - goto out_hotplug_enable; - } - /* Call function on all CPUs. One of us will make the * rtas call */ @@ -1000,18 +889,11 @@ int rtas_ibm_suspend_me(u64 handle) if (atomic_read(&data.error) != 0) printk(KERN_ERR "Error doing global join\n"); -out_hotplug_enable: - cpu_hotplug_enable(); - /* Take down CPUs not online prior to suspend */ - cpuret = rtas_offline_cpus_mask(offline_mask); - if (cpuret) - pr_warn("%s: Could not restore CPUs to offline state.\n", - __func__); + cpu_hotplug_enable(); -out: unlock_device_hotplug(); - free_cpumask_var(offline_mask); + return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ @@ -1058,6 +940,156 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, return NULL; } +#ifdef CONFIG_PPC_RTAS_FILTER + +/* + * The sys_rtas syscall, as originally designed, allows root to pass + * arbitrary physical addresses to RTAS calls. A number of RTAS calls + * can be abused to write to arbitrary memory and do other things that + * are potentially harmful to system integrity, and thus should only + * be used inside the kernel and not exposed to userspace. + * + * All known legitimate users of the sys_rtas syscall will only ever + * pass addresses that fall within the RMO buffer, and use a known + * subset of RTAS calls. + * + * Accordingly, we filter RTAS requests to check that the call is + * permitted, and that provided pointers fall within the RMO buffer. + * The rtas_filters list contains an entry for each permitted call, + * with the indexes of the parameters which are expected to contain + * addresses and sizes of buffers allocated inside the RMO buffer. + */ +struct rtas_filter { + const char *name; + int token; + /* Indexes into the args buffer, -1 if not used */ + int buf_idx1; + int size_idx1; + int buf_idx2; + int size_idx2; + + int fixed_size; +}; + +static struct rtas_filter rtas_filters[] __ro_after_init = { + { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, + { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ + { "display-character", -1, -1, -1, -1, -1 }, + { "ibm,display-message", -1, 0, -1, -1, -1 }, + { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, + { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, + { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, + { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, + { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, + { "ibm,get-indices", -1, 2, 3, -1, -1 }, + { "get-power-level", -1, -1, -1, -1, -1 }, + { "get-sensor-state", -1, -1, -1, -1, -1 }, + { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, + { "get-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,get-vpd", -1, 0, -1, 1, 2 }, + { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, + { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */ + { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, + { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, + { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, + { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, + { "set-indicator", -1, -1, -1, -1, -1 }, + { "set-power-level", -1, -1, -1, -1, -1 }, + { "set-time-for-power-on", -1, -1, -1, -1, -1 }, + { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, + { "set-time-of-day", -1, -1, -1, -1, -1 }, + { "ibm,suspend-me", -1, -1, -1, -1, -1 }, + { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, + { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, + { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, +}; + +static bool in_rmo_buf(u32 base, u32 end) +{ + return base >= rtas_rmo_buf && + base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) && + base <= end && + end >= rtas_rmo_buf && + end < (rtas_rmo_buf + RTAS_RMOBUF_MAX); +} + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + struct rtas_filter *f = &rtas_filters[i]; + u32 base, size, end; + + if (token != f->token) + continue; + + if (f->buf_idx1 != -1) { + base = be32_to_cpu(args->args[f->buf_idx1]); + if (f->size_idx1 != -1) + size = be32_to_cpu(args->args[f->size_idx1]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + + end = base + size - 1; + + /* + * Special case for ibm,platform-dump - NULL buffer + * address is used to indicate end of dump processing + */ + if (!strcmp(f->name, "ibm,platform-dump") && + base == 0) + return false; + + if (!in_rmo_buf(base, end)) + goto err; + } + + if (f->buf_idx2 != -1) { + base = be32_to_cpu(args->args[f->buf_idx2]); + if (f->size_idx2 != -1) + size = be32_to_cpu(args->args[f->size_idx2]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + end = base + size - 1; + + /* + * Special case for ibm,configure-connector where the + * address can be 0 + */ + if (!strcmp(f->name, "ibm,configure-connector") && + base == 0) + return false; + + if (!in_rmo_buf(base, end)) + goto err; + } + + return false; + } + +err: + pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); + pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", + token, nargs, current->comm); + return true; +} + +#else + +static bool block_rtas_call(int token, int nargs, + struct rtas_args *args) +{ + return false; +} + +#endif /* CONFIG_PPC_RTAS_FILTER */ + /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { @@ -1095,6 +1127,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); + if (block_rtas_call(token, nargs, &args)) + return -EINVAL; + /* Need to handle ibm,suspend_me call specially */ if (token == ibm_suspend_me_token) { @@ -1156,6 +1191,9 @@ void __init rtas_initialize(void) unsigned long rtas_region = RTAS_INSTANTIATE_MAX; u32 base, size, entry; int no_base, no_size, no_entry; +#ifdef CONFIG_PPC_RTAS_FILTER + int i; +#endif /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. @@ -1195,6 +1233,12 @@ void __init rtas_initialize(void) #ifdef CONFIG_RTAS_ERROR_LOGGING rtas_last_error_token = rtas_token("rtas-last-error"); #endif + +#ifdef CONFIG_PPC_RTAS_FILTER + for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { + rtas_filters[i].token = rtas_token(rtas_filters[i].name); + } +#endif } int __init early_init_dt_scan_rtas(unsigned long node, @@ -1209,6 +1253,12 @@ int __init early_init_dt_scan_rtas(unsigned long node, entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); sizep = of_get_flat_dt_prop(node, "rtas-size", NULL); +#ifdef CONFIG_PPC64 + /* need this feature to decide the crashkernel offset */ + if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL)) + powerpc_firmware_features |= FW_FEATURE_LPAR; +#endif + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c new file mode 100644 index 0000000000000..4b982324d3681 --- /dev/null +++ b/arch/powerpc/kernel/secure_boot.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + */ +#include +#include +#include + +static struct device_node *get_ppc_fw_sb_node(void) +{ + static const struct of_device_id ids[] = { + { .compatible = "ibm,secureboot", }, + { .compatible = "ibm,secureboot-v1", }, + { .compatible = "ibm,secureboot-v2", }, + {}, + }; + + return of_find_matching_node(NULL, ids); +} + +bool is_ppc_secureboot_enabled(void) +{ + struct device_node *node; + bool enabled = false; + + node = get_ppc_fw_sb_node(); + enabled = of_property_read_bool(node, "os-secureboot-enforcing"); + + of_node_put(node); + + pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); + + return enabled; +} + +bool is_ppc_trustedboot_enabled(void) +{ + struct device_node *node; + bool enabled = false; + + node = get_ppc_fw_sb_node(); + enabled = of_property_read_bool(node, "trusted-enabled"); + + of_node_put(node); + + pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); + + return enabled; +} diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 7cfcb294b11ca..ff022e7256934 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -16,7 +16,7 @@ #include -unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; +u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; enum count_cache_flush_type { COUNT_CACHE_FLUSH_NONE = 0x1, @@ -24,6 +24,7 @@ enum count_cache_flush_type { COUNT_CACHE_FLUSH_HW = 0x4, }; static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; +static bool link_stack_flush_enabled; bool barrier_nospec_enabled; static bool no_nospec; @@ -108,7 +109,7 @@ device_initcall(barrier_nospec_debugfs_init); static __init int security_feature_debugfs_init(void) { debugfs_create_x64("security_features", 0400, powerpc_debugfs_root, - (u64 *)&powerpc_security_features); + &powerpc_security_features); return 0; } device_initcall(security_feature_debugfs_init); @@ -141,32 +142,33 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); - if (rfi_flush || thread_priv) { + if (rfi_flush) { struct seq_buf s; seq_buf_init(&s, buf, PAGE_SIZE - 1); - seq_buf_printf(&s, "Mitigation: "); - - if (rfi_flush) - seq_buf_printf(&s, "RFI Flush"); - - if (rfi_flush && thread_priv) - seq_buf_printf(&s, ", "); - + seq_buf_printf(&s, "Mitigation: RFI Flush"); if (thread_priv) - seq_buf_printf(&s, "L1D private per thread"); + seq_buf_printf(&s, ", L1D private per thread"); seq_buf_printf(&s, "\n"); return s.len; } + if (thread_priv) + return sprintf(buf, "Vulnerable: L1D private per thread\n"); + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) return sprintf(buf, "Not affected\n"); return sprintf(buf, "Vulnerable\n"); } + +ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_meltdown(dev, attr, buf); +} #endif ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) @@ -212,11 +214,19 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (ccd) seq_buf_printf(&s, "Indirect branch cache disabled"); + + if (link_stack_flush_enabled) + seq_buf_printf(&s, ", Software link stack flush"); + } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { seq_buf_printf(&s, "Mitigation: Software count cache flush"); if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) seq_buf_printf(&s, " (hardware accelerated)"); + + if (link_stack_flush_enabled) + seq_buf_printf(&s, ", Software link stack flush"); + } else if (btb_flush_enabled) { seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); } else { @@ -246,6 +256,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { @@ -377,18 +392,49 @@ static __init int stf_barrier_debugfs_init(void) device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +static void no_count_cache_flush(void) +{ + count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; + pr_info("count-cache-flush: software flush disabled.\n"); +} + static void toggle_count_cache_flush(bool enable) { - if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) && + !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) + enable = false; + + if (!enable) { patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP); - count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; - pr_info("count-cache-flush: software flush disabled.\n"); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP); +#endif + pr_info("link-stack-flush: software flush disabled.\n"); + link_stack_flush_enabled = false; + no_count_cache_flush(); return; } + // This enables the branch from _switch to flush_count_cache patch_branch_site(&patch__call_flush_count_cache, (u64)&flush_count_cache, BRANCH_SET_LINK); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + // This enables the branch from guest_exit_cont to kvm_flush_link_stack + patch_branch_site(&patch__call_kvm_flush_link_stack, + (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); +#endif + + pr_info("link-stack-flush: software flush enabled.\n"); + link_stack_flush_enabled = true; + + // If we just need to flush the link stack, patch an early return + if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR); + no_count_cache_flush(); + return; + } + if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { count_cache_flush_type = COUNT_CACHE_FLUSH_SW; pr_info("count-cache-flush: full software flush sequence enabled.\n"); @@ -407,11 +453,20 @@ void setup_count_cache_flush(void) if (no_spectrev2 || cpu_mitigations_off()) { if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) || security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED)) - pr_warn("Spectre v2 mitigations not under software control, can't disable\n"); + pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n"); enable = false; } + /* + * There's no firmware feature flag/hypervisor bit to tell us we need to + * flush the link stack on context switch. So we set it here if we see + * either of the Spectre v2 mitigations that aim to protect userspace. + */ + if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) || + security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) + security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); + toggle_count_cache_flush(enable); } diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c new file mode 100644 index 0000000000000..6a29777d6a2dd --- /dev/null +++ b/arch/powerpc/kernel/secvar-ops.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 IBM Corporation + * Author: Nayna Jain + * + * This file initializes secvar operations for PowerPC Secureboot + */ + +#include +#include + +const struct secvar_operations *secvar_ops __ro_after_init; + +void set_secvar_ops(const struct secvar_operations *ops) +{ + secvar_ops = ops; +} diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c new file mode 100644 index 0000000000000..a0a78aba2083e --- /dev/null +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2019 IBM Corporation + * + * This code exposes secure variables to user via sysfs + */ + +#define pr_fmt(fmt) "secvar-sysfs: "fmt + +#include +#include +#include +#include +#include + +#define NAME_MAX_SIZE 1024 + +static struct kobject *secvar_kobj; +static struct kset *secvar_kset; + +static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + ssize_t rc = 0; + struct device_node *node; + const char *format; + + node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); + if (!of_device_is_available(node)) + return -ENODEV; + + rc = of_property_read_string(node, "format", &format); + if (rc) + return rc; + + rc = sprintf(buf, "%s\n", format); + + of_node_put(node); + + return rc; +} + + +static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + uint64_t dsize; + int rc; + + rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); + if (rc) { + pr_err("Error retrieving %s variable size %d\n", kobj->name, + rc); + return rc; + } + + return sprintf(buf, "%llu\n", dsize); +} + +static ssize_t data_read(struct file *filep, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + uint64_t dsize; + char *data; + int rc; + + rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); + if (rc) { + pr_err("Error getting %s variable size %d\n", kobj->name, rc); + return rc; + } + pr_debug("dsize is %llu\n", dsize); + + data = kzalloc(dsize, GFP_KERNEL); + if (!data) + return -ENOMEM; + + rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, data, &dsize); + if (rc) { + pr_err("Error getting %s variable %d\n", kobj->name, rc); + goto data_fail; + } + + rc = memory_read_from_buffer(buf, count, &off, data, dsize); + +data_fail: + kfree(data); + return rc; +} + +static ssize_t update_write(struct file *filep, struct kobject *kobj, + struct bin_attribute *attr, char *buf, loff_t off, + size_t count) +{ + int rc; + + pr_debug("count is %ld\n", count); + rc = secvar_ops->set(kobj->name, strlen(kobj->name) + 1, buf, count); + if (rc) { + pr_err("Error setting the %s variable %d\n", kobj->name, rc); + return rc; + } + + return count; +} + +static struct kobj_attribute format_attr = __ATTR_RO(format); + +static struct kobj_attribute size_attr = __ATTR_RO(size); + +static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0); + +static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0); + +static struct bin_attribute *secvar_bin_attrs[] = { + &data_attr, + &update_attr, + NULL, +}; + +static struct attribute *secvar_attrs[] = { + &size_attr.attr, + NULL, +}; + +static const struct attribute_group secvar_attr_group = { + .attrs = secvar_attrs, + .bin_attrs = secvar_bin_attrs, +}; +__ATTRIBUTE_GROUPS(secvar_attr); + +static struct kobj_type secvar_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = secvar_attr_groups, +}; + +static int update_kobj_size(void) +{ + + struct device_node *node; + u64 varsize; + int rc = 0; + + node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); + if (!of_device_is_available(node)) { + rc = -ENODEV; + goto out; + } + + rc = of_property_read_u64(node, "max-var-size", &varsize); + if (rc) + goto out; + + data_attr.size = varsize; + update_attr.size = varsize; + +out: + of_node_put(node); + + return rc; +} + +static int secvar_sysfs_load(void) +{ + char *name; + uint64_t namesize = 0; + struct kobject *kobj; + int rc; + + name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL); + if (!name) + return -ENOMEM; + + do { + rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE); + if (rc) { + if (rc != -ENOENT) + pr_err("error getting secvar from firmware %d\n", + rc); + break; + } + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) { + rc = -ENOMEM; + break; + } + + kobject_init(kobj, &secvar_ktype); + + rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); + if (rc) { + pr_warn("kobject_add error %d for attribute: %s\n", rc, + name); + kobject_put(kobj); + kobj = NULL; + } + + if (kobj) + kobject_uevent(kobj, KOBJ_ADD); + + } while (!rc); + + kfree(name); + return rc; +} + +static int secvar_sysfs_init(void) +{ + int rc; + + if (!secvar_ops) { + pr_warn("secvar: failed to retrieve secvar operations.\n"); + return -ENODEV; + } + + secvar_kobj = kobject_create_and_add("secvar", firmware_kobj); + if (!secvar_kobj) { + pr_err("secvar: Failed to create firmware kobj\n"); + return -ENOMEM; + } + + rc = sysfs_create_file(secvar_kobj, &format_attr.attr); + if (rc) { + kobject_put(secvar_kobj); + return -ENOMEM; + } + + secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj); + if (!secvar_kset) { + pr_err("secvar: sysfs kobject registration failed.\n"); + kobject_put(secvar_kobj); + return -ENOMEM; + } + + rc = update_kobj_size(); + if (rc) { + pr_err("Cannot read the size of the attribute\n"); + return rc; + } + + secvar_sysfs_load(); + + return 0; +} + +late_initcall(secvar_sysfs_init); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 25aaa39030009..9331d15a00852 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -64,6 +65,7 @@ #include #include #include +#include #include "setup.h" @@ -855,6 +857,16 @@ void __init setup_arch(char **cmdline_p) */ initialize_cache_info(); + /* + * Lock down the kernel if booted in secure mode. This is required to + * maintain kernel integrity. + */ + if (IS_ENABLED(CONFIG_LOCK_DOWN_IN_SECURE_BOOT)) { + if (is_ppc_secureboot_enabled()) + security_lock_kernel_down("PowerNV Secure Boot mode", + LOCKDOWN_INTEGRITY_MAX); + } + /* Initialize RTAS if available. */ rtas_initialize(); @@ -903,8 +915,6 @@ void __init setup_arch(char **cmdline_p) /* On BookE, setup per-core TLB data structures. */ setup_tlb_core_data(); - - smp_release_cpus(); #endif /* Print various info about the machine that has been gathered so far. */ @@ -925,6 +935,8 @@ void __init setup_arch(char **cmdline_p) exc_lvl_early_init(); emergency_stack_init(); + smp_release_cpus(); + initmem_init(); early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 44b4c432a2736..5bc7e753df4d0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -290,18 +290,36 @@ void __init early_setup(unsigned long dt_ptr) /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Try new device tree based feature discovery ... */ - if (!dt_cpu_ftrs_init(__va(dt_ptr))) - /* Otherwise use the old style CPU table */ - identify_cpu(0, mfspr(SPRN_PVR)); - - /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ + /* + * Assume we're on cpu 0 for now. + * + * We need to load a PACA very early for a few reasons. + * + * The stack protector canary is stored in the paca, so as soon as we + * call any stack protected code we need r13 pointing somewhere valid. + * + * If we are using kcov it will call in_task() in its instrumentation, + * which relies on the current task from the PACA. + * + * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as + * printk(), which can trigger both stack protector and kcov. + * + * percpu variables and spin locks also use the paca. + * + * So set up a temporary paca. It will be replaced below once we know + * what CPU we are on. + */ initialise_paca(&boot_paca, 0); setup_paca(&boot_paca); fixup_boot_paca(); /* -------- printk is now safe to use ------- */ + /* Try new device tree based feature discovery ... */ + if (!dt_cpu_ftrs_init(__va(dt_ptr))) + /* Otherwise use the old style CPU table */ + identify_cpu(0, mfspr(SPRN_PVR)); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); @@ -523,6 +541,8 @@ static bool __init parse_cache_info(struct device_node *np, lsizep = of_get_property(np, propnames[3], NULL); if (bsizep == NULL) bsizep = lsizep; + if (lsizep == NULL) + lsizep = bsizep; if (lsizep != NULL) lsize = be32_to_cpu(*lsizep); if (bsizep != NULL) @@ -839,7 +859,13 @@ early_initcall(disable_hardlockup_detector); static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; +static bool no_entry_flush; +static bool no_uaccess_flush; bool rfi_flush; +bool entry_flush; +bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); static int __init handle_no_rfi_flush(char *p) { @@ -849,6 +875,22 @@ static int __init handle_no_rfi_flush(char *p) } early_param("no_rfi_flush", handle_no_rfi_flush); +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -880,6 +922,32 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } +void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + +void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -938,10 +1006,28 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush && !cpu_mitigations_off()) + if (!cpu_mitigations_off() && !no_rfi_flush) rfi_flush_enable(enable); } +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -969,9 +1055,63 @@ static int rfi_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index e6c30cee6abf1..d215f95545537 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -200,14 +200,27 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk) * normal/non-checkpointed stack pointer. */ + unsigned long ret = tsk->thread.regs->gpr[1]; + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BUG_ON(tsk != current); if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) { + preempt_disable(); tm_reclaim_current(TM_CAUSE_SIGNAL); if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr)) - return tsk->thread.ckpt_regs.gpr[1]; + ret = tsk->thread.ckpt_regs.gpr[1]; + + /* + * If we treclaim, we must clear the current thread's TM bits + * before re-enabling preemption. Otherwise we might be + * preempted and have the live MSR[TS] changed behind our back + * (tm_recheckpoint_new_task() would recheckpoint). Besides, we + * enter the signal handler in non-transactional state. + */ + tsk->thread.regs->msr &= ~MSR_TS_MASK; + preempt_enable(); } #endif - return tsk->thread.regs->gpr[1]; + return ret; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 98600b276f764..1b090a76b4444 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -489,19 +489,11 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, */ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret) + struct mcontext __user *tm_frame, int sigret, + unsigned long msr) { - unsigned long msr = regs->msr; - WARN_ON(tm_suspend_disabled); - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state. This also ensures - * that flush_fp_to_thread won't set TIF_RESTORE_TM again. - */ - regs->msr &= ~MSR_TS_MASK; - /* Save both sets of general registers */ if (save_general_regs(¤t->thread.ckpt_regs, frame) || save_general_regs(regs, tm_frame)) @@ -912,6 +904,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -944,13 +940,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_frame = &rt_sf->uc_transact.uc_mcontext; - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { if (__put_user((unsigned long)&rt_sf->uc_transact, &rt_sf->uc.uc_link) || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) goto badframe; - if (save_tm_user_regs(regs, frame, tm_frame, sigret)) + if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr)) goto badframe; } else @@ -1369,6 +1365,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -1402,9 +1402,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mctx = &frame->mctx_transact; - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, - sigret)) + sigret, msr)) goto badframe; } else diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 117515564ec7a..adfde59cf4ba8 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -192,7 +192,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, static long setup_tm_sigcontexts(struct sigcontext __user *sc, struct sigcontext __user *tm_sc, struct task_struct *tsk, - int signr, sigset_t *set, unsigned long handler) + int signr, sigset_t *set, unsigned long handler, + unsigned long msr) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -207,12 +208,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc); #endif struct pt_regs *regs = tsk->thread.regs; - unsigned long msr = tsk->thread.regs->msr; long err = 0; BUG_ON(tsk != current); - BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + BUG_ON(!MSR_TM_ACTIVE(msr)); WARN_ON(tm_suspend_disabled); @@ -222,13 +222,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, */ msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX); - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state. This also ensures - * that flush_fp_to_thread won't set TIF_RESTORE_TM again. - */ - regs->msr &= ~MSR_TS_MASK; - #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); err |= __put_user(tm_v_regs, &tm_sc->v_regs); @@ -480,8 +473,10 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); + /* Don't allow userspace to set the trap value */ + regs->trap = 0; + /* These regs are not checkpointed; they can go in 'regs'. */ - err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]); err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]); err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]); err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]); @@ -824,6 +819,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long newsp = 0; long err = 0; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -841,7 +840,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, err |= __put_user(0, &frame->uc.uc_flags); err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { /* The ucontext_t passed to userland points to the second * ucontext_t (for transactional state) with its uc_link ptr. */ @@ -849,7 +848,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, tsk, ksig->sig, NULL, - (unsigned long)ksig->ka.sa.sa_handler); + (unsigned long)ksig->ka.sa.sa_handler, + msr); } else #endif { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index ea6adbf6a2211..4de63ec2e1551 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -582,12 +582,44 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif +#ifdef CONFIG_NMI_IPI +static void crash_stop_this_cpu(struct pt_regs *regs) +#else +static void crash_stop_this_cpu(void *dummy) +#endif +{ + /* + * Just busy wait here and avoid marking CPU as offline to ensure + * register data is captured appropriately. + */ + while (1) + cpu_relax(); +} + +void crash_smp_send_stop(void) +{ + static bool stopped = false; + + if (stopped) + return; + + stopped = true; + +#ifdef CONFIG_NMI_IPI + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); +#else + smp_call_function(crash_stop_this_cpu, NULL, 0); +#endif /* CONFIG_NMI_IPI */ +} + #ifdef CONFIG_NMI_IPI static void nmi_stop_this_cpu(struct pt_regs *regs) { /* * IRQs are already hard disabled by the smp_handle_nmi_ipi. */ + set_cpu_online(smp_processor_id(), false); + spin_begin(); while (1) spin_cpu_relax(); @@ -603,6 +635,15 @@ void smp_send_stop(void) static void stop_this_cpu(void *dummy) { hard_irq_disable(); + + /* + * Offlining CPUs in stop_this_cpu can result in scheduler warnings, + * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants + * to know other CPUs are offline before it breaks locks to flush + * printk buffers, in case we panic()ed while holding the lock. + */ + set_cpu_online(smp_processor_id(), false); + spin_begin(); while (1) spin_cpu_relax(); @@ -1254,6 +1295,9 @@ void start_secondary(void *unused) vdso_getcpu_init(); #endif + set_numa_node(numa_cpu_lookup_table[cpu]); + set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); + /* Update topology CPU masks */ add_cpu_to_masks(cpu); @@ -1266,9 +1310,6 @@ void start_secondary(void *unused) if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu))) shared_caches = true; - set_numa_node(numa_cpu_lookup_table[cpu]); - set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); - smp_wmb(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); @@ -1285,10 +1326,12 @@ void start_secondary(void *unused) BUG(); } +#ifdef CONFIG_PROFILING int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymetric SMT dependancy */ diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index e2a46cfed5fd1..890f95151fb44 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -8,6 +8,7 @@ * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp. */ +#include #include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include #include #include @@ -230,17 +232,31 @@ static void handle_backtrace_ipi(struct pt_regs *regs) static void raise_backtrace_ipi(cpumask_t *mask) { + struct paca_struct *p; unsigned int cpu; + u64 delay_us; for_each_cpu(cpu, mask) { - if (cpu == smp_processor_id()) + if (cpu == smp_processor_id()) { handle_backtrace_ipi(NULL); - else - smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC); - } + continue; + } - for_each_cpu(cpu, mask) { - struct paca_struct *p = paca_ptrs[cpu]; + delay_us = 5 * USEC_PER_SEC; + + if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) { + // Now wait up to 5s for the other CPU to do its backtrace + while (cpumask_test_cpu(cpu, mask) && delay_us) { + udelay(1); + delay_us--; + } + + // Other CPU cleared itself from the mask + if (delay_us) + continue; + } + + p = paca_ptrs[cpu]; cpumask_clear_cpu(cpu, mask); diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index cbdf86228eaaa..54c44aea338c4 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume) #ifdef CONFIG_ALTIVEC /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif sync diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 6d3189830dd32..068a268a8013e 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -142,7 +142,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) _GLOBAL(swsusp_arch_resume) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 80a676da11cbc..f08ca604a3941 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -31,29 +31,27 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); -/* - * SMT snooze delay stuff, 64-bit only for now - */ - #ifdef CONFIG_PPC64 -/* Time in microseconds we delay before sleeping in the idle loop */ -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 }; +/* + * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle: + * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in + * 2014: + * + * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean + * up the kernel code." + * + * powerpc-utils stopped using it as of 1.3.8. At some point in the future this + * code should be removed. + */ static ssize_t store_smt_snooze_delay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - ssize_t ret; - long snooze; - - ret = sscanf(buf, "%ld", &snooze); - if (ret != 1) - return -EINVAL; - - per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; + pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n", + current->comm, current->pid); return count; } @@ -61,9 +59,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev, struct device_attribute *attr, char *buf) { - struct cpu *cpu = container_of(dev, struct cpu, dev); - - return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id)); + pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n", + current->comm, current->pid); + return sprintf(buf, "100\n"); } static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, @@ -71,16 +69,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay, static int __init setup_smt_snooze_delay(char *str) { - unsigned int cpu; - long snooze; - if (!cpu_has_feature(CPU_FTR_SMT)) return 1; - snooze = simple_strtol(str, NULL, 10); - for_each_possible_cpu(cpu) - per_cpu(smt_snooze_delay, cpu) = snooze; - + pr_warn("smt-snooze-delay command line option has no effect\n"); return 1; } __setup("smt-snooze-delay=", setup_smt_snooze_delay); diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index e2ab8a111b693..0b4694b8d2482 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -13,13 +13,14 @@ */ #include -#include #include #include #include #include #include #include +#include +#include #include #include @@ -39,9 +40,7 @@ static struct tau_temp unsigned char grew; } tau[NR_CPUS]; -struct timer_list tau_timer; - -#undef DEBUG +static bool tau_int_enable; /* TODO: put these in a /proc interface, with some sanity checks, and maybe * dynamic adjustment to minimize # of interrupts */ @@ -50,72 +49,49 @@ struct timer_list tau_timer; #define step_size 2 /* step size when temp goes out of range */ #define window_expand 1 /* expand the window by this much */ /* configurable values for shrinking the window */ -#define shrink_timer 2*HZ /* period between shrinking the window */ +#define shrink_timer 2000 /* period between shrinking the window */ #define min_window 2 /* minimum window size, degrees C */ static void set_thresholds(unsigned long cpu) { -#ifdef CONFIG_TAU_INT - /* - * setup THRM1, - * threshold, valid bit, enable interrupts, interrupt when below threshold - */ - mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); + u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0; - /* setup THRM2, - * threshold, valid bit, enable interrupts, interrupt when above threshold - */ - mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); -#else - /* same thing but don't enable interrupts */ - mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID); - mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V); -#endif + /* setup THRM1, threshold, valid bit, interrupt when below threshold */ + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID); + + /* setup THRM2, threshold, valid bit, interrupt when above threshold */ + mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie); } static void TAUupdate(int cpu) { - unsigned thrm; - -#ifdef DEBUG - printk("TAUupdate "); -#endif + u32 thrm; + u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V; /* if both thresholds are crossed, the step_sizes cancel out * and the window winds up getting expanded twice. */ - if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */ - if(thrm & THRM1_TIN){ /* crossed low threshold */ - if (tau[cpu].low >= step_size){ - tau[cpu].low -= step_size; - tau[cpu].high -= (step_size - window_expand); - } - tau[cpu].grew = 1; -#ifdef DEBUG - printk("low threshold crossed "); -#endif + thrm = mfspr(SPRN_THRM1); + if ((thrm & bits) == bits) { + mtspr(SPRN_THRM1, 0); + + if (tau[cpu].low >= step_size) { + tau[cpu].low -= step_size; + tau[cpu].high -= (step_size - window_expand); } + tau[cpu].grew = 1; + pr_debug("%s: low threshold crossed\n", __func__); } - if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */ - if(thrm & THRM1_TIN){ /* crossed high threshold */ - if (tau[cpu].high <= 127-step_size){ - tau[cpu].low += (step_size - window_expand); - tau[cpu].high += step_size; - } - tau[cpu].grew = 1; -#ifdef DEBUG - printk("high threshold crossed "); -#endif + thrm = mfspr(SPRN_THRM2); + if ((thrm & bits) == bits) { + mtspr(SPRN_THRM2, 0); + + if (tau[cpu].high <= 127 - step_size) { + tau[cpu].low += (step_size - window_expand); + tau[cpu].high += step_size; } + tau[cpu].grew = 1; + pr_debug("%s: high threshold crossed\n", __func__); } - -#ifdef DEBUG - printk("grew = %d\n", tau[cpu].grew); -#endif - -#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */ - set_thresholds(cpu); -#endif - } #ifdef CONFIG_TAU_INT @@ -140,17 +116,16 @@ void TAUException(struct pt_regs * regs) static void tau_timeout(void * info) { int cpu; - unsigned long flags; int size; int shrink; - /* disabling interrupts *should* be okay */ - local_irq_save(flags); cpu = smp_processor_id(); -#ifndef CONFIG_TAU_INT - TAUupdate(cpu); -#endif + if (!tau_int_enable) + TAUupdate(cpu); + + /* Stop thermal sensor comparisons and interrupts */ + mtspr(SPRN_THRM3, 0); size = tau[cpu].high - tau[cpu].low; if (size > min_window && ! tau[cpu].grew) { @@ -173,32 +148,26 @@ static void tau_timeout(void * info) set_thresholds(cpu); - /* - * Do the enable every time, since otherwise a bunch of (relatively) - * complex sleep code needs to be added. One mtspr every time - * tau_timeout is called is probably not a big deal. - * - * Enable thermal sensor and set up sample interval timer - * need 20 us to do the compare.. until a nice 'cpu_speed' function - * call is implemented, just assume a 500 mhz clock. It doesn't really - * matter if we take too long for a compare since it's all interrupt - * driven anyway. - * - * use a extra long time.. (60 us @ 500 mhz) + /* Restart thermal sensor comparisons and interrupts. + * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet" + * recommends that "the maximum value be set in THRM3 under all + * conditions." */ - mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); - - local_irq_restore(flags); + mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E); } -static void tau_timeout_smp(struct timer_list *unused) -{ +static struct workqueue_struct *tau_workq; - /* schedule ourselves to be run again */ - mod_timer(&tau_timer, jiffies + shrink_timer) ; +static void tau_work_func(struct work_struct *work) +{ + msleep(shrink_timer); on_each_cpu(tau_timeout, NULL, 0); + /* schedule ourselves to be run again */ + queue_work(tau_workq, work); } +DECLARE_WORK(tau_work, tau_work_func); + /* * setup the TAU * @@ -231,21 +200,19 @@ static int __init TAU_init(void) return 1; } + tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && + !strcmp(cur_cpu_spec->platform, "ppc750"); - /* first, set up the window shrinking timer */ - timer_setup(&tau_timer, tau_timeout_smp, 0); - tau_timer.expires = jiffies + shrink_timer; - add_timer(&tau_timer); + tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0); + if (!tau_workq) + return -ENOMEM; on_each_cpu(TAU_init_smp, NULL, 0); - printk("Thermal assist unit "); -#ifdef CONFIG_TAU_INT - printk("using interrupts, "); -#else - printk("using timers, "); -#endif - printk("shrink_timer: %d jiffies\n", shrink_timer); + queue_work(tau_workq, &tau_work); + + pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n", + tau_int_enable ? "interrupts" : "workqueue", shrink_timer); tau_initialized = 1; return 0; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 694522308cd51..0e0a2227af7d7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -232,7 +232,7 @@ static u64 scan_dispatch_log(u64 stop_tb) * Accumulate stolen time by scanning the dispatch trace log. * Called on entry from user mode. */ -void accumulate_stolen_time(void) +void notrace accumulate_stolen_time(void) { u64 sst, ust; unsigned long save_irq_soft_mask = irq_soft_mask_return(); @@ -522,35 +522,6 @@ static inline void clear_irq_work_pending(void) "i" (offsetof(struct paca_struct, irq_work_pending))); } -void arch_irq_work_raise(void) -{ - preempt_disable(); - set_irq_work_pending_flag(); - /* - * Non-nmi code running with interrupts disabled will replay - * irq_happened before it re-enables interrupts, so setthe - * decrementer there instead of causing a hardware exception - * which would immediately hit the masked interrupt handler - * and have the net effect of setting the decrementer in - * irq_happened. - * - * NMI interrupts can not check this when they return, so the - * decrementer hardware exception is raised, which will fire - * when interrupts are next enabled. - * - * BookE does not support this yet, it must audit all NMI - * interrupt handlers to ensure they call nmi_enter() so this - * check would be correct. - */ - if (IS_ENABLED(CONFIG_BOOKE) || !irqs_disabled() || in_nmi()) { - set_dec(1); - } else { - hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_DEC; - } - preempt_enable(); -} - #else /* 32-bit */ DEFINE_PER_CPU(u8, irq_work_pending); @@ -559,16 +530,27 @@ DEFINE_PER_CPU(u8, irq_work_pending); #define test_irq_work_pending() __this_cpu_read(irq_work_pending) #define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) +#endif /* 32 vs 64 bit */ + void arch_irq_work_raise(void) { + /* + * 64-bit code that uses irq soft-mask can just cause an immediate + * interrupt here that gets soft masked, if this is called under + * local_irq_disable(). It might be possible to prevent that happening + * by noticing interrupts are disabled and setting decrementer pending + * to be replayed when irqs are enabled. The problem there is that + * tracing can call irq_work_raise, including in code that does low + * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on) + * which could get tangled up if we're messing with the same state + * here. + */ preempt_disable(); set_irq_work_pending_flag(); set_dec(1); preempt_enable(); } -#endif /* 32 vs 64 bit */ - #else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 @@ -959,6 +941,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec; vdso_data->stamp_xtime = xt; vdso_data->stamp_sec_fraction = frac_sec; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++(vdso_data->tb_update_count); } diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 7ea0ca044b650..d816e714f2f48 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -328,9 +328,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) /* Is this a known long jump tramp? */ for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (!ftrace_tramps[i]) - break; - else if (ftrace_tramps[i] == tramp) + if (ftrace_tramps[i] == tramp) return 0; /* Is this a known plt tramp? */ @@ -868,6 +866,17 @@ void arch_ftrace_update_code(int command) extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; +void ftrace_free_init_tramp(void) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++) + if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) { + ftrace_tramps[i] = 0; + return; + } +} + int __init ftrace_dyn_arch_init(void) { int i; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 82f43535e6867..ecfa460f66d17 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -250,15 +250,22 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, } NOKPROBE_SYMBOL(oops_end); +static char *get_mmu_str(void) +{ + if (early_radix_enabled()) + return " MMU=Radix"; + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + return " MMU=Hash"; + return ""; +} + static int __die(const char *str, struct pt_regs *regs, long err) { printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); - printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n", + printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", - PAGE_SIZE / 1024, - early_radix_enabled() ? " MMU=Radix" : "", - early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "", + PAGE_SIZE / 1024, get_mmu_str(), IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", @@ -503,11 +510,14 @@ void system_reset_exception(struct pt_regs *regs) #ifdef CONFIG_PPC_BOOK3S_64 BUG_ON(get_paca()->in_nmi == 0); if (get_paca()->in_nmi > 1) - nmi_panic(regs, "Unrecoverable nested System Reset"); + die("Unrecoverable nested System Reset", regs, SIGABRT); #endif /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) - nmi_panic(regs, "Unrecoverable System Reset"); + if (!(regs->msr & MSR_RI)) { + /* For the reason explained in die_mce, nmi_exit before die */ + nmi_exit(); + die("Unrecoverable System Reset", regs, SIGABRT); + } if (saved_hsrrs) { mtspr(SPRN_HSRR0, hsrr0); @@ -851,7 +861,7 @@ void machine_check_exception(struct pt_regs *regs) /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - nmi_panic(regs, "Unrecoverable Machine check"); + die("Unrecoverable Machine check", regs, SIGBUS); return; @@ -870,7 +880,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) { unsigned int ra, rb, t, i, sel, instr, rc; const void __user *addr; - u8 vbuf[16], *vdst; + u8 vbuf[16] __aligned(16), *vdst; unsigned long ea, msr, msr_mask; bool swap; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index eae9ddaecbcf4..efb1ba40274a8 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -682,7 +682,7 @@ int vdso_getcpu_init(void) node = cpu_to_node(cpu); WARN_ON_ONCE(node > 0xffff); - val = (cpu & 0xfff) | ((node & 0xffff) << 16); + val = (cpu & 0xffff) | ((node & 0xffff) << 16); mtspr(SPRN_SPRG_VDSO_WRITE, val); get_paca()->sprg_vdso = val; diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index becd9f8767ede..a967e795b96d9 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -156,12 +156,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f + mflr r12 + .cfi_register lr,r12 + bl __get_datapage@local /* get data page */ + lwz r5, CLOCK_HRTIMER_RES(r3) + mtlr r12 li r3,0 cmpli cr0,r4,0 crclr cr0*4+so beqlr - lis r5,CLOCK_REALTIME_RES@h - ori r5,r5,CLOCK_REALTIME_RES@l stw r3,TSPC32_TV_SEC(r4) stw r5,TSPC32_TV_NSEC(r4) blr diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 3f92561a64c4d..526f5ba2593e2 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -35,7 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 07bfe33fe8745..81757f06bbd7a 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -186,12 +186,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f + mflr r12 + .cfi_register lr,r12 + bl V_LOCAL_FUNC(__get_datapage) + lwz r5, CLOCK_HRTIMER_RES(r3) + mtlr r12 li r3,0 cmpldi cr0,r4,0 crclr cr0*4+so beqlr - lis r5,CLOCK_REALTIME_RES@h - ori r5,r5,CLOCK_REALTIME_RES@l std r3,TSPC64_TV_SEC(r4) std r5,TSPC64_TV_NSEC(r4) blr diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 060a1acd7c6d7..3ea360cad337b 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -98,10 +98,11 @@ SECTIONS ALIGN_FUNCTION(); #endif /* careful! __ftr_alt_* sections need to be close to .text */ - *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text); + *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text); #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif + NOINSTR_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT @@ -143,6 +144,20 @@ SECTIONS __stop___stf_entry_barrier_fixup = .; } + . = ALIGN(8); + __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) { + __start___uaccess_flush_fixup = .; + *(__uaccess_flush_fixup) + __stop___uaccess_flush_fixup = .; + } + + . = ALIGN(8); + __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { + __start___entry_flush_fixup = .; + *(__entry_flush_fixup) + __stop___entry_flush_fixup = .; + } + . = ALIGN(8); __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { __start___stf_exit_barrier_fixup = .; @@ -196,6 +211,12 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT + + /* + *.init.text might be RO so we must ensure this section ends on + * a page boundary. + */ + . = ALIGN(PAGE_SIZE); _einittext = .; #ifdef CONFIG_PPC64 *(.tramp.ftrace.init); @@ -209,21 +230,9 @@ SECTIONS EXIT_TEXT } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - INIT_SETUP(16) - } - - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - INIT_CALLS - } + . = ALIGN(PAGE_SIZE); - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - CON_INITCALL - } + INIT_DATA_SECTION(16) . = ALIGN(8); __ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) { @@ -251,9 +260,6 @@ SECTIONS __stop___fw_ftr_fixup = .; } #endif - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - INIT_RAM_FS - } PERCPU_SECTION(L1_CACHE_BYTES) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index af3c15a1d41eb..75b2a6c4db5a5 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -132,6 +132,10 @@ static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) { cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); + /* + * See wd_smp_clear_cpu_pending() + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; cpumask_andnot(&wd_smp_cpus_pending, @@ -217,13 +221,44 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb) cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); + } else { + /* + * The last CPU to clear pending should have reset the + * watchdog so we generally should not find it empty + * here if our CPU was clear. However it could happen + * due to a rare race with another CPU taking the + * last CPU out of the mask concurrently. + * + * We can't add a warning for it. But just in case + * there is a problem with the watchdog that is causing + * the mask to not be reset, try to kick it along here. + */ + if (unlikely(cpumask_empty(&wd_smp_cpus_pending))) + goto none_pending; } return; } + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + + /* + * Order the store to clear pending with the load(s) to check all + * words in the pending mask to check they are all empty. This orders + * with the same barrier on another CPU. This prevents two CPUs + * clearing the last 2 pending bits, but neither seeing the other's + * store when checking if the mask is empty, and missing an empty + * mask, which ends with a false positive. + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { unsigned long flags; +none_pending: + /* + * Double check under lock because more than one CPU could see + * a clear mask with the lockless check after clearing their + * pending bits. + */ wd_smp_lock(&flags); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; @@ -314,8 +349,12 @@ void arch_touch_nmi_watchdog(void) { unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - u64 tb = get_tb(); + u64 tb; + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return; + + tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) { per_cpu(wd_timer_tb, cpu) = tb; wd_smp_clear_cpu_pending(cpu, tb); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index ec2547cc5ecbe..1ff971f3b06f9 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -867,7 +867,8 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change); } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 2d415c36a61d3..9d73448354698 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -38,7 +38,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ if (kvmhv_on_pseries()) return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, - __pa(to), __pa(from), n); + (to != NULL) ? __pa(to): 0, + (from != NULL) ? __pa(from): 0, n); quadrant = 1; if (!pid) @@ -353,7 +354,13 @@ static struct kmem_cache *kvm_pmd_cache; static pte_t *kvmppc_pte_alloc(void) { - return kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); + pte_t *pte; + + pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL); + /* pmd_populate() will only reference _pa(pte). */ + kmemleak_ignore(pte); + + return pte; } static void kvmppc_pte_free(pte_t *ptep) @@ -363,7 +370,13 @@ static void kvmppc_pte_free(pte_t *ptep) static pmd_t *kvmppc_pmd_alloc(void) { - return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); + pmd_t *pmd; + + pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); + /* pud_populate() will only reference _pa(pmd). */ + kmemleak_ignore(pmd); + + return pmd; } static void kvmppc_pmd_free(pmd_t *pmdp) @@ -1091,6 +1104,11 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, kvm->arch.lpid); gpa += PAGE_SIZE; } + /* + * Increase the mmu notifier sequence number to prevent any page + * fault that read the memslot earlier from writing a PTE. + */ + kvm->mmu_notifier_seq++; spin_unlock(&kvm->mmu_lock); } diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 5834db0a54c66..4518a0f2d6c69 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -74,6 +74,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, struct kvmppc_spapr_tce_iommu_table *stit, *tmp; struct iommu_table_group *table_group = NULL; + rcu_read_lock(); list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { table_group = iommu_group_get_iommudata(grp); @@ -88,7 +89,9 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, kref_put(&stit->kref, kvm_spapr_tce_liobn_put); } } + cond_resched_rcu(); } + rcu_read_unlock(); } extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, @@ -106,12 +109,14 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!f.file) return -EBADF; + rcu_read_lock(); list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { if (stt == f.file->private_data) { found = true; break; } } + rcu_read_unlock(); fdput(f); @@ -144,6 +149,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!tbl) return -EINVAL; + rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { if (tbl != stit->tbl) continue; @@ -151,14 +157,17 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, if (!kref_get_unless_zero(&stit->kref)) { /* stit is being destroyed */ iommu_tce_table_put(tbl); + rcu_read_unlock(); return -ENOTTY; } /* * The table is already known to this KVM, we just increased * its KVM reference counter and can return. */ + rcu_read_unlock(); return 0; } + rcu_read_unlock(); stit = kzalloc(sizeof(*stit), GFP_KERNEL); if (!stit) { @@ -364,18 +373,19 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, if (kvmppc_tce_to_ua(stt->kvm, tce, &ua)) return H_TOO_HARD; + rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { unsigned long hpa = 0; struct mm_iommu_table_group_mem_t *mem; long shift = stit->tbl->it_page_shift; mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift); - if (!mem) - return H_TOO_HARD; - - if (mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) + if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) { + rcu_read_unlock(); return H_TOO_HARD; + } } + rcu_read_unlock(); return H_SUCCESS; } @@ -410,13 +420,19 @@ static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt, tbl[idx % TCES_PER_PAGE] = tce; } -static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, - unsigned long entry) +static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt, + struct iommu_table *tbl, unsigned long entry) { - unsigned long hpa = 0; - enum dma_data_direction dir = DMA_NONE; + unsigned long i; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift); + + for (i = 0; i < subpages; ++i) { + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir); + } } static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, @@ -475,6 +491,8 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm, break; } + iommu_tce_kill(tbl, io_entry, subpages); + return ret; } @@ -534,6 +552,8 @@ static long kvmppc_tce_iommu_map(struct kvm *kvm, break; } + iommu_tce_kill(tbl, io_entry, subpages); + return ret; } @@ -580,10 +600,9 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); - iommu_tce_kill(stit->tbl, entry, 1); if (ret != H_SUCCESS) { - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry); goto unlock_exit; } } @@ -659,13 +678,13 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ if (get_user(tce, tces + i)) { ret = H_TOO_HARD; - goto invalidate_exit; + goto unlock_exit; } tce = be64_to_cpu(tce); if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; - goto invalidate_exit; + goto unlock_exit; } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -674,19 +693,15 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, iommu_tce_direction(tce)); if (ret != H_SUCCESS) { - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, - entry); - goto invalidate_exit; + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, + entry + i); + goto unlock_exit; } } kvmppc_tce_put(stt, entry + i, tce); } -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill(stit->tbl, entry, npages); - unlock_exit: srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -725,20 +740,16 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - goto invalidate_exit; + return ret; WARN_ON_ONCE(1); - kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); + kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i); } } for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages); - return ret; } EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index ab6eeb8e753e5..abb49d8633298 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -177,10 +177,13 @@ static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt, idx -= stt->offset; page = stt->pages[idx / TCES_PER_PAGE]; /* - * page must not be NULL in real mode, - * kvmppc_rm_ioba_validate() must have taken care of this. + * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is + * being cleared, otherwise it returns H_TOO_HARD and we skip this. */ - WARN_ON_ONCE_RM(!page); + if (!page) { + WARN_ON_ONCE_RM(tce != 0); + return; + } tbl = kvmppc_page_address(page); tbl[idx % TCES_PER_PAGE] = tce; @@ -248,13 +251,19 @@ extern void iommu_tce_kill_rm(struct iommu_table *tbl, tbl->it_ops->tce_kill(tbl, entry, pages, true); } -static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl, - unsigned long entry) +static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt, + struct iommu_table *tbl, unsigned long entry) { - unsigned long hpa = 0; - enum dma_data_direction dir = DMA_NONE; + unsigned long i; + unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift); + unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift); - iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); + for (i = 0; i < subpages; ++i) { + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; + + iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir); + } } static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, @@ -317,6 +326,8 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, break; } + iommu_tce_kill_rm(tbl, io_entry, subpages); + return ret; } @@ -380,6 +391,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, break; } + iommu_tce_kill_rm(tbl, io_entry, subpages); + return ret; } @@ -425,10 +438,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); - iommu_tce_kill_rm(stit->tbl, entry, 1); - if (ret != H_SUCCESS) { - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry); return ret; } } @@ -568,7 +579,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, ua = 0; if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { ret = H_PARAMETER; - goto invalidate_exit; + goto unlock_exit; } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -577,19 +588,15 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, iommu_tce_direction(tce)); if (ret != H_SUCCESS) { - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, - entry); - goto invalidate_exit; + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, + entry + i); + goto unlock_exit; } } kvmppc_rm_tce_put(stt, entry + i, tce); } -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill_rm(stit->tbl, entry, npages); - unlock_exit: if (rmap) unlock_rmap(rmap); @@ -632,20 +639,16 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - goto invalidate_exit; + return ret; WARN_ON_ONCE_RM(1); - kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); + kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i); } } for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value); -invalidate_exit: - list_for_each_entry_lockless(stit, &stt->iommu_tables, next) - iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages); - return ret; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 709cf1fd4cf46..6c99ccc3bfcb0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -2306,8 +2307,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) vcpu->arch.hfscr |= HFSCR_TM; +#endif } if (cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr |= HFSCR_TM; @@ -2354,7 +2357,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_unlock(&kvm->lock); if (!vcore) - goto free_vcpu; + goto uninit_vcpu; spin_lock(&vcore->lock); ++vcore->num_threads; @@ -2371,6 +2374,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, return vcpu; +uninit_vcpu: + kvm_vcpu_uninit(vcpu); free_vcpu: kmem_cache_free(kvm_vcpu_cache, vcpu); out: @@ -2534,7 +2539,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) cpumask_t *cpu_in_guest; int i; - cpu = cpu_first_thread_sibling(cpu); + cpu = cpu_first_tlb_thread_sibling(cpu); if (nested) { cpumask_set_cpu(cpu, &nested->need_tlb_flush); cpu_in_guest = &nested->cpu_in_guest; @@ -2548,9 +2553,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu) * the other side is the first smp_mb() in kvmppc_run_core(). */ smp_mb(); - for (i = 0; i < threads_per_core; ++i) - if (cpumask_test_cpu(cpu + i, cpu_in_guest)) - smp_call_function_single(cpu + i, do_nothing, NULL, 1); + for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu); + i += cpu_tlb_thread_sibling_step()) + if (cpumask_test_cpu(i, cpu_in_guest)) + smp_call_function_single(i, do_nothing, NULL, 1); } static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) @@ -2581,8 +2587,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu) */ if (prev_cpu != pcpu) { if (prev_cpu >= 0 && - cpu_first_thread_sibling(prev_cpu) != - cpu_first_thread_sibling(pcpu)) + cpu_first_tlb_thread_sibling(prev_cpu) != + cpu_first_tlb_thread_sibling(pcpu)) radix_flush_cpu(kvm, prev_cpu, vcpu); if (nested) nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu; @@ -3554,6 +3560,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true); +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + if (vcpu->arch.vpa.pinned_addr) { + struct lppaca *lp = vcpu->arch.vpa.pinned_addr; + get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use; + } else { + get_lppaca()->pmcregs_in_use = 1; + } + barrier(); + } +#endif kvmhv_load_guest_pmu(vcpu); msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); @@ -3621,6 +3639,7 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && kvmppc_get_gpr(vcpu, 3) == H_CEDE) { kvmppc_nested_cede(vcpu); + kvmppc_set_gpr(vcpu, 3, 0); trap = 0; } } else { @@ -3635,7 +3654,10 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.dec_expires = dec + tb; vcpu->cpu = -1; vcpu->arch.thread_cpu = -1; + /* Save guest CTRL register, set runlatch to 1 */ vcpu->arch.ctrl = mfspr(SPRN_CTRLF); + if (!(vcpu->arch.ctrl & 1)) + mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1); vcpu->arch.iamr = mfspr(SPRN_IAMR); vcpu->arch.pspb = mfspr(SPRN_PSPB); @@ -3684,6 +3706,13 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, save_pmu |= nesting_enabled(vcpu->kvm); kvmhv_save_guest_pmu(vcpu, save_pmu); +#ifdef CONFIG_PPC_PSERIES + if (kvmhv_on_pseries()) { + barrier(); + get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse(); + barrier(); + } +#endif vc->entry_exit_map = 0x101; vc->in_guest = 0; @@ -5188,6 +5217,12 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, case KVM_PPC_ALLOCATE_HTAB: { u32 htab_order; + /* If we're a nested hypervisor, we currently only support radix */ + if (kvmhv_on_pseries()) { + r = -EOPNOTSUPP; + break; + } + r = -EFAULT; if (get_user(htab_order, (u32 __user *)argp)) break; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7c1909657b556..6d34b69729854 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -821,6 +821,7 @@ static void flush_guest_tlb(struct kvm *kvm) "r" (0) : "memory"); } asm volatile("ptesync": : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); } else { for (set = 0; set < kvm->arch.tlb_sets; ++set) { @@ -831,7 +832,9 @@ static void flush_guest_tlb(struct kvm *kvm) rb += PPC_BIT(51); /* increment set number */ } asm volatile("ptesync": : :"memory"); - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. + if (cpu_has_feature(CPU_FTR_ARCH_300)) + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); } } @@ -847,7 +850,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, * Thus we make all 4 threads use the same bit. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) - pcpu = cpu_first_thread_sibling(pcpu); + pcpu = cpu_first_tlb_thread_sibling(pcpu); if (nested) need_tlb_flush = &nested->need_tlb_flush; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index cdf30c6eaf542..613d24b707abe 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -51,7 +51,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) hr->ppr = vcpu->arch.ppr; } -static void byteswap_pt_regs(struct pt_regs *regs) +/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */ +static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs) { unsigned long *addr = (unsigned long *) regs; @@ -231,6 +232,9 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.l1_ptcr == 0) return H_NOT_AVAILABLE; + if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) + return H_BAD_MODE; + /* copy parameters in */ hv_ptr = kvmppc_get_gpr(vcpu, 4); err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, @@ -252,6 +256,23 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (l2_hv.vcpu_token >= NR_CPUS) return H_PARAMETER; + /* + * L1 must have set up a suspended state to enter the L2 in a + * transactional state, and only in that case. These have to be + * filtered out here to prevent causing a TM Bad Thing in the + * host HRFID. We could synthesize a TM Bad Thing back to the L1 + * here but there doesn't seem like much point. + */ + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) { + if (!MSR_TM_ACTIVE(l2_regs.msr)) + return H_BAD_MODE; + } else { + if (l2_regs.msr & MSR_TS_MASK) + return H_BAD_MODE; + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK)) + return H_BAD_MODE; + } + /* translate lpid */ l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true); if (!l2) @@ -489,7 +510,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) if (eaddr & (0xFFFUL << 52)) return H_PARAMETER; - buf = kzalloc(n, GFP_KERNEL); + buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN); if (!buf) return H_NO_MEM; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 220305454c23c..9bf3be438ac58 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -67,7 +67,7 @@ static int global_invalidates(struct kvm *kvm) * so use the bit for the first thread to represent the core. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) - cpu = cpu_first_thread_sibling(cpu); + cpu = cpu_first_tlb_thread_sibling(cpu); cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index faebcbb8c4db3..c9c6619564ffa 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -291,13 +292,16 @@ kvm_novcpu_exit: * r3 contains the SRR1 wakeup value, SRR1 is trashed. */ _GLOBAL(idle_kvm_start_guest) - ld r4,PACAEMERGSP(r13) mfcr r5 mflr r0 - std r1,0(r4) - std r5,8(r4) - std r0,16(r4) - subi r1,r4,STACK_FRAME_OVERHEAD + std r5, 8(r1) // Save CR in caller's frame + std r0, 16(r1) // Save LR in caller's frame + // Create frame on emergency stack + ld r4, PACAEMERGSP(r13) + stdu r1, -SWITCH_FRAME_SIZE(r4) + // Switch to new frame on emergency stack + mr r1, r4 + std r3, 32(r1) // Save SRR1 wakeup value SAVE_NVGPRS(r1) /* @@ -349,6 +353,10 @@ kvm_unsplit_wakeup: kvm_secondary_got_guest: + // About to go to guest, clear saved SRR1 + li r0, 0 + std r0, 32(r1) + /* Set HSTATE_DSCR(r13) to something sensible */ ld r6, PACA_DSCR_DEFAULT(r13) std r6, HSTATE_DSCR(r13) @@ -440,13 +448,12 @@ kvm_no_guest: mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 - /* set up r3 for return */ - mfspr r3,SPRN_SRR1 + // Return SRR1 wakeup value, or 0 if we went into the guest + ld r3, 32(r1) REST_NVGPRS(r1) - addi r1, r1, STACK_FRAME_OVERHEAD - ld r0, 16(r1) - ld r5, 8(r1) - ld r1, 0(r1) + ld r1, 0(r1) // Switch back to caller stack + ld r0, 16(r1) // Reload LR + ld r5, 8(r1) // Reload CR mtlr r0 mtcr r5 blr @@ -1116,7 +1123,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r7, VCPU_GPR(R7)(r4) bne ret_to_ultra - lwz r0, VCPU_CR(r4) + ld r0, VCPU_CR(r4) mtcr r0 ld r0, VCPU_GPR(R0)(r4) @@ -1136,7 +1143,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * R3 = UV_RETURN */ ret_to_ultra: - lwz r0, VCPU_CR(r4) + ld r0, VCPU_CR(r4) mtcr r0 ld r0, VCPU_GPR(R3)(r4) @@ -1487,6 +1494,13 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ 1: #endif /* CONFIG_KVM_XICS */ + /* + * Possibly flush the link stack here, before we do a blr in + * guest_exit_short_path. + */ +1: nop + patch_site 1b patch__call_kvm_flush_link_stack + /* If we came in through the P9 short path, go back out to C now */ lwz r0, STACK_SLOT_SHORT_PATH(r1) cmpwi r0, 0 @@ -1963,6 +1977,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mtlr r0 blr +.balign 32 +.global kvm_flush_link_stack +kvm_flush_link_stack: + /* Save LR into r0 */ + mflr r0 + + /* Flush the link stack. On Power8 it's up to 32 entries in size. */ + .rept 32 + bl .+4 + .endr + + /* And on Power9 it's up to 64. */ +BEGIN_FTR_SECTION + .rept 32 + bl .+4 + .endr +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + + /* Restore LR */ + mtlr r0 + blr + kvmppc_guest_external: /* External interrupt, first check for host_ipi. If this is * set, we know the host wants us out so let's do it now @@ -2499,7 +2535,7 @@ hcall_real_table: .globl hcall_real_table_end hcall_real_table_end: -_GLOBAL(kvmppc_h_set_xdabr) +_GLOBAL_TOC(kvmppc_h_set_xdabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) andi. r0, r5, DABRX_USER | DABRX_KERNEL beq 6f @@ -2509,7 +2545,7 @@ EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) 6: li r3, H_PARAMETER blr -_GLOBAL(kvmppc_h_set_dabr) +_GLOBAL_TOC(kvmppc_h_set_dabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr) li r5, DABRX_USER | DABRX_KERNEL 3: @@ -3107,7 +3143,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST) /* The following code handles the fake_suspend = 1 case */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -PPC_MIN_STKFRM(r1) + stdu r1, -TM_FRAME_SIZE(r1) /* Turn on TM. */ mfmsr r8 @@ -3122,10 +3158,42 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) nop + /* + * It's possible that treclaim. may modify registers, if we have lost + * track of fake-suspend state in the guest due to it using rfscv. + * Save and restore registers in case this occurs. + */ + mfspr r3, SPRN_DSCR + mfspr r4, SPRN_XER + mfspr r5, SPRN_AMR + /* SPRN_TAR would need to be saved here if the kernel ever used it */ + mfcr r12 + SAVE_NVGPRS(r1) + SAVE_GPR(2, r1) + SAVE_GPR(3, r1) + SAVE_GPR(4, r1) + SAVE_GPR(5, r1) + stw r12, 8(r1) + std r1, HSTATE_HOST_R1(r13) + /* We have to treclaim here because that's the only way to do S->N */ li r3, TM_CAUSE_KVM_RESCHED TRECLAIM(R3) + GET_PACA(r13) + ld r1, HSTATE_HOST_R1(r13) + REST_GPR(2, r1) + REST_GPR(3, r1) + REST_GPR(4, r1) + REST_GPR(5, r1) + lwz r12, 8(r1) + REST_NVGPRS(r1) + mtspr SPRN_DSCR, r3 + mtspr SPRN_XER, r4 + mtspr SPRN_AMR, r5 + mtcr r12 + HMT_MEDIUM + /* * We were in fake suspend, so we are not going to save the * register state as the guest checkpointed state (since @@ -3153,7 +3221,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) std r5, VCPU_TFHAR(r9) std r6, VCPU_TFIAR(r9) - addi r1, r1, PPC_MIN_STKFRM + addi r1, r1, TM_FRAME_SIZE ld r0, PPC_LR_STKOFF(r1) mtlr r0 blr diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c index 0db9374971697..cc90b8b823291 100644 --- a/arch/powerpc/kvm/book3s_hv_tm.c +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -3,6 +3,8 @@ * Copyright 2017 Paul Mackerras, IBM Corp. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -44,7 +46,18 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) u64 newmsr, bescr; int ra, rs; - switch (instr & 0xfc0007ff) { + /* + * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit + * in these instructions, so masking bit 31 out doesn't change these + * instructions. For treclaim., tsr., and trechkpt. instructions if bit + * 31 = 0 then they are per ISA invalid forms, however P9 UM, in section + * 4.6.10 Book II Invalid Forms, informs specifically that ignoring bit + * 31 is an acceptable way to handle these invalid forms that have + * bit 31 = 0. Moreover, for emulation purposes both forms (w/ and wo/ + * bit 31 set) can generate a softpatch interrupt. Hence both forms + * are handled below for these instructions so they behave the same way. + */ + switch (instr & PO_XOP_OPCODE_MASK) { case PPC_INST_RFID: /* XXX do we need to check for PR=0 here? */ newmsr = vcpu->arch.shregs.srr1; @@ -105,7 +118,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.shregs.msr = newmsr; return RESUME_GUEST; - case PPC_INST_TSR: + /* ignore bit 31, see comment above */ + case (PPC_INST_TSR & PO_XOP_OPCODE_MASK): /* check for PR=1 and arch 2.06 bit set in PCR */ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) { /* generate an illegal instruction interrupt */ @@ -140,7 +154,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.shregs.msr = msr; return RESUME_GUEST; - case PPC_INST_TRECLAIM: + /* ignore bit 31, see comment above */ + case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK): /* check for TM disabled in the HFSCR or MSR */ if (!(vcpu->arch.hfscr & HFSCR_TM)) { /* generate an illegal instruction interrupt */ @@ -176,7 +191,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) vcpu->arch.shregs.msr &= ~MSR_TS_MASK; return RESUME_GUEST; - case PPC_INST_TRECHKPT: + /* ignore bit 31, see comment above */ + case (PPC_INST_TRECHKPT & PO_XOP_OPCODE_MASK): /* XXX do we need to check for PR=0 here? */ /* check for TM disabled in the HFSCR or MSR */ if (!(vcpu->arch.hfscr & HFSCR_TM)) { @@ -208,6 +224,8 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) } /* What should we do here? We didn't recognize the instruction */ - WARN_ON_ONCE(1); + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + pr_warn_ratelimited("Unrecognized TM-related instruction %#x for emulation", instr); + return RESUME_GUEST; } diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c index 217246279dfae..fad931f224efd 100644 --- a/arch/powerpc/kvm/book3s_hv_tm_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c @@ -23,7 +23,18 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) u64 newmsr, msr, bescr; int rs; - switch (instr & 0xfc0007ff) { + /* + * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit + * in these instructions, so masking bit 31 out doesn't change these + * instructions. For the tsr. instruction if bit 31 = 0 then it is per + * ISA an invalid form, however P9 UM, in section 4.6.10 Book II Invalid + * Forms, informs specifically that ignoring bit 31 is an acceptable way + * to handle TM-related invalid forms that have bit 31 = 0. Moreover, + * for emulation purposes both forms (w/ and wo/ bit 31 set) can + * generate a softpatch interrupt. Hence both forms are handled below + * for tsr. to make them behave the same way. + */ + switch (instr & PO_XOP_OPCODE_MASK) { case PPC_INST_RFID: /* XXX do we need to check for PR=0 here? */ newmsr = vcpu->arch.shregs.srr1; @@ -73,7 +84,8 @@ int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu) vcpu->arch.shregs.msr = newmsr; return 1; - case PPC_INST_TSR: + /* ignore bit 31, see comment above */ + case (PPC_INST_TSR & PO_XOP_OPCODE_MASK): /* we know the MSR has the TS field = S (0b01) here */ msr = vcpu->arch.shregs.msr; /* check for PR=1 and arch 2.06 bit set in PCR */ diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cc65af8fe6f7e..3f6ad3f586287 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1769,10 +1769,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, err = kvmppc_mmu_init(vcpu); if (err < 0) - goto uninit_vcpu; + goto free_shared_page; return vcpu; +free_shared_page: + free_page((unsigned long)vcpu->arch.shared); uninit_vcpu: kvm_vcpu_uninit(vcpu); free_shadow_vcpu: diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 26b25994c9697..41137ec727622 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -240,6 +240,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; + if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) { + /* + * Don't overflow our args array: ensure there is room for + * at least rets[0] (even if the call specifies 0 nret). + * + * Each handler must then check for the correct nargs and nret + * values, but they may always return failure in rets[0]. + */ + rc = -EINVAL; + goto fail; + } args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->arch.rtas_token_lock); @@ -267,9 +278,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) fail: /* * We only get here if the guest has called RTAS with a bogus - * args pointer. That means we can't get to the args, and so we - * can't fail the RTAS call. So fail right out to userspace, - * which should kill the guest. + * args pointer or nargs/nret values that would overflow the + * array. That means we can't get to the args, and so we can't + * fail the RTAS call. So fail right out to userspace, which + * should kill the guest. + * + * SLOF should actually pass the hcall return value from the + * rtas handler call in r3, so enter_rtas could be modified to + * return a failure indication in r3 and we could return such + * errors to the guest rather than failing to host userspace. + * However old guests that don't test for failure could then + * continue silently after errors, so for now we won't do this. */ return rc; } diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index a3f9c665bb5ba..baa740815b3cb 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -2005,6 +2005,10 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) pr_devel("Creating xive for partition\n"); + /* Already there ? */ + if (kvm->arch.xive) + return -EEXIST; + xive = kvmppc_xive_get_device(kvm, type); if (!xive) return -ENOMEM; @@ -2014,12 +2018,6 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) xive->kvm = kvm; mutex_init(&xive->lock); - /* Already there ? */ - if (kvm->arch.xive) - ret = -EEXIST; - else - kvm->arch.xive = xive; - /* We use the default queue size set by the host */ xive->q_order = xive_native_default_eq_shift(); if (xive->q_order < PAGE_SHIFT) @@ -2039,6 +2037,7 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) if (ret) return ret; + kvm->arch.xive = xive; return 0; } diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 78b906ffa0d2b..d78d8487c1d6b 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -50,6 +50,24 @@ static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio) } } +static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q, + u8 prio, __be32 *qpage, + u32 order, bool can_escalate) +{ + int rc; + __be32 *qpage_prev = q->qpage; + + rc = xive_native_configure_queue(vp_id, q, prio, qpage, order, + can_escalate); + if (rc) + return rc; + + if (qpage_prev) + put_page(virt_to_page(qpage_prev)); + + return rc; +} + void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; @@ -234,6 +252,13 @@ static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) } state = &sb->irq_state[src]; + + /* Some sanity checking */ + if (!state->valid) { + pr_devel("%s: source %lx invalid !\n", __func__, irq); + return VM_FAULT_SIGBUS; + } + kvmppc_xive_select_irq(state, &hw_num, &xd); arch_spin_lock(&sb->lock); @@ -582,19 +607,14 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, q->guest_qaddr = 0; q->guest_qshift = 0; - rc = xive_native_configure_queue(xc->vp_id, q, priority, - NULL, 0, true); + rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, + NULL, 0, true); if (rc) { pr_err("Failed to reset queue %d for VCPU %d: %d\n", priority, xc->server_num, rc); return rc; } - if (q->qpage) { - put_page(virt_to_page(q->qpage)); - q->qpage = NULL; - } - return 0; } @@ -624,17 +644,18 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, srcu_idx = srcu_read_lock(&kvm->srcu); gfn = gpa_to_gfn(kvm_eq.qaddr); - page = gfn_to_page(kvm, gfn); - if (is_error_page(page)) { + + page_size = kvm_host_page_size(vcpu, gfn); + if (1ull << kvm_eq.qshift > page_size) { srcu_read_unlock(&kvm->srcu, srcu_idx); - pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); + pr_warn("Incompatible host page size %lx!\n", page_size); return -EINVAL; } - page_size = kvm_host_page_size(kvm, gfn); - if (1ull << kvm_eq.qshift > page_size) { + page = gfn_to_page(kvm, gfn); + if (is_error_page(page)) { srcu_read_unlock(&kvm->srcu, srcu_idx); - pr_warn("Incompatible host page size %lx!\n", page_size); + pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr); return -EINVAL; } @@ -653,8 +674,8 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, * OPAL level because the use of END ESBs is not supported by * Linux. */ - rc = xive_native_configure_queue(xc->vp_id, q, priority, - (__be32 *) qaddr, kvm_eq.qshift, true); + rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority, + (__be32 *) qaddr, kvm_eq.qshift, true); if (rc) { pr_err("Failed to configure queue %d for VCPU %d: %d\n", priority, xc->server_num, rc); @@ -1081,7 +1102,6 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) dev->private = xive; xive->dev = dev; xive->kvm = kvm; - kvm->arch.xive = xive; mutex_init(&xive->mapping_lock); mutex_init(&xive->lock); @@ -1102,6 +1122,7 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) if (ret) return ret; + kvm->arch.xive = xive; return 0; } diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 321db0fdb9db8..7154bd424d243 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -734,7 +734,8 @@ static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) return 0; } -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags) { /* kvm_unmap_hva flushes everything anyways */ kvm_unmap_hva(kvm, start); diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 2e496eb86e94a..1139bc56e0045 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; u32 inst; - int ra, rs, rt; enum emulation_result emulated = EMULATE_FAIL; int advance = 1; struct instruction_op op; @@ -85,10 +84,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (emulated != EMULATE_DONE) return emulated; - ra = get_ra(inst); - rs = get_rs(inst); - rt = get_rt(inst); - vcpu->arch.mmio_vsx_copy_nums = 0; vcpu->arch.mmio_vsx_offset = 0; vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 3a77bb6434521..eb8c72846b7fc 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1495,7 +1495,7 @@ int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, { enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; while (vcpu->arch.mmio_vmx_copy_nums) { @@ -1513,7 +1513,7 @@ int kvmppc_handle_vmx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1531,7 +1531,7 @@ int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1549,7 +1549,7 @@ int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1567,7 +1567,7 @@ int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1592,7 +1592,7 @@ int kvmppc_handle_vmx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int index = rs & KVM_MMIO_REG_MASK; enum emulation_result emulated = EMULATE_DONE; - if (vcpu->arch.mmio_vsx_copy_nums > 2) + if (vcpu->arch.mmio_vmx_copy_nums > 2) return EMULATE_FAIL; vcpu->arch.io_gpr = rs; @@ -2035,9 +2035,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_enable_cap cap; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&cap, argp, sizeof(cap))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); vcpu_put(vcpu); break; @@ -2061,9 +2061,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&dirty, argp, sizeof(dirty))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); vcpu_put(vcpu); break; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index b8de3be10eb47..8656b8d2ce555 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -16,6 +16,9 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif +CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + obj-y += alloc.o code-patching.o feature-fixups.o pmem.o ifndef CONFIG_KASAN diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3345f039a8769..a05f289e613ed 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -221,6 +221,11 @@ bool is_offset_in_branch_range(long offset) return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3)); } +bool is_offset_in_cond_branch_range(long offset) +{ + return offset >= -0x8000 && offset <= 0x7fff && !(offset & 0x3); +} + /* * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() @@ -274,7 +279,7 @@ unsigned int create_cond_branch(const unsigned int *addr, offset = offset - (unsigned long)addr; /* Check we can represent the target in the instruction format */ - if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) + if (!is_offset_in_cond_branch_range(offset)) return 0; /* Mask out the flags and target, so they don't step on each other. */ diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4ba634b89ce53..c8e260e29f2c4 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -221,11 +222,143 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types) : "unknown"); } +static int __do_stf_barrier_fixups(void *data) +{ + enum stf_barrier_type *types = data; + + do_stf_entry_barrier_fixups(*types); + do_stf_exit_barrier_fixups(*types); + + return 0; +} void do_stf_barrier_fixups(enum stf_barrier_type types) { - do_stf_entry_barrier_fixups(types); - do_stf_exit_barrier_fixups(types); + /* + * The call to the fallback entry flush, and the fallback/sync-ori exit + * flush can not be safely patched in/out while other CPUs are executing + * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs + * spin in the stop machine core with interrupts hard disabled. + */ + stop_machine(__do_stf_barrier_fixups, &types, NULL); +} + +void do_uaccess_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[4], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x4e800020; /* blr */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = 0x60000000; /* nop */ + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + patch_instruction((dest + 1), instrs[1]); + patch_instruction((dest + 2), instrs[2]); + patch_instruction((dest + 3), instrs[3]); + } + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + +static int __do_entry_flush_fixups(void *data) +{ + enum l1d_flush_type types = *(enum l1d_flush_type *)data; + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types == L1D_FLUSH_FALLBACK) + patch_branch((dest + 1), (unsigned long)&entry_flush_fallback, + BRANCH_SET_LINK); + else + patch_instruction((dest + 1), instrs[1]); + + patch_instruction((dest + 2), instrs[2]); + } + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); + + return 0; +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + /* + * The call to the fallback flush can not be safely patched in/out while + * other CPUs are executing it. So call __do_entry_flush_fixups() on one + * CPU while all other CPUs spin in the stop machine core with interrupts + * hard disabled. + */ + stop_machine(__do_entry_flush_fixups, &types, NULL); } void do_rfi_flush_fixups(enum l1d_flush_type types) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c077acb983a19..c70e8910bd737 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -906,7 +907,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __put_user_asmx(x, addr, err, op, cr) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: " op " %2,0,%3\n" \ + ".machine pop\n" \ " mfcr %1\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ @@ -919,7 +923,10 @@ NOKPROBE_SYMBOL(emulate_dcbz); #define __get_user_asmx(x, addr, err, op) \ __asm__ __volatile__( \ + ".machine push\n" \ + ".machine power8\n" \ "1: "op" %1,0,%2\n" \ + ".machine pop\n" \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3: li %0,%3\n" \ @@ -2787,12 +2794,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) case BARRIER_EIEIO: eieio(); break; +#ifdef CONFIG_PPC64 case BARRIER_LWSYNC: asm volatile("lwsync" : : : "memory"); break; case BARRIER_PTESYNC: asm volatile("ptesync" : : : "memory"); break; +#endif } break; @@ -2910,7 +2919,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) __put_user_asmx(op->val, ea, err, "stbcx.", cr); break; case 2: - __put_user_asmx(op->val, ea, err, "stbcx.", cr); + __put_user_asmx(op->val, ea, err, "sthcx.", cr); break; #endif case 4: diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S index f69a6aab7bfbb..1ddb26394e8ac 100644 --- a/arch/powerpc/lib/string_32.S +++ b/arch/powerpc/lib/string_32.S @@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) -_GLOBAL(__clear_user) +_GLOBAL(__arch_clear_user) /* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination @@ -87,4 +87,4 @@ _GLOBAL(__clear_user) EX_TABLE(8b, 91b) EX_TABLE(9b, 91b) -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index 507b18b1660e6..169872bc08928 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -17,7 +17,7 @@ PPC64_CACHES: .section ".text" /** - * __clear_user: - Zero a block of memory in user space, with less checking. + * __arch_clear_user: - Zero a block of memory in user space, with less checking. * @to: Destination address, in user space. * @n: Number of bytes to zero. * @@ -58,7 +58,7 @@ err3; stb r0,0(r3) mr r3,r4 blr -_GLOBAL_TOC(__clear_user) +_GLOBAL_TOC(__arch_clear_user) cmpdi r4,32 neg r6,r3 li r0,0 @@ -181,4 +181,4 @@ err1; dcbz 0,r3 cmpdi r4,32 blt .Lshort_clear b .Lmedium_clear -EXPORT_SYMBOL(__clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 84d5fab94f8f8..1424a120710e4 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -187,6 +187,7 @@ void mmu_mark_initmem_nx(void) int i; unsigned long base = (unsigned long)_stext - PAGE_OFFSET; unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; unsigned long size; if (IS_ENABLED(CONFIG_PPC_BOOK3S_601)) @@ -201,9 +202,10 @@ void mmu_mark_initmem_nx(void) size = block_size(base, top); size = max(size, 128UL << 10); if ((top - base) > size) { - if (strict_kernel_rwx_enabled()) - pr_warn("Kernel _etext not properly aligned\n"); size <<= 1; + if (strict_kernel_rwx_enabled() && base + size > border) + pr_warn("Some RW data is getting mapped X. " + "Adjust CONFIG_DATA_SHIFT to avoid that.\n"); } setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 6c123760164e8..83c51a7d7eee6 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -294,10 +294,18 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, HPTE_V_BOLTED, psize, psize, ssize); - + if (ret == -1) { + /* Try to remove a non bolted entry */ + ret = mmu_hash_ops.hpte_remove(hpteg); + if (ret != -1) + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, + HPTE_V_BOLTED, psize, psize, + ssize); + } if (ret < 0) break; + cond_resched(); #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled() && (paddr >> PAGE_SHIFT) < linear_map_hash_count) diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 56cc845205779..ef164851738b8 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -121,24 +121,6 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, goto free_exit; } - pageshift = PAGE_SHIFT; - for (i = 0; i < entries; ++i) { - struct page *page = mem->hpages[i]; - - /* - * Allow to use larger than 64k IOMMU pages. Only do that - * if we are backed by hugetlb. - */ - if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) - pageshift = page_shift(compound_head(page)); - mem->pageshift = min(mem->pageshift, pageshift); - /* - * We don't need struct page reference any more, switch - * to physical address. - */ - mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; - } - good_exit: atomic64_set(&mem->mapped, 1); mem->used = 1; @@ -158,6 +140,27 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, } } + if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { + /* + * Allow to use larger than 64k IOMMU pages. Only do that + * if we are backed by hugetlb. Skip device memory as it is not + * backed with page structs. + */ + pageshift = PAGE_SHIFT; + for (i = 0; i < entries; ++i) { + struct page *page = mem->hpages[i]; + + if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) + pageshift = page_shift(compound_head(page)); + mem->pageshift = min(mem->pageshift, pageshift); + /* + * We don't need struct page reference any more, switch + * to physical address. + */ + mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; + } + } + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); mutex_unlock(&mem_list_mutex); diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 75483b40fcb13..2bf7e1b4fd826 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -378,7 +378,6 @@ static inline void pgtable_free(void *table, int index) } } -#ifdef CONFIG_SMP void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) { unsigned long pgf = (unsigned long)table; @@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table) return pgtable_free(table, index); } -#else -void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) -{ - return pgtable_free(table, index); -} -#endif #ifdef CONFIG_PROC_FS atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index ae7fca40e5b37..432fd9fa8c3f0 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -83,13 +83,17 @@ static int pkey_initialize(void) scan_pkey_feature(); /* - * Let's assume 32 pkeys on P8 bare metal, if its not defined by device - * tree. We make this exception since skiboot forgot to expose this - * property on power8. + * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device + * tree. We make this exception since some version of skiboot forgot to + * expose this property on power8/9. */ - if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) && - cpu_has_feature(CPU_FTRS_POWER8)) - pkeys_total = 32; + if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR)) { + unsigned long pvr = mfspr(SPRN_PVR); + + if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E || + PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9) + pkeys_total = 32; + } /* * Adjust the upper limit, based on the number of bits supported by @@ -367,12 +371,14 @@ static bool pkey_access_permitted(int pkey, bool write, bool execute) return true; pkey_shift = pkeyshift(pkey); - if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) - return true; + if (execute) + return !(read_iamr() & (IAMR_EX_BIT << pkey_shift)); + + amr = read_amr(); + if (write) + return !(amr & (AMR_WR_BIT << pkey_shift)); - amr = read_amr(); /* Delay reading amr until absolutely needed */ - return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) || - (write && !(amr & (AMR_WR_BIT << pkey_shift)))); + return !(amr & (AMR_RD_BIT << pkey_shift)); } bool arch_pte_access_permitted(u64 pte, bool write, bool execute) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 6ee17d09649c3..bdcb07a98cd37 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -97,7 +97,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -155,7 +155,7 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, set_the_pte: set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); - smp_wmb(); + asm volatile("ptesync": : :"memory"); return 0; } @@ -643,21 +643,6 @@ void radix__mmu_cleanup_all(void) } } -void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - /* - * We don't currently support the first MEMBLOCK not mapping 0 - * physical on those processors - */ - BUG_ON(first_memblock_base != 0); - - /* - * Radix mode is not limited by RMA / VRMA addressing. - */ - ppc64_rma_size = ULONG_MAX; -} - #ifdef CONFIG_MEMORY_HOTPLUG static void free_pte_table(pte_t *pte_start, pmd_t *pmd) { diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 67af871190c6d..b0f240afffa22 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -639,19 +639,29 @@ static void do_exit_flush_lazy_tlb(void *arg) struct mm_struct *mm = arg; unsigned long pid = mm->context.id; + /* + * A kthread could have done a mmget_not_zero() after the flushing CPU + * checked mm_is_singlethreaded, and be in the process of + * kthread_use_mm when interrupted here. In that case, current->mm will + * be set to mm, because kthread_use_mm() setting ->mm and switching to + * the mm is done with interrupts off. + */ if (current->mm == mm) - return; /* Local CPU */ + goto out_flush; if (current->active_mm == mm) { - /* - * Must be a kernel thread because sender is single-threaded. - */ - BUG_ON(current->mm); + WARN_ON_ONCE(current->mm != NULL); + /* Is a kernel thread and is using mm as the lazy tlb */ mmgrab(&init_mm); - switch_mm(mm, &init_mm, current); current->active_mm = &init_mm; + switch_mm_irqs_off(mm, &init_mm, current); mmdrop(mm); } + + atomic_dec(&mm->context.active_cpus); + cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm)); + +out_flush: _tlbiel_pid(pid, RIC_FLUSH_ALL); } @@ -666,7 +676,6 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm) */ smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, (void *)mm, 1); - mm_reset_thread_local(mm); } void radix__flush_tlb_mm(struct mm_struct *mm) diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 59327cefbc6a6..873fcfc7b8756 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -362,10 +362,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) if (!drmem_info->lmbs) return; - for_each_drmem_lmb(lmb) { + for_each_drmem_lmb(lmb) read_drconf_v1_cell(lmb, &prop); - lmb_set_nid(lmb); - } } static void __init init_drmem_v2_lmbs(const __be32 *prop) @@ -410,8 +408,6 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) lmb->aa_index = dr_cell.aa_index; lmb->flags = dr_cell.flags; - - lmb_set_nid(lmb); } } } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8432c281de921..9f4a78e3cde9e 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -204,9 +204,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, { int is_exec = TRAP(regs) == 0x400; - /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ - if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | - DSISR_PROTFAULT))) { + if (is_exec) { pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n", address >= TASK_SIZE ? "exec-protected" : "user", address, @@ -233,7 +231,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, // Read/write fault in a valid region (the exception table search passed // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, is_write)) + if (bad_kuap_fault(regs, address, is_write)) return true; // What's left? Kernel fault on user in well defined regions (extable @@ -241,6 +239,9 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, return false; } +// This comes from 64-bit struct rt_sigframe + __SIGNAL_FRAMESIZE +#define SIGFRAME_MAX_SIZE (4096 + 128) + static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, struct vm_area_struct *vma, unsigned int flags, bool *must_retry) @@ -248,7 +249,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, /* * N.B. The POWER/Open ABI allows programs to access up to * 288 bytes below the stack pointer. - * The kernel signal delivery code writes up to about 1.5kB + * The kernel signal delivery code writes a bit over 4KB * below the stack pointer (r1) before decrementing it. * The exec code can write slightly over 640kB to the stack * before setting the user r1. Thus we allow the stack to @@ -273,7 +274,7 @@ static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, * between the last mapped region and the stack will * expand the stack rather than segfaulting. */ - if (address + 2048 >= uregs->gpr[1]) + if (address + SIGFRAME_MAX_SIZE >= uregs->gpr[1]) return false; if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) && @@ -346,7 +347,6 @@ static inline void cmo_account_page_fault(void) static inline void cmo_account_page_fault(void) { } #endif /* CONFIG_PPC_SMLPAR */ -#ifdef CONFIG_PPC_BOOK3S static void sanity_check_fault(bool is_write, bool is_user, unsigned long error_code, unsigned long address) { @@ -354,12 +354,18 @@ static void sanity_check_fault(bool is_write, bool is_user, * Userspace trying to access kernel address, we get PROTFAULT for that. */ if (is_user && address >= TASK_SIZE) { + if ((long)address == -1) + return; + pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n", current->comm, current->pid, address, from_kuid(&init_user_ns, current_uid())); return; } + if (!IS_ENABLED(CONFIG_PPC_BOOK3S)) + return; + /* * For hash translation mode, we should never get a * PROTFAULT. Any update to pte to reduce access will result in us @@ -394,10 +400,6 @@ static void sanity_check_fault(bool is_write, bool is_user, WARN_ON_ONCE(error_code & DSISR_PROTFAULT); } -#else -static void sanity_check_fault(bool is_write, bool is_user, - unsigned long error_code, unsigned long address) { } -#endif /* CONFIG_PPC_BOOK3S */ /* * Define the correct "is_write" bit in error_code based diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 73d4873fc7f85..33b3461d91e8d 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -53,20 +53,24 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); + new = NULL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = PGT_CACHE(PTE_INDEX_SIZE); + cachep = NULL; num_hugepd = 1; + new = pte_alloc_one(mm); } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; + new = NULL; } - if (!cachep) { + if (!cachep && !new) { WARN_ONCE(1, "No page table cache created for hugetlb tables"); return -ENOMEM; } - new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + if (cachep) + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -97,7 +101,10 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); - kmem_cache_free(cachep, new); + if (cachep) + kmem_cache_free(cachep, new); + else + pte_free(mm, new); } else { kmemleak_ignore(new); } @@ -324,8 +331,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, - get_hugepd_cache_index(PTE_INDEX_SIZE)); + pgtable_free_tlb(tlb, hugepte, 0); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); @@ -639,12 +645,13 @@ static int __init hugetlbpage_init(void) * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. */ - if (pdshift > shift && IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_cache_add(PTE_INDEX_SIZE); - else if (pdshift > shift) - pgtable_cache_add(pdshift - shift); - else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx)) + if (pdshift > shift) { + if (!IS_ENABLED(CONFIG_PPC_8xx)) + pgtable_cache_add(pdshift - shift); + } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || + IS_ENABLED(CONFIG_PPC_8xx)) { pgtable_cache_add(PTE_T_ORDER); + } configured = true; } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 4e08246acd79a..210f1c28b8e41 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -415,9 +415,16 @@ void __init mmu_early_init_devtree(void) if (!(mfmsr() & MSR_HV)) early_check_vec5(); - if (early_radix_enabled()) + if (early_radix_enabled()) { radix__early_init_devtree(); - else + /* + * We have finalized the translation we are going to use by now. + * Radix mode is not limited by RMA / VRMA addressing. + * Hence don't limit memblock allocations. + */ + ppc64_rma_size = ULONG_MAX; + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); + } else hash__early_init_devtree(); } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 0e6ed4413eeac..3f78007a72822 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -117,11 +117,11 @@ static void __init kasan_remap_early_shadow_ro(void) kasan_populate_pte(kasan_early_shadow_pte, prot); - for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_start & PAGE_MASK; k_cur != k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); - if ((pte_val(*ptep) & PTE_RPN_MASK) != pa) + if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page))) continue; __set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index be941d382c8d1..d427f70556eab 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -48,6 +48,7 @@ #include #include #include +#include #include @@ -104,6 +105,27 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } +#define FLUSH_CHUNK_SIZE SZ_1G +/** + * flush_dcache_range_chunked(): Write any modified data cache blocks out to + * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE + * Does not invalidate the corresponding instruction cache blocks. + * + * @start: the start address + * @stop: the stop address (exclusive) + * @chunk: the max size of the chunks + */ +static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, + unsigned long chunk) +{ + unsigned long i; + + for (i = start; i < stop; i += chunk) { + flush_dcache_range(i, min(stop, i + chunk)); + cond_resched(); + } +} + int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_restrictions *restrictions) { @@ -120,7 +142,8 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, start, start + size, rc); return -EFAULT; } - flush_dcache_range(start, start + size); + + flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); return __add_pages(nid, start_pfn, nr_pages, restrictions); } @@ -130,14 +153,14 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap); int ret; - __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); - flush_dcache_range(start, start + size); + flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE); + ret = remove_section_mapping(start, start + size); WARN_ON_ONCE(ret); @@ -260,6 +283,14 @@ void __init mem_init(void) BUILD_BUG_ON(MMU_PAGE_COUNT > 16); #ifdef CONFIG_SWIOTLB + /* + * Some platforms (e.g. 85xx) limit DMA-able memory way below + * 4G. We force memblock to bottom-up mode to ensure that the + * memory allocated in swiotlb_init() is DMA-able. + * As it's the last memblock allocation, no need to reset it + * back to to-down. + */ + memblock_set_bottom_up(true); swiotlb_init(0); #endif @@ -316,7 +347,124 @@ void free_initmem(void) mark_initmem_nx(); init_mem_is_free = true; free_initmem_default(POISON_FREE_INITMEM); + ftrace_free_init_tramp(); +} + +/** + * flush_coherent_icache() - if a CPU has a coherent icache, flush it + * @addr: The base address to use (can be any valid address, the whole cache will be flushed) + * Return true if the cache was flushed, false otherwise + */ +static inline bool flush_coherent_icache(unsigned long addr) +{ + /* + * For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + mb(); /* sync */ + allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); + icbi((void *)addr); + prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); + mb(); /* sync */ + isync(); + return true; + } + + return false; +} + +/** + * invalidate_icache_range() - Flush the icache by issuing icbi across an address range + * @start: the start address + * @stop: the stop address (exclusive) + */ +static void invalidate_icache_range(unsigned long start, unsigned long stop) +{ + unsigned long shift = l1_icache_shift(); + unsigned long bytes = l1_icache_bytes(); + char *addr = (char *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); + unsigned long i; + + for (i = 0; i < size >> shift; i++, addr += bytes) + icbi(addr); + + mb(); /* sync */ + isync(); +} + +/** + * flush_icache_range: Write any modified data cache blocks out to memory + * and invalidate the corresponding blocks in the instruction cache + * + * Generic code will call this after writing memory, before executing from it. + * + * @start: the start address + * @stop: the stop address (exclusive) + */ +void flush_icache_range(unsigned long start, unsigned long stop) +{ + if (flush_coherent_icache(start)) + return; + + clean_dcache_range(start, stop); + + if (IS_ENABLED(CONFIG_44x)) { + /* + * Flash invalidate on 44x because we are passed kmapped + * addresses and this doesn't work for userspace pages due to + * the virtually tagged icache. + */ + iccci((void *)start); + mb(); /* sync */ + isync(); + } else + invalidate_icache_range(start, stop); +} +EXPORT_SYMBOL(flush_icache_range); + +#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) +/** + * flush_dcache_icache_phys() - Flush a page by it's physical address + * @physaddr: the physical address of the page + */ +static void flush_dcache_icache_phys(unsigned long physaddr) +{ + unsigned long bytes = l1_dcache_bytes(); + unsigned long nb = PAGE_SIZE / bytes; + unsigned long addr = physaddr & PAGE_MASK; + unsigned long msr, msr0; + unsigned long loop1 = addr, loop2 = addr; + + msr0 = mfmsr(); + msr = msr0 & ~MSR_DR; + /* + * This must remain as ASM to prevent potential memory accesses + * while the data MMU is disabled + */ + asm volatile( + " mtctr %2;\n" + " mtmsr %3;\n" + " isync;\n" + "0: dcbst 0, %0;\n" + " addi %0, %0, %4;\n" + " bdnz 0b;\n" + " sync;\n" + " mtctr %2;\n" + "1: icbi 0, %1;\n" + " addi %1, %1, %4;\n" + " bdnz 1b;\n" + " sync;\n" + " mtmsr %5;\n" + " isync;\n" + : "+&r" (loop1), "+&r" (loop2) + : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) + : "ctr", "memory"); } +#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64) /* * This is called when a page has been modified by the kernel. @@ -350,12 +498,46 @@ void flush_dcache_icache_page(struct page *page) __flush_dcache_icache(start); kunmap_atomic(start); } else { - __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); + unsigned long addr = page_to_pfn(page) << PAGE_SHIFT; + + if (flush_coherent_icache(addr)) + return; + flush_dcache_icache_phys(addr); } #endif } EXPORT_SYMBOL(flush_dcache_icache_page); +/** + * __flush_dcache_icache(): Flush a particular page from the data cache to RAM. + * Note: this is necessary because the instruction cache does *not* + * snoop from the data cache. + * + * @page: the address of the page to flush + */ +void __flush_dcache_icache(void *p) +{ + unsigned long addr = (unsigned long)p; + + if (flush_coherent_icache(addr)) + return; + + clean_dcache_range(addr, addr + PAGE_SIZE); + + /* + * We don't flush the icache on 44x. Those have a virtual icache and we + * don't have access to the virtual address here (it's not the page + * vaddr but where it's mapped in user space). The flushing of the + * icache on these is handled elsewhere, when a change in the address + * space occurs, before returning to user space. + */ + + if (mmu_has_feature(MMU_FTR_TYPE_44x)) + return; + + invalidate_icache_range(addr, addr + PAGE_SIZE); +} + void clear_user_page(void *page, unsigned long vaddr, struct page *pg) { clear_page(page); diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 18f20da0d3483..64290d343b557 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -79,7 +79,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * context */ if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile ("dssall"); + asm volatile (PPC_DSSALL); if (new_on_cpu) radix_kvm_prefetch_workaround(next); diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S index 2ca407cedbe79..eaeee402f96e8 100644 --- a/arch/powerpc/mm/nohash/tlb_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S @@ -397,7 +397,7 @@ _GLOBAL(set_context) * extern void loadcam_entry(unsigned int index) * * Load TLBCAM[index] entry in to the L2 CAM MMU - * Must preserve r7, r8, r9, and r10 + * Must preserve r7, r8, r9, r10 and r11 */ _GLOBAL(loadcam_entry) mflr r5 @@ -433,6 +433,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) */ _GLOBAL(loadcam_multi) mflr r8 + /* Don't switch to AS=1 if already there */ + mfmsr r11 + andi. r11,r11,MSR_IS + bne 10f /* * Set up temporary TLB entry that is the same as what we're @@ -458,6 +462,7 @@ _GLOBAL(loadcam_multi) mtmsr r6 isync +10: mr r9,r3 add r10,r3,r4 2: bl loadcam_entry @@ -466,6 +471,10 @@ _GLOBAL(loadcam_multi) mr r3,r9 blt 2b + /* Don't return to AS=0 if we were in AS=1 at function start */ + andi. r11,r11,MSR_IS + bne 3f + /* Return to AS=0 and clear the temporary entry */ mfmsr r6 rlwinm. r6,r6,0,~(MSR_IS|MSR_DS) @@ -481,6 +490,7 @@ _GLOBAL(loadcam_multi) tlbwe isync +3: mtlr r8 blr #endif diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 8ec5dfb65b2eb..da9f722d9f168 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -207,7 +207,7 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_stext + 1)) + if (v_block_mapped((unsigned long)_sinittext)) mmu_mark_initmem_nx(); else change_page_attr(page, numpages, PAGE_KERNEL); @@ -219,8 +219,9 @@ void mark_rodata_ro(void) struct page *page; unsigned long numpages; - if (v_block_mapped((unsigned long)_sinittext)) { + if (v_block_mapped((unsigned long)_stext + 1)) { mmu_mark_rodata_ro(); + ptdump_check_wx(); return; } diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index a07278027c6f4..a2e8c3b2cf351 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -259,7 +259,7 @@ static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 * for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); - if (lpar_rc != H_SUCCESS) + if (lpar_rc) continue; for (j = 0; j < 4; j++) { if (HPTE_V_COMPARE(ptes[j].v, want_v) && diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 2f9ddc29c5355..633711bf1cae4 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -58,6 +58,7 @@ struct pg_state { unsigned long start_address; unsigned long start_pa; unsigned long last_pa; + unsigned long page_size; unsigned int level; u64 current_flags; bool check_wx; @@ -155,9 +156,9 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #endif pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); - if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { + if (st->start_pa == st->last_pa && st->start_address + st->page_size != addr) { pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); - delta = PAGE_SIZE >> 10; + delta = st->page_size >> 10; } else { pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); delta = (addr - st->start_address) >> 10; @@ -173,10 +174,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr) static void note_prot_wx(struct pg_state *st, unsigned long addr) { + pte_t pte = __pte(st->current_flags); + if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx) return; - if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X))) + if (!pte_write(pte) || !pte_exec(pte)) return; WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n", @@ -186,7 +189,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) } static void note_page(struct pg_state *st, unsigned long addr, - unsigned int level, u64 val) + unsigned int level, u64 val, unsigned long page_size) { u64 flag = val & pg_level[level].mask; u64 pa = val & PTE_RPN_MASK; @@ -198,6 +201,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); /* * Dump the section of virtual memory when: @@ -209,7 +213,7 @@ static void note_page(struct pg_state *st, unsigned long addr, */ } else if (flag != st->current_flags || level != st->level || addr >= st->marker[1].start_address || - (pa != st->last_pa + PAGE_SIZE && + (pa != st->last_pa + st->page_size && (pa != st->start_pa || st->start_pa != st->last_pa))) { /* Check the PTE flags */ @@ -237,6 +241,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; + st->page_size = page_size; st->current_flags = flag; st->level = level; } else { @@ -252,7 +257,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) for (i = 0; i < PTRS_PER_PTE; i++, pte++) { addr = start + i * PAGE_SIZE; - note_page(st, addr, 4, pte_val(*pte)); + note_page(st, addr, 4, pte_val(*pte), PAGE_SIZE); } } @@ -269,7 +274,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) /* pmd exists */ walk_pte(st, pmd, addr); else - note_page(st, addr, 3, pmd_val(*pmd)); + note_page(st, addr, 3, pmd_val(*pmd), PMD_SIZE); } } @@ -285,7 +290,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) /* pud exists */ walk_pmd(st, pud, addr); else - note_page(st, addr, 2, pud_val(*pud)); + note_page(st, addr, 2, pud_val(*pud), PUD_SIZE); } } @@ -304,7 +309,7 @@ static void walk_pagetables(struct pg_state *st) /* pgd exists */ walk_pud(st, pgd, addr); else - note_page(st, addr, 1, pgd_val(*pgd)); + note_page(st, addr, 1, pgd_val(*pgd), PGDIR_SIZE); } } @@ -359,7 +364,7 @@ static int ptdump_show(struct seq_file *m, void *v) /* Traverse kernel page tables */ walk_pagetables(&st); - note_page(&st, 0, 0, 0); + note_page(&st, 0, 0, 0, 0); return 0; } diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index f7ed2f187cb01..f10f2158c8afb 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -17,9 +17,9 @@ static const struct flag_info flag_array[] = { .clear = " ", }, { .mask = _PAGE_RW, - .val = _PAGE_RW, - .set = "rw", - .clear = "r ", + .val = 0, + .set = "r ", + .clear = "rw", }, { .mask = _PAGE_EXEC, .val = _PAGE_EXEC, @@ -30,6 +30,11 @@ static const struct flag_info flag_array[] = { .val = _PAGE_PRESENT, .set = "present", .clear = " ", + }, { + .mask = _PAGE_COHERENT, + .val = _PAGE_COHERENT, + .set = "coherent", + .clear = " ", }, { .mask = _PAGE_GUARDED, .val = _PAGE_GUARDED, diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 42bbcd47cc85f..dffe1a45b6ed4 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -50,7 +50,7 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) { #endif -static inline bool slice_addr_is_low(unsigned long addr) +static inline notrace bool slice_addr_is_low(unsigned long addr) { u64 tmp = (u64)addr; @@ -659,7 +659,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, mm_ctx_user_psize(¤t->mm->context), 1); } -unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) +unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr) { unsigned char *psizes; int index, mask_index; diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 55d4377ccfae3..6d2a268528dfc 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -11,6 +11,7 @@ #ifndef __ASSEMBLY__ #include +#include #ifdef PPC64_ELF_ABI_v1 #define FUNCTION_DESCR_SIZE 24 @@ -180,13 +181,26 @@ #define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a)) /* Long jump; (unconditional 'branch') */ -#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ - (((dest) - (ctx->idx * 4)) & 0x03fffffc)) +#define PPC_JMP(dest) \ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_branch_range(offset)) { \ + pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ + } while (0) /* "cond" here covers BO:BI fields. */ -#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ - (((cond) & 0x3ff) << 16) | \ - (((dest) - (ctx->idx * 4)) & \ - 0xfffc)) +#define PPC_BCC_SHORT(cond, dest) \ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_cond_branch_range(offset)) { \ + pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ + } while (0) + /* Sign-extended 32-bit immediate load */ #define PPC_LI32(d, i) do { \ if ((int)(uintptr_t)(i) >= -32768 && \ @@ -225,11 +239,6 @@ #define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif -static inline bool is_nearbranch(int offset) -{ - return (offset < 32768) && (offset >= -32768); -} - /* * The fly in the ointment of code size changing from pass to pass is * avoided by padding the short branch case with a NOP. If code size differs @@ -238,7 +247,7 @@ static inline bool is_nearbranch(int offset) * state. */ #define PPC_BCC(cond, dest) do { \ - if (is_nearbranch((dest) - (ctx->idx * 4))) { \ + if (is_offset_in_cond_branch_range((long)(dest) - (ctx->idx * 4))) { \ PPC_BCC_SHORT(cond, dest); \ PPC_NOP(); \ } else { \ diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index cf3a7e337f025..68ddf3502ab03 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -16,18 +16,18 @@ * with our redzone usage. * * [ prev sp ] <------------- - * [ nv gpr save area ] 6*8 | + * [ nv gpr save area ] 5*8 | * [ tail_call_cnt ] 8 | - * [ local_tmp_var ] 8 | + * [ local_tmp_var ] 16 | * fp (r31) --> [ ebpf stack space ] upto 512 | * [ frame header ] 32/112 | * sp (r1) ---> [ stack pointer ] -------------- */ /* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (6*8) +#define BPF_PPC_STACK_SAVE (5*8) /* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 16 +#define BPF_PPC_STACK_LOCALS 24 /* stack frame excluding BPF stack, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index be3517ef0574d..687cd352648aa 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "bpf_jit64.h" @@ -56,9 +57,9 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ prev sp ] <------------- * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- - * [ nv gpr save area ] 6*8 + * [ nv gpr save area ] 5*8 * [ tail_call_cnt ] 8 - * [ local_tmp_var ] 8 + * [ local_tmp_var ] 16 * [ unused red zone ] 208 bytes protected */ static int bpf_jit_stack_local(struct codegen_context *ctx) @@ -66,12 +67,12 @@ static int bpf_jit_stack_local(struct codegen_context *ctx) if (bpf_has_stack_frame(ctx)) return STACK_FRAME_MIN_SIZE + ctx->stack_size; else - return -(BPF_PPC_STACK_SAVE + 16); + return -(BPF_PPC_STACK_SAVE + 24); } static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) { - return bpf_jit_stack_local(ctx) + 8; + return bpf_jit_stack_local(ctx) + 16; } static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) @@ -224,7 +225,7 @@ static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, PPC_BLRL(); } -static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* * By now, the eBPF program has already setup parameters in r3, r4 and r5 @@ -285,14 +286,39 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 bpf_jit_emit_common_epilogue(image, ctx); PPC_BCTR(); + /* out: */ + return 0; } +/* + * We spill into the redzone always, even if the bpf program has its own stackframe. + * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local() + */ +void bpf_stf_barrier(void); + +asm ( +" .global bpf_stf_barrier ;" +" bpf_stf_barrier: ;" +" std 21,-64(1) ;" +" std 22,-56(1) ;" +" sync ;" +" ld 21,-64(1) ;" +" ld 22,-56(1) ;" +" ori 31,31,0 ;" +" .rept 14 ;" +" b 1f ;" +" 1: ;" +" .endr ;" +" blr ;" +); + /* Assemble the body code between the prologue & epilogue */ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, bool extra_pass) { + enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; int i, ret; @@ -347,18 +373,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_SUB(dst_reg, dst_reg, src_reg); goto bpf_alu32_trunc; case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ - case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + if (!imm) { + goto bpf_alu32_trunc; + } else if (imm >= -32768 && imm < 32768) { + PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); + } else { + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ - if (BPF_OP(code) == BPF_SUB) - imm = -imm; - if (imm) { - if (imm >= -32768 && imm < 32768) - PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); - else { - PPC_LI32(b2p[TMP_REG_1], imm); - PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); - } + if (!imm) { + goto bpf_alu32_trunc; + } else if (imm > -32768 && imm <= 32768) { + PPC_ADDI(dst_reg, dst_reg, IMM_L(-imm)); + } else { + PPC_LI32(b2p[TMP_REG_1], imm); + PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -408,8 +441,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ if (imm == 0) return -EINVAL; - else if (imm == 1) - goto bpf_alu32_trunc; + if (imm == 1) { + if (BPF_OP(code) == BPF_DIV) { + goto bpf_alu32_trunc; + } else { + PPC_LI(dst_reg, 0); + break; + } + } PPC_LI32(b2p[TMP_REG_1], imm); switch (BPF_CLASS(code)) { @@ -644,6 +683,36 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, } break; + /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) || + (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) && + (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) || !cpu_has_feature(CPU_FTR_HVMODE)))) + break; + + switch (stf_barrier) { + case STF_BARRIER_EIEIO: + EMIT(0x7c0006ac | 0x02000000); + break; + case STF_BARRIER_SYNC_ORI: + EMIT(PPC_INST_SYNC); + PPC_LD(b2p[TMP_REG_1], 13, 0); + PPC_ORI(31, 31, 0); + break; + case STF_BARRIER_FALLBACK: + EMIT(PPC_INST_MFLR | ___PPC_RT(b2p[TMP_REG_1])); + PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + PPC_MTCTR(12); + EMIT(PPC_INST_BCTR | 0x1); + PPC_MTLR(b2p[TMP_REG_1]); + break; + case STF_BARRIER_NONE: + break; + } + break; + /* * BPF_ST(X) */ @@ -989,7 +1058,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ case BPF_JMP | BPF_TAIL_CALL: ctx->seen |= SEEN_TAILCALL; - bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + if (ret < 0) + return ret; break; default: diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index ca92e01d0bd1b..6f013e4188349 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -133,6 +133,9 @@ static void pmao_restore_workaround(bool ebb) { } bool is_sier_available(void) { + if (!ppmu) + return false; + if (ppmu->flags & PPMU_HAS_SIER) return true; @@ -1522,9 +1525,16 @@ static int power_pmu_add(struct perf_event *event, int ef_flags) ret = 0; out: if (has_branch_stack(event)) { - power_pmu_bhrb_enable(event); - cpuhw->bhrb_filter = ppmu->bhrb_filter_map( - event->attr.branch_sample_type); + u64 bhrb_filter = -1; + + if (ppmu->bhrb_filter_map) + bhrb_filter = ppmu->bhrb_filter_map( + event->attr.branch_sample_type); + + if (bhrb_filter != -1) { + cpuhw->bhrb_filter = bhrb_filter; + power_pmu_bhrb_enable(event); + } } perf_pmu_enable(event->pmu); @@ -1846,7 +1856,6 @@ static int power_pmu_event_init(struct perf_event *event) int n; int err; struct cpu_hw_events *cpuhw; - u64 bhrb_filter; if (!ppmu) return -ENOENT; @@ -1952,7 +1961,10 @@ static int power_pmu_event_init(struct perf_event *event) err = power_check_constraints(cpuhw, events, cflags, n + 1); if (has_branch_stack(event)) { - bhrb_filter = ppmu->bhrb_filter_map( + u64 bhrb_filter = -1; + + if (ppmu->bhrb_filter_map) + bhrb_filter = ppmu->bhrb_filter_map( event->attr.branch_sample_type); if (bhrb_filter == -1) { @@ -2065,7 +2077,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val, left += period; if (left <= 0) left = period; - record = siar_valid(regs); + + /* + * If address is not requested in the sample via + * PERF_SAMPLE_IP, just record that sample irrespective + * of SIAR valid check. + */ + if (event->attr.sample_type & PERF_SAMPLE_IP) + record = siar_valid(regs); + else + record = 1; + event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) @@ -2077,6 +2099,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val, local64_set(&event->hw.period_left, left); perf_event_update_userpage(event); + /* + * Due to hardware limitation, sometimes SIAR could sample a kernel + * address even when freeze on supervisor state (kernel) is set in + * MMCR2. Check attr.exclude_kernel and address to drop the sample in + * these cases. + */ + if (event->attr.exclude_kernel && + (event->attr.sample_type & PERF_SAMPLE_IP) && + is_kernel_addr(mfspr(SPRN_SIAR))) + record = 0; + /* * Finally record data if requested. */ @@ -2106,6 +2139,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); + } else if (period) { + /* Account for interrupt in case of invalid SIAR */ + if (perf_event_account_interrupt(event)) + power_pmu_stop(event, 0); } } diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index 573e0b309c0ca..48e8f4b17b91b 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -1400,16 +1400,6 @@ static void h_24x7_event_read(struct perf_event *event) h24x7hw = &get_cpu_var(hv_24x7_hw); h24x7hw->events[i] = event; put_cpu_var(h24x7hw); - /* - * Clear the event count so we can compute the _change_ - * in the 24x7 raw counter value at the end of the txn. - * - * Note that we could alternatively read the 24x7 value - * now and save its value in event->hw.prev_count. But - * that would require issuing a hcall, which would then - * defeat the purpose of using the txn interface. - */ - local64_set(&event->count, 0); } put_cpu_var(hv_24x7_reqb); diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h index e608f9db12ddc..8965b4463d433 100644 --- a/arch/powerpc/perf/hv-gpci-requests.h +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -95,7 +95,7 @@ REQUEST(__field(0, 8, partition_id) #define REQUEST_NAME system_performance_capabilities #define REQUEST_NUM 0x40 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__field(0, 1, perf_collect_privileged) __field(0x1, 1, capability_mask) @@ -223,7 +223,7 @@ REQUEST(__field(0, 2, partition_id) #define REQUEST_NAME system_hypervisor_times #define REQUEST_NUM 0xF0 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) __count(0x8, 8, time_spent_processing_virtual_processor_timers) @@ -234,7 +234,7 @@ REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) #define REQUEST_NAME system_tlbie_count_and_time #define REQUEST_NUM 0xF4 -#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#define REQUEST_IDX_KIND "starting_index=0xffffffff" #include I(REQUEST_BEGIN) REQUEST(__count(0, 8, tlbie_instructions_issued) /* diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 6884d16ec19b9..732cfc53e260d 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -164,7 +164,7 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index, */ count = 0; for (i = offset; i < offset + length; i++) - count |= arg->bytes[i] << (i - offset); + count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); *value = count; out: diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index cb50a9e1fd2d7..d76e800a1f337 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -44,6 +44,16 @@ static DEFINE_PER_CPU(u64 *, trace_imc_mem); static struct imc_pmu_ref *trace_imc_refc; static int trace_imc_mem_size; +/* + * Global data structure used to avoid races between thread, + * core and trace-imc + */ +static struct imc_pmu_ref imc_global_refc = { + .lock = __MUTEX_INITIALIZER(imc_global_refc.lock), + .id = 0, + .refc = 0, +}; + static struct imc_pmu *imc_event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct imc_pmu, pmu); @@ -698,6 +708,16 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) return -EINVAL; ref->refc = 0; + /* + * Reduce the global reference count, if this is the + * last cpu in this core and core-imc event running + * in this cpu. + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_CORE) + imc_global_refc.refc--; + + mutex_unlock(&imc_global_refc.lock); } return 0; } @@ -710,6 +730,23 @@ static int core_imc_pmu_cpumask_init(void) ppc_core_imc_cpu_offline); } +static void reset_global_refc(struct perf_event *event) +{ + mutex_lock(&imc_global_refc.lock); + imc_global_refc.refc--; + + /* + * If no other thread is running any + * event for this domain(thread/core/trace), + * set the global id to zero. + */ + if (imc_global_refc.refc <= 0) { + imc_global_refc.refc = 0; + imc_global_refc.id = 0; + } + mutex_unlock(&imc_global_refc.lock); +} + static void core_imc_counters_release(struct perf_event *event) { int rc, core_id; @@ -759,6 +796,8 @@ static void core_imc_counters_release(struct perf_event *event) ref->refc = 0; } mutex_unlock(&ref->lock); + + reset_global_refc(event); } static int core_imc_event_init(struct perf_event *event) @@ -819,6 +858,29 @@ static int core_imc_event_init(struct perf_event *event) ++ref->refc; mutex_unlock(&ref->lock); + /* + * Since the system can run either in accumulation or trace-mode + * of IMC at a time, core-imc events are allowed only if no other + * trace/thread imc events are enabled/monitored. + * + * Take the global lock, and check the refc.id + * to know whether any other trace/thread imc + * events are running. + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) { + /* + * No other trace/thread imc events are running in + * the system, so set the refc.id to core-imc. + */ + imc_global_refc.id = IMC_DOMAIN_CORE; + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); event->destroy = core_imc_counters_release; return 0; @@ -877,7 +939,23 @@ static int ppc_thread_imc_cpu_online(unsigned int cpu) static int ppc_thread_imc_cpu_offline(unsigned int cpu) { - mtspr(SPRN_LDBAR, 0); + /* + * Set the bit 0 of LDBAR to zero. + * + * If bit 0 of LDBAR is unset, it will stop posting + * the counter data to memory. + * For thread-imc, bit 0 of LDBAR will be set to 1 in the + * event_add function. So reset this bit here, to stop the updates + * to memory in the cpu_offline path. + */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); + + /* Reduce the refc if thread-imc event running on this cpu */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_THREAD) + imc_global_refc.refc--; + mutex_unlock(&imc_global_refc.lock); + return 0; } @@ -916,7 +994,22 @@ static int thread_imc_event_init(struct perf_event *event) if (!target) return -EINVAL; + mutex_lock(&imc_global_refc.lock); + /* + * Check if any other trace/core imc events are running in the + * system, if not set the global id to thread-imc. + */ + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) { + imc_global_refc.id = IMC_DOMAIN_THREAD; + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->pmu->task_ctx_nr = perf_sw_context; + event->destroy = reset_global_refc; return 0; } @@ -1063,10 +1156,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags) int core_id; struct imc_pmu_ref *ref; - mtspr(SPRN_LDBAR, 0); - core_id = smp_processor_id() / threads_per_core; ref = &core_imc_refc[core_id]; + if (!ref) { + pr_debug("imc: Failed to get event reference count\n"); + return; + } mutex_lock(&ref->lock); ref->refc--; @@ -1082,6 +1177,10 @@ static void thread_imc_event_del(struct perf_event *event, int flags) ref->refc = 0; } mutex_unlock(&ref->lock); + + /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); + /* * Take a snapshot and calculate the delta and update * the event counter values. @@ -1133,7 +1232,18 @@ static int ppc_trace_imc_cpu_online(unsigned int cpu) static int ppc_trace_imc_cpu_offline(unsigned int cpu) { - mtspr(SPRN_LDBAR, 0); + /* + * No need to set bit 0 of LDBAR to zero, as + * it is set to zero for imc trace-mode + * + * Reduce the refc if any trace-imc event running + * on this cpu. + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_TRACE) + imc_global_refc.refc--; + mutex_unlock(&imc_global_refc.lock); + return 0; } @@ -1226,15 +1336,14 @@ static int trace_imc_event_add(struct perf_event *event, int flags) local_mem = get_trace_imc_event_base_addr(); ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE; - if (core_imc_refc) - ref = &core_imc_refc[core_id]; + /* trace-imc reference count */ + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; if (!ref) { - /* If core-imc is not enabled, use trace-imc reference count */ - if (trace_imc_refc) - ref = &trace_imc_refc[core_id]; - if (!ref) - return -EINVAL; + pr_debug("imc: Failed to get the event reference count\n"); + return -EINVAL; } + mtspr(SPRN_LDBAR, ldbar_value); mutex_lock(&ref->lock); if (ref->refc == 0) { @@ -1242,13 +1351,11 @@ static int trace_imc_event_add(struct perf_event *event, int flags) get_hard_smp_processor_id(smp_processor_id()))) { mutex_unlock(&ref->lock); pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); - mtspr(SPRN_LDBAR, 0); return -EINVAL; } } ++ref->refc; mutex_unlock(&ref->lock); - return 0; } @@ -1274,16 +1381,13 @@ static void trace_imc_event_del(struct perf_event *event, int flags) int core_id = smp_processor_id() / threads_per_core; struct imc_pmu_ref *ref = NULL; - if (core_imc_refc) - ref = &core_imc_refc[core_id]; + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; if (!ref) { - /* If core-imc is not enabled, use trace-imc reference count */ - if (trace_imc_refc) - ref = &trace_imc_refc[core_id]; - if (!ref) - return; + pr_debug("imc: Failed to get event reference count\n"); + return; } - mtspr(SPRN_LDBAR, 0); + mutex_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { @@ -1297,6 +1401,7 @@ static void trace_imc_event_del(struct perf_event *event, int flags) ref->refc = 0; } mutex_unlock(&ref->lock); + trace_imc_event_stop(event, flags); } @@ -1314,10 +1419,34 @@ static int trace_imc_event_init(struct perf_event *event) if (event->attr.sample_period == 0) return -ENOENT; + /* + * Take the global lock, and make sure + * no other thread is running any core/thread imc + * events + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) { + /* + * No core/thread imc events are running in the + * system, so set the refc.id to trace-imc. + */ + imc_global_refc.id = IMC_DOMAIN_TRACE; + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->hw.idx = -1; target = event->hw.target; - event->pmu->task_ctx_nr = perf_hw_context; + /* + * There can only be a single PMU for perf_hw_context events which is assigned to + * core PMU. Hence use "perf_sw_context" for trace_imc. + */ + event->pmu->task_ctx_nr = perf_sw_context; + event->destroy = reset_global_refc; return 0; } @@ -1429,10 +1558,10 @@ static void cleanup_all_core_imc_memory(void) static void thread_imc_ldbar_disable(void *dummy) { /* - * By Zeroing LDBAR, we disable thread-imc - * updates. + * By setting 0th bit of LDBAR to zero, we disable thread-imc + * updates to memory. */ - mtspr(SPRN_LDBAR, 0); + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); } void thread_imc_disable(void) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 4c86da5eb28ab..25eda98f3b1bd 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -269,6 +269,15 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) mask |= CNST_PMC_MASK(pmc); value |= CNST_PMC_VAL(pmc); + + /* + * PMC5 and PMC6 are used to count cycles and instructions and + * they do not support most of the constraint bits. Add a check + * to exclude PMC5/6 from most of the constraints except for + * EBB/BHRB. + */ + if (pmc >= 5) + goto ebb_bhrb; } if (pmc <= 4) { @@ -317,7 +326,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { mask |= CNST_THRESH_MASK; value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); - } + } else if (event_is_threshold(event)) + return -1; } else { /* * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, @@ -335,6 +345,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) } } +ebb_bhrb: if (!pmc && ebb) /* EBB events must specify the PMC */ return -1; @@ -353,8 +364,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) * EBB events are pinned & exclusive, so this should never actually * hit, but we leave it as a fallback in case. */ - mask |= CNST_EBB_VAL(ebb); - value |= CNST_EBB_MASK; + mask |= CNST_EBB_MASK; + value |= CNST_EBB_VAL(ebb); *maskp = mask; *valp = value; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 08c3ef7961982..1225f53609a44 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -131,11 +131,11 @@ int p9_dd22_bl_ev[] = { /* Table of alternatives, sorted by column 0 */ static const unsigned int power9_event_alternatives[][MAX_ALT] = { - { PM_INST_DISP, PM_INST_DISP_ALT }, - { PM_RUN_CYC_ALT, PM_RUN_CYC }, - { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, - { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT }, { PM_BR_2PATH, PM_BR_2PATH_ALT }, + { PM_INST_DISP, PM_INST_DISP_ALT }, + { PM_RUN_CYC_ALT, PM_RUN_CYC }, + { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT }, + { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, }; static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index b299e43f5ef94..823397c802def 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -208,6 +208,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) if (irq == NO_IRQ) { pr_err("device tree node %pOFn is missing a interrupt", np); + of_node_put(np); return; } @@ -215,6 +216,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) if (rc) { pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d", np, rc); + of_node_put(np); return; } } diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index ae8b812c92029..2481e78c04234 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -327,6 +327,6 @@ late_initcall(cpm_init); static int __init cpm_powersave_off(char *arg) { cpm.powersave_off = 1; - return 0; + return 1; } __setup("powersave=off", cpm_powersave_off); diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c index e6e2adcc7b64c..c13d64c3b0194 100644 --- a/arch/powerpc/platforms/4xx/pci.c +++ b/arch/powerpc/platforms/4xx/pci.c @@ -1242,7 +1242,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) if (mbase == NULL) { printk(KERN_ERR "%pOF: Can't map internal config space !", port->node); - goto done; + return; } while (attempt && (0 == (in_le32(mbase + PECFG_460SX_DLLSTA) @@ -1252,9 +1252,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port) } if (attempt) port->link = 1; -done: iounmap(mbase); - } static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = { diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S index 3a9969c429b39..054f927bfef93 100644 --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S @@ -181,7 +181,7 @@ sram_code: udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */ mullw r12, r12, r11 mftb r13 /* start */ - addi r12, r13, r12 /* end */ + add r12, r13, r12 /* end */ 1: mftb r13 /* current */ cmp cr0, r13, r12 diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index 7c0133f558d02..ffa8a7a6a2db7 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -94,9 +94,8 @@ int __init mpc85xx_setup_pmc(void) pr_err("Could not map guts node address\n"); return -ENOMEM; } + qoriq_pm_ops = &mpc85xx_pm_ops; } - qoriq_pm_ops = &mpc85xx_pm_ops; - return 0; } diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 0f65c51271db9..ec6dc2d7a9db3 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -292,6 +292,7 @@ cpm_setbrg(uint brg, uint rate) out_be32(bp, (((BRG_UART_CLK_DIV16 / rate) - 1) << 1) | CPM_BRG_EN | CPM_BRG_DIV16); } +EXPORT_SYMBOL(cpm_setbrg); struct cpm_ioport16 { __be16 dir, par, odr_sor, dat, intr; diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c index c80bd7afd6c5e..b06c6d26dd722 100644 --- a/arch/powerpc/platforms/8xx/micropatch.c +++ b/arch/powerpc/platforms/8xx/micropatch.c @@ -361,6 +361,17 @@ void __init cpm_load_patch(cpm8xx_t *cp) if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) { smc_uart_t *smp; + if (IS_ENABLED(CONFIG_PPC_EARLY_DEBUG_CPM)) { + int i; + + for (i = 0; i < sizeof(*smp); i += 4) { + u32 __iomem *src = (u32 __iomem *)&cp->cp_dparam[PROFF_SMC1 + i]; + u32 __iomem *dst = (u32 __iomem *)&cp->cp_dparam[PROFF_DSP1 + i]; + + out_be32(dst, in_be32(src)); + } + } + smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1]; out_be16(&smp->smc_rpbase, 0x1ec0); smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2]; diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index e9617d35fd1f6..209b12323aa4c 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,6 +153,7 @@ int mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; + goto out; } ret = 0; diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index d82e3664ffdf8..18792a5b003a0 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -219,12 +219,11 @@ config TAU temperature within 2-4 degrees Celsius. This option shows the current on-die temperature in /proc/cpuinfo if the cpu supports it. - Unfortunately, on some chip revisions, this sensor is very inaccurate - and in many cases, does not work at all, so don't assume the cpu - temp is actually what /proc/cpuinfo says it is. + Unfortunately, this sensor is very inaccurate when uncalibrated, so + don't assume the cpu temp is actually what /proc/cpuinfo says it is. config TAU_INT - bool "Interrupt driven TAU driver (DANGEROUS)" + bool "Interrupt driven TAU driver (EXPERIMENTAL)" depends on TAU ---help--- The TAU supports an interrupt driven mode which causes an interrupt @@ -232,12 +231,7 @@ config TAU_INT to get notified the temp has exceeded a range. With this option off, a timer is used to re-check the temperature periodically. - However, on some cpus it appears that the TAU interrupt hardware - is buggy and can cause a situation which would lead unexplained hard - lockups. - - Unless you are extending the TAU driver, or enjoy kernel/hardware - debugging, leave this option off. + If in doubt, say N here. config TAU_AVERAGE bool "Average high and low temp" diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 12543e53fa96a..325dc8b534228 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -118,9 +118,9 @@ config GENERIC_CPU depends on PPC64 && CPU_LITTLE_ENDIAN select ARCH_HAS_FAST_MULTIPLIER -config GENERIC_CPU +config POWERPC_CPU bool "Generic 32 bits powerpc" - depends on PPC32 && !PPC_8xx + depends on PPC32 && !PPC_8xx && !PPC_85xx config CELL_CPU bool "Cell Broadband Engine" @@ -151,11 +151,11 @@ config POWER9_CPU config E5500_CPU bool "Freescale e5500" - depends on E500 + depends on PPC64 && E500 config E6500_CPU bool "Freescale e6500" - depends on E500 + depends on PPC64 && E500 config 860_CPU bool "8xx family" @@ -174,11 +174,23 @@ config G4_CPU depends on PPC_BOOK3S_32 select ALTIVEC +config E500_CPU + bool "e500 (8540)" + depends on PPC_85xx && !PPC_E500MC + +config E500MC_CPU + bool "e500mc" + depends on PPC_85xx && PPC_E500MC + +config TOOLCHAIN_DEFAULT_CPU + bool "Rely on the toolchain's implicit default CPU" + depends on PPC32 + endchoice config TARGET_CPU_BOOL bool - default !GENERIC_CPU + default !GENERIC_CPU && !TOOLCHAIN_DEFAULT_CPU config TARGET_CPU string @@ -193,6 +205,9 @@ config TARGET_CPU default "e300c2" if E300C2_CPU default "e300c3" if E300C3_CPU default "G4" if G4_CPU + default "8540" if E500_CPU + default "e500mc" if E500MC_CPU + default "powerpc" if POWERPC_CPU config PPC_BOOK3S def_bool y @@ -389,7 +404,7 @@ config PPC_KUAP config PPC_KUAP_DEBUG bool "Extra debugging for Kernel Userspace Access Protection" - depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32) + depends on PPC_KUAP && (PPC_RADIX_MMU || PPC32) help Add extra debugging for Kernel Userspace Access Protection (KUAP) If you're unsure, say N. diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 0f7c8241912b9..f2ff359041eec 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -44,6 +44,7 @@ config SPU_FS tristate "SPU file system" default m depends on PPC_CELL + depends on COREDUMP select SPU_BASE help The SPU file system is used to access Synergistic Processing diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 57c4e0e86c884..ba33140e671da 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -226,6 +226,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) if (!prop) { dev_dbg(&dev->dev, "axon_msi: no msi-address-(32|64) properties found\n"); + of_node_put(dn); return -ENOENT; } diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index ca9ffc1c8685d..a6a60e2b8f453 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -976,6 +976,7 @@ static int __init cell_iommu_fixed_mapping_init(void) if (hbase < dbase || (hend > (dbase + dsize))) { pr_debug("iommu: hash window doesn't fit in" "real DMA window\n"); + of_node_put(np); return -1; } } diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 6af3a6e600a70..80c9c4d715505 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -77,6 +77,7 @@ static int cbe_system_reset_exception(struct pt_regs *regs) switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: set_dec(1); + break; case SRR1_WAKEEE: /* * Handle these when interrupts get re-enabled and we take diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index c0f950a3f4e1f..f4a4dfb191e7d 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1978,8 +1978,9 @@ static ssize_t __spufs_mbox_info_read(struct spu_context *ctx, static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { - int ret; struct spu_context *ctx = file->private_data; + u32 stat, data; + int ret; if (!access_ok(buf, len)) return -EFAULT; @@ -1988,11 +1989,16 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_mbox_info_read(ctx, buf, len, pos); + stat = ctx->csa.prob.mb_stat_R; + data = ctx->csa.prob.pu_mb_R; spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + /* EOF if there's no entry in the mbox */ + if (!(stat & 0x0000ff)) + return 0; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof(data)); } static const struct file_operations spufs_mbox_info_fops = { @@ -2019,6 +2025,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + u32 stat, data; int ret; if (!access_ok(buf, len)) @@ -2028,11 +2035,16 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_ibox_info_read(ctx, buf, len, pos); + stat = ctx->csa.prob.mb_stat_R; + data = ctx->csa.priv2.puint_mb_R; spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + /* EOF if there's no entry in the ibox */ + if (!(stat & 0xff0000)) + return 0; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof(data)); } static const struct file_operations spufs_ibox_info_fops = { @@ -2041,6 +2053,11 @@ static const struct file_operations spufs_ibox_info_fops = { .llseek = generic_file_llseek, }; +static size_t spufs_wbox_info_cnt(struct spu_context *ctx) +{ + return (4 - ((ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8)) * sizeof(u32); +} + static ssize_t __spufs_wbox_info_read(struct spu_context *ctx, char __user *buf, size_t len, loff_t *pos) { @@ -2049,7 +2066,7 @@ static ssize_t __spufs_wbox_info_read(struct spu_context *ctx, u32 wbox_stat; wbox_stat = ctx->csa.prob.mb_stat_R; - cnt = 4 - ((wbox_stat & 0x00ff00) >> 8); + cnt = spufs_wbox_info_cnt(ctx); for (i = 0; i < cnt; i++) { data[i] = ctx->csa.spu_mailbox_data[i]; } @@ -2062,7 +2079,8 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; - int ret; + u32 data[ARRAY_SIZE(ctx->csa.spu_mailbox_data)]; + int ret, count; if (!access_ok(buf, len)) return -EFAULT; @@ -2071,11 +2089,13 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_wbox_info_read(ctx, buf, len, pos); + count = spufs_wbox_info_cnt(ctx); + memcpy(&data, &ctx->csa.spu_mailbox_data, sizeof(data)); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &data, + count * sizeof(u32)); } static const struct file_operations spufs_wbox_info_fops = { @@ -2084,27 +2104,33 @@ static const struct file_operations spufs_wbox_info_fops = { .llseek = generic_file_llseek, }; -static ssize_t __spufs_dma_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static void spufs_get_dma_info(struct spu_context *ctx, + struct spu_dma_info *info) { - struct spu_dma_info info; - struct mfc_cq_sr *qp, *spuqp; int i; - info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; - info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; - info.dma_info_status = ctx->csa.spu_chnldata_RW[24]; - info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; - info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; + info->dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; + info->dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; + info->dma_info_status = ctx->csa.spu_chnldata_RW[24]; + info->dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; + info->dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; for (i = 0; i < 16; i++) { - qp = &info.dma_info_command_data[i]; - spuqp = &ctx->csa.priv2.spuq[i]; + struct mfc_cq_sr *qp = &info->dma_info_command_data[i]; + struct mfc_cq_sr *spuqp = &ctx->csa.priv2.spuq[i]; qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW; qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW; qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW; qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW; } +} + +static ssize_t __spufs_dma_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + struct spu_dma_info info; + + spufs_get_dma_info(ctx, &info); return simple_read_from_buffer(buf, len, pos, &info, sizeof info); @@ -2114,6 +2140,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + struct spu_dma_info info; int ret; if (!access_ok(buf, len)) @@ -2123,11 +2150,12 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_dma_info_read(ctx, buf, len, pos); + spufs_get_dma_info(ctx, &info); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &info, + sizeof(info)); } static const struct file_operations spufs_dma_info_fops = { @@ -2136,13 +2164,31 @@ static const struct file_operations spufs_dma_info_fops = { .llseek = no_llseek, }; +static void spufs_get_proxydma_info(struct spu_context *ctx, + struct spu_proxydma_info *info) +{ + int i; + + info->proxydma_info_type = ctx->csa.prob.dma_querytype_RW; + info->proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; + info->proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; + + for (i = 0; i < 8; i++) { + struct mfc_cq_sr *qp = &info->proxydma_info_command_data[i]; + struct mfc_cq_sr *puqp = &ctx->csa.priv2.puq[i]; + + qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW; + qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW; + qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW; + qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW; + } +} + static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx, char __user *buf, size_t len, loff_t *pos) { struct spu_proxydma_info info; - struct mfc_cq_sr *qp, *puqp; int ret = sizeof info; - int i; if (len < ret) return -EINVAL; @@ -2150,18 +2196,7 @@ static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx, if (!access_ok(buf, len)) return -EFAULT; - info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW; - info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; - info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; - for (i = 0; i < 8; i++) { - qp = &info.proxydma_info_command_data[i]; - puqp = &ctx->csa.priv2.puq[i]; - - qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW; - qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW; - qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW; - qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW; - } + spufs_get_proxydma_info(ctx, &info); return simple_read_from_buffer(buf, len, pos, &info, sizeof info); @@ -2171,17 +2206,19 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + struct spu_proxydma_info info; int ret; ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_proxydma_info_read(ctx, buf, len, pos); + spufs_get_proxydma_info(ctx, &info); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &info, + sizeof(info)); } static const struct file_operations spufs_proxydma_info_fops = { diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 2dd452a047cd6..99e688498a9cb 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -671,6 +671,7 @@ spufs_init_isolated_loader(void) return; loader = of_get_property(dn, "loader", &size); + of_node_put(dn); if (!loader) return; diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index a1b7f79a8a152..de10c13de15c6 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -215,6 +215,7 @@ void hlwd_pic_probe(void) irq_set_chained_handler(cascade_virq, hlwd_pic_irq_cascade); hlwd_irq_host = host; + of_node_put(np); break; } } diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 9cd6f3e1000b3..09a0594350b69 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -294,23 +294,6 @@ static int __init maple_probe(void) return 1; } -define_machine(maple) { - .name = "Maple", - .probe = maple_probe, - .setup_arch = maple_setup_arch, - .init_IRQ = maple_init_IRQ, - .pci_irq_fixup = maple_pci_irq_fixup, - .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, - .restart = maple_restart, - .halt = maple_halt, - .get_boot_time = maple_get_boot_time, - .set_rtc_time = maple_set_rtc_time, - .get_rtc_time = maple_get_rtc_time, - .calibrate_decr = generic_calibrate_decr, - .progress = maple_progress, - .power_save = power4_idle, -}; - #ifdef CONFIG_EDAC /* * Register a platform device for CPC925 memory controller on @@ -367,3 +350,20 @@ static int __init maple_cpc925_edac_setup(void) } machine_device_initcall(maple, maple_cpc925_edac_setup); #endif + +define_machine(maple) { + .name = "Maple", + .probe = maple_probe, + .setup_arch = maple_setup_arch, + .init_IRQ = maple_init_IRQ, + .pci_irq_fixup = maple_pci_irq_fixup, + .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, + .restart = maple_restart, + .halt = maple_halt, + .get_boot_time = maple_get_boot_time, + .set_rtc_time = maple_set_rtc_time, + .get_rtc_time = maple_get_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = maple_progress, + .power_save = power4_idle, +}; diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S index da69e0fcb4f15..9b85b030cbebb 100644 --- a/arch/powerpc/platforms/powermac/cache.S +++ b/arch/powerpc/platforms/powermac/cache.S @@ -48,7 +48,7 @@ flush_disable_75x: /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -196,7 +196,7 @@ flush_disable_745x: isync /* Stop prefetch streams */ - DSSALL + PPC_DSSALL sync /* Disable L2 prefetching */ diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index bf4be4b53b44d..210435a43bf95 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -582,6 +582,7 @@ static void __init kw_i2c_add(struct pmac_i2c_host_kw *host, bus->close = kw_i2c_close; bus->xfer = kw_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); if (controller == busnode) bus->flags = pmac_i2c_multibus; @@ -811,6 +812,7 @@ static void __init pmu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = pmu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = pmac_i2c_multibus; list_add(&bus->link, &pmac_i2c_busses); @@ -934,6 +936,7 @@ static void __init smu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = smu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = 0; list_add(&bus->link, &pmac_i2c_busses); diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index bd6085b470b7a..935dcac4c02d3 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -293,14 +293,7 @@ grackle_wake_up: * we do any r1 memory access as we are not sure they * are in a sane state above the first 256Mb region */ - li r0,16 /* load up segment register values */ - mtctr r0 /* for context 0 */ - lis r3,0x2000 /* Ku = 1, VSID = 0 */ - li r4,0 -3: mtsrin r3,r4 - addi r3,r3,0x111 /* increment VSID */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b + bl load_segment_registers sync isync diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index a3ac9646119d0..c0f8120045c3a 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -20,3 +20,4 @@ obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o obj-$(CONFIG_OCXL_BASE) += ocxl.o obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o +obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index eb2e75dac369a..b45ab455a18e8 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -30,6 +30,7 @@ struct memtrace_entry { char name[16]; }; +static DEFINE_MUTEX(memtrace_mutex); static u64 memtrace_size; static struct memtrace_entry *memtrace_array; @@ -67,6 +68,23 @@ static int change_memblock_state(struct memory_block *mem, void *arg) return 0; } +static void memtrace_clear_range(unsigned long start_pfn, + unsigned long nr_pages) +{ + unsigned long pfn; + + /* + * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM + * does not apply, avoid passing around "struct page" and use + * clear_page() instead directly. + */ + for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { + if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) + cond_resched(); + clear_page(__va(PFN_PHYS(pfn))); + } +} + /* called with device_hotplug_lock held */ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) { @@ -111,6 +129,11 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) lock_device_hotplug(); for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { + /* + * Clear the range while we still have a linear + * mapping. + */ + memtrace_clear_range(base_pfn, nr_pages); /* * Remove memory in memory block size chunks so that * iomem resources are always split to the same size and @@ -268,6 +291,7 @@ static int memtrace_online(void) static int memtrace_enable_set(void *data, u64 val) { + int rc = -EAGAIN; u64 bytes; /* @@ -280,25 +304,31 @@ static int memtrace_enable_set(void *data, u64 val) return -EINVAL; } + mutex_lock(&memtrace_mutex); + /* Re-add/online previously removed/offlined memory */ if (memtrace_size) { if (memtrace_online()) - return -EAGAIN; + goto out_unlock; } - if (!val) - return 0; + if (!val) { + rc = 0; + goto out_unlock; + } /* Offline and remove memory */ if (memtrace_init_regions_runtime(val)) - return -EINVAL; + goto out_unlock; if (memtrace_init_debugfs()) - return -EINVAL; + goto out_unlock; memtrace_size = val; - - return 0; + rc = 0; +out_unlock: + mutex_unlock(&memtrace_mutex); + return rc; } static int memtrace_enable_get(void *data, u64 *val) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index b95b9e3c4c98e..c640cb993209a 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -384,7 +384,8 @@ static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group) for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->take_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->take_ownership) continue; pe->table_group.ops->take_ownership(&pe->table_group); } @@ -400,7 +401,8 @@ static void pnv_npu_peers_release_ownership( for (i = 0; i < npucomp->pe_num; ++i) { struct pnv_ioda_pe *pe = npucomp->pe[i]; - if (!pe->table_group.ops->release_ownership) + if (!pe->table_group.ops || + !pe->table_group.ops->release_ownership) continue; pe->table_group.ops->release_ownership(&pe->table_group); } @@ -560,6 +562,11 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n", @@ -607,6 +614,11 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) return -ENODEV; hose = pci_bus_to_host(npdev->bus); + if (hose->npu == NULL) { + dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); + return 0; + } + nphb = hose->private_data; dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index a2aa5e433ac84..5cd0f52d258f6 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -290,3 +290,6 @@ OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT); OPAL_CALL(opal_mpipl_update, OPAL_MPIPL_UPDATE); OPAL_CALL(opal_mpipl_register_tag, OPAL_MPIPL_REGISTER_TAG); OPAL_CALL(opal_mpipl_query_tag, OPAL_MPIPL_QUERY_TAG); +OPAL_CALL(opal_secvar_get, OPAL_SECVAR_GET); +OPAL_CALL(opal_secvar_get_next, OPAL_SECVAR_GET_NEXT); +OPAL_CALL(opal_secvar_enqueue_update, OPAL_SECVAR_ENQUEUE_UPDATE); diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 543c816fa99ef..0e6693bacb7e7 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -318,15 +318,14 @@ static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, return count; } -static struct dump_obj *create_dump_obj(uint32_t id, size_t size, - uint32_t type) +static void create_dump_obj(uint32_t id, size_t size, uint32_t type) { struct dump_obj *dump; int rc; dump = kzalloc(sizeof(*dump), GFP_KERNEL); if (!dump) - return NULL; + return; dump->kobj.kset = dump_kset; @@ -346,21 +345,39 @@ static struct dump_obj *create_dump_obj(uint32_t id, size_t size, rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id); if (rc) { kobject_put(&dump->kobj); - return NULL; + return; } + /* + * As soon as the sysfs file for this dump is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the dump before kobject_uevent() is called. If that + * happens then there is a potential race between + * dump_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&dump->kobj); rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr); - if (rc) { + if (rc == 0) { + kobject_uevent(&dump->kobj, KOBJ_ADD); + + pr_info("%s: New platform dump. ID = 0x%x Size %u\n", + __func__, dump->id, dump->size); + } else { + /* Drop reference count taken for bin file */ kobject_put(&dump->kobj); - return NULL; } - pr_info("%s: New platform dump. ID = 0x%x Size %u\n", - __func__, dump->id, dump->size); - - kobject_uevent(&dump->kobj, KOBJ_ADD); - - return dump; + /* Drop our reference */ + kobject_put(&dump->kobj); + return; } static irqreturn_t process_dump(int irq, void *data) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 62ef7ad995da3..5e33b1fc67c2b 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -179,14 +179,14 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, return count; } -static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) +static void create_elog_obj(uint64_t id, size_t size, uint64_t type) { struct elog_obj *elog; int rc; elog = kzalloc(sizeof(*elog), GFP_KERNEL); if (!elog) - return NULL; + return; elog->kobj.kset = elog_kset; @@ -219,18 +219,37 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) rc = kobject_add(&elog->kobj, NULL, "0x%llx", id); if (rc) { kobject_put(&elog->kobj); - return NULL; + return; } + /* + * As soon as the sysfs file for this elog is created/activated there is + * a chance the opal_errd daemon (or any userspace) might read and + * acknowledge the elog before kobject_uevent() is called. If that + * happens then there is a potential race between + * elog_ack_store->kobject_put() and kobject_uevent() which leads to a + * use-after-free of a kernfs object resulting in a kernel crash. + * + * To avoid that, we need to take a reference on behalf of the bin file, + * so that our reference remains valid while we call kobject_uevent(). + * We then drop our reference before exiting the function, leaving the + * bin file to drop the last reference (if it hasn't already). + */ + + /* Take a reference for the bin file */ + kobject_get(&elog->kobj); rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr); - if (rc) { + if (rc == 0) { + kobject_uevent(&elog->kobj, KOBJ_ADD); + } else { + /* Drop the reference taken for the bin file */ kobject_put(&elog->kobj); - return NULL; } - kobject_uevent(&elog->kobj, KOBJ_ADD); + /* Drop our reference */ + kobject_put(&elog->kobj); - return elog; + return; } static irqreturn_t elog_event(int irq, void *data) diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index d361d37d975f3..f5cea068f0bdc 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -60,7 +60,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) addr = be64_to_cpu(addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = (void *)addr; - if (opal_fdm_active->registered_regions == 0) + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) return; ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); @@ -95,17 +95,17 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf); static void opal_fadump_update_config(struct fw_dump *fadump_conf, const struct opal_fadump_mem_struct *fdm) { - pr_debug("Boot memory regions count: %d\n", fdm->region_cnt); + pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt)); /* * The destination address of the first boot memory region is the * destination address of boot memory regions. */ - fadump_conf->boot_mem_dest_addr = fdm->rgn[0].dest; + fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest); pr_debug("Destination address of boot memory regions: %#016llx\n", fadump_conf->boot_mem_dest_addr); - fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr; + fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr); } /* @@ -126,9 +126,9 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, fadump_conf->boot_memory_size = 0; pr_debug("Boot memory regions:\n"); - for (i = 0; i < fdm->region_cnt; i++) { - base = fdm->rgn[i].src; - size = fdm->rgn[i].size; + for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) { + base = be64_to_cpu(fdm->rgn[i].src); + size = be64_to_cpu(fdm->rgn[i].size); pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size); fadump_conf->boot_mem_addr[i] = base; @@ -143,7 +143,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, * Start address of reserve dump area (permanent reservation) for * re-registering FADump after dump capture. */ - fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest; + fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest); /* * Rarely, but it can so happen that system crashes before all @@ -155,13 +155,14 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, * Hope the memory that could not be preserved only has pages * that are usually filtered out while saving the vmcore. */ - if (fdm->region_cnt > fdm->registered_regions) { + if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) { pr_warn("Not all memory regions were saved!!!\n"); pr_warn(" Unsaved memory regions:\n"); - i = fdm->registered_regions; - while (i < fdm->region_cnt) { + i = be16_to_cpu(fdm->registered_regions); + while (i < be16_to_cpu(fdm->region_cnt)) { pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n", - i, fdm->rgn[i].src, fdm->rgn[i].size); + i, be64_to_cpu(fdm->rgn[i].src), + be64_to_cpu(fdm->rgn[i].size)); i++; } @@ -170,7 +171,7 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, } fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size); - fadump_conf->boot_mem_regs_cnt = fdm->region_cnt; + fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt); opal_fadump_update_config(fadump_conf, fdm); } @@ -178,35 +179,38 @@ static void opal_fadump_get_config(struct fw_dump *fadump_conf, static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm) { fdm->version = OPAL_FADUMP_VERSION; - fdm->region_cnt = 0; - fdm->registered_regions = 0; - fdm->fadumphdr_addr = 0; + fdm->region_cnt = cpu_to_be16(0); + fdm->registered_regions = cpu_to_be16(0); + fdm->fadumphdr_addr = cpu_to_be64(0); } static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf) { u64 addr = fadump_conf->reserve_dump_area_start; + u16 reg_cnt; int i; opal_fdm = __va(fadump_conf->kernel_metadata); opal_fadump_init_metadata(opal_fdm); /* Boot memory regions */ + reg_cnt = be16_to_cpu(opal_fdm->region_cnt); for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) { - opal_fdm->rgn[i].src = fadump_conf->boot_mem_addr[i]; - opal_fdm->rgn[i].dest = addr; - opal_fdm->rgn[i].size = fadump_conf->boot_mem_sz[i]; + opal_fdm->rgn[i].src = cpu_to_be64(fadump_conf->boot_mem_addr[i]); + opal_fdm->rgn[i].dest = cpu_to_be64(addr); + opal_fdm->rgn[i].size = cpu_to_be64(fadump_conf->boot_mem_sz[i]); - opal_fdm->region_cnt++; + reg_cnt++; addr += fadump_conf->boot_mem_sz[i]; } + opal_fdm->region_cnt = cpu_to_be16(reg_cnt); /* * Kernel metadata is passed to f/w and retrieved in capture kerenl. * So, use it to save fadump header address instead of calculating it. */ - opal_fdm->fadumphdr_addr = (opal_fdm->rgn[0].dest + - fadump_conf->boot_memory_size); + opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) + + fadump_conf->boot_memory_size); opal_fadump_update_config(fadump_conf, opal_fdm); @@ -269,18 +273,21 @@ static u64 opal_fadump_get_bootmem_min(void) static int opal_fadump_register(struct fw_dump *fadump_conf) { s64 rc = OPAL_PARAMETER; + u16 registered_regs; int i, err = -EIO; - for (i = 0; i < opal_fdm->region_cnt; i++) { + registered_regs = be16_to_cpu(opal_fdm->registered_regions); + for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) { rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE, - opal_fdm->rgn[i].src, - opal_fdm->rgn[i].dest, - opal_fdm->rgn[i].size); + be64_to_cpu(opal_fdm->rgn[i].src), + be64_to_cpu(opal_fdm->rgn[i].dest), + be64_to_cpu(opal_fdm->rgn[i].size)); if (rc != OPAL_SUCCESS) break; - opal_fdm->registered_regions++; + registered_regs++; } + opal_fdm->registered_regions = cpu_to_be16(registered_regs); switch (rc) { case OPAL_SUCCESS: @@ -291,7 +298,8 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) case OPAL_RESOURCE: /* If MAX regions limit in f/w is hit, warn and proceed. */ pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n", - (opal_fdm->region_cnt - opal_fdm->registered_regions)); + (be16_to_cpu(opal_fdm->region_cnt) - + be16_to_cpu(opal_fdm->registered_regions))); fadump_conf->dump_registered = 1; err = 0; break; @@ -312,7 +320,7 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) * If some regions were registered before OPAL_MPIPL_ADD_RANGE * OPAL call failed, unregister all regions. */ - if ((err < 0) && (opal_fdm->registered_regions > 0)) + if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0)) opal_fadump_unregister(fadump_conf); return err; @@ -328,7 +336,7 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf) return -EIO; } - opal_fdm->registered_regions = 0; + opal_fdm->registered_regions = cpu_to_be16(0); fadump_conf->dump_registered = 0; return 0; } @@ -563,19 +571,20 @@ static void opal_fadump_region_show(struct fw_dump *fadump_conf, else fdm_ptr = opal_fdm; - for (i = 0; i < fdm_ptr->region_cnt; i++) { + for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) { /* * Only regions that are registered for MPIPL * would have dump data. */ if ((fadump_conf->dump_active) && - (i < fdm_ptr->registered_regions)) - dumped_bytes = fdm_ptr->rgn[i].size; + (i < be16_to_cpu(fdm_ptr->registered_regions))) + dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size); seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", - fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest); + be64_to_cpu(fdm_ptr->rgn[i].src), + be64_to_cpu(fdm_ptr->rgn[i].dest)); seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", - fdm_ptr->rgn[i].size, dumped_bytes); + be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes); } /* Dump is active. Show reserved area start address. */ @@ -624,6 +633,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { const __be32 *prop; unsigned long dn; + __be64 be_addr; u64 addr = 0; int i, len; s64 ret; @@ -680,13 +690,13 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) if (!prop) return; - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); - if ((ret != OPAL_SUCCESS) || !addr) { + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr); + if ((ret != OPAL_SUCCESS) || !be_addr) { pr_err("Failed to get Kernel metadata (%lld)\n", ret); return; } - addr = be64_to_cpu(addr); + addr = be64_to_cpu(be_addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = __va(addr); @@ -697,14 +707,14 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) } /* Kernel regions not registered with f/w for MPIPL */ - if (opal_fdm_active->registered_regions == 0) { + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) { opal_fdm_active = NULL; return; } - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr); - if (addr) { - addr = be64_to_cpu(addr); + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr); + if (be_addr) { + addr = be64_to_cpu(be_addr); pr_debug("CPU metadata addr: %llx\n", addr); opal_cpu_metadata = __va(addr); } diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h index f1e9ecf548c5d..3f715efb0aa6e 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.h +++ b/arch/powerpc/platforms/powernv/opal-fadump.h @@ -31,14 +31,14 @@ * OPAL FADump kernel metadata * * The address of this structure will be registered with f/w for retrieving - * and processing during crash dump. + * in the capture kernel to process the crash dump. */ struct opal_fadump_mem_struct { u8 version; u8 reserved[3]; - u16 region_cnt; /* number of regions */ - u16 registered_regions; /* Regions registered for MPIPL */ - u64 fadumphdr_addr; + __be16 region_cnt; /* number of regions */ + __be16 registered_regions; /* Regions registered for MPIPL */ + __be64 fadumphdr_addr; struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS]; } __packed; @@ -135,7 +135,7 @@ static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt, for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) { reg_entry = (struct hdat_fadump_reg_entry *)bufp; val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) : - reg_entry->reg_val); + (u64)(reg_entry->reg_val)); opal_fadump_set_regval_regnum(regs, be32_to_cpu(reg_entry->reg_type), be32_to_cpu(reg_entry->reg_num), diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index e04b20625cb94..000b350d4060c 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -59,10 +59,6 @@ static void export_imc_mode_and_cmd(struct device_node *node, imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root); - /* - * Return here, either because 'imc' directory already exists, - * Or failed to create a new one. - */ if (!imc_debugfs_parent) return; @@ -135,7 +131,6 @@ static int imc_get_mem_addr_nest(struct device_node *node, } pmu_ptr->imc_counter_mmaped = true; - export_imc_mode_and_cmd(node, pmu_ptr); kfree(base_addr_arr); kfree(chipid_arr); return 0; @@ -151,7 +146,7 @@ static int imc_get_mem_addr_nest(struct device_node *node, * and domain as the inputs. * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets */ -static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) +static struct imc_pmu *imc_pmu_create(struct device_node *parent, int pmu_index, int domain) { int ret = 0; struct imc_pmu *pmu_ptr; @@ -159,27 +154,23 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) /* Return for unknown domain */ if (domain < 0) - return -EINVAL; + return NULL; /* memory for pmu */ pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL); if (!pmu_ptr) - return -ENOMEM; + return NULL; /* Set the domain */ pmu_ptr->domain = domain; ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size); - if (ret) { - ret = -EINVAL; + if (ret) goto free_pmu; - } if (!of_property_read_u32(parent, "offset", &offset)) { - if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) { - ret = -EINVAL; + if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) goto free_pmu; - } } /* Function to register IMC pmu */ @@ -190,14 +181,14 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) if (pmu_ptr->domain == IMC_DOMAIN_NEST) kfree(pmu_ptr->mem_info); kfree(pmu_ptr); - return ret; + return NULL; } - return 0; + return pmu_ptr; free_pmu: kfree(pmu_ptr); - return ret; + return NULL; } static void disable_nest_pmu_counters(void) @@ -254,6 +245,7 @@ int get_max_nest_dev(void) static int opal_imc_counters_probe(struct platform_device *pdev) { struct device_node *imc_dev = pdev->dev.of_node; + struct imc_pmu *pmu; int pmu_count = 0, domain; bool core_imc_reg = false, thread_imc_reg = false; u32 type; @@ -269,6 +261,7 @@ static int opal_imc_counters_probe(struct platform_device *pdev) } for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) { + pmu = NULL; if (of_property_read_u32(imc_dev, "type", &type)) { pr_warn("IMC Device without type property\n"); continue; @@ -285,7 +278,14 @@ static int opal_imc_counters_probe(struct platform_device *pdev) domain = IMC_DOMAIN_THREAD; break; case IMC_TYPE_TRACE: - domain = IMC_DOMAIN_TRACE; + /* + * FIXME. Using trace_imc events to monitor application + * or KVM thread performance can cause a checkstop + * (system crash). + * Disable it for now. + */ + pr_info_once("IMC: disabling trace_imc PMU\n"); + domain = -1; break; default: pr_warn("IMC Unknown Device type \n"); @@ -293,9 +293,13 @@ static int opal_imc_counters_probe(struct platform_device *pdev) break; } - if (!imc_pmu_create(imc_dev, pmu_count, domain)) { - if (domain == IMC_DOMAIN_NEST) + pmu = imc_pmu_create(imc_dev, pmu_count, domain); + if (pmu != NULL) { + if (domain == IMC_DOMAIN_NEST) { + if (!imc_debugfs_parent) + export_imc_mode_and_cmd(imc_dev, pmu); pmu_count++; + } if (domain == IMC_DOMAIN_CORE) core_imc_reg = true; if (domain == IMC_DOMAIN_THREAD) @@ -303,10 +307,6 @@ static int opal_imc_counters_probe(struct platform_device *pdev) } } - /* If none of the nest units are registered, remove debugfs interface */ - if (pmu_count == 0) - debugfs_remove_recursive(imc_debugfs_parent); - /* If core imc is not registered, unregister thread-imc */ if (!core_imc_reg && thread_imc_reg) unregister_thread_imc(); diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index 608569082ba0b..123a0e799b7bd 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -396,6 +396,7 @@ void __init opal_lpc_init(void) if (!of_get_property(np, "primary", NULL)) continue; opal_lpc_chip_id = of_get_ibm_chip_id(np); + of_node_put(np); break; } if (opal_lpc_chip_id < 0) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 45f4223a790f2..6ebc906e07b0a 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -372,6 +372,12 @@ static struct notifier_block opal_prd_event_nb = { .priority = 0, }; +static struct notifier_block opal_prd_event_nb2 = { + .notifier_call = opal_prd_msg_notifier, + .next = NULL, + .priority = 0, +}; + static int opal_prd_probe(struct platform_device *pdev) { int rc; @@ -393,9 +399,10 @@ static int opal_prd_probe(struct platform_device *pdev) return rc; } - rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb); + rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb2); if (rc) { pr_err("Couldn't register PRD2 event notifier\n"); + opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); return rc; } @@ -404,6 +411,8 @@ static int opal_prd_probe(struct platform_device *pdev) pr_err("failed to register miscdev\n"); opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, + &opal_prd_event_nb2); return rc; } @@ -414,6 +423,7 @@ static int opal_prd_remove(struct platform_device *pdev) { misc_deregister(&opal_prd_dev); opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, &opal_prd_event_nb2); return 0; } diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c new file mode 100644 index 0000000000000..14133e120bdd2 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-secvar.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PowerNV code for secure variables + * + * Copyright (C) 2019 IBM Corporation + * Author: Claudio Carvalho + * Nayna Jain + * + * APIs to access secure variables managed by OPAL. + */ + +#define pr_fmt(fmt) "secvar: "fmt + +#include +#include +#include +#include +#include +#include + +static int opal_status_to_err(int rc) +{ + int err; + + switch (rc) { + case OPAL_SUCCESS: + err = 0; + break; + case OPAL_UNSUPPORTED: + err = -ENXIO; + break; + case OPAL_PARAMETER: + err = -EINVAL; + break; + case OPAL_RESOURCE: + err = -ENOSPC; + break; + case OPAL_HARDWARE: + err = -EIO; + break; + case OPAL_NO_MEM: + err = -ENOMEM; + break; + case OPAL_EMPTY: + err = -ENOENT; + break; + case OPAL_PARTIAL: + err = -EFBIG; + break; + default: + err = -EINVAL; + } + + return err; +} + +static int opal_get_variable(const char *key, uint64_t ksize, + u8 *data, uint64_t *dsize) +{ + int rc; + + if (!key || !dsize) + return -EINVAL; + + *dsize = cpu_to_be64(*dsize); + + rc = opal_secvar_get(key, ksize, data, dsize); + + *dsize = be64_to_cpu(*dsize); + + return opal_status_to_err(rc); +} + +static int opal_get_next_variable(const char *key, uint64_t *keylen, + uint64_t keybufsize) +{ + int rc; + + if (!key || !keylen) + return -EINVAL; + + *keylen = cpu_to_be64(*keylen); + + rc = opal_secvar_get_next(key, keylen, keybufsize); + + *keylen = be64_to_cpu(*keylen); + + return opal_status_to_err(rc); +} + +static int opal_set_variable(const char *key, uint64_t ksize, u8 *data, + uint64_t dsize) +{ + int rc; + + if (!key || !data) + return -EINVAL; + + rc = opal_secvar_enqueue_update(key, ksize, data, dsize); + + return opal_status_to_err(rc); +} + +static const struct secvar_operations opal_secvar_ops = { + .get = opal_get_variable, + .get_next = opal_get_next_variable, + .set = opal_set_variable, +}; + +static int opal_secvar_probe(struct platform_device *pdev) +{ + if (!opal_check_token(OPAL_SECVAR_GET) + || !opal_check_token(OPAL_SECVAR_GET_NEXT) + || !opal_check_token(OPAL_SECVAR_ENQUEUE_UPDATE)) { + pr_err("OPAL doesn't support secure variables\n"); + return -ENODEV; + } + + set_secvar_ops(&opal_secvar_ops); + + return 0; +} + +static const struct of_device_id opal_secvar_match[] = { + { .compatible = "ibm,secvar-backend",}, + {}, +}; + +static struct platform_driver opal_secvar_driver = { + .driver = { + .name = "secvar", + .of_match_table = opal_secvar_match, + }, +}; + +static int __init opal_secvar_init(void) +{ + return platform_driver_probe(&opal_secvar_driver, opal_secvar_probe); +} +device_initcall(opal_secvar_init); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 38e90270280bf..8355bcd00f93f 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -1002,6 +1002,9 @@ static int __init opal_init(void) /* Initialise OPAL Power control interface */ opal_power_control_init(); + /* Initialize OPAL secure variables */ + opal_pdev_init("ibm,secvar-backend"); + return 0; } machine_subsys_initcall(powernv, opal_init); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c28d0d9b7ee0f..b76b5322beb5a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -188,7 +188,7 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) unsigned int pe_num = pe->pe_number; WARN_ON(pe->pdev); - WARN_ON(pe->npucomp); /* NPUs are not supposed to be freed */ + WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */ kfree(pe->npucomp); memset(pe, 0, sizeof(struct pnv_ioda_pe)); clear_bit(pe_num, phb->ioda.pe_alloc); @@ -777,6 +777,34 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb, return 0; } +static void pnv_ioda_unset_peltv(struct pnv_phb *phb, + struct pnv_ioda_pe *pe, + struct pci_dev *parent) +{ + int64_t rc; + + while (parent) { + struct pci_dn *pdn = pci_get_pdn(parent); + + if (pdn && pdn->pe_number != IODA_INVALID_PE) { + rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, + pe->pe_number, + OPAL_REMOVE_PE_FROM_DOMAIN); + /* XXX What to do in case of error ? */ + } + parent = parent->bus->self; + } + + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + + /* Disassociate PE in PELT */ + rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, + pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); + if (rc) + pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); +} + static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; @@ -827,25 +855,13 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) for (rid = pe->rid; rid < rid_end; rid++) phb->ioda.pe_rmap[rid] = IODA_INVALID_PE; - /* Release from all parents PELT-V */ - while (parent) { - struct pci_dn *pdn = pci_get_pdn(parent); - if (pdn && pdn->pe_number != IODA_INVALID_PE) { - rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, - pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); - /* XXX What to do in case of error ? */ - } - parent = parent->bus->self; - } - - opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + /* + * Release from all parents PELT-V. NPUs don't have a PELTV + * table + */ + if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) + pnv_ioda_unset_peltv(phb, pe, parent); - /* Disassociate PE in PELT */ - rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, - pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); - if (rc) - pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, bcomp, dcomp, fcomp, OPAL_UNMAP_PE); if (rc) @@ -1062,20 +1078,20 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) return NULL; } - /* NOTE: We get only one ref to the pci_dev for the pdn, not for the - * pointer in the PE data structure, both should be destroyed at the - * same time. However, this needs to be looked at more closely again - * once we actually start removing things (Hotplug, SR-IOV, ...) + /* NOTE: We don't get a reference for the pointer in the PE + * data structure, both the device and PE structures should be + * destroyed at the same time. However, removing nvlink + * devices will need some work. * * At some point we want to remove the PDN completely anyways */ - pci_dev_get(dev); pdn->pe_number = pe->pe_number; pe->flags = PNV_IODA_PE_DEV; pe->pdev = dev; pe->pbus = NULL; pe->mve_number = -1; pe->rid = dev->bus->number << 8 | pdn->devfn; + pe->device_count++; pe_info(pe, "Associated device to PE\n"); @@ -1084,13 +1100,13 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) pnv_ioda_free_pe(pe); pdn->pe_number = IODA_INVALID_PE; pe->pdev = NULL; - pci_dev_put(dev); return NULL; } /* Put PE to the list */ + mutex_lock(&phb->ioda.pe_list_mutex); list_add_tail(&pe->list, &phb->ioda.pe_list); - + mutex_unlock(&phb->ioda.pe_list_mutex); return pe; } @@ -1205,6 +1221,14 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus); struct pnv_phb *phb = hose->private_data; + /* + * Intentionally leak a reference on the npu device (for + * nvlink only; this is not an opencapi path) to make sure it + * never goes away, as it's been the case all along and some + * work is needed otherwise. + */ + pci_dev_get(npu_pdev); + /* * Due to a hardware errata PE#0 on the NPU is reserved for * error handling. This means we only have three PEs remaining @@ -1228,11 +1252,11 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) */ dev_info(&npu_pdev->dev, "Associating to existing PE %x\n", pe_num); - pci_dev_get(npu_pdev); npu_pdn = pci_get_pdn(npu_pdev); rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; npu_pdn->pe_number = pe_num; phb->ioda.pe_rmap[rid] = pe->pe_number; + pe->device_count++; /* Map the PE to this link */ rc = opal_pci_set_pe(phb->opal_id, pe_num, rid, @@ -1268,8 +1292,6 @@ static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose; struct pnv_phb *phb; - struct pci_bus *bus; - struct pci_dev *pdev; struct pnv_ioda_pe *pe; list_for_each_entry(hose, &hose_list, list_node) { @@ -1281,11 +1303,6 @@ static void pnv_pci_ioda_setup_PEs(void) if (phb->model == PNV_PHB_MODEL_NPU2) WARN_ON_ONCE(pnv_npu2_init(hose)); } - if (phb->type == PNV_PHB_NPU_OCAPI) { - bus = hose->bus; - list_for_each_entry(pdev, &bus->devices, bus_list) - pnv_ioda_setup_dev_PE(pdev); - } } list_for_each_entry(hose, &hose_list, list_node) { phb = hose->private_data; @@ -1558,6 +1575,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + if (pdn->m64_single_mode) pe_num = pdn->pe_num_map[vf_index]; else @@ -1570,13 +1591,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->pbus = NULL; pe->parent_dev = pdev; pe->mve_number = -1; - pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | - pci_iov_virtfn_devfn(pdev, vf_index); + pe->rid = (vf_bus << 8) | vf_devfn; pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", hose->global_number, pdev->bus->number, - PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), - PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -1590,6 +1609,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) list_add_tail(&pe->list, &phb->ioda.pe_list); mutex_unlock(&phb->ioda.pe_list_mutex); + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + pnv_pci_ioda2_setup_dma_pe(phb, pe); #ifdef CONFIG_IOMMU_API iommu_register_group(&pe->table_group, @@ -2889,9 +2917,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) struct pci_dn *pdn; int mul, total_vfs; - if (!pdev->is_physfn || pci_dev_is_added(pdev)) - return; - pdn = pci_get_pdn(pdev); pdn->vfs_expanded = 0; pdn->m64_single_mode = false; @@ -2966,6 +2991,30 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res->end = res->start - 1; } } + +static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) +{ + if (WARN_ON(pci_dev_is_added(pdev))) + return; + + if (pdev->is_virtfn) { + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); + + /* + * VF PEs are single-device PEs so their pdev pointer needs to + * be set. The pdev doesn't exist when the PE is allocated (in + * (pcibios_sriov_enable()) so we fix it up here. + */ + pe->pdev = pdev; + WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); + } else if (pdev->is_physfn) { + /* + * For PFs adjust their allocated IOV resources to match what + * the PHB can support using it's M64 BAR table. + */ + pnv_pci_ioda_fixup_iov_resources(pdev); + } +} #endif /* CONFIG_PCI_IOV */ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, @@ -3383,6 +3432,28 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; } +static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn; + struct pnv_ioda_pe *pe; + + if (!phb->initialized) + return true; + + pdn = pci_get_pdn(dev); + if (!pdn) + return false; + + if (pdn->pe_number == IODA_INVALID_PE) { + pe = pnv_ioda_setup_dev_PE(dev); + if (!pe) + return false; + } + return true; +} + static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group, int num) { @@ -3512,7 +3583,10 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) struct pnv_phb *phb = pe->phb; struct pnv_ioda_pe *slave, *tmp; + mutex_lock(&phb->ioda.pe_list_mutex); list_del(&pe->list); + mutex_unlock(&phb->ioda.pe_list_mutex); + switch (phb->type) { case PNV_PHB_IODA1: pnv_pci_ioda1_release_pe_dma(pe); @@ -3520,6 +3594,8 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) case PNV_PHB_IODA2: pnv_pci_ioda2_release_pe_dma(pe); break; + case PNV_PHB_NPU_OCAPI: + break; default: WARN_ON(1); } @@ -3620,7 +3696,8 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { }; static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { - .enable_device_hook = pnv_pci_enable_device_hook, + .enable_device_hook = pnv_ocapi_enable_device_hook, + .release_device = pnv_pci_release_device, .window_alignment = pnv_pci_window_alignment, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .shutdown = pnv_pci_ioda_shutdown, @@ -3862,7 +3939,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, ppc_md.pcibios_default_alignment = pnv_pci_default_alignment; #ifdef CONFIG_PCI_IOV - ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; + ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov; ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment; ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable; ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 2825d004dece2..72ffc98dd43f0 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -38,7 +38,7 @@ static DEFINE_MUTEX(tunnel_mutex); int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) { - struct device_node *parent = np; + struct device_node *node = np; u32 bdfn; u64 phbid; int ret; @@ -48,25 +48,29 @@ int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) return -ENXIO; bdfn = ((bdfn & 0x00ffff00) >> 8); - while ((parent = of_get_parent(parent))) { - if (!PCI_DN(parent)) { - of_node_put(parent); + for (node = np; node; node = of_get_parent(node)) { + if (!PCI_DN(node)) { + of_node_put(node); break; } - if (!of_device_is_compatible(parent, "ibm,ioda2-phb") && - !of_device_is_compatible(parent, "ibm,ioda3-phb")) { - of_node_put(parent); + if (!of_device_is_compatible(node, "ibm,ioda2-phb") && + !of_device_is_compatible(node, "ibm,ioda3-phb") && + !of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb")) { + of_node_put(node); continue; } - ret = of_property_read_u64(parent, "ibm,opal-phbid", &phbid); + ret = of_property_read_u64(node, "ibm,opal-phbid", &phbid); if (ret) { - of_node_put(parent); + of_node_put(node); return -ENXIO; } - *id = PCI_SLOT_ID(phbid, bdfn); + if (of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb")) + *id = PCI_PHB_SLOT_ID(phbid); + else + *id = PCI_SLOT_ID(phbid, bdfn); return 0; } @@ -814,24 +818,6 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; -#ifdef CONFIG_PCI_IOV - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - - /* Fix the VF pdn PE number */ - if (pdev->is_virtfn) { - pdn = pci_get_pdn(pdev); - WARN_ON(pdn->pe_number != IODA_INVALID_PE); - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - if (pe->rid == ((pdev->bus->number << 8) | - (pdev->devfn & 0xff))) { - pdn->pe_number = pe->pe_number; - pe->pdev = pdev; - break; - } - } - } -#endif /* CONFIG_PCI_IOV */ if (phb && phb->dma_dev_setup) phb->dma_dev_setup(phb, pdev); @@ -945,6 +931,23 @@ void __init pnv_pci_init(void) if (!firmware_has_feature(FW_FEATURE_OPAL)) return; +#ifdef CONFIG_PCIEPORTBUS + /* + * On PowerNV PCIe devices are (currently) managed in cooperation + * with firmware. This isn't *strictly* required, but there's enough + * assumptions baked into both firmware and the platform code that + * it's unwise to allow the portbus services to be used. + * + * We need to fix this eventually, but for now set this flag to disable + * the portbus driver. The AER service isn't required since that AER + * events are handled via EEH. The pciehp hotplug driver can't work + * without kernel changes (and portbus binding breaks pnv_php). The + * other services also require some thinking about how we're going + * to integrate them. + */ + pcie_ports_disabled = true; +#endif + /* Look for IODA IO-Hubs. */ for_each_compatible_node(np, NULL, "ibm,ioda-hub") { pnv_pci_init_ioda_hub(np); diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h index 1aa51c4fa9045..4825e4cf4410d 100644 --- a/arch/powerpc/platforms/powernv/powernv.h +++ b/arch/powerpc/platforms/powernv/powernv.h @@ -35,4 +35,6 @@ ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count); u32 memcons_get_size(struct memcons *mc); struct memcons *memcons_init(struct device_node *node, const char *mc_prop_name); +void pnv_rng_init(void); + #endif /* _POWERNV_H */ diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 8035caf6e297d..ace6497017830 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -17,6 +17,7 @@ #include #include #include +#include "powernv.h" #define DARN_ERR 0xFFFFFFFFFFFFFFFFul @@ -28,7 +29,6 @@ struct powernv_rng { static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); - int powernv_hwrng_present(void) { struct powernv_rng *rng; @@ -43,7 +43,11 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) unsigned long parity; /* Calculate the parity of the value */ - asm ("popcntd %0,%1" : "=r" (parity) : "r" (val)); + asm (".machine push; \ + .machine power7; \ + popcntd %0,%1; \ + .machine pop;" + : "=r" (parity) : "r" (val)); /* xor our value with the previous mask */ val ^= rng->mask; @@ -59,6 +63,8 @@ int powernv_get_random_real_mode(unsigned long *v) struct powernv_rng *rng; rng = raw_cpu_read(powernv_rng); + if (!rng) + return 0; *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); @@ -94,9 +100,6 @@ static int initialise_darn(void) return 0; } } - - pr_warn("Unable to use DARN for get_random_seed()\n"); - return -EIO; } @@ -159,32 +162,59 @@ static __init int rng_create(struct device_node *dn) rng_init_per_cpu(rng, dn); - pr_info_once("Registering arch random hook.\n"); - ppc_md.get_random_seed = powernv_get_random_long; return 0; } -static __init int rng_init(void) +static int __init pnv_get_random_long_early(unsigned long *v) { struct device_node *dn; - int rc; - - for_each_compatible_node(dn, NULL, "ibm,power-rng") { - rc = rng_create(dn); - if (rc) { - pr_err("Failed creating rng for %pOF (%d).\n", - dn, rc); - continue; - } - /* Create devices for hwrng driver */ - of_platform_device_create(dn, NULL, NULL); - } + if (!slab_is_available()) + return 0; + + if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early, + NULL) != pnv_get_random_long_early) + return 0; - initialise_darn(); + for_each_compatible_node(dn, NULL, "ibm,power-rng") + rng_create(dn); + + if (!ppc_md.get_random_seed) + return 0; + return ppc_md.get_random_seed(v); +} + +void __init pnv_rng_init(void) +{ + struct device_node *dn; + + /* Prefer darn over the rest. */ + if (!initialise_darn()) + return; + + dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng"); + if (dn) + ppc_md.get_random_seed = pnv_get_random_long_early; + + of_node_put(dn); +} + +static int __init pnv_rng_late_init(void) +{ + struct device_node *dn; + unsigned long v; + + /* In case it wasn't called during init for some other reason. */ + if (ppc_md.get_random_seed == pnv_get_random_long_early) + pnv_get_random_long_early(&v); + + if (ppc_md.get_random_seed == powernv_get_random_long) { + for_each_compatible_node(dn, NULL, "ibm,power-rng") + of_platform_device_create(dn, NULL, NULL); + } return 0; } -machine_subsys_initcall(powernv, rng_init); +machine_subsys_initcall(powernv, pnv_rng_late_init); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 83498604d322b..d9e26614d7ed8 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -122,12 +122,29 @@ static void pnv_setup_rfi_flush(void) type = L1D_FLUSH_ORI; } + /* + * If we are non-Power9 bare metal, we don't need to flush on kernel + * entry or after user access: they fix a P9 specific vulnerability. + */ + if (!pvr_version_is(PVR_POWER9)) { + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + } + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } static void __init pnv_setup_arch(void) @@ -151,6 +168,8 @@ static void __init pnv_setup_arch(void) powersave_nap = 1; /* XXX PMCS */ + + pnv_rng_init(); } static void __init pnv_init(void) @@ -169,11 +188,16 @@ static void __init pnv_init(void) add_preferred_console("hvc", 0, NULL); if (!radix_enabled()) { + size_t size = sizeof(struct slb_entry) * mmu_slb_size; int i; /* Allocate per cpu area to save old slb contents during MCE */ - for_each_possible_cpu(i) - paca_ptrs[i]->mce_faulty_slbs = memblock_alloc_node(mmu_slb_size, __alignof__(*paca_ptrs[i]->mce_faulty_slbs), cpu_to_node(i)); + for_each_possible_cpu(i) { + paca_ptrs[i]->mce_faulty_slbs = + memblock_alloc_node(size, + __alignof__(struct slb_entry), + cpu_to_node(i)); + } } } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 13e2516993466..bbf361f23ae86 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -43,7 +43,7 @@ #include #define DBG(fmt...) udbg_printf(fmt) #else -#define DBG(fmt...) +#define DBG(fmt...) do { } while (0) #endif static void pnv_smp_setup_cpu(int cpu) @@ -167,7 +167,6 @@ static void pnv_smp_cpu_kill_self(void) /* Standard hot unplug procedure */ idle_task_exit(); - current->active_mm = NULL; /* for sanity */ cpu = smp_processor_id(); DBG("CPU%d offline\n", cpu); generic_set_cpu_dead(cpu); diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c index e4a00ad06f9d3..67c8c4b2d8b17 100644 --- a/arch/powerpc/platforms/powernv/ultravisor.c +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -55,6 +55,7 @@ static int __init uv_init(void) return -ENODEV; uv_memcons = memcons_init(node, "memcons"); + of_node_put(node); if (!uv_memcons) return -ENOENT; diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 423be34f0f5fb..c3e3746694003 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -6,6 +6,7 @@ * Copyright 2006 Sony Corp. */ +#include #include #include #include @@ -200,13 +201,14 @@ void ps3_mm_vas_destroy(void) { int result; - DBG("%s:%d: map.vas_id = %llu\n", __func__, __LINE__, map.vas_id); - if (map.vas_id) { result = lv1_select_virtual_address_space(0); - BUG_ON(result); - result = lv1_destruct_virtual_address_space(map.vas_id); - BUG_ON(result); + result += lv1_destruct_virtual_address_space(map.vas_id); + + if (result) { + lv1_panic(0); + } + map.vas_id = 0; } } @@ -304,19 +306,20 @@ static void ps3_mm_region_destroy(struct mem_region *r) int result; if (!r->destroy) { - pr_info("%s:%d: Not destroying high region: %llxh %llxh\n", - __func__, __LINE__, r->base, r->size); return; } - DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base); - if (r->base) { result = lv1_release_memory(r->base); - BUG_ON(result); + + if (result) { + lv1_panic(0); + } + r->size = r->base = r->offset = 0; map.total = map.rm.size; } + ps3_mm_set_repository_highmem(NULL); } @@ -1116,6 +1119,7 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, enum ps3_dma_region_type region_type, void *addr, unsigned long len) { unsigned long lpar_addr; + int result; lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0; @@ -1127,6 +1131,16 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, r->offset -= map.r1.offset; r->len = len ? len : _ALIGN_UP(map.total, 1 << r->page_size); + dev->core.dma_mask = &r->dma_mask; + + result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32)); + + if (result < 0) { + dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n", + __func__, __LINE__, result); + return result; + } + switch (dev->dev_type) { case PS3_DEVICE_TYPE_SB: r->region_ops = (USE_DYNAMIC_DMA) diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index b33251d75927b..572651a5c87bb 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -411,6 +411,10 @@ static struct bus_type cmm_subsys = { .dev_name = "cmm", }; +static void cmm_release_device(struct device *dev) +{ +} + /** * cmm_sysfs_register - Register with sysfs * @@ -426,6 +430,7 @@ static int cmm_sysfs_register(struct device *dev) dev->id = 0; dev->bus = &cmm_subsys; + dev->release = cmm_release_device; if ((rc = device_register(dev))) goto subsys_unregister; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 16e86ba8aa209..f6b7749d6ada7 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -127,7 +127,6 @@ void dlpar_free_cc_nodes(struct device_node *dn) #define NEXT_PROPERTY 3 #define PREV_PARENT 4 #define MORE_MEMORY 5 -#define CALL_AGAIN -2 #define ERR_CFG_USE -9003 struct device_node *dlpar_configure_connector(__be32 drc_index, @@ -168,6 +167,9 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, spin_unlock(&rtas_data_buf_lock); + if (rtas_busy_delay(rc)) + continue; + switch (rc) { case COMPLETE: break; @@ -216,9 +218,6 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, last_dn = last_dn->parent; break; - case CALL_AGAIN: - break; - case MORE_MEMORY: case ERR_CFG_USE: default: diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index bbda646b63b54..210e6f563eb41 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -91,9 +91,6 @@ static void rtas_stop_self(void) BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); - printk("cpu %u (hwid %u) Ready to die...\n", - smp_processor_id(), hard_smp_processor_id()); - rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL); panic("Alas, I survived.\n"); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 8e700390f3d6c..f364909d0c08d 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -27,7 +27,7 @@ static bool rtas_hp_event; unsigned long pseries_memory_block_size(void) { struct device_node *np; - unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE; + u64 memblock_size = MIN_MEMORY_BLOCK_SIZE; struct resource r; np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); @@ -223,7 +223,7 @@ static int get_lmb_range(u32 drc_index, int n_lmbs, struct drmem_lmb **end_lmb) { struct drmem_lmb *lmb, *start, *end; - struct drmem_lmb *last_lmb; + struct drmem_lmb *limit; start = NULL; for_each_drmem_lmb(lmb) { @@ -236,10 +236,10 @@ static int get_lmb_range(u32 drc_index, int n_lmbs, if (!start) return -EINVAL; - end = &start[n_lmbs - 1]; + end = &start[n_lmbs]; - last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1]; - if (end > last_lmb) + limit = &drmem_info->lmbs[drmem_info->n_lmbs]; + if (end > limit) return -EINVAL; *start_lmb = start; @@ -279,7 +279,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb) return dlpar_change_lmb_state(lmb, false); } -static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) +static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size) { unsigned long block_sz, start_pfn; int sections_per_block; @@ -310,10 +310,11 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz static int pseries_remove_mem_node(struct device_node *np) { - const __be32 *regs; + const __be32 *prop; unsigned long base; - unsigned int lmb_size; + unsigned long lmb_size; int ret = -EINVAL; + int addr_cells, size_cells; /* * Check to see if we are actually removing memory @@ -324,12 +325,19 @@ static int pseries_remove_mem_node(struct device_node *np) /* * Find the base address and size of the memblock */ - regs = of_get_property(np, "reg", NULL); - if (!regs) + prop = of_get_property(np, "reg", NULL); + if (!prop) return ret; - base = be64_to_cpu(*(unsigned long *)regs); - lmb_size = be32_to_cpu(regs[3]); + addr_cells = of_n_addr_cells(np); + size_cells = of_n_size_cells(np); + + /* + * "reg" property represents (addr,size) tuple. + */ + base = of_read_number(prop, addr_cells); + prop += addr_cells; + lmb_size = of_read_number(prop, size_cells); pseries_remove_memblock(base, lmb_size); return 0; @@ -360,8 +368,10 @@ static bool lmb_is_removable(struct drmem_lmb *lmb) for (i = 0; i < scns_per_block; i++) { pfn = PFN_DOWN(phys_addr); - if (!pfn_present(pfn)) + if (!pfn_present(pfn)) { + phys_addr += MIN_MEMORY_BLOCK_SIZE; continue; + } rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION); phys_addr += MIN_MEMORY_BLOCK_SIZE; @@ -374,25 +384,32 @@ static int dlpar_add_lmb(struct drmem_lmb *); static int dlpar_remove_lmb(struct drmem_lmb *lmb) { + struct memory_block *mem_block; unsigned long block_sz; int rc; if (!lmb_is_removable(lmb)) return -EINVAL; + mem_block = lmb_to_memblock(lmb); + if (mem_block == NULL) + return -EINVAL; + rc = dlpar_offline_lmb(lmb); - if (rc) + if (rc) { + put_device(&mem_block->dev); return rc; + } block_sz = pseries_memory_block_size(); - __remove_memory(lmb->nid, lmb->base_addr, block_sz); + __remove_memory(mem_block->nid, lmb->base_addr, block_sz); + put_device(&mem_block->dev); /* Update memory regions for memory remove */ memblock_remove(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); - lmb_clear_nid(lmb); lmb->flags &= ~DRCONF_MEM_ASSIGNED; return 0; @@ -611,7 +628,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) #else static inline int pseries_remove_memblock(unsigned long base, - unsigned int memblock_size) + unsigned long memblock_size) { return -EOPNOTSUPP; } @@ -649,7 +666,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) static int dlpar_add_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; - int rc; + int nid, rc; if (lmb->flags & DRCONF_MEM_ASSIGNED) return -EINVAL; @@ -660,11 +677,13 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) return rc; } - lmb_set_nid(lmb); block_sz = memory_block_size_bytes(); + /* Find the node id for this address. */ + nid = memory_add_physaddr_to_nid(lmb->base_addr); + /* Add the memory */ - rc = __add_memory(lmb->nid, lmb->base_addr, block_sz); + rc = __add_memory(nid, lmb->base_addr, block_sz); if (rc) { invalidate_lmb_associativity_index(lmb); return rc; @@ -672,9 +691,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = dlpar_online_lmb(lmb); if (rc) { - __remove_memory(lmb->nid, lmb->base_addr, block_sz); + __remove_memory(nid, lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); - lmb_clear_nid(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; } @@ -943,10 +961,11 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog) static int pseries_add_mem_node(struct device_node *np) { - const __be32 *regs; + const __be32 *prop; unsigned long base; - unsigned int lmb_size; + unsigned long lmb_size; int ret = -EINVAL; + int addr_cells, size_cells; /* * Check to see if we are actually adding memory @@ -957,12 +976,18 @@ static int pseries_add_mem_node(struct device_node *np) /* * Find the base and size of the memblock */ - regs = of_get_property(np, "reg", NULL); - if (!regs) + prop = of_get_property(np, "reg", NULL); + if (!prop) return ret; - base = be64_to_cpu(*(unsigned long *)regs); - lmb_size = be32_to_cpu(regs[3]); + addr_cells = of_n_addr_cells(np); + size_cells = of_n_size_cells(np); + /* + * "reg" property represents (addr,size) tuple. + */ + base = of_read_number(prop, addr_cells); + prop += addr_cells; + lmb_size = of_read_number(prop, size_cells); /* * Update memory region to represent the memory add diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 6ba081dd61c93..b4ce9d472dfe0 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -36,7 +36,6 @@ #include #include #include -#include #include "pseries.h" @@ -133,10 +132,10 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return be64_to_cpu(*tcep); } -static void tce_free_pSeriesLP(struct iommu_table*, long, long); +static void tce_free_pSeriesLP(unsigned long liobn, long, long); static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); -static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, +static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, long npages, unsigned long uaddr, enum dma_data_direction direction, unsigned long attrs) @@ -147,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; long tcenum_start = tcenum, npages_start = npages; - rpn = __pa(uaddr) >> TCE_SHIFT; + rpn = __pa(uaddr) >> tceshift; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; while (npages--) { - tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); + tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift; + rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; - tce_free_pSeriesLP(tbl, tcenum_start, + tce_free_pSeriesLP(liobn, tcenum_start, (npages_start - (npages + 1))); break; } if (rc && printk_ratelimit()) { printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); printk("\ttce val = 0x%llx\n", tce ); dump_stack(); @@ -194,7 +193,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, unsigned long flags; if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, npages, uaddr, direction, attrs); } @@ -210,8 +210,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, /* If allocation fails, fall back to the loop implementation */ if (!tcep) { local_irq_restore(flags); - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, - direction, attrs); + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, + npages, uaddr, direction, attrs); } __this_cpu_write(tce_page, tcep); } @@ -262,16 +263,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return ret; } -static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages) { u64 rc; while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); + rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0); if (rc && printk_ratelimit()) { printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); dump_stack(); } @@ -286,7 +287,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n u64 rc; if (!firmware_has_feature(FW_FEATURE_MULTITCE)) - return tce_free_pSeriesLP(tbl, tcenum, npages); + return tce_free_pSeriesLP(tbl->it_index, tcenum, npages); rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); @@ -401,6 +402,19 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, u64 rc = 0; long l, limit; + if (!firmware_has_feature(FW_FEATURE_MULTITCE)) { + unsigned long tceshift = be32_to_cpu(maprange->tce_shift); + unsigned long dmastart = (start_pfn << PAGE_SHIFT) + + be64_to_cpu(maprange->dma_base); + unsigned long tcenum = dmastart >> tceshift; + unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; + void *uaddr = __va(start_pfn << PAGE_SHIFT); + + return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), + tcenum, tceshift, npages, (unsigned long) uaddr, + DMA_BIDIRECTIONAL, 0); + } + local_irq_disable(); /* to protect tcep and the page behind it */ tcep = __this_cpu_read(tce_page); @@ -1320,15 +1334,7 @@ void iommu_init_early_pSeries(void) of_reconfig_notifier_register(&iommu_reconfig_nb); register_memory_notifier(&iommu_mem_nb); - /* - * Secure guest memory is inacessible to devices so regular DMA isn't - * possible. - * - * In that case keep devices' dma_map_ops as NULL so that the generic - * DMA code path will use SWIOTLB to bounce buffers for DMA. - */ - if (!is_secure_guest()) - set_pci_dma_ops(&dma_iommu_ops); + set_pci_dma_ops(&dma_iommu_ops); } static int __init disable_multitce(char *str) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f87a5c64e24dc..c93b9a3bf237e 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1992,7 +1992,7 @@ static int __init vpa_debugfs_init(void) { char name[16]; long i; - static struct dentry *vpa_dir; + struct dentry *vpa_dir; if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e33e8bc4b69bd..38c306551f76b 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -435,10 +435,10 @@ static void maxmem_data(struct seq_file *m) { unsigned long maxmem = 0; - maxmem += drmem_info->n_lmbs * drmem_info->lmb_size; + maxmem += (unsigned long)drmem_info->n_lmbs * drmem_info->lmb_size; maxmem += hugetlb_total_pages() * PAGE_SIZE; - seq_printf(m, "MaxMem=%ld\n", maxmem); + seq_printf(m, "MaxMem=%lu\n", maxmem); } static int pseries_lparcfg_data(struct seq_file *m, void *v) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 133f6adcb39cb..637300330507f 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -4,6 +4,7 @@ * Copyright 2006-2007 Michael Ellerman, IBM Corp. */ +#include #include #include #include @@ -458,7 +459,28 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) return hwirq; } - virq = irq_create_mapping(NULL, hwirq); + /* + * Depending on the number of online CPUs in the original + * kernel, it is likely for CPU #0 to be offline in a kdump + * kernel. The associated IRQs in the affinity mappings + * provided by irq_create_affinity_masks() are thus not + * started by irq_startup(), as per-design for managed IRQs. + * This can be a problem with multi-queue block devices driven + * by blk-mq : such a non-started IRQ is very likely paired + * with the single queue enforced by blk-mq during kdump (see + * blk_mq_alloc_tag_set()). This causes the device to remain + * silent and likely hangs the guest at some point. + * + * We don't really care for fine-grained affinity when doing + * kdump actually : simply ignore the pre-computed affinity + * masks in this case and let the default mask with all CPUs + * be used when creating the IRQ mappings. + */ + if (is_kdump_kernel()) + virq = irq_create_mapping(NULL, hwirq); + else + virq = irq_create_mapping_affinity(NULL, hwirq, + entry->affinity); if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 61883291defc3..66fd517c48164 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -152,7 +152,7 @@ static int papr_scm_meta_get(struct papr_scm_priv *p, int len, read; int64_t ret; - if ((hdr->in_offset + hdr->in_length) >= p->metadata_size) + if ((hdr->in_offset + hdr->in_length) > p->metadata_size) return -EINVAL; for (len = hdr->in_length; len; len -= read) { @@ -206,7 +206,7 @@ static int papr_scm_meta_set(struct papr_scm_priv *p, __be64 data_be; int64_t ret; - if ((hdr->in_offset + hdr->in_length) >= p->metadata_size) + if ((hdr->in_offset + hdr->in_length) > p->metadata_size) return -EINVAL; for (len = hdr->in_length; len; len -= wrote) { @@ -342,6 +342,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) p->bus = nvdimm_bus_register(NULL, &p->bus_desc); if (!p->bus) { dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); + kfree(p->bus_desc.provider_name); return -ENXIO; } @@ -498,6 +499,7 @@ static int papr_scm_remove(struct platform_device *pdev) nvdimm_bus_unregister(p->bus); drc_pmem_unbind(p); + kfree(p->bus_desc.provider_name); kfree(p); return 0; diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 561917fa54a8a..afca4b737e80f 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -66,6 +66,7 @@ EXPORT_SYMBOL_GPL(init_phb_dynamic); int remove_phb_dynamic(struct pci_controller *phb) { struct pci_bus *b = phb->bus; + struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge); struct resource *res; int rc, i; @@ -92,7 +93,8 @@ int remove_phb_dynamic(struct pci_controller *phb) /* Remove the PCI bus and unregister the bridge device from sysfs */ phb->bus = NULL; pci_remove_bus(b); - device_unregister(b->bridge); + host_bridge->bus = NULL; + device_unregister(&host_bridge->dev); /* Now release the IO resource */ if (res->flags & IORESOURCE_IO) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 13fa370a87e4e..5fd56230b01c5 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -114,4 +114,6 @@ int dlpar_workqueue_init(void); void pseries_setup_rfi_flush(void); void pseries_lpar_read_hblkrm_characteristics(void); +void pseries_rng_init(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 3acdcc3bb908c..b658fa627a34b 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -184,7 +184,6 @@ static void handle_system_shutdown(char event_modifier) case EPOW_SHUTDOWN_ON_UPS: pr_emerg("Loss of system power detected. System is running on" " UPS/battery. Check RTAS error log for details\n"); - orderly_poweroff(true); break; case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: @@ -395,10 +394,11 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) /* * Some versions of FWNMI place the buffer inside the 4kB page starting at * 0x7000. Other versions place it inside the rtas buffer. We check both. + * Minimum size of the buffer is 16 bytes. */ #define VALID_FWNMI_BUFFER(A) \ - ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ - (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) + ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ + (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) static inline struct rtas_error_log *fwnmi_get_errlog(void) { @@ -494,18 +494,55 @@ int pSeries_system_reset_exception(struct pt_regs *regs) return 0; /* need to perform reset */ } +static int mce_handle_err_realmode(int disposition, u8 error_type) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (disposition == RTAS_DISP_NOT_RECOVERED) { + switch (error_type) { + case MC_ERROR_TYPE_SLB: + case MC_ERROR_TYPE_ERAT: + /* + * Store the old slb content in paca before flushing. + * Print this when we go to virtual mode. + * There are chances that we may hit MCE again if there + * is a parity error on the SLB entry we trying to read + * for saving. Hence limit the slb saving to single + * level of recursion. + */ + if (local_paca->in_mce == 1) + slb_save_contents(local_paca->mce_faulty_slbs); + flush_and_reload_slb(); + disposition = RTAS_DISP_FULLY_RECOVERED; + break; + default: + break; + } + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { + /* Platform corrected itself but could be degraded */ + pr_err("MCE: limited recovery, system may be degraded\n"); + disposition = RTAS_DISP_FULLY_RECOVERED; + } +#endif + return disposition; +} -static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +static int mce_handle_err_virtmode(struct pt_regs *regs, + struct rtas_error_log *errp, + struct pseries_mc_errorlog *mce_log, + int disposition) { struct mce_error_info mce_err = { 0 }; - unsigned long eaddr = 0, paddr = 0; - struct pseries_errorlog *pseries_log; - struct pseries_mc_errorlog *mce_log; - int disposition = rtas_error_disposition(errp); int initiator = rtas_error_initiator(errp); int severity = rtas_error_severity(errp); + unsigned long eaddr = 0, paddr = 0; u8 error_type, err_sub_type; + if (!mce_log) + goto out; + + error_type = mce_log->error_type; + err_sub_type = rtas_mc_error_sub_type(mce_log); + if (initiator == RTAS_INITIATOR_UNKNOWN) mce_err.initiator = MCE_INITIATOR_UNKNOWN; else if (initiator == RTAS_INITIATOR_CPU) @@ -544,18 +581,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; mce_err.error_class = MCE_ECLASS_UNKNOWN; - if (!rtas_error_extended(errp)) - goto out; - - pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); - if (pseries_log == NULL) - goto out; - - mce_log = (struct pseries_mc_errorlog *)pseries_log->data; - error_type = mce_log->error_type; - err_sub_type = rtas_mc_error_sub_type(mce_log); - - switch (mce_log->error_type) { + switch (error_type) { case MC_ERROR_TYPE_UE: mce_err.error_type = MCE_ERROR_TYPE_UE; switch (err_sub_type) { @@ -652,40 +678,45 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; break; } - -#ifdef CONFIG_PPC_BOOK3S_64 - if (disposition == RTAS_DISP_NOT_RECOVERED) { - switch (error_type) { - case MC_ERROR_TYPE_SLB: - case MC_ERROR_TYPE_ERAT: - /* - * Store the old slb content in paca before flushing. - * Print this when we go to virtual mode. - * There are chances that we may hit MCE again if there - * is a parity error on the SLB entry we trying to read - * for saving. Hence limit the slb saving to single - * level of recursion. - */ - if (local_paca->in_mce == 1) - slb_save_contents(local_paca->mce_faulty_slbs); - flush_and_reload_slb(); - disposition = RTAS_DISP_FULLY_RECOVERED; - break; - default: - break; - } - } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { - /* Platform corrected itself but could be degraded */ - printk(KERN_ERR "MCE: limited recovery, system may " - "be degraded\n"); - disposition = RTAS_DISP_FULLY_RECOVERED; - } -#endif - out: save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, - &mce_err, regs->nip, eaddr, paddr); + &mce_err, regs->nip, eaddr, paddr); + return disposition; +} + +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log = NULL; + int disposition = rtas_error_disposition(errp); + u8 error_type; + + if (!rtas_error_extended(errp)) + goto out; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (!pseries_log) + goto out; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + error_type = mce_log->error_type; + + disposition = mce_handle_err_realmode(disposition, error_type); + /* + * Enable translation as we will be accessing per-cpu variables + * in save_mce_event() which may fall outside RMO region, also + * leave it enabled because subsequently we will be queuing work + * to workqueues where again per-cpu variables accessed, besides + * fwnmi_release_errinfo() crashes when called in realmode on + * pseries. + * Note: All the realmode handling like flushing SLB entries for + * SLB multihit is done by now. + */ +out: + mtmsr(mfmsr() | MSR_IR | MSR_DR); + disposition = mce_handle_err_virtmode(regs, errp, mce_log, + disposition); return disposition; } diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c index bbb97169bf63e..6ddfdeaace9ef 100644 --- a/arch/powerpc/platforms/pseries/rng.c +++ b/arch/powerpc/platforms/pseries/rng.c @@ -10,6 +10,7 @@ #include #include #include +#include "pseries.h" static int pseries_get_random_long(unsigned long *v) @@ -24,18 +25,13 @@ static int pseries_get_random_long(unsigned long *v) return 0; } -static __init int rng_init(void) +void __init pseries_rng_init(void) { struct device_node *dn; dn = of_find_compatible_node(NULL, NULL, "ibm,random"); if (!dn) - return -ENODEV; - - pr_info("Registering arch random hook.\n"); - + return; ppc_md.get_random_seed = pseries_get_random_long; - - return 0; + of_node_put(dn); } -machine_subsys_initcall(pseries, rng_init); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 0a40201f315ff..d5abb25865e36 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -74,6 +74,9 @@ #include "pseries.h" #include "../../../../drivers/pci/pci.h" +DEFINE_STATIC_KEY_FALSE(shared_processor); +EXPORT_SYMBOL(shared_processor); + int CMO_PrPSP = -1; int CMO_SecPSP = -1; unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K); @@ -558,6 +561,14 @@ void pseries_setup_rfi_flush(void) setup_rfi_flush(types, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); } #ifdef CONFIG_PCI_IOV @@ -758,6 +769,10 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); + + if (lppaca_shared_proc(get_lppaca())) + static_branch_enable(&shared_processor); + ppc_md.power_save = pseries_lpar_idle; ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; #ifdef CONFIG_PCI_IOV @@ -777,6 +792,8 @@ static void __init pSeries_setup_arch(void) if (swiotlb_force == SWIOTLB_FORCE) ppc_swiotlb_enable = 1; + + pseries_rng_init(); } static void pseries_panic(char *str) diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 0a24a5a185f02..e5ecadfb5dea2 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -13,7 +13,6 @@ #include #include #include -#include "../../kernel/cacheinfo.h" static u64 stream_id; static struct device suspend_dev; @@ -78,9 +77,7 @@ static void pseries_suspend_enable_irqs(void) * Update configuration which can be modified based on device tree * changes during resume. */ - cacheinfo_cpu_offline(smp_processor_id()); post_mobility_fixup(); - cacheinfo_cpu_online(smp_processor_id()); } /** @@ -132,15 +129,11 @@ static ssize_t store_hibernate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - cpumask_var_t offline_mask; int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL)) - return -ENOMEM; - stream_id = simple_strtoul(buf, NULL, 16); do { @@ -150,32 +143,16 @@ static ssize_t store_hibernate(struct device *dev, } while (rc == -EAGAIN); if (!rc) { - /* All present CPUs must be online */ - cpumask_andnot(offline_mask, cpu_present_mask, - cpu_online_mask); - rc = rtas_online_cpus_mask(offline_mask); - if (rc) { - pr_err("%s: Could not bring present CPUs online.\n", - __func__); - goto out; - } - stop_topology_update(); rc = pm_suspend(PM_SUSPEND_MEM); start_topology_update(); - - /* Take down CPUs not online prior to suspend */ - if (!rtas_offline_cpus_mask(offline_mask)) - pr_warn("%s: Could not restore CPUs to offline " - "state.\n", __func__); } stream_id = 0; if (!rc) rc = count; -out: - free_cpumask_var(offline_mask); + return rc; } @@ -210,7 +187,6 @@ static struct bus_type suspend_subsys = { static const struct platform_suspend_ops pseries_suspend_ops = { .valid = suspend_valid_only_mem, - .begin = pseries_suspend_begin, .prepare_late = pseries_prepare_late, .enter = pseries_suspend_enter, }; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 79e2287991dbb..91ca34966838b 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -36,7 +36,6 @@ static struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = "vio", .type = "", .dev.init_name = "vio", - .dev.bus = &vio_bus_type, }; #ifdef CONFIG_PPC_SMLPAR @@ -1176,6 +1175,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) if (tbl == NULL) return NULL; + kref_init(&tbl->it_kref); + of_parse_dma_window(dev->dev.of_node, dma_window, &tbl->it_index, &offset, &size); diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 6b4a34b36d987..8ff9bcfe4b8d4 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -403,9 +403,10 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) } /* Initialize the DART HW */ - if (dart_init(dn) != 0) + if (dart_init(dn) != 0) { + of_node_put(dn); return; - + } /* * U4 supports a DART bypass, we use it for 64-bit capable devices to * improve performance. However, that only works for devices connected @@ -418,6 +419,7 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) /* Setup pci_dma ops */ set_pci_dma_ops(&dma_iommu_ops); + of_node_put(dn); } #ifdef CONFIG_PM diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S index efeeb1b885a17..329b9c4ae5429 100644 --- a/arch/powerpc/sysdev/dcr-low.S +++ b/arch/powerpc/sysdev/dcr-low.S @@ -11,7 +11,7 @@ #include #define DCR_ACCESS_PROLOG(table) \ - cmpli cr0,r3,1024; \ + cmplwi cr0,r3,1024; \ rlwinm r3,r3,4,18,27; \ lis r5,table@h; \ ori r5,r5,table@l; \ diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 8963eaffb1b7b..39186ad6b3c3a 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -86,7 +86,7 @@ static LIST_HEAD(gtms); */ struct gtm_timer *gtm_get_timer16(void) { - struct gtm *gtm = NULL; + struct gtm *gtm; int i; list_for_each_entry(gtm, >ms, list_node) { @@ -103,7 +103,7 @@ struct gtm_timer *gtm_get_timer16(void) spin_unlock_irq(>m->lock); } - if (gtm) + if (!list_empty(>ms)) return ERR_PTR(-EBUSY); return ERR_PTR(-ENODEV); } diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index ff0e2b156cb5f..6adf2fdec799e 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -520,6 +520,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) struct resource rsrc; const int *bus_range; u8 hdr_type, progif; + u32 class_code; struct device_node *dev; struct ccsr_pci __iomem *pci; u16 temp; @@ -593,6 +594,13 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS; if (fsl_pcie_check_link(hose)) hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK; + /* Fix Class Code to PCI_CLASS_BRIDGE_PCI_NORMAL for pre-3.0 controller */ + if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0) { + early_read_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, &class_code); + class_code &= 0xff; + class_code |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8; + early_write_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, class_code); + } } else { /* * Set PBFR(PCI Bus Function Register)[10] = 1 to diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index 1d7a412056959..5ffaa60f1fa09 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -18,6 +18,7 @@ struct platform_device; #define PCIE_LTSSM 0x0404 /* PCIE Link Training and Status */ #define PCIE_LTSSM_L0 0x16 /* L0 state */ +#define PCIE_FSL_CSR_CLASSCODE 0x474 /* FSL GPEX CSR */ #define PCIE_IP_REV_2_2 0x02080202 /* PCIE IP block version Rev2.2 */ #define PCIE_IP_REV_3_0 0x02080300 /* PCIE IP block version Rev3.0 */ #define PIWAR_EN 0x80000000 /* Enable */ diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 07c164f7f8cfe..3f9f78621cf3c 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -505,8 +505,10 @@ int fsl_rio_setup(struct platform_device *dev) if (rc) { dev_err(&dev->dev, "Can't get %pOF property 'reg'\n", rmu_node); + of_node_put(rmu_node); goto err_rmu; } + of_node_put(rmu_node); rmu_regs_win = ioremap(rmu_regs.start, resource_size(&rmu_regs)); if (!rmu_regs_win) { dev_err(&dev->dev, "Unable to map rmu register window\n"); diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index f6b253e2be409..36ec0bdd8b63c 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -191,7 +191,7 @@ static int mpic_msgr_probe(struct platform_device *dev) /* IO map the message register block. */ of_address_to_resource(np, 0, &rsrc); - msgr_block_addr = ioremap(rsrc.start, resource_size(&rsrc)); + msgr_block_addr = devm_ioremap(&dev->dev, rsrc.start, resource_size(&rsrc)); if (!msgr_block_addr) { dev_err(&dev->dev, "Failed to iomap MPIC message registers"); return -EFAULT; diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index ad8117148ea3b..21b9d1bf39ff6 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -174,6 +174,7 @@ int icp_hv_init(void) icp_ops = &icp_hv_ops; + of_node_put(np); return 0; } diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 68fd2540b0931..7fa520efcefa0 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -195,6 +195,7 @@ int icp_opal_init(void) printk("XICS: Using OPAL ICP fallbacks\n"); + of_node_put(np); return 0; } diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index df832b09e3e9c..d4467da279668 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -68,13 +69,6 @@ static u32 xive_ipi_irq; /* Xive state for each CPU */ static DEFINE_PER_CPU(struct xive_cpu *, xive_cpu); -/* - * A "disabled" interrupt should never fire, to catch problems - * we set its logical number to this - */ -#define XIVE_BAD_IRQ 0x7fffffff -#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) - /* An invalid CPU target */ #define XIVE_INVALID_TARGET (-1) @@ -263,6 +257,13 @@ notrace void xmon_xive_do_dump(int cpu) xmon_printf("\n"); } +static struct irq_data *xive_get_irq_data(u32 hw_irq) +{ + unsigned int irq = irq_find_mapping(xive_irq_domain, hw_irq); + + return irq ? irq_get_irq_data(irq) : NULL; +} + int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) { int rc; @@ -279,6 +280,9 @@ int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d) xmon_printf("IRQ 0x%08x : target=0x%x prio=%02x lirq=0x%x ", hw_irq, target, prio, lirq); + if (!d) + d = xive_get_irq_data(hw_irq); + if (d) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); u64 val = xive_esb_read(xd, XIVE_ESB_GET); @@ -972,12 +976,22 @@ static int xive_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, bool *state) { struct xive_irq_data *xd = irq_data_get_irq_handler_data(data); + u8 pq; switch (which) { case IRQCHIP_STATE_ACTIVE: - *state = !xd->stale_p && - (xd->saved_p || - !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P)); + pq = xive_esb_read(xd, XIVE_ESB_GET); + + /* + * The esb value being all 1's means we couldn't get + * the PQ state of the interrupt through mmio. It may + * happen, for example when querying a PHB interrupt + * while the PHB is in an error state. We consider the + * interrupt to be inactive in that case. + */ + *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && + (xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) && + !irqd_irq_disabled(data))); return 0; default: return -EINVAL; @@ -1007,12 +1021,16 @@ EXPORT_SYMBOL_GPL(is_xive_irq); void xive_cleanup_irq_data(struct xive_irq_data *xd) { if (xd->eoi_mmio) { + unmap_kernel_range((unsigned long)xd->eoi_mmio, + 1u << xd->esb_shift); iounmap(xd->eoi_mmio); if (xd->eoi_mmio == xd->trig_mmio) xd->trig_mmio = NULL; xd->eoi_mmio = NULL; } if (xd->trig_mmio) { + unmap_kernel_range((unsigned long)xd->trig_mmio, + 1u << xd->esb_shift); iounmap(xd->trig_mmio); xd->trig_mmio = NULL; } @@ -1035,6 +1053,15 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) xd->target = XIVE_INVALID_TARGET; irq_set_handler_data(virq, xd); + /* + * Turn OFF by default the interrupt being mapped. A side + * effect of this check is the mapping the ESB page of the + * interrupt in the Linux address space. This prevents page + * fault issues in the crash handler which masks all + * interrupts. + */ + xive_esb_read(xd, XIVE_ESB_SET_PQ_01); + return 0; } @@ -1132,7 +1159,7 @@ static int xive_setup_cpu_ipi(unsigned int cpu) xc = per_cpu(xive_cpu, cpu); /* Check if we are already setup */ - if (xc->hw_ipi != 0) + if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; /* Grab an IPI from the backend, this will populate xc->hw_ipi */ @@ -1169,7 +1196,7 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) /* Disable the IPI and free the IRQ data */ /* Already cleaned up ? */ - if (xc->hw_ipi == 0) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; /* Mask the IPI */ @@ -1325,6 +1352,7 @@ static int xive_prepare_cpu(unsigned int cpu) if (np) xc->chip_id = of_get_ibm_chip_id(np); of_node_put(np); + xc->hw_ipi = XIVE_BAD_IRQ; per_cpu(xive_cpu, cpu) = xc; } diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 0ff6b739052c8..3fd086533dcfc 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -312,7 +313,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) s64 rc; /* Free the IPI */ - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; for (;;) { rc = opal_xive_free_irq(xc->hw_ipi); @@ -320,7 +321,7 @@ static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc) msleep(OPAL_BUSY_DELAY_MS); continue; } - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; break; } } @@ -646,6 +647,7 @@ static bool xive_native_provision_pages(void) pr_err("Failed to allocate provisioning page\n"); return false; } + kmemleak_ignore(p); opal_xive_donate_page(chip, __pa(p)); } return true; diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 33c10749edec9..b21d71badaec9 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -392,20 +392,28 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->esb_shift = esb_shift; data->trig_page = trig_page; + data->hw_irq = hw_irq; + /* * No chip-id for the sPAPR backend. This has an impact how we * pick a target. See xive_pick_irq_target(). */ data->src_chip = XIVE_INVALID_CHIP_ID; + /* + * When the H_INT_ESB flag is set, the H_INT_ESB hcall should + * be used for interrupt management. Skip the remapping of the + * ESB pages which are not available. + */ + if (data->flags & XIVE_IRQ_FLAG_H_INT_ESB) + return 0; + data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift); if (!data->eoi_mmio) { pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq); return -ENOMEM; } - data->hw_irq = hw_irq; - /* Full function page supports trigger */ if (flags & XIVE_SRC_TRIGGER) { data->trig_mmio = data->eoi_mmio; @@ -552,11 +560,11 @@ static int xive_spapr_get_ipi(unsigned int cpu, struct xive_cpu *xc) static void xive_spapr_put_ipi(unsigned int cpu, struct xive_cpu *xc) { - if (!xc->hw_ipi) + if (xc->hw_ipi == XIVE_BAD_IRQ) return; xive_irq_bitmap_free(xc->hw_ipi); - xc->hw_ipi = 0; + xc->hw_ipi = XIVE_BAD_IRQ; } #endif /* CONFIG_SMP */ @@ -675,6 +683,7 @@ static bool xive_get_max_prio(u8 *max_prio) } reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len); + of_node_put(rootdn); if (!reg) { pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n"); return false; diff --git a/arch/powerpc/sysdev/xive/xive-internal.h b/arch/powerpc/sysdev/xive/xive-internal.h index 59cd366e7933a..382980f4de2d2 100644 --- a/arch/powerpc/sysdev/xive/xive-internal.h +++ b/arch/powerpc/sysdev/xive/xive-internal.h @@ -5,6 +5,13 @@ #ifndef __XIVE_INTERNAL_H #define __XIVE_INTERNAL_H +/* + * A "disabled" interrupt should never fire, to catch problems + * we set its logical number to this + */ +#define XIVE_BAD_IRQ 0x7fffffff +#define XIVE_MAX_IRQ (XIVE_BAD_IRQ - 1) + /* Each CPU carry one of these with various per-CPU state */ struct xive_cpu { #ifdef CONFIG_SMP diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index 2b4e959caa365..014e00e74d2b6 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -10,24 +10,29 @@ # based on relocs_check.pl # Copyright © 2009 IBM Corporation -if [ $# -lt 2 ]; then - echo "$0 [path to objdump] [path to vmlinux]" 1>&2 +if [ $# -lt 3 ]; then + echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2 exit 1 fi -# Have Kbuild supply the path to objdump so we handle cross compilation. +# Have Kbuild supply the path to objdump and nm so we handle cross compilation. objdump="$1" -vmlinux="$2" +nm="$2" +vmlinux="$3" + +# Remove from the bad relocations those that match an undefined weak symbol +# which will result in an absolute relocation to 0. +# Weak unresolved symbols are of that form in nm output: +# " w _binary__btf_vmlinux_bin_end" +undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }') bad_relocs=$( -"$objdump" -R "$vmlinux" | +$objdump -R "$vmlinux" | # Only look at relocation lines. grep -E '\ - # R_PPC64_ADDR64 __crc_ # On PPC: # R_PPC_RELATIVE, R_PPC_ADDR16_HI, # R_PPC_ADDR16_HA,R_PPC_ADDR16_LO, @@ -39,8 +44,7 @@ R_PPC_ADDR16_HI R_PPC_ADDR16_HA R_PPC_RELATIVE R_PPC_NONE' | - grep -E -v '\:' | awk '{print $1}' ) BRANCHES=$( -"$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000 \ +$objdump -R "$vmlinux" -D --start-address=0xc000000000000000 \ --stop-address=${end_intr} | grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" | grep -v '\<__start_initialization_multiplatform>' | diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index f142570ad8606..6f9cccea54f3b 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,9 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -# Disable clang warning for using setjmp without setjmp.h header -subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header) - GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c index 5c1a50912229a..9b0d85bff021e 100644 --- a/arch/powerpc/xmon/nonstdio.c +++ b/arch/powerpc/xmon/nonstdio.c @@ -178,7 +178,7 @@ void xmon_printf(const char *format, ...) if (n && rc == 0) { /* No udbg hooks, fallback to printk() - dangerous */ - printk("%s", xmon_outbuf); + pr_cont("%s", xmon_outbuf); } } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d83364ebc5c5a..6d130c89fbd85 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -187,6 +188,8 @@ static void dump_tlb_44x(void); static void dump_tlb_book3e(void); #endif +static void clear_all_bpt(void); + #ifdef CONFIG_PPC64 #define REG "%.16lx" #else @@ -283,10 +286,38 @@ Commands:\n\ " U show uptime information\n" " ? help\n" " # n limit output to n lines per page (for dp, dpa, dl)\n" -" zr reboot\n\ - zh halt\n" +" zr reboot\n" +" zh halt\n" ; +#ifdef CONFIG_SECURITY +static bool xmon_is_locked_down(void) +{ + static bool lockdown; + + if (!lockdown) { + lockdown = !!security_locked_down(LOCKDOWN_XMON_RW); + if (lockdown) { + printf("xmon: Disabled due to kernel lockdown\n"); + xmon_is_ro = true; + } + } + + if (!xmon_is_ro) { + xmon_is_ro = !!security_locked_down(LOCKDOWN_XMON_WR); + if (xmon_is_ro) + printf("xmon: Read-only due to kernel lockdown\n"); + } + + return lockdown; +} +#else /* CONFIG_SECURITY */ +static inline bool xmon_is_locked_down(void) +{ + return false; +} +#endif + static struct pt_regs *xmon_regs; static inline void sync(void) @@ -438,7 +469,10 @@ static bool wait_for_other_cpus(int ncpus) return false; } -#endif /* CONFIG_SMP */ +#else /* CONFIG_SMP */ +static inline void get_output_lock(void) {} +static inline void release_output_lock(void) {} +#endif static inline int unrecoverable_excp(struct pt_regs *regs) { @@ -455,6 +489,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi) int cmd = 0; struct bpt *bp; long recurse_jmp[JMP_BUF_LEN]; + bool locked_down; unsigned long offset; unsigned long flags; #ifdef CONFIG_SMP @@ -465,6 +500,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi) local_irq_save(flags); hard_irq_disable(); + locked_down = xmon_is_locked_down(); + if (!fromipi) { tracing_enabled = tracing_is_on(); tracing_off(); @@ -518,7 +555,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi) if (!fromipi) { get_output_lock(); - excprint(regs); + if (!locked_down) + excprint(regs); if (bp) { printf("cpu 0x%x stopped at breakpoint 0x%tx (", cpu, BP_NUM(bp)); @@ -570,10 +608,14 @@ static int xmon_core(struct pt_regs *regs, int fromipi) } remove_bpts(); disable_surveillance(); - /* for breakpoint or single step, print the current instr. */ - if (bp || TRAP(regs) == 0xd00) - ppc_inst_dump(regs->nip, 1, 0); - printf("enter ? for help\n"); + + if (!locked_down) { + /* for breakpoint or single step, print curr insn */ + if (bp || TRAP(regs) == 0xd00) + ppc_inst_dump(regs->nip, 1, 0); + printf("enter ? for help\n"); + } + mb(); xmon_gate = 1; barrier(); @@ -597,8 +639,9 @@ static int xmon_core(struct pt_regs *regs, int fromipi) spin_cpu_relax(); touch_nmi_watchdog(); } else { - cmd = cmds(regs); - if (cmd != 0) { + if (!locked_down) + cmd = cmds(regs); + if (locked_down || cmd != 0) { /* exiting xmon */ insert_bpts(); xmon_gate = 0; @@ -635,13 +678,16 @@ static int xmon_core(struct pt_regs *regs, int fromipi) "can't continue\n"); remove_bpts(); disable_surveillance(); - /* for breakpoint or single step, print the current instr. */ - if (bp || TRAP(regs) == 0xd00) - ppc_inst_dump(regs->nip, 1, 0); - printf("enter ? for help\n"); + if (!locked_down) { + /* for breakpoint or single step, print current insn */ + if (bp || TRAP(regs) == 0xd00) + ppc_inst_dump(regs->nip, 1, 0); + printf("enter ? for help\n"); + } } - cmd = cmds(regs); + if (!locked_down) + cmd = cmds(regs); insert_bpts(); in_xmon = 0; @@ -670,7 +716,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi) } } #endif - insert_cpu_bpts(); + if (locked_down) + clear_all_bpt(); + else + insert_cpu_bpts(); touch_nmi_watchdog(); local_irq_restore(flags); @@ -1894,15 +1943,14 @@ static void dump_300_sprs(void) printf("pidr = %.16lx tidr = %.16lx\n", mfspr(SPRN_PID), mfspr(SPRN_TIDR)); - printf("asdr = %.16lx psscr = %.16lx\n", - mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR) - : mfspr(SPRN_PSSCR_PR)); + printf("psscr = %.16lx\n", + hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR)); if (!hv) return; - printf("ptcr = %.16lx\n", - mfspr(SPRN_PTCR)); + printf("ptcr = %.16lx asdr = %.16lx\n", + mfspr(SPRN_PTCR), mfspr(SPRN_ASDR)); #endif } @@ -3762,6 +3810,11 @@ static void xmon_init(int enable) #ifdef CONFIG_MAGIC_SYSRQ static void sysrq_handle_xmon(int key) { + if (xmon_is_locked_down()) { + clear_all_bpt(); + xmon_init(0); + return; + } /* ensure xmon is enabled */ xmon_init(1); debugger(get_irq_regs()); @@ -3783,7 +3836,6 @@ static int __init setup_xmon_sysrq(void) device_initcall(setup_xmon_sysrq); #endif /* CONFIG_MAGIC_SYSRQ */ -#ifdef CONFIG_DEBUG_FS static void clear_all_bpt(void) { int i; @@ -3801,18 +3853,22 @@ static void clear_all_bpt(void) iabr = NULL; dabr.enabled = 0; } - - printf("xmon: All breakpoints cleared\n"); } +#ifdef CONFIG_DEBUG_FS static int xmon_dbgfs_set(void *data, u64 val) { xmon_on = !!val; xmon_init(xmon_on); /* make sure all breakpoints removed when disabling */ - if (!xmon_on) + if (!xmon_on) { clear_all_bpt(); + get_output_lock(); + printf("xmon: All breakpoints cleared\n"); + release_output_lock(); + } + return 0; } @@ -3838,7 +3894,11 @@ static int xmon_early __initdata; static int __init early_parse_xmon(char *p) { - if (!p || strncmp(p, "early", 5) == 0) { + if (xmon_is_locked_down()) { + xmon_init(0); + xmon_early = 0; + xmon_on = 0; + } else if (!p || strncmp(p, "early", 5) == 0) { /* just "xmon" is equivalent to "xmon=early" */ xmon_init(1); xmon_early = 1; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8eebbc8860bbd..9c719832cf77e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -31,6 +31,7 @@ config RISCV select GENERIC_SMP_IDLE_THREAD select GENERIC_ATOMIC64 if !64BIT select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_SECCOMP_FILTER select HAVE_ASM_MODVERSIONS select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_CONTIGUOUS @@ -58,9 +59,9 @@ config RISCV select EDAC_SUPPORT select ARCH_HAS_GIGANTIC_PAGE select ARCH_WANT_HUGE_PMD_SHARE if 64BIT - select SPARSEMEM_STATIC if 32BIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select HAVE_ARCH_MMAP_RND_BITS + select HAVE_COPY_THREAD_TLS config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT @@ -100,7 +101,9 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y - select SPARSEMEM_VMEMMAP_ENABLE + depends on MMU + select SPARSEMEM_STATIC if 32BIT && SPARSEMEM + select SPARSEMEM_VMEMMAP_ENABLE if 64BIT config ARCH_SELECT_MEMORY_MODEL def_bool ARCH_SPARSEMEM_ENABLE @@ -272,6 +275,19 @@ menu "Kernel features" source "kernel/Kconfig.hz" +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + endmenu menu "Boot options" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index f5e9142102454..d1a9615391010 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -34,11 +34,26 @@ else KBUILD_LDFLAGS += -melf32lriscv endif +ifeq ($(CONFIG_LD_IS_LLD),y) + KBUILD_CFLAGS += -mno-relax + KBUILD_AFLAGS += -mno-relax +ifneq ($(LLVM_IAS),1) + KBUILD_CFLAGS += -Wa,-mno-relax + KBUILD_AFLAGS += -Wa,-mno-relax +endif +endif + # ISA string setting riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c + +# Newer binutils versions default to ISA spec version 20191213 which moves some +# instructions from the I extension to the Zicsr and Zifencei extensions. +toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) +riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei + KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_AFLAGS += -march=$(riscv-march-y) @@ -59,6 +74,7 @@ ifeq ($(CONFIG_PERF_EVENTS),y) endif KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) +KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax) # GCC versions that support the "-mstrict-align" option default to allowing # unaligned accesses. While unaligned accesses are explicitly allowed in the diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 88cfcb96bf233..cc04e66752aac 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -83,6 +83,7 @@ phy-mode = "gmii"; phy-handle = <&phy0>; phy0: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.0771"; reg = <0>; }; }; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 420a0dbef3866..3c656fe97e583 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -62,6 +62,8 @@ CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y CONFIG_SPI_SIFIVE=y +CONFIG_GPIOLIB=y +CONFIG_GPIO_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y CONFIG_DRM_RADEON=y diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index c9fecd120d187..8ae9708a8eee8 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -4,4 +4,8 @@ #include #include +long long __lshrti3(long long a, int b); +long long __ashrti3(long long a, int b); +long long __ashlti3(long long a, int b); + #endif /* _ASM_RISCV_PROTOTYPES_H */ diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 3f1737f301ccb..d0e24aaa2aa06 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -58,8 +58,16 @@ do { \ * The AQ/RL pair provides a RCpc critical section, but there's not really any * way we can take advantage of that here because the ordering is only enforced * on that one lock. Thus, we're just doing a full fence. + * + * Since we allow writeX to be called from preemptive regions we need at least + * an "o" in the predecessor set to ensure device writes are visible before the + * task is marked as available for scheduling on a new hart. While I don't see + * any concrete reason we need a full IO fence, it seems safer to just upgrade + * this in order to avoid any IO crossing a scheduling boundary. In both + * instances the scheduler pairs this with an mb(), so nothing is necessary on + * the new hart. */ -#define smp_mb__after_spinlock() RISCV_FENCE(rw,rw) +#define smp_mb__after_spinlock() RISCV_FENCE(iorw,iorw) #include diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index d969bab4a26b5..262e5bbb27760 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -179,7 +179,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -224,7 +224,7 @@ RISCV_ACQUIRE_BARRIER \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -270,7 +270,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -316,7 +316,7 @@ " fence rw, rw\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index c6dcc5291f972..693c3839a7dfe 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -10,9 +10,19 @@ #endif #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR +/* + * Clang prior to 13 had "mcount" instead of "_mcount": + * https://reviews.llvm.org/D98881 + */ +#if defined(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 130000 +#define MCOUNT_NAME _mcount +#else +#define MCOUNT_NAME mcount +#endif + #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ -void _mcount(void); +void MCOUNT_NAME(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; @@ -33,7 +43,7 @@ struct dyn_arch_ftrace { * both auipc and jalr at the same time. */ -#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_ADDR ((unsigned long)MCOUNT_NAME) #define JALR_SIGN_MASK (0x00000800) #define JALR_OFFSET_MASK (0x00000fff) #define AUIPC_OFFSET_MASK (0xfffff000) @@ -63,4 +73,11 @@ do { \ * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here. */ #define MCOUNT_INSN_SIZE 8 + +#ifndef __ASSEMBLY__ +struct dyn_ftrace; +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); +#define ftrace_init_nop ftrace_init_nop +#endif + #endif diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 3db261c4810fc..6a30794aa1eea 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -119,7 +119,10 @@ extern unsigned long min_low_pfn; #endif /* __ASSEMBLY__ */ -#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + (unsigned long)(_addr) >= PAGE_OFFSET && pfn_valid(virt_to_pfn(_addr)); \ +}) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h index b0ab66e5fdb1d..5b2e79e5bfa5b 100644 --- a/arch/riscv/include/asm/pgtable-32.h +++ b/arch/riscv/include/asm/pgtable-32.h @@ -14,4 +14,6 @@ #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 34 + #endif /* _ASM_RISCV_PGTABLE_32_H */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index f539149d04c20..8c5b11a640dd3 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -22,6 +22,8 @@ #ifndef __ASSEMBLY__ +#include + struct task_struct; struct pt_regs; diff --git a/arch/riscv/include/asm/seccomp.h b/arch/riscv/include/asm/seccomp.h new file mode 100644 index 0000000000000..bf7744ee3b3d0 --- /dev/null +++ b/arch/riscv/include/asm/seccomp.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H + +#include + +#include + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 905372d7eeb81..464a2bbc97ea3 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -12,7 +12,11 @@ #include /* thread information allocation */ +#ifdef CONFIG_64BIT +#define THREAD_SIZE_ORDER (2) +#else #define THREAD_SIZE_ORDER (1) +#endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #ifndef __ASSEMBLY__ @@ -75,6 +79,7 @@ struct thread_info { #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ +#define TIF_SECCOMP 8 /* syscall secure computing */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -82,11 +87,13 @@ struct thread_info { #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_WORK_MASK \ (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) #define _TIF_SYSCALL_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \ + _TIF_SECCOMP) #endif /* _ASM_RISCV_THREAD_INFO_H */ diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index d86cb17bbabe6..22e0ae8884061 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -10,4 +10,7 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +/* entries in ARCH_DLINFO */ +#define AT_VECTOR_SIZE_ARCH 1 + #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 13ce76cc5affe..80dff2c2bf677 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -18,9 +18,10 @@ #ifdef __LP64__ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_SYS_CLONE3 #endif /* __LP64__ */ +#define __ARCH_WANT_SYS_CLONE3 + #include /* diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 4c90c07d8c39d..d930bd073b7b2 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -16,7 +16,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->type = type; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct device_node *np = of_cpu_device_node_get(cpu); @@ -58,7 +58,7 @@ static int __init_cache_level(unsigned int cpu) return 0; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *this_leaf = this_cpu_ci->info_list; @@ -95,6 +95,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 8ca4798311429..06c133cc95817 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -226,8 +226,26 @@ check_syscall_nr: /* Check to make sure we don't jump to a bogus syscall number. */ li t0, __NR_syscalls la s0, sys_ni_syscall - /* Syscall number held in a7 */ - bgeu a7, t0, 1f + /* + * The tracer can change syscall number to valid/invalid value. + * We use syscall_set_nr helper in syscall_trace_enter thus we + * cannot trust the current value in a7 and have to reload from + * the current task pt_regs. + */ + REG_L a7, PT_A7(sp) + /* + * Syscall number held in a7. + * If syscall number is above allowed value, redirect to ni_syscall. + */ + bge a7, t0, 1f + /* + * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. + * If yes, we pretend it was executed. + */ + li t1, -1 + beq a7, t1, ret_from_syscall_rejected + blt a7, t1, 1f + /* Call syscall */ la s0, sys_call_table slli t0, a7, RISCV_LGPTR add s0, s0, t0 @@ -238,6 +256,12 @@ check_syscall_nr: ret_from_syscall: /* Set user a0 to kernel a0 */ REG_S a0, PT_A0(sp) + /* + * We didn't execute the actual syscall. + * Seccomp already set return value for the current task pt_regs. + * (If it was configured with SECCOMP_RET_ERRNO/TRACE) + */ +ret_from_syscall_rejected: /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_WORK @@ -387,6 +411,7 @@ ENTRY(__switch_to) ENDPROC(__switch_to) .section ".rodata" + .align LGREG /* Exception vector table */ ENTRY(excp_vect_table) RISCV_PTR do_trap_insn_misaligned diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index b94d8db5ddccb..291c579e12457 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -88,6 +88,25 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return __ftrace_modify_call(rec->ip, addr, false); } + +/* + * This is called early on, and isn't wrapped by + * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold + * text_mutex, which triggers a lockdep failure. SMP isn't running so we could + * just directly poke the text, but it's simpler to just take the lock + * ourselves. + */ +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + int out; + + ftrace_arch_code_modify_prepare(); + out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); + ftrace_arch_code_modify_post_process(); + + return out; +} + int ftrace_update_ftrace_func(ftrace_func_t func) { int ret = __ftrace_modify_call((unsigned long)&ftrace_call, @@ -142,7 +161,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - if (function_graph_enter(old, self_addr, frame_pointer, parent)) + if (!function_graph_enter(old, self_addr, frame_pointer, parent)) *parent = return_hooker; } diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 72f89b7590dd6..344793159b97d 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -26,12 +26,17 @@ ENTRY(_start) /* reserved */ .word 0 .balign 8 +#ifdef CONFIG_RISCV_M_MODE + /* Image load offset (0MB) from start of RAM for M-mode */ + .dword 0 +#else #if __riscv_xlen == 64 /* Image load offset(2MB) from start of RAM */ .dword 0x200000 #else /* Image load offset(4MB) from start of RAM */ .dword 0x400000 +#endif #endif /* Effective size of kernel image */ .dword _end - _start diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 8a5593ff9ff3d..6d462681c9c02 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -47,8 +47,8 @@ ENTRY(ftrace_stub) #ifdef CONFIG_DYNAMIC_FTRACE - .global _mcount - .set _mcount, ftrace_stub + .global MCOUNT_NAME + .set MCOUNT_NAME, ftrace_stub #endif ret ENDPROC(ftrace_stub) @@ -78,7 +78,7 @@ ENDPROC(return_to_handler) #endif #ifndef CONFIG_DYNAMIC_FTRACE -ENTRY(_mcount) +ENTRY(MCOUNT_NAME) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return @@ -124,6 +124,6 @@ do_trace: jalr t5 RESTORE_ABI_STATE ret -ENDPROC(_mcount) +ENDPROC(MCOUNT_NAME) #endif -EXPORT_SYMBOL(_mcount) +EXPORT_SYMBOL(MCOUNT_NAME) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 70bb94ae61c59..a963b761e1a39 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -8,6 +8,23 @@ #include #include #include +#include +#include +#include +#include + +/* + * The auipc+jalr instruction pair can reach any PC-relative offset + * in the range [-2^31 - 2^11, 2^31 - 2^11) + */ +static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) +{ +#ifdef CONFIG_32BIT + return true; +#else + return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11)); +#endif +} static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { @@ -91,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; - if (offset != (s32)offset) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); @@ -193,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { offset = module_emit_plt_entry(me, v); @@ -220,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); @@ -386,3 +401,15 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } + +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) +#define VMALLOC_MODULE_START \ + max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START) +void *module_alloc(unsigned long size) +{ + return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START, + VMALLOC_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); +} +#endif diff --git a/arch/riscv/kernel/module.lds b/arch/riscv/kernel/module.lds index 295ecfb341a29..18ec719899e28 100644 --- a/arch/riscv/kernel/module.lds +++ b/arch/riscv/kernel/module.lds @@ -2,7 +2,7 @@ /* Copyright (C) 2017 Andes Technology Corporation */ SECTIONS { - .plt (NOLOAD) : { BYTE(0) } - .got (NOLOAD) : { BYTE(0) } - .got.plt (NOLOAD) : { BYTE(0) } + .plt : { BYTE(0) } + .got : { BYTE(0) } + .got.plt : { BYTE(0) } } diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index 8d2804f05cf93..1de5991916eb9 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -60,10 +60,11 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); unsigned long fp = 0; /* RISC-V does not support perf in guest mode. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + if (guest_cbs && guest_cbs->is_in_guest()) return; fp = regs->s0; @@ -76,7 +77,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, bool fill_callchain(unsigned long pc, void *entry) { - return perf_callchain_store(entry, pc); + return perf_callchain_store(entry, pc) == 0; } void notrace walk_stackframe(struct task_struct *task, @@ -84,8 +85,10 @@ void notrace walk_stackframe(struct task_struct *task, void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + /* RISC-V does not support perf in guest mode. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { pr_warn("RISC-V does not support perf in guest mode!"); return; } diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 85e3c39bb60bc..330b34706aa02 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -99,8 +99,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } -int copy_thread(unsigned long clone_flags, unsigned long usp, - unsigned long arg, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, + unsigned long arg, struct task_struct *p, unsigned long tls) { struct pt_regs *childregs = task_pt_regs(p); @@ -120,7 +120,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, if (usp) /* User fork */ childregs->sp = usp; if (clone_flags & CLONE_SETTLS) - childregs->tp = childregs->a5; + childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ p->thread.ra = (unsigned long)ret_from_fork; } diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 1252113ef8b2c..0f84628b9385f 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -154,6 +154,16 @@ __visible void do_syscall_trace_enter(struct pt_regs *regs) if (tracehook_report_syscall_entry(regs)) syscall_set_nr(current, regs, -1); + /* + * Do the secure computing after ptrace; failures should be fast. + * If this fails we might have return value in a0 from seccomp + * (via SECCOMP_RET_ERRNO/TRACE). + */ + if (secure_computing(NULL) == -1) { + syscall_set_nr(current, regs, -1); + return; + } + #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, syscall_get_nr(current, regs)); diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 5c9ec78422c22..098c04adbaaf6 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -51,7 +51,7 @@ int riscv_hartid_to_cpuid(int hartid) return i; pr_err("Couldn't find cpu id for hartid [%d]\n", hartid); - return i; + return -ENOENT; } void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 0940681d2f682..19e46f4160cc3 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -63,7 +63,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, #else /* !CONFIG_FRAME_POINTER */ -static void notrace walk_stackframe(struct task_struct *task, +void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg) { unsigned long sp, pc; diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f3619f59d85cc..8a7880b9c433e 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -8,6 +8,7 @@ #include #include #include +#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -16,6 +17,10 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, { if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; + + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) + return -EINVAL; + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 6a53c02e9c734..8aa70b519e04f 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -4,6 +4,7 @@ * Copyright (C) 2017 SiFive */ +#include #include #include #include @@ -24,5 +25,7 @@ void __init time_init(void) riscv_timebase = prop; lpj_fine = riscv_timebase / HZ; + + of_clk_init(NULL); timer_probe(); } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 473de3ae8bb75..ae462037910be 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,9 @@ void die(struct pt_regs *regs, const char *str) ret = notify_die(DIE_OOPS, str, regs, 0, regs->scause, SIGSEGV); + if (regs && kexec_should_crash(current)) + crash_kexec(regs); + bust_spinlocks(0); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die_lock); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 49a5852fd07dd..a4ee3a0e7d20d 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -33,15 +33,15 @@ $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) # We also create a special relocatable object that should mirror the symbol -# table and layout of the linked DSO. With ld -R we can then refer to -# these symbols in the kernel code rather than hand-coded addresses. +# table and layout of the linked DSO. With ld --just-symbols we can then +# refer to these symbols in the kernel code rather than hand-coded addresses. SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ -Wl,--build-id -Wl,--hash-style=both $(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE $(call if_changed,vdsold) -LDFLAGS_vdso-syms.o := -r -R +LDFLAGS_vdso-syms.o := -r --just-symbols $(obj)/vdso-syms.o: $(obj)/vdso-dummy.o FORCE $(call if_changed,ld) @@ -58,7 +58,8 @@ quiet_cmd_vdsold = VDSOLD $@ cmd_vdsold = $(CC) $(KBUILD_CFLAGS) $(call cc-option, -no-pie) -nostdlib -nostartfiles $(SYSCFLAGS_$(@F)) \ -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp && \ $(CROSS_COMPILE)objcopy \ - $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ + $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ + rm $@.tmp # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/riscv/lib/tishift.S b/arch/riscv/lib/tishift.S index 15f9d54c7db63..ef90075c4b0a9 100644 --- a/arch/riscv/lib/tishift.S +++ b/arch/riscv/lib/tishift.S @@ -4,34 +4,73 @@ */ #include +#include -ENTRY(__lshrti3) +SYM_FUNC_START(__lshrti3) beqz a2, .L1 li a5,64 sub a5,a5,a2 - addi sp,sp,-16 sext.w a4,a5 blez a5, .L2 sext.w a2,a2 - sll a4,a1,a4 srl a0,a0,a2 - srl a1,a1,a2 + sll a4,a1,a4 + srl a2,a1,a2 or a0,a0,a4 - sd a1,8(sp) - sd a0,0(sp) - ld a0,0(sp) - ld a1,8(sp) - addi sp,sp,16 - ret + mv a1,a2 .L1: ret .L2: - negw a4,a4 - srl a1,a1,a4 - sd a1,0(sp) - sd zero,8(sp) - ld a0,0(sp) - ld a1,8(sp) - addi sp,sp,16 + negw a0,a4 + li a2,0 + srl a0,a1,a0 + mv a1,a2 + ret +SYM_FUNC_END(__lshrti3) +EXPORT_SYMBOL(__lshrti3) + +SYM_FUNC_START(__ashrti3) + beqz a2, .L3 + li a5,64 + sub a5,a5,a2 + sext.w a4,a5 + blez a5, .L4 + sext.w a2,a2 + srl a0,a0,a2 + sll a4,a1,a4 + sra a2,a1,a2 + or a0,a0,a4 + mv a1,a2 +.L3: + ret +.L4: + negw a0,a4 + srai a2,a1,0x3f + sra a0,a1,a0 + mv a1,a2 + ret +SYM_FUNC_END(__ashrti3) +EXPORT_SYMBOL(__ashrti3) + +SYM_FUNC_START(__ashlti3) + beqz a2, .L5 + li a5,64 + sub a5,a5,a2 + sext.w a4,a5 + blez a5, .L6 + sext.w a2,a2 + sll a1,a1,a2 + srl a4,a0,a4 + sll a2,a0,a2 + or a1,a1,a4 + mv a0,a2 +.L5: + ret +.L6: + negw a1,a4 + li a2,0 + sll a1,a0,a1 + mv a0,a2 ret -ENDPROC(__lshrti3) +SYM_FUNC_END(__ashlti3) +EXPORT_SYMBOL(__ashlti3) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 3f15938dec893..c54bd3c799552 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -14,6 +14,7 @@ void flush_icache_all(void) { sbi_remote_fence_i(NULL); } +EXPORT_SYMBOL(flush_icache_all); /* * Performs an icache flush for the given MM context. RISC-V has no direct diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 573463d1c799a..d49e334071d45 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -98,7 +98,7 @@ void __init setup_bootmem(void) for_each_memblock(memory, reg) { phys_addr_t end = reg->base + reg->size; - if (reg->base <= vmlinux_end && vmlinux_end <= end) { + if (reg->base <= vmlinux_start && vmlinux_end <= end) { mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET); /* @@ -115,8 +115,9 @@ void __init setup_bootmem(void) /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - set_max_mapnr(PFN_DOWN(mem_size)); - max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = max_pfn; + set_max_mapnr(max_low_pfn); #ifdef CONFIG_BLK_DEV_INITRD setup_initrd(); @@ -166,12 +167,11 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) ptep = &fixmap_pte[pte_index(addr)]; - if (pgprot_val(prot)) { + if (pgprot_val(prot)) set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot)); - } else { + else pte_clear(&init_mm, addr, ptep); - local_flush_tlb_page(addr); - } + local_flush_tlb_page(addr); } static pte_t *__init get_pte_virt(phys_addr_t pa) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 24cd33d2c48f3..720b443c4528f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -2,6 +2,7 @@ #include #include +#include #include void flush_tlb_all(void) @@ -9,13 +10,33 @@ void flush_tlb_all(void) sbi_remote_sfence_vma(NULL, 0, -1); } +/* + * This function must not be called with cmask being null. + * Kernel may panic if cmask is NULL. + */ static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start, unsigned long size) { struct cpumask hmask; + unsigned int cpuid; - riscv_cpuid_to_hartid_mask(cmask, &hmask); - sbi_remote_sfence_vma(hmask.bits, start, size); + if (cpumask_empty(cmask)) + return; + + cpuid = get_cpu(); + + if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { + /* local cpu is the only cpu present in cpumask */ + if (size <= PAGE_SIZE) + local_flush_tlb_page(start); + else + local_flush_tlb_all(); + } else { + riscv_cpuid_to_hartid_mask(cmask, &hmask); + sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size); + } + + put_cpu(); } void flush_tlb_mm(struct mm_struct *mm) diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index 5451ef3845f29..0eefe6193253b 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx) return false; } +static void mark_fp(struct rv_jit_context *ctx) +{ + __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); +} + static void mark_call(struct rv_jit_context *ctx) { __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); @@ -596,7 +601,8 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); /* Set return value. */ - emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); + if (reg == RV_REG_RA) + emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx); } @@ -631,14 +637,14 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) return -1; emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx); - /* if (--TCC < 0) + /* if (TCC-- < 0) * goto out; */ emit(rv_addi(RV_REG_T1, tcc, -1), ctx); off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; if (is_13b_check(off, insn)) return -1; - emit(rv_blt(RV_REG_T1, RV_REG_ZERO, off >> 1), ctx); + emit(rv_blt(tcc, RV_REG_ZERO, off >> 1), ctx); /* prog = array->ptrs[index]; * if (!prog) @@ -1307,6 +1313,10 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit(rv_ld(rd, 0, RV_REG_T1), ctx); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: emit_imm(RV_REG_T1, imm, ctx); @@ -1426,6 +1436,10 @@ static void build_prologue(struct rv_jit_context *ctx) { int stack_adjust = 0, store_offset, bpf_stack_adjust; + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + if (bpf_stack_adjust) + mark_fp(ctx); + if (seen_reg(RV_REG_RA, ctx)) stack_adjust += 8; stack_adjust += 8; /* RV_REG_FP */ @@ -1443,7 +1457,6 @@ static void build_prologue(struct rv_jit_context *ctx) stack_adjust += 8; stack_adjust = round_up(stack_adjust, 16); - bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); stack_adjust += bpf_stack_adjust; store_offset = stack_adjust - 8; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 43a81d0ad5074..9f3f0a892e003 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -155,6 +155,7 @@ config S390 select HAVE_KERNEL_UNCOMPRESSED select HAVE_KERNEL_XZ select HAVE_KPROBES + select HAVE_KPROBES_ON_FTRACE select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH @@ -541,7 +542,6 @@ config KEXEC config KEXEC_FILE bool "kexec file based system call" select KEXEC_CORE - select BUILD_BIN2C depends on CRYPTO depends on CRYPTO_SHA256 depends on CRYPTO_SHA256_S390 @@ -920,7 +920,7 @@ config CMM_IUCV config APPLDATA_BASE def_bool n prompt "Linux - VM Monitor Stream, base infrastructure" - depends on PROC_FS + depends on PROC_SYSCTL help This provides a kernel interface for creating and updating z/VM APPLDATA monitor records. The monitor records are updated at certain time @@ -1006,3 +1006,11 @@ config S390_GUEST the KVM hypervisor. endmenu + +config KMSG_IDS + def_bool y + prompt "Kernel message numbers" + help + Select this option if you want to include a message number to the + prefix for kernel messages issued by the s390 architecture and + driver code. See "Documentation/s390/kmsg.txt" for more details. diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 478b645b20ddb..71e3d7c0b8709 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -31,6 +31,16 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) + +ifdef CONFIG_CC_IS_GCC + ifeq ($(call cc-ifversion, -ge, 1200, y), y) + ifeq ($(call cc-ifversion, -lt, 1300, y), y) + KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds) + KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds) + endif + endif +endif + UTS_MACHINE := s390x STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384) CHECKFLAGS += -D__s390__ -D__s390x__ @@ -69,7 +79,7 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -ifeq ($(call cc-option-yn,-mpacked-stack),y) +ifeq ($(call cc-option-yn,-mpacked-stack -mbackchain -msoft-float),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK endif @@ -146,7 +156,7 @@ all: bzImage #KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg... KBUILD_IMAGE := $(boot)/bzImage -install: vmlinux +install: $(Q)$(MAKE) $(build)=$(boot) $@ bzImage: vmlinux diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 54f375627532a..8bf46d7059573 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -75,7 +75,7 @@ struct appldata_os_data { (waiting for I/O) */ /* per cpu data */ - struct appldata_os_per_cpu os_cpu[0]; + struct appldata_os_per_cpu os_cpu[]; } __attribute__((packed)); static struct appldata_os_data *appldata_os_data; diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e2c47d3a1c891..45b33b83de08c 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -37,7 +37,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o obj-y += version.o pgm_check_info.o ctype.o text_dma.o -obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o +obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y) @@ -70,7 +70,7 @@ $(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(obj)/startup.a: $(OBJECTS) FORCE $(call if_changed,ar) -install: $(CONFIGURE) $(obj)/bzImage +install: sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c index 45046630c56ac..368fd372c8757 100644 --- a/arch/s390/boot/compressed/decompressor.c +++ b/arch/s390/boot/compressed/decompressor.c @@ -30,13 +30,13 @@ extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; #ifdef CONFIG_HAVE_KERNEL_BZIP2 -#define HEAP_SIZE 0x400000 +#define BOOT_HEAP_SIZE 0x400000 #else -#define HEAP_SIZE 0x10000 +#define BOOT_HEAP_SIZE 0x10000 #endif static unsigned long free_mem_ptr = (unsigned long) _end; -static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE; +static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE; #ifdef CONFIG_KERNEL_GZIP #include "../../../../lib/decompress_inflate.c" @@ -62,7 +62,7 @@ static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE; #include "../../../../lib/decompress_unxz.c" #endif -#define decompress_offset ALIGN((unsigned long)_end + HEAP_SIZE, PAGE_SIZE) +#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE) unsigned long mem_safe_offset(void) { diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 4b86a8d3c1219..e6bf5f40bff34 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -360,22 +360,23 @@ ENTRY(startup_kdump) # the save area and does disabled wait with a faulty address. # ENTRY(startup_pgm_check_handler) - stmg %r0,%r15,__LC_SAVE_AREA_SYNC - la %r1,4095 - stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r1) - mvc __LC_GPREGS_SAVE_AREA-4095(128,%r1),__LC_SAVE_AREA_SYNC - mvc __LC_PSW_SAVE_AREA-4095(16,%r1),__LC_PGM_OLD_PSW + stmg %r8,%r15,__LC_SAVE_AREA_SYNC + la %r8,4095 + stctg %c0,%c15,__LC_CREGS_SAVE_AREA-4095(%r8) + stmg %r0,%r7,__LC_GPREGS_SAVE_AREA-4095(%r8) + mvc __LC_GPREGS_SAVE_AREA-4095+64(64,%r8),__LC_SAVE_AREA_SYNC + mvc __LC_PSW_SAVE_AREA-4095(16,%r8),__LC_PGM_OLD_PSW mvc __LC_RETURN_PSW(16),__LC_PGM_OLD_PSW ni __LC_RETURN_PSW,0xfc # remove IO and EX bits ni __LC_RETURN_PSW+1,0xfb # remove MCHK bit oi __LC_RETURN_PSW+1,0x2 # set wait state bit - larl %r2,.Lold_psw_disabled_wait - stg %r2,__LC_PGM_NEW_PSW+8 - l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r2) + larl %r9,.Lold_psw_disabled_wait + stg %r9,__LC_PGM_NEW_PSW+8 + l %r15,.Ldump_info_stack-.Lold_psw_disabled_wait(%r9) brasl %r14,print_pgm_check_info .Lold_psw_disabled_wait: - la %r1,4095 - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) + la %r8,4095 + lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r8) lpswe __LC_RETURN_PSW # disabled wait .Ldump_info_stack: .long 0x5000 + PAGE_SIZE - STACK_FRAME_OVERHEAD diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 24ef67eb1ceff..0de2d79a3c941 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -14,6 +14,7 @@ char __bootdata(early_command_line)[COMMAND_LINE_SIZE]; struct ipl_parameter_block __bootdata_preserved(ipl_block); int __bootdata_preserved(ipl_block_valid); +unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL; unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE; unsigned long __bootdata(memory_end); @@ -27,22 +28,25 @@ static inline int __diag308(unsigned long subcode, void *addr) register unsigned long _addr asm("0") = (unsigned long)addr; register unsigned long _rc asm("1") = 0; unsigned long reg1, reg2; - psw_t old = S390_lowcore.program_new_psw; + psw_t old; asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" " epsw %0,%1\n" - " st %0,%[psw_pgm]\n" - " st %1,%[psw_pgm]+4\n" + " st %0,0(%[psw_pgm])\n" + " st %1,4(%[psw_pgm])\n" " larl %0,1f\n" - " stg %0,%[psw_pgm]+8\n" + " stg %0,8(%[psw_pgm])\n" " diag %[addr],%[subcode],0x308\n" - "1: nopr %%r7\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" : "=&d" (reg1), "=&a" (reg2), - [psw_pgm] "=Q" (S390_lowcore.program_new_psw), + "+Q" (S390_lowcore.program_new_psw), + "=Q" (old), [addr] "+d" (_addr), "+d" (_rc) - : [subcode] "d" (subcode) + : [subcode] "d" (subcode), + [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw) : "cc", "memory"); - S390_lowcore.program_new_psw = old; return _rc; } @@ -69,30 +73,44 @@ static size_t scpdata_length(const u8 *buf, size_t count) static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size, const struct ipl_parameter_block *ipb) { - size_t count; - size_t i; + const __u8 *scp_data; + __u32 scp_data_len; int has_lowercase; + size_t count = 0; + size_t i; + + switch (ipb->pb0_hdr.pbt) { + case IPL_PBT_FCP: + scp_data_len = ipb->fcp.scp_data_len; + scp_data = ipb->fcp.scp_data; + break; + case IPL_PBT_NVME: + scp_data_len = ipb->nvme.scp_data_len; + scp_data = ipb->nvme.scp_data; + break; + default: + goto out; + } - count = min(size - 1, scpdata_length(ipb->fcp.scp_data, - ipb->fcp.scp_data_len)); + count = min(size - 1, scpdata_length(scp_data, scp_data_len)); if (!count) goto out; has_lowercase = 0; for (i = 0; i < count; i++) { - if (!isascii(ipb->fcp.scp_data[i])) { + if (!isascii(scp_data[i])) { count = 0; goto out; } - if (!has_lowercase && islower(ipb->fcp.scp_data[i])) + if (!has_lowercase && islower(scp_data[i])) has_lowercase = 1; } if (has_lowercase) - memcpy(dest, ipb->fcp.scp_data, count); + memcpy(dest, scp_data, count); else for (i = 0; i < count; i++) - dest[i] = tolower(ipb->fcp.scp_data[i]); + dest[i] = tolower(scp_data[i]); out: dest[count] = '\0'; return count; @@ -114,6 +132,7 @@ static void append_ipl_block_parm(void) parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); break; case IPL_PBT_FCP: + case IPL_PBT_NVME: rc = ipl_block_get_ascii_scpdata( parm, COMMAND_LINE_SIZE - len - 1, &ipl_block); break; @@ -229,6 +248,19 @@ void parse_boot_command_line(void) if (!strcmp(param, "vmalloc") && val) vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE); + if (!strcmp(param, "dfltcc")) { + if (!strcmp(val, "off")) + zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED; + else if (!strcmp(val, "on")) + zlib_dfltcc_support = ZLIB_DFLTCC_FULL; + else if (!strcmp(val, "def_only")) + zlib_dfltcc_support = ZLIB_DFLTCC_DEFLATE_ONLY; + else if (!strcmp(val, "inf_only")) + zlib_dfltcc_support = ZLIB_DFLTCC_INFLATE_ONLY; + else if (!strcmp(val, "always")) + zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG; + } + if (!strcmp(param, "noexec")) { rc = kstrtobool(val, &enabled); if (!rc && !enabled) diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 5d12352545c55..5591243d673e8 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -75,7 +75,7 @@ static unsigned long get_random(unsigned long limit) *(unsigned long *) prng.parm_block ^= seed; for (i = 0; i < 16; i++) { cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, - (char *) entropy, (char *) entropy, + (u8 *) entropy, (u8 *) entropy, sizeof(entropy)); memcpy(prng.parm_block, entropy, sizeof(entropy)); } diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c index 62e7c13ce85c7..85049541c191e 100644 --- a/arch/s390/boot/mem_detect.c +++ b/arch/s390/boot/mem_detect.c @@ -70,24 +70,27 @@ static int __diag260(unsigned long rx1, unsigned long rx2) register unsigned long _ry asm("4") = 0x10; /* storage configuration */ int rc = -1; /* fail */ unsigned long reg1, reg2; - psw_t old = S390_lowcore.program_new_psw; + psw_t old; asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" " epsw %0,%1\n" - " st %0,%[psw_pgm]\n" - " st %1,%[psw_pgm]+4\n" + " st %0,0(%[psw_pgm])\n" + " st %1,4(%[psw_pgm])\n" " larl %0,1f\n" - " stg %0,%[psw_pgm]+8\n" + " stg %0,8(%[psw_pgm])\n" " diag %[rx],%[ry],0x260\n" " ipm %[rc]\n" " srl %[rc],28\n" - "1:\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" : "=&d" (reg1), "=&a" (reg2), - [psw_pgm] "=Q" (S390_lowcore.program_new_psw), + "+Q" (S390_lowcore.program_new_psw), + "=Q" (old), [rc] "+&d" (rc), [ry] "+d" (_ry) - : [rx] "d" (_rx1), "d" (_rx2) + : [rx] "d" (_rx1), "d" (_rx2), + [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw) : "cc", "memory"); - S390_lowcore.program_new_psw = old; return rc == 0 ? _ry : -1; } @@ -112,24 +115,30 @@ static int diag260(void) static int tprot(unsigned long addr) { - unsigned long pgm_addr; + unsigned long reg1, reg2; int rc = -EFAULT; - psw_t old = S390_lowcore.program_new_psw; + psw_t old; - S390_lowcore.program_new_psw.mask = __extract_psw(); asm volatile( - " larl %[pgm_addr],1f\n" - " stg %[pgm_addr],%[psw_pgm_addr]\n" + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" " tprot 0(%[addr]),0\n" " ipm %[rc]\n" " srl %[rc],28\n" - "1:\n" - : [pgm_addr] "=&d"(pgm_addr), - [psw_pgm_addr] "=Q"(S390_lowcore.program_new_psw.addr), - [rc] "+&d"(rc) - : [addr] "a"(addr) + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + "=Q" (S390_lowcore.program_new_psw.addr), + "=Q" (old) + : [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw), + [addr] "a" (addr) : "cc", "memory"); - S390_lowcore.program_new_psw = old; return rc; } diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 5367950510f69..fa0150285d388 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -170,6 +170,11 @@ void startup_kernel(void) handle_relocs(__kaslr_offset); if (__kaslr_offset) { + /* + * Save KASLR offset for early dumps, before vmcore_info is set. + * Mark as uneven to distinguish from real vmcore_info pointer. + */ + S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL; /* Clear non-relocated kernel */ if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) memset(img, 0, vmlinux.image_size); diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S index 9715715c4c28d..ea93314f44976 100644 --- a/arch/s390/boot/text_dma.S +++ b/arch/s390/boot/text_dma.S @@ -9,16 +9,6 @@ #include #include -#ifdef CC_USING_EXPOLINE - .pushsection .dma.text.__s390_indirect_jump_r14,"axG" -__dma__s390_indirect_jump_r14: - larl %r1,0f - ex 0,0(%r1) - j . -0: br %r14 - .popsection -#endif - .section .dma.text,"ax" /* * Simplified version of expoline thunk. The normal thunks can not be used here, @@ -27,11 +17,10 @@ __dma__s390_indirect_jump_r14: * affects a few functions that are not performance-relevant. */ .macro BR_EX_DMA_r14 -#ifdef CC_USING_EXPOLINE - jg __dma__s390_indirect_jump_r14 -#else - br %r14 -#endif + larl %r1,0f + ex 0,0(%r1) + j . +0: br %r14 .endm /* diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index ed007f4a6444f..8fde561f1d07e 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -3,7 +3,13 @@ #include #include +/* will be used in arch/s390/kernel/uv.c */ +#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); +#endif +#if IS_ENABLED(CONFIG_KVM) +struct uv_info __bootdata_preserved(uv_info); +#endif void uv_query_info(void) { @@ -15,10 +21,25 @@ void uv_query_info(void) if (!test_facility(158)) return; - if (uv_call(0, (uint64_t)&uvcb)) + /* rc==0x100 means that there is additional data we do not process */ + if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100) return; + if (IS_ENABLED(CONFIG_KVM)) { + memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list)); + uv_info.uv_base_stor_len = uvcb.uv_base_stor_len; + uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len; + uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len; + uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len; + uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len; + uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE); + uv_info.max_num_sec_conf = uvcb.max_num_sec_conf; + uv_info.max_guest_cpus = uvcb.max_guest_cpus; + } + +#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list)) prot_virt_guest = 1; +#endif } diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 38d64030aacf6..2e60c80395ab0 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -62,7 +62,6 @@ CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y -CONFIG_REFCOUNT_FULL=y CONFIG_LOCK_EVENT_COUNTS=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 9803e96d29247..558cfe570ccf8 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -861,7 +861,7 @@ static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw, unsigned int nbytes) { gw->walk_bytes_remain -= nbytes; - scatterwalk_unmap(&gw->walk); + scatterwalk_unmap(gw->walk_ptr); scatterwalk_advance(&gw->walk, nbytes); scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); gw->walk_ptr = NULL; @@ -936,7 +936,7 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) goto out; } - scatterwalk_unmap(&gw->walk); + scatterwalk_unmap(gw->walk_ptr); gw->walk_ptr = NULL; gw->ptr = gw->buf; diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index dd95cdbd22ce8..1f2d40993c4d2 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -2,122 +2,17 @@ /* * s390 arch random implementation. * - * Copyright IBM Corp. 2017, 2018 + * Copyright IBM Corp. 2017, 2020 * Author(s): Harald Freudenberger - * - * The s390_arch_random_generate() function may be called from random.c - * in interrupt context. So this implementation does the best to be very - * fast. There is a buffer of random data which is asynchronously checked - * and filled by a workqueue thread. - * If there are enough bytes in the buffer the s390_arch_random_generate() - * just delivers these bytes. Otherwise false is returned until the - * worker thread refills the buffer. - * The worker fills the rng buffer by pulling fresh entropy from the - * high quality (but slow) true hardware random generator. This entropy - * is then spread over the buffer with an pseudo random generator PRNG. - * As the arch_get_random_seed_long() fetches 8 bytes and the calling - * function add_interrupt_randomness() counts this as 1 bit entropy the - * distribution needs to make sure there is in fact 1 bit entropy contained - * in 8 bytes of the buffer. The current values pull 32 byte entropy - * and scatter this into a 2048 byte buffer. So 8 byte in the buffer - * will contain 1 bit of entropy. - * The worker thread is rescheduled based on the charge level of the - * buffer but at least with 500 ms delay to avoid too much CPU consumption. - * So the max. amount of rng data delivered via arch_get_random_seed is - * limited to 4k bytes per second. */ #include #include #include -#include #include -#include #include DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); EXPORT_SYMBOL(s390_arch_random_counter); - -#define ARCH_REFILL_TICKS (HZ/2) -#define ARCH_PRNG_SEED_SIZE 32 -#define ARCH_RNG_BUF_SIZE 2048 - -static DEFINE_SPINLOCK(arch_rng_lock); -static u8 *arch_rng_buf; -static unsigned int arch_rng_buf_idx; - -static void arch_rng_refill_buffer(struct work_struct *); -static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer); - -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes) -{ - /* lock rng buffer */ - if (!spin_trylock(&arch_rng_lock)) - return false; - - /* try to resolve the requested amount of bytes from the buffer */ - arch_rng_buf_idx -= nbytes; - if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) { - memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes); - atomic64_add(nbytes, &s390_arch_random_counter); - spin_unlock(&arch_rng_lock); - return true; - } - - /* not enough bytes in rng buffer, refill is done asynchronously */ - spin_unlock(&arch_rng_lock); - - return false; -} -EXPORT_SYMBOL(s390_arch_random_generate); - -static void arch_rng_refill_buffer(struct work_struct *unused) -{ - unsigned int delay = ARCH_REFILL_TICKS; - - spin_lock(&arch_rng_lock); - if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) { - /* buffer is exhausted and needs refill */ - u8 seed[ARCH_PRNG_SEED_SIZE]; - u8 prng_wa[240]; - /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */ - cpacf_trng(NULL, 0, seed, sizeof(seed)); - /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */ - memset(prng_wa, 0, sizeof(prng_wa)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, - &prng_wa, NULL, 0, seed, sizeof(seed)); - cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, - &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0); - arch_rng_buf_idx = ARCH_RNG_BUF_SIZE; - } - delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE; - spin_unlock(&arch_rng_lock); - - /* kick next check */ - queue_delayed_work(system_long_wq, &arch_rng_work, delay); -} - -static int __init s390_arch_random_init(void) -{ - /* all the needed PRNO subfunctions available ? */ - if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) && - cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { - - /* alloc arch random working buffer */ - arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL); - if (!arch_rng_buf) - return -ENOMEM; - - /* kick worker queue job to fill the random buffer */ - queue_delayed_work(system_long_wq, - &arch_rng_work, ARCH_REFILL_TICKS); - - /* enable arch random to the outside world */ - static_branch_enable(&s390_arch_random_available); - } - - return 0; -} -arch_initcall(s390_arch_random_init); diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 6184dceed340b..2ad237cec2dfd 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -5,7 +5,7 @@ * s390 implementation of the AES Cipher Algorithm with protected keys. * * s390 Version: - * Copyright IBM Corp. 2017,2019 + * Copyright IBM Corp. 2017,2020 * Author(s): Martin Schwidefsky * Harald Freudenberger */ @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -31,11 +32,11 @@ * is called. As paes can handle different kinds of key blobs * and padding is also possible, the limits need to be generous. */ -#define PAES_MIN_KEYSIZE 64 -#define PAES_MAX_KEYSIZE 256 +#define PAES_MIN_KEYSIZE 16 +#define PAES_MAX_KEYSIZE 320 static u8 *ctrblk; -static DEFINE_SPINLOCK(ctrblk_lock); +static DEFINE_MUTEX(ctrblk_lock); static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; @@ -52,19 +53,46 @@ struct key_blob { unsigned int keylen; }; -static inline int _copy_key_to_kb(struct key_blob *kb, - const u8 *key, - unsigned int keylen) -{ - if (keylen <= sizeof(kb->keybuf)) +static inline int _key_to_kb(struct key_blob *kb, + const u8 *key, + unsigned int keylen) +{ + struct clearkey_header { + u8 type; + u8 res0[3]; + u8 version; + u8 res1[3]; + u32 keytype; + u32 len; + } __packed * h; + + switch (keylen) { + case 16: + case 24: + case 32: + /* clear key value, prepare pkey clear key token in keybuf */ + memset(kb->keybuf, 0, sizeof(kb->keybuf)); + h = (struct clearkey_header *) kb->keybuf; + h->version = 0x02; /* TOKVER_CLEAR_KEY */ + h->keytype = (keylen - 8) >> 3; + h->len = keylen; + memcpy(kb->keybuf + sizeof(*h), key, keylen); + kb->keylen = sizeof(*h) + keylen; kb->key = kb->keybuf; - else { - kb->key = kmalloc(keylen, GFP_KERNEL); - if (!kb->key) - return -ENOMEM; + break; + default: + /* other key material, let pkey handle this */ + if (keylen <= sizeof(kb->keybuf)) + kb->key = kb->keybuf; + else { + kb->key = kmalloc(keylen, GFP_KERNEL); + if (!kb->key) + return -ENOMEM; + } + memcpy(kb->key, key, keylen); + kb->keylen = keylen; + break; } - memcpy(kb->key, key, keylen); - kb->keylen = keylen; return 0; } @@ -81,16 +109,18 @@ static inline void _free_kb_keybuf(struct key_blob *kb) struct s390_paes_ctx { struct key_blob kb; struct pkey_protkey pk; + spinlock_t pk_lock; unsigned long fc; }; struct s390_pxts_ctx { struct key_blob kb[2]; struct pkey_protkey pk[2]; + spinlock_t pk_lock; unsigned long fc; }; -static inline int __paes_convert_key(struct key_blob *kb, +static inline int __paes_keyblob2pkey(struct key_blob *kb, struct pkey_protkey *pk) { int i, ret; @@ -105,22 +135,18 @@ static inline int __paes_convert_key(struct key_blob *kb, return ret; } -static int __paes_set_key(struct s390_paes_ctx *ctx) +static inline int __paes_convert_key(struct s390_paes_ctx *ctx) { - unsigned long fc; + struct pkey_protkey pkey; - if (__paes_convert_key(&ctx->kb, &ctx->pk)) + if (__paes_keyblob2pkey(&ctx->kb, &pkey)) return -EINVAL; - /* Pick the correct function code based on the protected key type */ - fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 : - (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 : - (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0; - - /* Check if the function code is available */ - ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + spin_lock_bh(&ctx->pk_lock); + memcpy(&ctx->pk, &pkey, sizeof(pkey)); + spin_unlock_bh(&ctx->pk_lock); - return ctx->fc ? 0 : -EINVAL; + return 0; } static int ecb_paes_init(struct crypto_tfm *tfm) @@ -128,6 +154,7 @@ static int ecb_paes_init(struct crypto_tfm *tfm) struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); ctx->kb.key = NULL; + spin_lock_init(&ctx->pk_lock); return 0; } @@ -139,6 +166,24 @@ static void ecb_paes_exit(struct crypto_tfm *tfm) _free_kb_keybuf(&ctx->kb); } +static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx) +{ + unsigned long fc; + + if (__paes_convert_key(ctx)) + return -EINVAL; + + /* Pick the correct function code based on the protected key type */ + fc = (ctx->pk.type == PKEY_KEYTYPE_AES_128) ? CPACF_KM_PAES_128 : + (ctx->pk.type == PKEY_KEYTYPE_AES_192) ? CPACF_KM_PAES_192 : + (ctx->pk.type == PKEY_KEYTYPE_AES_256) ? CPACF_KM_PAES_256 : 0; + + /* Check if the function code is available */ + ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0; + + return ctx->fc ? 0 : -EINVAL; +} + static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { @@ -146,13 +191,14 @@ static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); _free_kb_keybuf(&ctx->kb); - rc = _copy_key_to_kb(&ctx->kb, in_key, key_len); + rc = _key_to_kb(&ctx->kb, in_key, key_len); if (rc) return rc; - if (__paes_set_key(ctx)) { + if (__ecb_paes_set_key(ctx)) { tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; return -EINVAL; + } return 0; } @@ -164,18 +210,31 @@ static int ecb_paes_crypt(struct blkcipher_desc *desc, struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int nbytes, n, k; int ret; + struct { + u8 key[MAXPROTKEYSIZE]; + } param; ret = blkcipher_walk_virt(desc, walk); + if (ret) + return ret; + + spin_lock_bh(&ctx->pk_lock); + memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + spin_unlock_bh(&ctx->pk_lock); + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); - k = cpacf_km(ctx->fc | modifier, ctx->pk.protkey, + k = cpacf_km(ctx->fc | modifier, ¶m, walk->dst.virt.addr, walk->src.virt.addr, n); if (k) ret = blkcipher_walk_done(desc, walk, nbytes - k); if (k < n) { - if (__paes_set_key(ctx) != 0) + if (__paes_convert_key(ctx)) return blkcipher_walk_done(desc, walk, -EIO); + spin_lock_bh(&ctx->pk_lock); + memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + spin_unlock_bh(&ctx->pk_lock); } } return ret; @@ -229,6 +288,7 @@ static int cbc_paes_init(struct crypto_tfm *tfm) struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); ctx->kb.key = NULL; + spin_lock_init(&ctx->pk_lock); return 0; } @@ -240,11 +300,11 @@ static void cbc_paes_exit(struct crypto_tfm *tfm) _free_kb_keybuf(&ctx->kb); } -static int __cbc_paes_set_key(struct s390_paes_ctx *ctx) +static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx) { unsigned long fc; - if (__paes_convert_key(&ctx->kb, &ctx->pk)) + if (__paes_convert_key(ctx)) return -EINVAL; /* Pick the correct function code based on the protected key type */ @@ -265,7 +325,7 @@ static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); _free_kb_keybuf(&ctx->kb); - rc = _copy_key_to_kb(&ctx->kb, in_key, key_len); + rc = _key_to_kb(&ctx->kb, in_key, key_len); if (rc) return rc; @@ -288,22 +348,31 @@ static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, } param; ret = blkcipher_walk_virt(desc, walk); + if (ret) + return ret; + memcpy(param.iv, walk->iv, AES_BLOCK_SIZE); + spin_lock_bh(&ctx->pk_lock); memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + spin_unlock_bh(&ctx->pk_lock); + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { /* only use complete blocks */ n = nbytes & ~(AES_BLOCK_SIZE - 1); k = cpacf_kmc(ctx->fc | modifier, ¶m, walk->dst.virt.addr, walk->src.virt.addr, n); - if (k) + if (k) { + memcpy(walk->iv, param.iv, AES_BLOCK_SIZE); ret = blkcipher_walk_done(desc, walk, nbytes - k); + } if (k < n) { - if (__cbc_paes_set_key(ctx) != 0) + if (__paes_convert_key(ctx)) return blkcipher_walk_done(desc, walk, -EIO); + spin_lock_bh(&ctx->pk_lock); memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + spin_unlock_bh(&ctx->pk_lock); } } - memcpy(walk->iv, param.iv, AES_BLOCK_SIZE); return ret; } @@ -357,6 +426,7 @@ static int xts_paes_init(struct crypto_tfm *tfm) ctx->kb[0].key = NULL; ctx->kb[1].key = NULL; + spin_lock_init(&ctx->pk_lock); return 0; } @@ -369,12 +439,27 @@ static void xts_paes_exit(struct crypto_tfm *tfm) _free_kb_keybuf(&ctx->kb[1]); } -static int __xts_paes_set_key(struct s390_pxts_ctx *ctx) +static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx) +{ + struct pkey_protkey pkey0, pkey1; + + if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) || + __paes_keyblob2pkey(&ctx->kb[1], &pkey1)) + return -EINVAL; + + spin_lock_bh(&ctx->pk_lock); + memcpy(&ctx->pk[0], &pkey0, sizeof(pkey0)); + memcpy(&ctx->pk[1], &pkey1, sizeof(pkey1)); + spin_unlock_bh(&ctx->pk_lock); + + return 0; +} + +static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx) { unsigned long fc; - if (__paes_convert_key(&ctx->kb[0], &ctx->pk[0]) || - __paes_convert_key(&ctx->kb[1], &ctx->pk[1])) + if (__xts_paes_convert_key(ctx)) return -EINVAL; if (ctx->pk[0].type != ctx->pk[1].type) @@ -406,10 +491,10 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, _free_kb_keybuf(&ctx->kb[0]); _free_kb_keybuf(&ctx->kb[1]); - rc = _copy_key_to_kb(&ctx->kb[0], in_key, key_len); + rc = _key_to_kb(&ctx->kb[0], in_key, key_len); if (rc) return rc; - rc = _copy_key_to_kb(&ctx->kb[1], in_key + key_len, key_len); + rc = _key_to_kb(&ctx->kb[1], in_key + key_len, key_len); if (rc) return rc; @@ -449,15 +534,19 @@ static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, } xts_param; ret = blkcipher_walk_virt(desc, walk); + if (ret) + return ret; + keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64; offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0; -retry: + memset(&pcc_param, 0, sizeof(pcc_param)); memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak)); + spin_lock_bh(&ctx->pk_lock); memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen); - cpacf_pcc(ctx->fc, pcc_param.key + offset); - memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen); + spin_unlock_bh(&ctx->pk_lock); + cpacf_pcc(ctx->fc, pcc_param.key + offset); memcpy(xts_param.init, pcc_param.xts, 16); while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { @@ -468,11 +557,15 @@ static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, if (k) ret = blkcipher_walk_done(desc, walk, nbytes - k); if (k < n) { - if (__xts_paes_set_key(ctx) != 0) + if (__xts_paes_convert_key(ctx)) return blkcipher_walk_done(desc, walk, -EIO); - goto retry; + spin_lock_bh(&ctx->pk_lock); + memcpy(xts_param.key + offset, + ctx->pk[0].protkey, keylen); + spin_unlock_bh(&ctx->pk_lock); } } + return ret; } @@ -525,6 +618,7 @@ static int ctr_paes_init(struct crypto_tfm *tfm) struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); ctx->kb.key = NULL; + spin_lock_init(&ctx->pk_lock); return 0; } @@ -536,11 +630,11 @@ static void ctr_paes_exit(struct crypto_tfm *tfm) _free_kb_keybuf(&ctx->kb); } -static int __ctr_paes_set_key(struct s390_paes_ctx *ctx) +static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx) { unsigned long fc; - if (__paes_convert_key(&ctx->kb, &ctx->pk)) + if (__paes_convert_key(ctx)) return -EINVAL; /* Pick the correct function code based on the protected key type */ @@ -562,7 +656,7 @@ static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key, struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm); _free_kb_keybuf(&ctx->kb); - rc = _copy_key_to_kb(&ctx->kb, in_key, key_len); + rc = _key_to_kb(&ctx->kb, in_key, key_len); if (rc) return rc; @@ -595,47 +689,62 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, u8 buf[AES_BLOCK_SIZE], *ctrptr; unsigned int nbytes, n, k; int ret, locked; - - locked = spin_trylock(&ctrblk_lock); + struct { + u8 key[MAXPROTKEYSIZE]; + } param; ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); + if (ret) + return ret; + + spin_lock_bh(&ctx->pk_lock); + memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + spin_unlock_bh(&ctx->pk_lock); + + locked = mutex_trylock(&ctrblk_lock); + + while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { n = AES_BLOCK_SIZE; if (nbytes >= 2*AES_BLOCK_SIZE && locked) n = __ctrblk_init(ctrblk, walk->iv, nbytes); ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv; - k = cpacf_kmctr(ctx->fc | modifier, ctx->pk.protkey, - walk->dst.virt.addr, walk->src.virt.addr, - n, ctrptr); + k = cpacf_kmctr(ctx->fc | modifier, ¶m, walk->dst.virt.addr, + walk->src.virt.addr, n, ctrptr); if (k) { if (ctrptr == ctrblk) memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE, AES_BLOCK_SIZE); crypto_inc(walk->iv, AES_BLOCK_SIZE); - ret = blkcipher_walk_done(desc, walk, nbytes - n); + ret = blkcipher_walk_done(desc, walk, nbytes - k); } if (k < n) { - if (__ctr_paes_set_key(ctx) != 0) { + if (__paes_convert_key(ctx)) { if (locked) - spin_unlock(&ctrblk_lock); + mutex_unlock(&ctrblk_lock); return blkcipher_walk_done(desc, walk, -EIO); } + spin_lock_bh(&ctx->pk_lock); + memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + spin_unlock_bh(&ctx->pk_lock); } } if (locked) - spin_unlock(&ctrblk_lock); + mutex_unlock(&ctrblk_lock); /* * final block may be < AES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { while (1) { - if (cpacf_kmctr(ctx->fc | modifier, - ctx->pk.protkey, buf, + if (cpacf_kmctr(ctx->fc | modifier, ¶m, buf, walk->src.virt.addr, AES_BLOCK_SIZE, walk->iv) == AES_BLOCK_SIZE) break; - if (__ctr_paes_set_key(ctx) != 0) + if (__paes_convert_key(ctx)) return blkcipher_walk_done(desc, walk, -EIO); + spin_lock_bh(&ctx->pk_lock); + memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE); + spin_unlock_bh(&ctx->pk_lock); } memcpy(walk->dst.virt.addr, buf, nbytes); crypto_inc(walk->iv, AES_BLOCK_SIZE); @@ -697,12 +806,12 @@ static inline void __crypto_unregister_alg(struct crypto_alg *alg) static void paes_s390_fini(void) { - if (ctrblk) - free_page((unsigned long) ctrblk); __crypto_unregister_alg(&ctr_paes_alg); __crypto_unregister_alg(&xts_paes_alg); __crypto_unregister_alg(&cbc_paes_alg); __crypto_unregister_alg(&ecb_paes_alg); + if (ctrblk) + free_page((unsigned long) ctrblk); } static int __init paes_s390_init(void) @@ -740,14 +849,14 @@ static int __init paes_s390_init(void) if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) || cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) || cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) { - ret = crypto_register_alg(&ctr_paes_alg); - if (ret) - goto out_err; ctrblk = (u8 *) __get_free_page(GFP_KERNEL); if (!ctrblk) { ret = -ENOMEM; goto out_err; } + ret = crypto_register_alg(&ctr_paes_alg); + if (ret) + goto out_err; } return 0; diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index d39e0f0792170..686fe7aa192f4 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -74,14 +74,17 @@ int s390_sha_final(struct shash_desc *desc, u8 *out) struct s390_sha_ctx *ctx = shash_desc_ctx(desc); unsigned int bsize = crypto_shash_blocksize(desc->tfm); u64 bits; - unsigned int n, mbl_offset; + unsigned int n; + int mbl_offset; n = ctx->count % bsize; bits = ctx->count * 8; - mbl_offset = s390_crypto_shash_parmsize(ctx->func) / sizeof(u32); + mbl_offset = s390_crypto_shash_parmsize(ctx->func); if (mbl_offset < 0) return -EINVAL; + mbl_offset = mbl_offset / sizeof(u32); + /* set total msg bit length (mbl) in CPACF parmblock */ switch (ctx->func) { case CPACF_KLMD_SHA_1: diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index f0bc4dc3e9bf0..6511d15ace45e 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -437,7 +437,7 @@ __init int hypfs_diag_init(void) int rc; if (diag204_probe()) { - pr_err("The hardware system does not support hypfs\n"); + pr_info("The hardware system does not support hypfs\n"); return -ENODATA; } diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index e1fcc03159ef2..a927adccb4ba7 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -20,6 +20,7 @@ static char local_guest[] = " "; static char all_guests[] = "* "; +static char *all_groups = all_guests; static char *guest_query; struct diag2fc_data { @@ -62,10 +63,11 @@ static int diag2fc(int size, char* query, void *addr) memcpy(parm_list.userid, query, NAME_LEN); ASCEBC(parm_list.userid, NAME_LEN); - parm_list.addr = (unsigned long) addr ; + memcpy(parm_list.aci_grp, all_groups, NAME_LEN); + ASCEBC(parm_list.aci_grp, NAME_LEN); + parm_list.addr = (unsigned long)addr; parm_list.size = size; parm_list.fmt = 0x02; - memset(parm_list.aci_grp, 0x40, NAME_LEN); rc = -1; diag_stat_inc(DIAG_STAT_X2FC); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 70139d0791b61..ca4fc66a361fb 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -501,9 +501,9 @@ static int __init hypfs_init(void) hypfs_vm_exit(); fail_hypfs_diag_exit: hypfs_diag_exit(); + pr_err("Initialization of hypfs failed with rc=%i\n", rc); fail_dbfs_exit: hypfs_dbfs_exit(); - pr_err("Initialization of hypfs failed with rc=%i\n", rc); return rc; } device_initcall(hypfs_init) diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index c67b82dfa558e..4120c428dc378 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -2,7 +2,7 @@ /* * Kernel interface for the s390 arch_random_* functions * - * Copyright IBM Corp. 2017 + * Copyright IBM Corp. 2017, 2022 * * Author: Harald Freudenberger * @@ -14,47 +14,41 @@ #ifdef CONFIG_ARCH_RANDOM #include +#include #include +#include DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; -bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); - -static inline bool arch_has_random(void) -{ - return false; -} - -static inline bool arch_has_random_seed(void) -{ - if (static_branch_likely(&s390_arch_random_available)) - return true; - return false; -} - -static inline bool arch_get_random_long(unsigned long *v) +static inline bool __must_check arch_get_random_long(unsigned long *v) { return false; } -static inline bool arch_get_random_int(unsigned int *v) +static inline bool __must_check arch_get_random_int(unsigned int *v) { return false; } -static inline bool arch_get_random_seed_long(unsigned long *v) +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { - if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } -static inline bool arch_get_random_seed_int(unsigned int *v) +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { - if (static_branch_likely(&s390_arch_random_available)) { - return s390_arch_random_generate((u8 *)v, sizeof(*v)); + if (static_branch_likely(&s390_arch_random_available) && + in_task()) { + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; } return false; } diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 819803a97c2b2..4dfebb8705e43 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -109,7 +109,9 @@ struct hws_basic_entry { unsigned int AS:2; /* 29-30 PSW address-space control */ unsigned int I:1; /* 31 entry valid or invalid */ unsigned int CL:2; /* 32-33 Configuration Level */ - unsigned int:14; + unsigned int H:1; /* 34 Host Indicator */ + unsigned int LS:1; /* 35 Limited Sampling */ + unsigned int:12; unsigned int prim_asn:16; /* primary ASN */ unsigned long long ia; /* Instruction Address */ unsigned long long gpp; /* Guest Program Parameter */ diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index 60f9075163358..f79e5e9cf6c89 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -11,6 +11,8 @@ #include #define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10) +#define CR0_FETCH_PROTECTION_OVERRIDE BIT(63 - 38) +#define CR0_STORAGE_PROTECTION_OVERRIDE BIT(63 - 39) #define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49) #define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50) #define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52) diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 0036eab14391d..ca8f85b53a902 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -298,10 +298,8 @@ struct diag26c_mac_resp { union diag318_info { unsigned long val; struct { - unsigned int cpnc : 8; - unsigned int cpvc_linux : 24; - unsigned char cpvc_distro[3]; - unsigned char zero; + unsigned long cpnc : 8; + unsigned long cpvc : 56; }; }; diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 68d362f8d6c17..c72c179a5aae9 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -27,6 +27,7 @@ void ftrace_caller(void); extern char ftrace_graph_caller_end; extern unsigned long ftrace_plt; +extern void *ftrace_func; struct dyn_arch_ftrace { }; diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 37f96b6f0e611..a816fb4734b8b 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -9,6 +9,7 @@ #ifndef _ASM_S390_GMAP_H #define _ASM_S390_GMAP_H +#include #include /* Generic bits for GMAP notification on DAT table entry changes. */ @@ -31,6 +32,7 @@ * @table: pointer to the page directory * @asce: address space control element for gmap page table * @pfault_enabled: defines if pfaults are applicable for the guest + * @guest_handle: protected virtual machine handle for the ultravisor * @host_to_rmap: radix tree with gmap_rmap lists * @children: list of shadow gmap structures * @pt_list: list of all page tables used in the shadow guest address space @@ -54,6 +56,8 @@ struct gmap { unsigned long asce_end; void *private; bool pfault_enabled; + /* only set for protected virtual machines */ + unsigned long guest_handle; /* Additional data for shadow guest address spaces */ struct radix_tree_root host_to_rmap; struct list_head children; @@ -144,4 +148,6 @@ int gmap_mprotect_notify(struct gmap *, unsigned long start, void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); +int gmap_mark_unmergeable(void); +void s390_reset_acc(struct mm_struct *mm); #endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 084e71b7272af..81da530c8e665 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -21,6 +21,7 @@ struct ipl_parameter_block { struct ipl_pb0_common common; struct ipl_pb0_fcp fcp; struct ipl_pb0_ccw ccw; + struct ipl_pb0_nvme nvme; char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)]; }; } __packed __aligned(PAGE_SIZE); @@ -30,6 +31,11 @@ struct ipl_parameter_block { #define IPL_BP_FCP_LEN (sizeof(struct ipl_pl_hdr) + \ sizeof(struct ipl_pb0_fcp)) #define IPL_BP0_FCP_LEN (sizeof(struct ipl_pb0_fcp)) + +#define IPL_BP_NVME_LEN (sizeof(struct ipl_pl_hdr) + \ + sizeof(struct ipl_pb0_nvme)) +#define IPL_BP0_NVME_LEN (sizeof(struct ipl_pb0_nvme)) + #define IPL_BP_CCW_LEN (sizeof(struct ipl_pl_hdr) + \ sizeof(struct ipl_pb0_ccw)) #define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw)) @@ -59,6 +65,7 @@ enum ipl_type { IPL_TYPE_FCP = 4, IPL_TYPE_FCP_DUMP = 8, IPL_TYPE_NSS = 16, + IPL_TYPE_NVME = 32, }; struct ipl_info @@ -73,6 +80,10 @@ struct ipl_info u64 wwpn; u64 lun; } fcp; + struct { + u32 fid; + u32 nsid; + } nvme; struct { char name[NSS_NAME_SIZE + 1]; } nss; @@ -109,6 +120,7 @@ int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf, unsigned char flags, unsigned short cert); int ipl_report_add_certificate(struct ipl_report *report, void *key, unsigned long addr, unsigned long len); +bool ipl_get_secureboot(void); /* * DIAG 308 support diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index ea398a05f6432..63098df81c9f2 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -9,6 +9,8 @@ #ifndef _S390_KEXEC_H #define _S390_KEXEC_H +#include + #include #include #include @@ -74,7 +76,21 @@ void *kexec_file_add_components(struct kimage *image, int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, unsigned long addr); +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + void *ipl_buf; +}; + extern const struct kexec_file_ops s390_kexec_image_ops; extern const struct kexec_file_ops s390_kexec_elf_ops; +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif #endif /*_S390_KEXEC_H */ diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index b106aa29bf55c..09cdb632a490e 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -54,7 +54,6 @@ typedef u16 kprobe_opcode_t; struct arch_specific_insn { /* copy of original instruction */ kprobe_opcode_t *insn; - unsigned int is_ftrace_insn : 1; }; struct prev_kprobe { diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index abe60268335d2..5802dab2e8a1e 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -31,12 +31,12 @@ #define KVM_USER_MEM_SLOTS 32 /* - * These seem to be used for allocating ->chip in the routing table, - * which we don't use. 4096 is an out-of-thin-air value. If we need - * to look at ->chip later on, we'll need to revisit this. + * These seem to be used for allocating ->chip in the routing table, which we + * don't use. 1 is as small as we can get to reduce the needed memory. If we + * need to look at ->chip later on, we'll need to revisit this. */ #define KVM_NR_IRQCHIPS 1 -#define KVM_IRQCHIP_NUM_PINS 4096 +#define KVM_IRQCHIP_NUM_PINS 1 #define KVM_HALT_POLL_NS_DEFAULT 50000 /* s390-specific vcpu->requests bit members */ @@ -122,6 +122,17 @@ struct mcck_volatile_info { __u32 reserved; }; +#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \ + CR0_MEASUREMENT_ALERT_SUBMASK) +#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \ + CR14_EXTERNAL_DAMAGE_SUBMASK) + +#define SIDAD_SIZE_MASK 0xff +#define sida_origin(sie_block) \ + ((sie_block)->sidad & PAGE_MASK) +#define sida_size(sie_block) \ + ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) + #define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 @@ -155,7 +166,13 @@ struct kvm_s390_sie_block { __u8 reserved08[4]; /* 0x0008 */ #define PROG_IN_SIE (1<<0) __u32 prog0c; /* 0x000c */ - __u8 reserved10[16]; /* 0x0010 */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; #define PROG_BLOCK_SIE (1<<0) #define PROG_REQUEST (1<<1) atomic_t prog20; /* 0x0020 */ @@ -204,10 +221,23 @@ struct kvm_s390_sie_block { #define ICPT_PARTEXEC 0x38 #define ICPT_IOINST 0x40 #define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 __u8 icptcode; /* 0x0050 */ __u8 icptstatus; /* 0x0051 */ __u16 ihcpu; /* 0x0052 */ - __u8 reserved54[2]; /* 0x0054 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ __u16 ipa; /* 0x0056 */ __u32 ipb; /* 0x0058 */ __u32 scaoh; /* 0x005c */ @@ -228,9 +258,10 @@ struct kvm_s390_sie_block { #define ECB3_RI 0x01 __u8 ecb3; /* 0x0063 */ __u32 scaol; /* 0x0064 */ - __u8 reserved68; /* 0x0068 */ + __u8 sdf; /* 0x0068 */ __u8 epdx; /* 0x0069 */ - __u8 reserved6a[2]; /* 0x006a */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ __u32 todpr; /* 0x006c */ #define GISA_FORMAT1 0x00000001 __u32 gd; /* 0x0070 */ @@ -244,31 +275,58 @@ struct kvm_s390_sie_block { #define HPID_KVM 0x4 #define HPID_VSIE 0x5 __u8 hpid; /* 0x00b8 */ - __u8 reservedb9[11]; /* 0x00b9 */ - __u16 extcpuaddr; /* 0x00c4 */ - __u16 eic; /* 0x00c6 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; __u32 reservedc8; /* 0x00c8 */ - __u16 pgmilc; /* 0x00cc */ - __u16 iprcc; /* 0x00ce */ - __u32 dxc; /* 0x00d0 */ - __u16 mcn; /* 0x00d4 */ - __u8 perc; /* 0x00d6 */ - __u8 peratmid; /* 0x00d7 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; __u64 peraddr; /* 0x00d8 */ __u8 eai; /* 0x00e0 */ __u8 peraid; /* 0x00e1 */ __u8 oai; /* 0x00e2 */ __u8 armid; /* 0x00e3 */ __u8 reservede4[4]; /* 0x00e4 */ - __u64 tecmc; /* 0x00e8 */ - __u8 reservedf0[12]; /* 0x00f0 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ #define CRYCB_FORMAT_MASK 0x00000003 #define CRYCB_FORMAT0 0x00000000 #define CRYCB_FORMAT1 0x00000001 #define CRYCB_FORMAT2 0x00000003 __u32 crycbd; /* 0x00fc */ __u64 gcr[16]; /* 0x0100 */ - __u64 gbea; /* 0x0180 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; __u8 reserved188[8]; /* 0x0188 */ __u64 sdnxo; /* 0x0190 */ __u8 reserved198[8]; /* 0x0198 */ @@ -296,7 +354,9 @@ struct kvm_s390_itdb { struct sie_page { struct kvm_s390_sie_block sie_block; struct mcck_volatile_info mcck_info; /* 0x0200 */ - __u8 reserved218[1000]; /* 0x0218 */ + __u8 reserved218[360]; /* 0x0218 */ + __u64 pv_grregs[16]; /* 0x0380 */ + __u8 reserved400[512]; /* 0x0400 */ struct kvm_s390_itdb itdb; /* 0x0600 */ __u8 reserved700[2304]; /* 0x0700 */ }; @@ -470,6 +530,7 @@ enum irq_types { IRQ_PEND_PFAULT_INIT, IRQ_PEND_EXT_HOST, IRQ_PEND_EXT_SERVICE, + IRQ_PEND_EXT_SERVICE_EV, IRQ_PEND_EXT_TIMING, IRQ_PEND_EXT_CPU_TIMER, IRQ_PEND_EXT_CLOCK_COMP, @@ -514,6 +575,7 @@ enum irq_types { (1UL << IRQ_PEND_EXT_TIMING) | \ (1UL << IRQ_PEND_EXT_HOST) | \ (1UL << IRQ_PEND_EXT_SERVICE) | \ + (1UL << IRQ_PEND_EXT_SERVICE_EV) | \ (1UL << IRQ_PEND_VIRTIO) | \ (1UL << IRQ_PEND_PFAULT_INIT) | \ (1UL << IRQ_PEND_PFAULT_DONE)) @@ -530,6 +592,13 @@ enum irq_types { #define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \ (1UL << IRQ_PEND_MCHK_EX)) +#define IRQ_PEND_EXT_II_MASK ((1UL << IRQ_PEND_EXT_CPU_TIMER) | \ + (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \ + (1UL << IRQ_PEND_EXT_EMERGENCY) | \ + (1UL << IRQ_PEND_EXT_EXTERNAL) | \ + (1UL << IRQ_PEND_EXT_SERVICE) | \ + (1UL << IRQ_PEND_EXT_SERVICE_EV)) + struct kvm_s390_interrupt_info { struct list_head list; u64 type; @@ -588,6 +657,7 @@ struct kvm_s390_local_interrupt { struct kvm_s390_float_interrupt { unsigned long pending_irqs; + unsigned long masked_irqs; spinlock_t lock; struct list_head lists[FIRQ_LIST_COUNT]; int counters[FIRQ_MAX_COUNT]; @@ -639,6 +709,11 @@ struct kvm_guestdbg_info_arch { unsigned long last_bp; }; +struct kvm_s390_pv_vcpu { + u64 handle; + unsigned long stor_base; +}; + struct kvm_vcpu_arch { struct kvm_s390_sie_block *sie_block; /* if vsie is active, currently executed shadow sie control block */ @@ -667,6 +742,8 @@ struct kvm_vcpu_arch { __u64 cputm_start; bool gs_enabled; bool skey_enabled; + struct kvm_s390_pv_vcpu pv; + union diag318_info diag318_info; }; struct kvm_vm_stat { @@ -695,9 +772,6 @@ struct s390_io_adapter { bool masked; bool swap; bool suppressible; - struct rw_semaphore maps_lock; - struct list_head maps; - atomic_t nr_maps; }; #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8) @@ -840,6 +914,13 @@ struct kvm_s390_gisa_interrupt { DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS); }; +struct kvm_s390_pv { + u64 handle; + u64 guest_len; + unsigned long stor_base; + void *stor_var; +}; + struct kvm_arch{ void *sca; int use_esca; @@ -873,8 +954,10 @@ struct kvm_arch{ atomic64_t cmma_dirty_pages; /* subset of available cpu features enabled by user space */ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); + /* indexed by vcpu_idx */ DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS); struct kvm_s390_gisa_interrupt gisa_int; + struct kvm_s390_pv pv; }; #define KVM_HVA_ERR_BAD (-1UL) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 237ee0c4169f7..612ed3c6d5813 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -141,7 +141,9 @@ struct lowcore { /* br %r1 trampoline */ __u16 br_r1_trampoline; /* 0x0400 */ - __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */ + __u32 return_lpswe; /* 0x0402 */ + __u32 return_mcck_lpswe; /* 0x0406 */ + __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */ /* * 0xe00 contains the address of the IPL Parameter Information diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index bcfb6371086f2..e21b618ad4326 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -16,6 +16,8 @@ typedef struct { unsigned long asce; unsigned long asce_limit; unsigned long vdso_base; + /* The mmu context belongs to a secure guest. */ + atomic_t is_protected; /* * The following bitfields need a down_write on the mm * semaphore when they are written to. As they are only diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8d04e6f3f7964..afa8360140764 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk, INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.flush_count, 0); + atomic_set(&mm->context.is_protected, 0); mm->context.gmap_asce = 0; mm->context.flush_mm = 0; mm->context.compat_mm = test_thread_flag(TIF_31BIT); diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h index 35f8cbe7e5bb0..c759dcffa9eaf 100644 --- a/arch/s390/include/asm/numa.h +++ b/arch/s390/include/asm/numa.h @@ -17,7 +17,6 @@ void numa_setup(void); int numa_pfn_to_nid(unsigned long pfn); -int __node_distance(int a, int b); void numa_update_cpu_topology(void); extern cpumask_t node_to_cpumask_map[MAX_NUMNODES]; diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 823578c6b9e2c..30f19b358a56d 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -20,6 +20,8 @@ #define PAGE_SIZE _PAGE_SIZE #define PAGE_MASK _PAGE_MASK #define PAGE_DEFAULT_ACC 0 +/* storage-protection override */ +#define PAGE_SPO_ACC 9 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) #define HPAGE_SHIFT 20 @@ -33,6 +35,8 @@ #define ARCH_HAS_PREPARE_HUGEPAGE #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + #include #ifndef __ASSEMBLY__ @@ -40,7 +44,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end); static inline void storage_key_init_range(unsigned long start, unsigned long end) { - if (PAGE_DEFAULT_KEY) + if (PAGE_DEFAULT_KEY != 0) __storage_key_init_range(start, end); } @@ -151,6 +155,11 @@ static inline int devmem_is_allowed(unsigned long pfn) #define HAVE_ARCH_FREE_PAGE #define HAVE_ARCH_ALLOC_PAGE +#if IS_ENABLED(CONFIG_PGSTE) +int arch_make_page_accessible(struct page *page); +#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE +#endif + #endif /* !__ASSEMBLY__ */ #define __PAGE_OFFSET 0x0UL diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index a2399eff84ca4..74177c11611a0 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -24,10 +25,16 @@ int pci_domain_nr(struct pci_bus *); int pci_proc_domain(struct pci_bus *); #define ZPCI_BUS_NR 0 /* default bus number */ -#define ZPCI_DEVFN 0 /* default device number */ #define ZPCI_NR_DMA_SPACES 1 #define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS +#define ZPCI_DOMAIN_BITMAP_SIZE (1 << 16) + +#ifdef PCI +#if (ZPCI_NR_DEVICES > ZPCI_DOMAIN_BITMAP_SIZE) +# error ZPCI_NR_DEVICES can not be bigger than ZPCI_DOMAIN_BITMAP_SIZE +#endif +#endif /* PCI */ /* PCI Function Controls */ #define ZPCI_FC_FN_ENABLED 0x80 @@ -95,10 +102,26 @@ struct zpci_bar_struct { struct s390_domain; +#define ZPCI_FUNCTIONS_PER_BUS 256 +struct zpci_bus { + struct kref kref; + struct pci_bus *bus; + struct zpci_dev *function[ZPCI_FUNCTIONS_PER_BUS]; + struct list_head resources; + struct list_head bus_next; + int pchid; + int domain_nr; + bool multifunction; + enum pci_bus_speed max_bus_speed; +}; + /* Private data per function */ struct zpci_dev { - struct pci_bus *bus; + struct zpci_bus *zbus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ + struct list_head bus_next; + struct kref kref; + struct hotplug_slot hotplug_slot; enum zpci_state state; u32 fid; /* function ID, used by sclp */ @@ -107,7 +130,12 @@ struct zpci_dev { u16 pchid; /* physical channel ID */ u8 pfgid; /* function group ID */ u8 pft; /* pci function type */ - u16 domain; + u8 port; + u8 rid_available : 1; + u8 has_hp_slot : 1; + u8 is_physfn : 1; + u8 reserved : 5; + unsigned int devfn; /* DEVFN part of the RID*/ struct mutex lock; u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ @@ -167,15 +195,19 @@ static inline bool zdev_enabled(struct zpci_dev *zdev) extern const struct attribute_group *zpci_attr_groups[]; extern unsigned int s390_pci_force_floating __initdata; +extern unsigned int s390_pci_no_rid; /* ----------------------------------------------------------------------------- Prototypes ----------------------------------------------------------------------------- */ /* Base stuff */ -int zpci_create_device(struct zpci_dev *); -void zpci_remove_device(struct zpci_dev *zdev); +int zpci_create_device(struct zpci_dev *zdev); +void zpci_remove_device(struct zpci_dev *zdev, bool set_error); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); +void zpci_device_reserved(struct zpci_dev *zdev); +bool zpci_is_device_configured(struct zpci_dev *zdev); + int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); void zpci_remove_reserved_devices(void); @@ -183,12 +215,15 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_scan_pci_devices(void); int clp_rescan_pci_devices(void); -int clp_rescan_pci_devices_simple(void); +int clp_rescan_pci_devices_simple(u32 *fid); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); int clp_get_state(u32 fid, enum zpci_state *state); +/* UID */ +void update_uid_checking(bool new); + /* IOMMU Interface */ int zpci_init_iommu(struct zpci_dev *zdev); void zpci_destroy_iommu(struct zpci_dev *zdev); @@ -224,7 +259,14 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {} /* Helpers */ static inline struct zpci_dev *to_zpci(struct pci_dev *pdev) { - return pdev->sysdata; + struct zpci_bus *zbus = pdev->sysdata; + + return zbus->function[pdev->devfn]; +} + +static inline struct zpci_dev *to_zpci_dev(struct device *dev) +{ + return to_zpci(to_pci_dev(dev)); } struct zpci_dev *get_zdev_by_fid(u32); diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index 50359172cc488..641b7e8d02268 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -93,7 +93,10 @@ struct clp_req_query_pci { struct clp_rsp_query_pci { struct clp_rsp_hdr hdr; u16 vfn; /* virtual fn number */ - u16 : 6; + u16 : 3; + u16 rid_avail : 1; + u16 is_physfn : 1; + u16 reserved1 : 1; u16 mio_addr_avail : 1; u16 util_str_avail : 1; /* utility string available? */ u16 pfgid : 8; /* pci function group id */ @@ -102,12 +105,16 @@ struct clp_rsp_query_pci { u16 pchid; __le32 bar[PCI_BAR_COUNT]; u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ - u32 : 16; + u16 : 12; + u16 port : 4; u8 fmb_len; u8 pft; /* pci function type */ u64 sdma; /* start dma as */ u64 edma; /* end dma as */ - u32 reserved[11]; +#define ZPCI_RID_MASK_DEVFN 0x00ff + u16 rid; /* BUS/DEVFN PCI address */ + u16 reserved0; + u32 reserved[10]; u32 uid; /* user defined id */ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ u32 reserved2[16]; diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index cd060b5dd8fdd..287bb88f76986 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -8,14 +8,19 @@ #include #include +/* I/O size constraints */ +#define ZPCI_MAX_READ_SIZE 8 +#define ZPCI_MAX_WRITE_SIZE 128 + /* I/O Map */ #define ZPCI_IOMAP_SHIFT 48 -#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL +#define ZPCI_IOMAP_ADDR_SHIFT 62 +#define ZPCI_IOMAP_ADDR_BASE (1UL << ZPCI_IOMAP_ADDR_SHIFT) #define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1) #define ZPCI_IOMAP_MAX_ENTRIES \ - ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT)) + (1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT)) #define ZPCI_IOMAP_ADDR_IDX_MASK \ - (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE) + ((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK) struct zpci_iomap_entry { u32 fh; @@ -140,7 +145,8 @@ static inline int zpci_memcpy_fromio(void *dst, while (n > 0) { size = zpci_get_max_write_size((u64 __force) src, - (u64) dst, n, 8); + (u64) dst, n, + ZPCI_MAX_READ_SIZE); rc = zpci_read_single(dst, src, size); if (rc) break; @@ -161,7 +167,8 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst, while (n > 0) { size = zpci_get_max_write_size((u64 __force) dst, - (u64) src, n, 128); + (u64) src, n, + ZPCI_MAX_WRITE_SIZE); if (size > 8) /* main path */ rc = zpci_write_block(dst, src, size); else diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 50b4ce8cddfdc..918f0ba4f4d20 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -29,7 +29,7 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ old__, new__, prev__; \ pcp_op_T__ *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ prev__ = *ptr__; \ do { \ @@ -37,7 +37,7 @@ new__ = old__ op (val); \ prev__ = cmpxchg(ptr__, old__, new__); \ } while (prev__ != old__); \ - preempt_enable(); \ + preempt_enable_notrace(); \ new__; \ }) @@ -68,7 +68,7 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ if (__builtin_constant_p(val__) && \ ((szcast)val__ > -129) && ((szcast)val__ < 128)) { \ @@ -84,7 +84,7 @@ : [val__] "d" (val__) \ : "cc"); \ } \ - preempt_enable(); \ + preempt_enable_notrace(); \ } #define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int) @@ -95,14 +95,14 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ op " %[old__],%[val__],%[ptr__]\n" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ : [val__] "d" (val__) \ : "cc"); \ - preempt_enable(); \ + preempt_enable_notrace(); \ old__ + val__; \ }) @@ -114,14 +114,14 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ val__ = (val); \ pcp_op_T__ old__, *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ asm volatile( \ op " %[old__],%[val__],%[ptr__]\n" \ : [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \ : [val__] "d" (val__) \ : "cc"); \ - preempt_enable(); \ + preempt_enable_notrace(); \ } #define this_cpu_and_4(pcp, val) arch_this_cpu_to_op(pcp, val, "lan") @@ -136,10 +136,10 @@ typedef typeof(pcp) pcp_op_T__; \ pcp_op_T__ ret__; \ pcp_op_T__ *ptr__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ ret__ = cmpxchg(ptr__, oval, nval); \ - preempt_enable(); \ + preempt_enable_notrace(); \ ret__; \ }) @@ -152,10 +152,10 @@ ({ \ typeof(pcp) *ptr__; \ typeof(pcp) ret__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ ret__ = xchg(ptr__, nval); \ - preempt_enable(); \ + preempt_enable_notrace(); \ ret__; \ }) @@ -171,11 +171,11 @@ typeof(pcp1) *p1__; \ typeof(pcp2) *p2__; \ int ret__; \ - preempt_disable(); \ + preempt_disable_notrace(); \ p1__ = raw_cpu_ptr(&(pcp1)); \ p2__ = raw_cpu_ptr(&(pcp2)); \ ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \ - preempt_enable(); \ + preempt_enable_notrace(); \ ret__; \ }) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index bccb8f4a63e20..77606c4acd58d 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -56,7 +56,12 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) crst_table_init(table, _REGION2_ENTRY_EMPTY); return (p4d_t *) table; } -#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d) + +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + if (!mm_p4d_folded(mm)) + crst_table_free(mm, (unsigned long *) p4d); +} static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { @@ -65,7 +70,12 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) crst_table_init(table, _REGION3_ENTRY_EMPTY); return (pud_t *) table; } -#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud) + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + if (!mm_pud_folded(mm)) + crst_table_free(mm, (unsigned long *) pud); +} static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { @@ -83,6 +93,8 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { + if (mm_pmd_folded(mm)) + return; pgtable_pmd_page_dtor(virt_to_page(pmd)); crst_table_free(mm, (unsigned long *) pmd); } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5ff98d76a66cd..e5bd69c6d5e2d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -19,6 +19,7 @@ #include #include #include +#include extern pgd_t swapper_pg_dir[]; extern void paging_init(void); @@ -522,6 +523,15 @@ static inline int mm_has_pgste(struct mm_struct *mm) return 0; } +static inline int mm_is_protected(struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + if (unlikely(atomic_read(&mm->context.is_protected))) + return 1; +#endif + return 0; +} + static inline int mm_alloc_pgste(struct mm_struct *mm) { #ifdef CONFIG_PGSTE @@ -756,6 +766,12 @@ static inline int pmd_write(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; +} + static inline int pmd_dirty(pmd_t pmd) { int dirty = 1; @@ -1071,7 +1087,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); + pte_t res; + + res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); + if (mm_is_protected(mm) && pte_present(res)) + uv_convert_from_secure(pte_val(res) & PAGE_MASK); + return res; } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION @@ -1083,7 +1104,12 @@ void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long, static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); + pte_t res; + + res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); + if (mm_is_protected(vma->vm_mm) && pte_present(res)) + uv_convert_from_secure(pte_val(res) & PAGE_MASK); + return res; } /* @@ -1098,12 +1124,17 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) { + pte_t res; + if (full) { - pte_t pte = *ptep; + res = *ptep; *ptep = __pte(_PAGE_INVALID); - return pte; + } else { + res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } - return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); + if (mm_is_protected(mm) && pte_present(res)) + uv_convert_from_secure(pte_val(res) & PAGE_MASK); + return res; } #define __HAVE_ARCH_PTEP_SET_WRPROTECT @@ -1173,8 +1204,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { - if (!MACHINE_HAS_NX) - pte_val(entry) &= ~_PAGE_NOEXEC; if (pte_present(entry)) pte_val(entry) &= ~_PAGE_UNUSED; if (mm_has_pgste(mm)) @@ -1191,6 +1220,8 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); + if (!MACHINE_HAS_NX) + pte_val(__pte) &= ~_PAGE_NOEXEC; return pte_mkyoung(__pte); } @@ -1241,26 +1272,46 @@ static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address) #define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address) #define pgd_offset_k(address) pgd_offset(&init_mm, address) -static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address) +{ + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) + return (p4d_t *) pgd_deref(pgd) + p4d_index(address); + return (p4d_t *) pgdp; +} +#define p4d_offset_lockless p4d_offset_lockless + +static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address) +{ + return p4d_offset_lockless(pgdp, *pgdp, address); +} + +static inline pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long address) +{ + if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) + return (pud_t *) p4d_deref(p4d) + pud_index(address); + return (pud_t *) p4dp; +} +#define pud_offset_lockless pud_offset_lockless + +static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long address) { - if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) - return (p4d_t *) pgd_deref(*pgd) + p4d_index(address); - return (p4d_t *) pgd; + return pud_offset_lockless(p4dp, *p4dp, address); } +#define pud_offset pud_offset -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +static inline pmd_t *pmd_offset_lockless(pud_t *pudp, pud_t pud, unsigned long address) { - if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) - return (pud_t *) p4d_deref(*p4d) + pud_index(address); - return (pud_t *) p4d; + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) + return (pmd_t *) pud_deref(pud) + pmd_index(address); + return (pmd_t *) pudp; } +#define pmd_offset_lockless pmd_offset_lockless -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +static inline pmd_t *pmd_offset(pud_t *pudp, unsigned long address) { - if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) - return (pmd_t *) pud_deref(*pud) + pmd_index(address); - return (pmd_t *) pud; + return pmd_offset_lockless(pudp, *pudp, address); } +#define pmd_offset pmd_offset static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) { diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index b5ea9e14c017a..3dcd8ab3db73b 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -52,10 +52,17 @@ static inline bool test_preempt_need_resched(void) static inline void __preempt_count_add(int val) { - if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) - __atomic_add_const(val, &S390_lowcore.preempt_count); - else - __atomic_add(val, &S390_lowcore.preempt_count); + /* + * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES + * enabled, gcc 12 fails to handle __builtin_constant_p(). + */ + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) { + if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) { + __atomic_add_const(val, &S390_lowcore.preempt_count); + return; + } + } + __atomic_add(val, &S390_lowcore.preempt_count); } static inline void __preempt_count_sub(int val) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 51a0e4a2dc961..48d6ccdef5f77 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -162,6 +162,7 @@ typedef struct thread_struct thread_struct; #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ .fpu.regs = (void *) init_task.thread.fpu.fprs, \ + .last_break = 1, \ } /* @@ -214,7 +215,7 @@ static inline unsigned long current_stack_pointer(void) return sp; } -static __no_kasan_or_inline unsigned short stap(void) +static __always_inline unsigned short stap(void) { unsigned short cpu_address; @@ -253,7 +254,7 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ -static __no_kasan_or_inline void __load_psw_mask(unsigned long mask) +static __always_inline void __load_psw_mask(unsigned long mask) { unsigned long addr; psw_t psw; diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index e3f238e8c6116..4f35b1055f30a 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -227,7 +227,7 @@ struct qdio_buffer { * @sbal: absolute SBAL address */ struct sl_element { - unsigned long sbal; + u64 sbal; } __attribute__ ((packed)); /** diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index c563f8368b19b..3b6410deba436 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -129,6 +129,8 @@ int sclp_chp_deconfigure(struct chp_id chpid); int sclp_chp_read_info(struct sclp_chp_info *info); int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); +int sclp_ap_configure(u32 apid); +int sclp_ap_deconfigure(u32 apid); int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid); int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count); int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count); diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 6dc6c4fbc8e2b..694af4fd5cd23 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -8,6 +8,7 @@ #include #include +#include #define EP_OFFSET 0x10008 #define EP_STRING "S390EP" @@ -38,6 +39,7 @@ #define MACHINE_FLAG_NX BIT(15) #define MACHINE_FLAG_GS BIT(16) #define MACHINE_FLAG_SCC BIT(17) +#define MACHINE_FLAG_PCI_MIO BIT(18) #define LPP_MAGIC BIT(31) #define LPP_PID_MASK _AC(0xffffffff, UL) @@ -80,6 +82,13 @@ struct parmarea { char command_line[ARCH_COMMAND_LINE_SIZE]; /* 0x10480 */ }; +extern unsigned int zlib_dfltcc_support; +#define ZLIB_DFLTCC_DISABLED 0 +#define ZLIB_DFLTCC_FULL 1 +#define ZLIB_DFLTCC_DEFLATE_ONLY 2 +#define ZLIB_DFLTCC_INFLATE_ONLY 3 +#define ZLIB_DFLTCC_FULL_DEBUG 4 + extern int noexec_disabled; extern int memory_end_set; extern unsigned long memory_end; @@ -105,6 +114,7 @@ extern unsigned long __swsusp_reset_dma; #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) #define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) #define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC) +#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO) /* * Console mode. Override with conmode= @@ -157,6 +167,12 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } +static inline u32 gen_lpswe(unsigned long addr) +{ + BUILD_BUG_ON(addr > 0xfff); + return 0xb2b20000 | addr; +} + #else /* __ASSEMBLY__ */ #define IPL_DEVICE (IPL_DEVICE_OFFSET) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 0ae4bbf7779c8..e192681f83e10 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -79,12 +79,16 @@ struct stack_frame { CALL_ARGS_4(arg1, arg2, arg3, arg4); \ register unsigned long r4 asm("6") = (unsigned long)(arg5) -#define CALL_FMT_0 "=&d" (r2) : -#define CALL_FMT_1 "+&d" (r2) : -#define CALL_FMT_2 CALL_FMT_1 "d" (r3), -#define CALL_FMT_3 CALL_FMT_2 "d" (r4), -#define CALL_FMT_4 CALL_FMT_3 "d" (r5), -#define CALL_FMT_5 CALL_FMT_4 "d" (r6), +/* + * To keep this simple mark register 2-6 as being changed (volatile) + * by the called function, even though register 6 is saved/nonvolatile. + */ +#define CALL_FMT_0 "=&d" (r2) +#define CALL_FMT_1 "+&d" (r2) +#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3) +#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4) +#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5) +#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6) #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" #define CALL_CLOBBER_4 CALL_CLOBBER_5 @@ -105,10 +109,118 @@ struct stack_frame { " brasl 14,%[_fn]\n" \ " la 15,0(%[_prev])\n" \ : [_prev] "=&a" (prev), CALL_FMT_##nr \ - [_stack] "a" (stack), \ + : [_stack] "a" (stack), \ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ r2; \ }) +#define CALL_LARGS_0(...) \ + long dummy = 0 +#define CALL_LARGS_1(t1, a1) \ + long arg1 = (long)(t1)(a1) +#define CALL_LARGS_2(t1, a1, t2, a2) \ + CALL_LARGS_1(t1, a1); \ + long arg2 = (long)(t2)(a2) +#define CALL_LARGS_3(t1, a1, t2, a2, t3, a3) \ + CALL_LARGS_2(t1, a1, t2, a2); \ + long arg3 = (long)(t3)(a3) +#define CALL_LARGS_4(t1, a1, t2, a2, t3, a3, t4, a4) \ + CALL_LARGS_3(t1, a1, t2, a2, t3, a3); \ + long arg4 = (long)(t4)(a4) +#define CALL_LARGS_5(t1, a1, t2, a2, t3, a3, t4, a4, t5, a5) \ + CALL_LARGS_4(t1, a1, t2, a2, t3, a3, t4, a4); \ + long arg5 = (long)(t5)(a5) + +#define CALL_REGS_0 \ + register long r2 asm("2") = dummy +#define CALL_REGS_1 \ + register long r2 asm("2") = arg1 +#define CALL_REGS_2 \ + CALL_REGS_1; \ + register long r3 asm("3") = arg2 +#define CALL_REGS_3 \ + CALL_REGS_2; \ + register long r4 asm("4") = arg3 +#define CALL_REGS_4 \ + CALL_REGS_3; \ + register long r5 asm("5") = arg4 +#define CALL_REGS_5 \ + CALL_REGS_4; \ + register long r6 asm("6") = arg5 + +#define CALL_TYPECHECK_0(...) +#define CALL_TYPECHECK_1(t, a, ...) \ + typecheck(t, a) +#define CALL_TYPECHECK_2(t, a, ...) \ + CALL_TYPECHECK_1(__VA_ARGS__); \ + typecheck(t, a) +#define CALL_TYPECHECK_3(t, a, ...) \ + CALL_TYPECHECK_2(__VA_ARGS__); \ + typecheck(t, a) +#define CALL_TYPECHECK_4(t, a, ...) \ + CALL_TYPECHECK_3(__VA_ARGS__); \ + typecheck(t, a) +#define CALL_TYPECHECK_5(t, a, ...) \ + CALL_TYPECHECK_4(__VA_ARGS__); \ + typecheck(t, a) + +#define CALL_PARM_0(...) void +#define CALL_PARM_1(t, a, ...) t +#define CALL_PARM_2(t, a, ...) t, CALL_PARM_1(__VA_ARGS__) +#define CALL_PARM_3(t, a, ...) t, CALL_PARM_2(__VA_ARGS__) +#define CALL_PARM_4(t, a, ...) t, CALL_PARM_3(__VA_ARGS__) +#define CALL_PARM_5(t, a, ...) t, CALL_PARM_4(__VA_ARGS__) +#define CALL_PARM_6(t, a, ...) t, CALL_PARM_5(__VA_ARGS__) + +/* + * Use call_on_stack() to call a function switching to a specified + * stack. Proper sign and zero extension of function arguments is + * done. Usage: + * + * rc = call_on_stack(nr, stack, rettype, fn, t1, a1, t2, a2, ...) + * + * - nr specifies the number of function arguments of fn. + * - stack specifies the stack to be used. + * - fn is the function to be called. + * - rettype is the return type of fn. + * - t1, a1, ... are pairs, where t1 must match the type of the first + * argument of fn, t2 the second, etc. a1 is the corresponding + * first function argument (not name), etc. + */ +#define call_on_stack(nr, stack, rettype, fn, ...) \ +({ \ + rettype (*__fn)(CALL_PARM_##nr(__VA_ARGS__)) = fn; \ + unsigned long frame = current_frame_address(); \ + unsigned long __stack = stack; \ + unsigned long prev; \ + CALL_LARGS_##nr(__VA_ARGS__); \ + CALL_REGS_##nr; \ + \ + CALL_TYPECHECK_##nr(__VA_ARGS__); \ + asm volatile( \ + " lgr %[_prev],15\n" \ + " lg 15,%[_stack]\n" \ + " stg %[_frame],%[_bc](15)\n" \ + " brasl 14,%[_fn]\n" \ + " lgr 15,%[_prev]\n" \ + : [_prev] "=&d" (prev), CALL_FMT_##nr \ + : [_stack] "R" (__stack), \ + [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ + [_frame] "d" (frame), \ + [_fn] "X" (__fn) : CALL_CLOBBER_##nr); \ + (rettype)r2; \ +}) + +#define CALL_ON_STACK_NORETURN(fn, stack) \ +({ \ + asm volatile( \ + " la 15,0(%[_stack])\n" \ + " xc %[_bc](8,15),%[_bc](15)\n" \ + " brasl 14,%[_fn]\n" \ + ::[_bc] "i" (offsetof(struct stack_frame, back_chain)), \ + [_stack] "a" (stack), [_fn] "X" (fn)); \ + BUG(); \ +}) + #endif /* _ASM_S390_STACKTRACE_H */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index f073292e9fdb1..d9d5de0f67ffe 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -33,7 +33,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; + unsigned long error = regs->gprs[2]; +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) { + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + error = (long)(int)error; + } +#endif + return IS_ERR_VALUE(error) ? error : 0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 64539c221672b..99a7e028232d8 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -10,8 +10,9 @@ #ifndef _ASM_S390_TIMEX_H #define _ASM_S390_TIMEX_H -#include +#include #include +#include /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL @@ -154,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk) static inline unsigned long long get_tod_clock(void) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; + char clk[STORE_CLOCK_EXT_SIZE]; get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); @@ -176,6 +177,7 @@ static inline cycles_t get_cycles(void) { return (cycles_t) get_tod_clock() >> 2; } +#define get_cycles get_cycles int get_phys_clock(unsigned long *clock); void init_cpu_timer(void); @@ -186,15 +188,18 @@ extern unsigned char tod_clock_base[16] __aligned(8); /** * get_clock_monotonic - returns current time in clock rate units * - * The caller must ensure that preemption is disabled. * The clock and tod_clock_base get changed via stop_machine. - * Therefore preemption must be disabled when calling this - * function, otherwise the returned value is not guaranteed to - * be monotonic. + * Therefore preemption must be disabled, otherwise the returned + * value is not guaranteed to be monotonic. */ static inline unsigned long long get_tod_clock_monotonic(void) { - return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + unsigned long long tod; + + preempt_disable_notrace(); + tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + preempt_enable_notrace(); + return tod; } /** diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index cca406fdbe51f..e44679bad9f80 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -68,11 +68,8 @@ static inline void topology_expect_change(void) { } #ifdef CONFIG_NUMA -#define cpu_to_node cpu_to_node -static inline int cpu_to_node(int cpu) -{ - return cpu_topology[cpu].node_id; -} +extern int __cpu_to_node(int cpu); +#define cpu_to_node __cpu_to_node /* Returns a pointer to the cpumask of CPUs on node 'node'. */ #define cpumask_of_node cpumask_of_node @@ -83,8 +80,6 @@ static inline const struct cpumask *cpumask_of_node(int node) #define pcibus_to_node(bus) __pcibus_to_node(bus) -#define node_distance(a, b) __node_distance(a, b) - #else /* !CONFIG_NUMA */ #define numa_node_id numa_node_id diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a470f1fa9f2af..f5518c649507f 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -60,54 +60,109 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); #define INLINE_COPY_TO_USER #endif +unsigned long __must_check +_copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key); + +static __always_inline unsigned long __must_check +copy_from_user_key(void *to, const void __user *from, unsigned long n, unsigned long key) +{ + if (likely(check_copy_size(to, n, false))) + n = _copy_from_user_key(to, from, n, key); + return n; +} + +unsigned long __must_check +_copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key); + +static __always_inline unsigned long __must_check +copy_to_user_key(void __user *to, const void *from, unsigned long n, unsigned long key) +{ + if (likely(check_copy_size(from, n, true))) + n = _copy_to_user_key(to, from, n, key); + return n; +} + +union oac { + unsigned int val; + struct { + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac1; + struct { + unsigned short key : 4; + unsigned short : 4; + unsigned short as : 2; + unsigned short : 4; + unsigned short k : 1; + unsigned short a : 1; + } oac2; + }; +}; + #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES -#define __put_get_user_asm(to, from, size, spec) \ -({ \ - register unsigned long __reg0 asm("0") = spec; \ - int __rc; \ - \ - asm volatile( \ - "0: mvcos %1,%3,%2\n" \ - "1: xr %0,%0\n" \ - "2:\n" \ - ".pushsection .fixup, \"ax\"\n" \ - "3: lhi %0,%5\n" \ - " jg 2b\n" \ - ".popsection\n" \ - EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ - : "=d" (__rc), "+Q" (*(to)) \ - : "d" (size), "Q" (*(from)), \ - "d" (__reg0), "K" (-EFAULT) \ - : "cc"); \ - __rc; \ +#define __put_get_user_asm(to, from, size, oac_spec) \ +({ \ + int __rc; \ + \ + asm volatile( \ + " lr 0,%[spec]\n" \ + "0: mvcos %[_to],%[_from],%[_size]\n" \ + "1: xr %[rc],%[rc]\n" \ + "2:\n" \ + ".pushsection .fixup, \"ax\"\n" \ + "3: lhi %[rc],%[retval]\n" \ + " jg 2b\n" \ + ".popsection\n" \ + EX_TABLE(0b,3b) EX_TABLE(1b,3b) \ + : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \ + : [_size] "d" (size), [_from] "Q" (*(from)), \ + [retval] "K" (-EFAULT), [spec] "d" (oac_spec.val) \ + : "cc", "0"); \ + __rc; \ }) +#define __put_user_asm(to, from, size) \ + __put_get_user_asm(to, from, size, ((union oac) { \ + .oac1.as = PSW_BITS_AS_PRIMARY, \ + .oac1.a = 1 \ + })) + +#define __get_user_asm(to, from, size) \ + __put_get_user_asm(to, from, size, ((union oac) { \ + .oac2.as = PSW_BITS_AS_PRIMARY, \ + .oac2.a = 1 \ + })) \ + static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { - unsigned long spec = 0x010000UL; int rc; switch (size) { case 1: - rc = __put_get_user_asm((unsigned char __user *)ptr, - (unsigned char *)x, - size, spec); + rc = __put_user_asm((unsigned char __user *)ptr, + (unsigned char *)x, + size); break; case 2: - rc = __put_get_user_asm((unsigned short __user *)ptr, - (unsigned short *)x, - size, spec); + rc = __put_user_asm((unsigned short __user *)ptr, + (unsigned short *)x, + size); break; case 4: - rc = __put_get_user_asm((unsigned int __user *)ptr, - (unsigned int *)x, - size, spec); + rc = __put_user_asm((unsigned int __user *)ptr, + (unsigned int *)x, + size); break; case 8: - rc = __put_get_user_asm((unsigned long __user *)ptr, - (unsigned long *)x, - size, spec); + rc = __put_user_asm((unsigned long __user *)ptr, + (unsigned long *)x, + size); break; } return rc; @@ -115,29 +170,28 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { - unsigned long spec = 0x01UL; int rc; switch (size) { case 1: - rc = __put_get_user_asm((unsigned char *)x, - (unsigned char __user *)ptr, - size, spec); + rc = __get_user_asm((unsigned char *)x, + (unsigned char __user *)ptr, + size); break; case 2: - rc = __put_get_user_asm((unsigned short *)x, - (unsigned short __user *)ptr, - size, spec); + rc = __get_user_asm((unsigned short *)x, + (unsigned short __user *)ptr, + size); break; case 4: - rc = __put_get_user_asm((unsigned int *)x, - (unsigned int __user *)ptr, - size, spec); + rc = __get_user_asm((unsigned int *)x, + (unsigned int __user *)ptr, + size); break; case 8: - rc = __put_get_user_asm((unsigned long *)x, - (unsigned long __user *)ptr, - size, spec); + rc = __get_user_asm((unsigned long *)x, + (unsigned long __user *)ptr, + size); break; } return rc; @@ -276,6 +330,6 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo } int copy_to_user_real(void __user *dest, void *src, unsigned long count); -void s390_kernel_write(void *dst, const void *src, size_t size); +void *s390_kernel_write(void *dst, const void *src, size_t size); #endif /* __S390_UACCESS_H */ diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index ef3c00b049ab4..595704e7feaeb 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -14,23 +14,67 @@ #include #include #include +#include #include +#include + +#define UVC_CC_OK 0 +#define UVC_CC_ERROR 1 +#define UVC_CC_BUSY 2 +#define UVC_CC_PARTIAL 3 #define UVC_RC_EXECUTED 0x0001 #define UVC_RC_INV_CMD 0x0002 #define UVC_RC_INV_STATE 0x0003 #define UVC_RC_INV_LEN 0x0005 #define UVC_RC_NO_RESUME 0x0007 +#define UVC_RC_NEED_DESTROY 0x8000 #define UVC_CMD_QUI 0x0001 +#define UVC_CMD_INIT_UV 0x000f +#define UVC_CMD_CREATE_SEC_CONF 0x0100 +#define UVC_CMD_DESTROY_SEC_CONF 0x0101 +#define UVC_CMD_CREATE_SEC_CPU 0x0120 +#define UVC_CMD_DESTROY_SEC_CPU 0x0121 +#define UVC_CMD_CONV_TO_SEC_STOR 0x0200 +#define UVC_CMD_CONV_FROM_SEC_STOR 0x0201 +#define UVC_CMD_SET_SEC_CONF_PARAMS 0x0300 +#define UVC_CMD_UNPACK_IMG 0x0301 +#define UVC_CMD_VERIFY_IMG 0x0302 +#define UVC_CMD_CPU_RESET 0x0310 +#define UVC_CMD_CPU_RESET_INITIAL 0x0311 +#define UVC_CMD_PREPARE_RESET 0x0320 +#define UVC_CMD_CPU_RESET_CLEAR 0x0321 +#define UVC_CMD_CPU_SET_STATE 0x0330 +#define UVC_CMD_SET_UNSHARE_ALL 0x0340 +#define UVC_CMD_PIN_PAGE_SHARED 0x0341 +#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342 #define UVC_CMD_SET_SHARED_ACCESS 0x1000 #define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001 /* Bits in installed uv calls */ enum uv_cmds_inst { BIT_UVC_CMD_QUI = 0, + BIT_UVC_CMD_INIT_UV = 1, + BIT_UVC_CMD_CREATE_SEC_CONF = 2, + BIT_UVC_CMD_DESTROY_SEC_CONF = 3, + BIT_UVC_CMD_CREATE_SEC_CPU = 4, + BIT_UVC_CMD_DESTROY_SEC_CPU = 5, + BIT_UVC_CMD_CONV_TO_SEC_STOR = 6, + BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7, BIT_UVC_CMD_SET_SHARED_ACCESS = 8, BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9, + BIT_UVC_CMD_SET_SEC_PARMS = 11, + BIT_UVC_CMD_UNPACK_IMG = 13, + BIT_UVC_CMD_VERIFY_IMG = 14, + BIT_UVC_CMD_CPU_RESET = 15, + BIT_UVC_CMD_CPU_RESET_INITIAL = 16, + BIT_UVC_CMD_CPU_SET_STATE = 17, + BIT_UVC_CMD_PREPARE_RESET = 18, + BIT_UVC_CMD_CPU_PERFORM_CLEAR_RESET = 19, + BIT_UVC_CMD_UNSHARE_ALL = 20, + BIT_UVC_CMD_PIN_PAGE_SHARED = 21, + BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22, }; struct uv_cb_header { @@ -40,13 +84,127 @@ struct uv_cb_header { u16 rrc; /* Return Reason Code */ } __packed __aligned(8); +/* Query Ultravisor Information */ struct uv_cb_qui { struct uv_cb_header header; u64 reserved08; u64 inst_calls_list[4]; - u64 reserved30[15]; + u64 reserved30[2]; + u64 uv_base_stor_len; + u64 reserved48; + u64 conf_base_phys_stor_len; + u64 conf_base_virt_stor_len; + u64 conf_virt_var_stor_len; + u64 cpu_stor_len; + u32 reserved70[3]; + u32 max_num_sec_conf; + u64 max_guest_stor_addr; + u8 reserved88[158 - 136]; + u16 max_guest_cpus; + u8 reserveda0[200 - 160]; +} __packed __aligned(8); + +/* Initialize Ultravisor */ +struct uv_cb_init { + struct uv_cb_header header; + u64 reserved08[2]; + u64 stor_origin; + u64 stor_len; + u64 reserved28[4]; +} __packed __aligned(8); + +/* Create Guest Configuration */ +struct uv_cb_cgc { + struct uv_cb_header header; + u64 reserved08[2]; + u64 guest_handle; + u64 conf_base_stor_origin; + u64 conf_virt_stor_origin; + u64 reserved30; + u64 guest_stor_origin; + u64 guest_stor_len; + u64 guest_sca; + u64 guest_asce; + u64 reserved58[5]; +} __packed __aligned(8); + +/* Create Secure CPU */ +struct uv_cb_csc { + struct uv_cb_header header; + u64 reserved08[2]; + u64 cpu_handle; + u64 guest_handle; + u64 stor_origin; + u8 reserved30[6]; + u16 num; + u64 state_origin; + u64 reserved40[4]; +} __packed __aligned(8); + +/* Convert to Secure */ +struct uv_cb_cts { + struct uv_cb_header header; + u64 reserved08[2]; + u64 guest_handle; + u64 gaddr; +} __packed __aligned(8); + +/* Convert from Secure / Pin Page Shared */ +struct uv_cb_cfs { + struct uv_cb_header header; + u64 reserved08[2]; + u64 paddr; +} __packed __aligned(8); + +/* Set Secure Config Parameter */ +struct uv_cb_ssc { + struct uv_cb_header header; + u64 reserved08[2]; + u64 guest_handle; + u64 sec_header_origin; + u32 sec_header_len; + u32 reserved2c; + u64 reserved30[4]; } __packed __aligned(8); +/* Unpack */ +struct uv_cb_unp { + struct uv_cb_header header; + u64 reserved08[2]; + u64 guest_handle; + u64 gaddr; + u64 tweak[2]; + u64 reserved38[3]; +} __packed __aligned(8); + +#define PV_CPU_STATE_OPR 1 +#define PV_CPU_STATE_STP 2 +#define PV_CPU_STATE_CHKSTP 3 +#define PV_CPU_STATE_OPR_LOAD 5 + +struct uv_cb_cpu_set_state { + struct uv_cb_header header; + u64 reserved08[2]; + u64 cpu_handle; + u8 reserved20[7]; + u8 state; + u64 reserved28[5]; +}; + +/* + * A common UV call struct for calls that take no payload + * Examples: + * Destroy cpu/config + * Verify + */ +struct uv_cb_nodata { + struct uv_cb_header header; + u64 reserved08[2]; + u64 handle; + u64 reserved20[4]; +} __packed __aligned(8); + +/* Set Shared Access */ struct uv_cb_share { struct uv_cb_header header; u64 reserved08[3]; @@ -54,21 +212,76 @@ struct uv_cb_share { u64 reserved28; } __packed __aligned(8); -static inline int uv_call(unsigned long r1, unsigned long r2) +static inline int __uv_call(unsigned long r1, unsigned long r2) { int cc; asm volatile( - "0: .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n" - " brc 3,0b\n" - " ipm %[cc]\n" - " srl %[cc],28\n" + " .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n" + " ipm %[cc]\n" + " srl %[cc],28\n" : [cc] "=d" (cc) : [r1] "a" (r1), [r2] "a" (r2) : "memory", "cc"); return cc; } +static inline int uv_call(unsigned long r1, unsigned long r2) +{ + int cc; + + do { + cc = __uv_call(r1, r2); + } while (cc > 1); + return cc; +} + +/* Low level uv_call that avoids stalls for long running busy conditions */ +static inline int uv_call_sched(unsigned long r1, unsigned long r2) +{ + int cc; + + do { + cc = __uv_call(r1, r2); + cond_resched(); + } while (cc > 1); + return cc; +} + +/* + * special variant of uv_call that only transports the cpu or guest + * handle and the command, like destroy or verify. + */ +static inline int uv_cmd_nodata(u64 handle, u16 cmd, u16 *rc, u16 *rrc) +{ + struct uv_cb_nodata uvcb = { + .header.cmd = cmd, + .header.len = sizeof(uvcb), + .handle = handle, + }; + int cc; + + WARN(!handle, "No handle provided to Ultravisor call cmd %x\n", cmd); + cc = uv_call_sched(0, (u64)&uvcb); + *rc = uvcb.header.rc; + *rrc = uvcb.header.rrc; + return cc ? -EINVAL : 0; +} + +struct uv_info { + unsigned long inst_calls_list[4]; + unsigned long uv_base_stor_len; + unsigned long guest_base_stor_len; + unsigned long guest_virt_base_stor_len; + unsigned long guest_virt_var_stor_len; + unsigned long guest_cpu_stor_len; + unsigned long max_sec_stor_addr; + unsigned int max_num_sec_conf; + unsigned short max_guest_cpus; +}; + +extern struct uv_info uv_info; + #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST extern int prot_virt_guest; @@ -121,11 +334,40 @@ static inline int uv_remove_shared(unsigned long addr) return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS); } -void uv_query_info(void); #else #define is_prot_virt_guest() 0 static inline int uv_set_shared(unsigned long addr) { return 0; } static inline int uv_remove_shared(unsigned long addr) { return 0; } +#endif + +#if IS_ENABLED(CONFIG_KVM) +extern int prot_virt_host; + +static inline int is_prot_virt_host(void) +{ + return prot_virt_host; +} + +int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); +int uv_convert_from_secure(unsigned long paddr); +int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); + +void setup_uv(void); +void adjust_to_uv_max(unsigned long *vmax); +#else +#define is_prot_virt_host() 0 +static inline void setup_uv(void) {} +static inline void adjust_to_uv_max(unsigned long *vmax) {} + +static inline int uv_convert_from_secure(unsigned long paddr) +{ + return 0; +} +#endif + +#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) +void uv_query_info(void); +#else static inline void uv_query_info(void) {} #endif diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 169d7604eb804..f3ba84fa9bd18 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -36,6 +36,7 @@ struct vdso_data { __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ __u32 ts_dir; /* TOD steering direction 0x64 */ __u64 ts_end; /* TOD steering end 0x68 */ + __u32 hrtimer_res; /* hrtimer resolution 0x70 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h index 451ba7d08905c..d1ecd5d722a0a 100644 --- a/arch/s390/include/uapi/asm/ipl.h +++ b/arch/s390/include/uapi/asm/ipl.h @@ -27,6 +27,7 @@ enum ipl_pbt { IPL_PBT_FCP = 0, IPL_PBT_SCP_DATA = 1, IPL_PBT_CCW = 2, + IPL_PBT_NVME = 4, }; /* IPL Parameter Block 0 with common fields */ @@ -67,6 +68,30 @@ struct ipl_pb0_fcp { #define IPL_PB0_FCP_OPT_IPL 0x10 #define IPL_PB0_FCP_OPT_DUMP 0x20 +/* IPL Parameter Block 0 for NVMe */ +struct ipl_pb0_nvme { + __u32 len; + __u8 pbt; + __u8 reserved1[3]; + __u8 loadparm[8]; + __u8 reserved2[304]; + __u8 opt; + __u8 reserved3[3]; + __u32 fid; + __u8 reserved4[12]; + __u32 nsid; + __u8 reserved5[4]; + __u32 bootprog; + __u8 reserved6[12]; + __u64 br_lba; + __u32 scp_data_len; + __u8 reserved7[260]; + __u8 scp_data[]; +} __packed; + +#define IPL_PB0_NVME_OPT_IPL 0x10 +#define IPL_PB0_NVME_OPT_DUMP 0x20 + /* IPL Parameter Block 0 for CCW */ struct ipl_pb0_ccw { __u32 len; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 436ec76369273..7a6b14874d65c 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -231,11 +231,13 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_GSCB (1UL << 9) #define KVM_SYNC_BPBC (1UL << 10) #define KVM_SYNC_ETOKEN (1UL << 11) +#define KVM_SYNC_DIAG318 (1UL << 12) #define KVM_SYNC_S390_VALID_FIELDS \ (KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \ KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \ - KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN) + KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \ + KVM_SYNC_DIAG318) /* length and alignment of the sdnx as a power of two */ #define SDNXC 8 @@ -264,7 +266,8 @@ struct kvm_sync_regs { __u8 reserved2 : 7; __u8 padding1[51]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ - __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + __u64 diag318; /* diagnose 0x318 info */ + __u8 padding2[184]; /* sdnx needs to be 256byte aligned */ union { __u8 sdnx[SDNXL]; /* state description annex */ struct { diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index e22f0720bbb8b..d27d7d3292634 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -25,10 +25,11 @@ #define MAXPROTKEYSIZE 64 /* a protected key blob may be up to 64 bytes */ #define MAXCLRKEYSIZE 32 /* a clear key value may be up to 32 bytes */ #define MAXAESCIPHERKEYSIZE 136 /* our aes cipher keys have always 136 bytes */ +#define MINEP11AESKEYBLOBSIZE 256 /* min EP11 AES key blob size */ +#define MAXEP11AESKEYBLOBSIZE 320 /* max EP11 AES key blob size */ -/* Minimum and maximum size of a key blob */ +/* Minimum size of a key blob */ #define MINKEYBLOBSIZE SECKEYBLOBSIZE -#define MAXKEYBLOBSIZE MAXAESCIPHERKEYSIZE /* defines for the type field within the pkey_protkey struct */ #define PKEY_KEYTYPE_AES_128 1 @@ -39,6 +40,7 @@ enum pkey_key_type { PKEY_TYPE_CCA_DATA = (__u32) 1, PKEY_TYPE_CCA_CIPHER = (__u32) 2, + PKEY_TYPE_EP11 = (__u32) 3, }; /* the newer ioctls use a pkey_key_size enum for key size information */ @@ -200,7 +202,7 @@ struct pkey_kblob2pkey { /* * Generate secure key, version 2. - * Generate either a CCA AES secure key or a CCA AES cipher key. + * Generate CCA AES secure key, CCA AES cipher key or EP11 AES secure key. * There needs to be a list of apqns given with at least one entry in there. * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain * is not supported. The implementation walks through the list of apqns and @@ -210,10 +212,13 @@ struct pkey_kblob2pkey { * (return -1 with errno ENODEV). You may use the PKEY_APQNS4KT ioctl to * generate a list of apqns based on the key type to generate. * The keygenflags argument is passed to the low level generation functions - * individual for the key type and has a key type specific meaning. Currently - * only CCA AES cipher keys react to this parameter: Use one or more of the - * PKEY_KEYGEN_* flags to widen the export possibilities. By default a cipher - * key is only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC). + * individual for the key type and has a key type specific meaning. When + * generating CCA cipher keys you can use one or more of the PKEY_KEYGEN_* + * flags to widen the export possibilities. By default a cipher key is + * only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC). + * The keygenflag argument for generating an EP11 AES key should either be 0 + * to use the defaults which are XCP_BLOB_ENCRYPT, XCP_BLOB_DECRYPT and + * XCP_BLOB_PROTKEY_EXTRACTABLE or a valid combination of XCP_BLOB_* flags. */ struct pkey_genseck2 { struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets*/ @@ -229,8 +234,8 @@ struct pkey_genseck2 { /* * Generate secure key from clear key value, version 2. - * Construct a CCA AES secure key or CCA AES cipher key from a given clear key - * value. + * Construct an CCA AES secure key, CCA AES cipher key or EP11 AES secure + * key from a given clear key value. * There needs to be a list of apqns given with at least one entry in there. * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain * is not supported. The implementation walks through the list of apqns and @@ -240,10 +245,13 @@ struct pkey_genseck2 { * (return -1 with errno ENODEV). You may use the PKEY_APQNS4KT ioctl to * generate a list of apqns based on the key type to generate. * The keygenflags argument is passed to the low level generation functions - * individual for the key type and has a key type specific meaning. Currently - * only CCA AES cipher keys react to this parameter: Use one or more of the - * PKEY_KEYGEN_* flags to widen the export possibilities. By default a cipher - * key is only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC). + * individual for the key type and has a key type specific meaning. When + * generating CCA cipher keys you can use one or more of the PKEY_KEYGEN_* + * flags to widen the export possibilities. By default a cipher key is + * only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC). + * The keygenflag argument for generating an EP11 AES key should either be 0 + * to use the defaults which are XCP_BLOB_ENCRYPT, XCP_BLOB_DECRYPT and + * XCP_BLOB_PROTKEY_EXTRACTABLE or a valid combination of XCP_BLOB_* flags. */ struct pkey_clr2seck2 { struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */ @@ -266,14 +274,19 @@ struct pkey_clr2seck2 { * with one apqn able to handle this key. * The function also checks for the master key verification patterns * of the key matching to the current or alternate mkvp of the apqn. - * Currently CCA AES secure keys and CCA AES cipher keys are supported. - * The flags field is updated with some additional info about the apqn mkvp + * For CCA AES secure keys and CCA AES cipher keys this means to check + * the key's mkvp against the current or old mkvp of the apqns. The flags + * field is updated with some additional info about the apqn mkvp * match: If the current mkvp matches to the key's mkvp then the * PKEY_FLAGS_MATCH_CUR_MKVP bit is set, if the alternate mkvp matches to * the key's mkvp the PKEY_FLAGS_MATCH_ALT_MKVP is set. For CCA keys the * alternate mkvp is the old master key verification pattern. * CCA AES secure keys are also checked to have the CPACF export allowed * bit enabled (XPRTCPAC) in the kmf1 field. + * EP11 keys are also supported and the wkvp of the key is checked against + * the current wkvp of the apqns. There is no alternate for this type of + * key and so on a match the flag PKEY_FLAGS_MATCH_CUR_MKVP always is set. + * EP11 keys are also checked to have XCP_BLOB_PROTKEY_EXTRACTABLE set. * The ioctl returns 0 as long as the given or found apqn matches to * matches with the current or alternate mkvp to the key's mkvp. If the given * apqn does not match or there is no such apqn found, -1 with errno @@ -313,16 +326,20 @@ struct pkey_kblob2pkey2 { /* * Build a list of APQNs based on a key blob given. * Is able to find out which type of secure key is given (CCA AES secure - * key or CCA AES cipher key) and tries to find all matching crypto cards - * based on the MKVP and maybe other criterias (like CCA AES cipher keys - * need a CEX5C or higher). The list of APQNs is further filtered by the key's - * mkvp which needs to match to either the current mkvp or the alternate mkvp - * (which is the old mkvp on CCA adapters) of the apqns. The flags argument may - * be used to limit the matching apqns. If the PKEY_FLAGS_MATCH_CUR_MKVP is - * given, only the current mkvp of each apqn is compared. Likewise with the - * PKEY_FLAGS_MATCH_ALT_MKVP. If both are given, it is assumed to - * return apqns where either the current or the alternate mkvp + * key, CCA AES cipher key or EP11 AES key) and tries to find all matching + * crypto cards based on the MKVP and maybe other criterias (like CCA AES + * cipher keys need a CEX5C or higher, EP11 keys with BLOB_PKEY_EXTRACTABLE + * need a CEX7 and EP11 api version 4). The list of APQNs is further filtered + * by the key's mkvp which needs to match to either the current mkvp (CCA and + * EP11) or the alternate mkvp (old mkvp, CCA adapters only) of the apqns. The + * flags argument may be used to limit the matching apqns. If the + * PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current mkvp of each apqn is + * compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. If both are given, it + * is assumed to return apqns where either the current or the alternate mkvp * matches. At least one of the matching flags needs to be given. + * The flags argument for EP11 keys has no further action and is currently + * ignored (but needs to be given as PKEY_FLAGS_MATCH_CUR_MKVP) as there is only + * the wkvp from the key to match against the apqn's wkvp. * The list of matching apqns is stored into the space given by the apqns * argument and the number of stored entries goes into apqn_entries. If the list * is empty (apqn_entries is 0) the apqn_entries field is updated to the number @@ -356,6 +373,10 @@ struct pkey_apqns4key { * If both are given, it is assumed to return apqns where either the * current or the alternate mkvp matches. If no match flag is given * (flags is 0) the mkvp values are ignored for the match process. + * For EP11 keys there is only the current wkvp. So if the apqns should also + * match to a given wkvp, then the PKEY_FLAGS_MATCH_CUR_MKVP flag should be + * set. The wkvp value is 32 bytes but only the leftmost 16 bytes are compared + * against the leftmost 16 byte of the wkvp of the apqn. * The list of matching apqns is stored into the space given by the apqns * argument and the number of stored entries goes into apqn_entries. If the list * is empty (apqn_entries is 0) the apqn_entries field is updated to the number diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index f9e5e1f0821d4..22fd202856bc7 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -36,12 +36,12 @@ * - length(n_modulus) = inputdatalength */ struct ica_rsa_modexpo { - char __user *inputdata; - unsigned int inputdatalength; - char __user *outputdata; - unsigned int outputdatalength; - char __user *b_key; - char __user *n_modulus; + __u8 __user *inputdata; + __u32 inputdatalength; + __u8 __user *outputdata; + __u32 outputdatalength; + __u8 __user *b_key; + __u8 __user *n_modulus; }; /** @@ -59,15 +59,15 @@ struct ica_rsa_modexpo { * - length(u_mult_inv) = inputdatalength/2 + 8 */ struct ica_rsa_modexpo_crt { - char __user *inputdata; - unsigned int inputdatalength; - char __user *outputdata; - unsigned int outputdatalength; - char __user *bp_key; - char __user *bq_key; - char __user *np_prime; - char __user *nq_prime; - char __user *u_mult_inv; + __u8 __user *inputdata; + __u32 inputdatalength; + __u8 __user *outputdata; + __u32 outputdatalength; + __u8 __user *bp_key; + __u8 __user *bq_key; + __u8 __user *np_prime; + __u8 __user *nq_prime; + __u8 __user *u_mult_inv; }; /** @@ -83,67 +83,67 @@ struct ica_rsa_modexpo_crt { * key block */ struct CPRBX { - unsigned short cprb_len; /* CPRB length 220 */ - unsigned char cprb_ver_id; /* CPRB version id. 0x02 */ - unsigned char pad_000[3]; /* Alignment pad bytes */ - unsigned char func_id[2]; /* function id 0x5432 */ - unsigned char cprb_flags[4]; /* Flags */ - unsigned int req_parml; /* request parameter buffer len */ - unsigned int req_datal; /* request data buffer */ - unsigned int rpl_msgbl; /* reply message block length */ - unsigned int rpld_parml; /* replied parameter block len */ - unsigned int rpl_datal; /* reply data block len */ - unsigned int rpld_datal; /* replied data block len */ - unsigned int req_extbl; /* request extension block len */ - unsigned char pad_001[4]; /* reserved */ - unsigned int rpld_extbl; /* replied extension block len */ - unsigned char padx000[16 - sizeof(char *)]; - unsigned char *req_parmb; /* request parm block 'address' */ - unsigned char padx001[16 - sizeof(char *)]; - unsigned char *req_datab; /* request data block 'address' */ - unsigned char padx002[16 - sizeof(char *)]; - unsigned char *rpl_parmb; /* reply parm block 'address' */ - unsigned char padx003[16 - sizeof(char *)]; - unsigned char *rpl_datab; /* reply data block 'address' */ - unsigned char padx004[16 - sizeof(char *)]; - unsigned char *req_extb; /* request extension block 'addr'*/ - unsigned char padx005[16 - sizeof(char *)]; - unsigned char *rpl_extb; /* reply extension block 'address'*/ - unsigned short ccp_rtcode; /* server return code */ - unsigned short ccp_rscode; /* server reason code */ - unsigned int mac_data_len; /* Mac Data Length */ - unsigned char logon_id[8]; /* Logon Identifier */ - unsigned char mac_value[8]; /* Mac Value */ - unsigned char mac_content_flgs;/* Mac content flag byte */ - unsigned char pad_002; /* Alignment */ - unsigned short domain; /* Domain */ - unsigned char usage_domain[4];/* Usage domain */ - unsigned char cntrl_domain[4];/* Control domain */ - unsigned char S390enf_mask[4];/* S/390 enforcement mask */ - unsigned char pad_004[36]; /* reserved */ + __u16 cprb_len; /* CPRB length 220 */ + __u8 cprb_ver_id; /* CPRB version id. 0x02 */ + __u8 pad_000[3]; /* Alignment pad bytes */ + __u8 func_id[2]; /* function id 0x5432 */ + __u8 cprb_flags[4]; /* Flags */ + __u32 req_parml; /* request parameter buffer len */ + __u32 req_datal; /* request data buffer */ + __u32 rpl_msgbl; /* reply message block length */ + __u32 rpld_parml; /* replied parameter block len */ + __u32 rpl_datal; /* reply data block len */ + __u32 rpld_datal; /* replied data block len */ + __u32 req_extbl; /* request extension block len */ + __u8 pad_001[4]; /* reserved */ + __u32 rpld_extbl; /* replied extension block len */ + __u8 padx000[16 - sizeof(__u8 *)]; + __u8 __user *req_parmb; /* request parm block 'address' */ + __u8 padx001[16 - sizeof(__u8 *)]; + __u8 __user *req_datab; /* request data block 'address' */ + __u8 padx002[16 - sizeof(__u8 *)]; + __u8 __user *rpl_parmb; /* reply parm block 'address' */ + __u8 padx003[16 - sizeof(__u8 *)]; + __u8 __user *rpl_datab; /* reply data block 'address' */ + __u8 padx004[16 - sizeof(__u8 *)]; + __u8 __user *req_extb; /* request extension block 'addr'*/ + __u8 padx005[16 - sizeof(__u8 *)]; + __u8 __user *rpl_extb; /* reply extension block 'address'*/ + __u16 ccp_rtcode; /* server return code */ + __u16 ccp_rscode; /* server reason code */ + __u32 mac_data_len; /* Mac Data Length */ + __u8 logon_id[8]; /* Logon Identifier */ + __u8 mac_value[8]; /* Mac Value */ + __u8 mac_content_flgs; /* Mac content flag byte */ + __u8 pad_002; /* Alignment */ + __u16 domain; /* Domain */ + __u8 usage_domain[4]; /* Usage domain */ + __u8 cntrl_domain[4]; /* Control domain */ + __u8 S390enf_mask[4]; /* S/390 enforcement mask */ + __u8 pad_004[36]; /* reserved */ } __attribute__((packed)); /** * xcRB */ struct ica_xcRB { - unsigned short agent_ID; - unsigned int user_defined; - unsigned short request_ID; - unsigned int request_control_blk_length; - unsigned char padding1[16 - sizeof(char *)]; - char __user *request_control_blk_addr; - unsigned int request_data_length; - char padding2[16 - sizeof(char *)]; - char __user *request_data_address; - unsigned int reply_control_blk_length; - char padding3[16 - sizeof(char *)]; - char __user *reply_control_blk_addr; - unsigned int reply_data_length; - char padding4[16 - sizeof(char *)]; - char __user *reply_data_addr; - unsigned short priority_window; - unsigned int status; + __u16 agent_ID; + __u32 user_defined; + __u16 request_ID; + __u32 request_control_blk_length; + __u8 _padding1[16 - sizeof(__u8 *)]; + __u8 __user *request_control_blk_addr; + __u32 request_data_length; + __u8 _padding2[16 - sizeof(__u8 *)]; + __u8 __user *request_data_address; + __u32 reply_control_blk_length; + __u8 _padding3[16 - sizeof(__u8 *)]; + __u8 __user *reply_control_blk_addr; + __u32 reply_data_length; + __u8 __padding4[16 - sizeof(__u8 *)]; + __u8 __user *reply_data_addr; + __u16 priority_window; + __u32 status; } __attribute__((packed)); /** @@ -161,17 +161,17 @@ struct ica_xcRB { * @payload_len: Payload length */ struct ep11_cprb { - __u16 cprb_len; - unsigned char cprb_ver_id; - unsigned char pad_000[2]; - unsigned char flags; - unsigned char func_id[2]; - __u32 source_id; - __u32 target_id; - __u32 ret_code; - __u32 reserved1; - __u32 reserved2; - __u32 payload_len; + __u16 cprb_len; + __u8 cprb_ver_id; + __u8 pad_000[2]; + __u8 flags; + __u8 func_id[2]; + __u32 source_id; + __u32 target_id; + __u32 ret_code; + __u32 reserved1; + __u32 reserved2; + __u32 payload_len; } __attribute__((packed)); /** @@ -197,13 +197,13 @@ struct ep11_target_dev { */ struct ep11_urb { __u16 targets_num; - __u64 targets; + __u8 __user *targets; __u64 weight; __u64 req_no; __u64 req_len; - __u64 req; + __u8 __user *req; __u64 resp_len; - __u64 resp; + __u8 __user *resp; } __attribute__((packed)); /** @@ -237,7 +237,9 @@ struct zcrypt_device_matrix_ext { struct zcrypt_device_status_ext device[MAX_ZDEV_ENTRIES_EXT]; }; -#define AUTOSELECT 0xFFFFFFFF +#define AUTOSELECT 0xFFFFFFFF +#define AUTOSEL_AP ((__u16) 0xFFFF) +#define AUTOSEL_DOM ((__u16) 0xFFFF) #define ZCRYPT_IOCTL_MAGIC 'z' diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7edbbcd8228ab..1efbad2727764 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -78,7 +78,11 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_diag.o obj-$(CONFIG_TRACEPOINTS) += trace.o +obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o # vdso obj-y += vdso64/ obj-$(CONFIG_COMPAT_VDSO) += vdso32/ + +# kernel message catalog +obj-$(CONFIG_KMSG_IDS) += kmsg.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 41ac4ad213110..a65cb4924bdbd 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -76,6 +76,7 @@ int main(void) OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir); OFFSET(__VDSO_TS_END, vdso_data, ts_end); + OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr); @@ -87,7 +88,6 @@ int main(void) DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); - DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ @@ -125,6 +125,8 @@ int main(void) OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code); OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address); OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr); + OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe); + OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe); OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw); OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw); OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw); diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index af013b4244d34..2da0273597989 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -37,10 +37,12 @@ static int diag8_noresponse(int cmdlen) static int diag8_response(int cmdlen, char *response, int *rlen) { + unsigned long _cmdlen = cmdlen | 0x40000000L; + unsigned long _rlen = *rlen; register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; register unsigned long reg3 asm ("3") = (addr_t) response; - register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L; - register unsigned long reg5 asm ("5") = *rlen; + register unsigned long reg4 asm ("4") = _cmdlen; + register unsigned long reg5 asm ("5") = _rlen; asm volatile( " diag %2,%0,0x8\n" diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6d321f5f101d6..b1aadc3ad065d 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -198,9 +198,10 @@ static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas) if (!areas) goto fail_malloc_areas; for (i = 0; i < nr_areas; i++) { + /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */ areas[i] = kmalloc_array(pages_per_area, sizeof(debug_entry_t *), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!areas[i]) goto fail_malloc_areas2; for (j = 0; j < pages_per_area; j++) { @@ -326,24 +327,6 @@ static debug_info_t *debug_info_create(const char *name, int pages_per_area, goto out; rc->mode = mode & ~S_IFMT; - - /* create root directory */ - rc->debugfs_root_entry = debugfs_create_dir(rc->name, - debug_debugfs_root_entry); - - /* append new element to linked list */ - if (!debug_area_first) { - /* first element in list */ - debug_area_first = rc; - rc->prev = NULL; - } else { - /* append element to end of list */ - debug_area_last->next = rc; - rc->prev = debug_area_last; - } - debug_area_last = rc; - rc->next = NULL; - refcount_set(&rc->ref_count, 1); out: return rc; @@ -403,27 +386,10 @@ static void debug_info_get(debug_info_t *db_info) */ static void debug_info_put(debug_info_t *db_info) { - int i; - if (!db_info) return; - if (refcount_dec_and_test(&db_info->ref_count)) { - for (i = 0; i < DEBUG_MAX_VIEWS; i++) { - if (!db_info->views[i]) - continue; - debugfs_remove(db_info->debugfs_entries[i]); - } - debugfs_remove(db_info->debugfs_root_entry); - if (db_info == debug_area_first) - debug_area_first = db_info->next; - if (db_info == debug_area_last) - debug_area_last = db_info->prev; - if (db_info->prev) - db_info->prev->next = db_info->next; - if (db_info->next) - db_info->next->prev = db_info->prev; + if (refcount_dec_and_test(&db_info->ref_count)) debug_info_free(db_info); - } } /* @@ -647,6 +613,31 @@ static int debug_close(struct inode *inode, struct file *file) return 0; /* success */ } +/* Create debugfs entries and add to internal list. */ +static void _debug_register(debug_info_t *id) +{ + /* create root directory */ + id->debugfs_root_entry = debugfs_create_dir(id->name, + debug_debugfs_root_entry); + + /* append new element to linked list */ + if (!debug_area_first) { + /* first element in list */ + debug_area_first = id; + id->prev = NULL; + } else { + /* append element to end of list */ + debug_area_last->next = id; + id->prev = debug_area_last; + } + debug_area_last = id; + id->next = NULL; + + debug_register_view(id, &debug_level_view); + debug_register_view(id, &debug_flush_view); + debug_register_view(id, &debug_pages_view); +} + /** * debug_register_mode() - creates and initializes debug area. * @@ -676,19 +667,16 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, if ((uid != 0) || (gid != 0)) pr_warn("Root becomes the owner of all s390dbf files in sysfs\n"); BUG_ON(!initialized); - mutex_lock(&debug_mutex); /* create new debug_info */ rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode); - if (!rc) - goto out; - debug_register_view(rc, &debug_level_view); - debug_register_view(rc, &debug_flush_view); - debug_register_view(rc, &debug_pages_view); -out: - if (!rc) + if (rc) { + mutex_lock(&debug_mutex); + _debug_register(rc); + mutex_unlock(&debug_mutex); + } else { pr_err("Registering debug feature %s failed\n", name); - mutex_unlock(&debug_mutex); + } return rc; } EXPORT_SYMBOL(debug_register_mode); @@ -717,6 +705,27 @@ debug_info_t *debug_register(const char *name, int pages_per_area, } EXPORT_SYMBOL(debug_register); +/* Remove debugfs entries and remove from internal list. */ +static void _debug_unregister(debug_info_t *id) +{ + int i; + + for (i = 0; i < DEBUG_MAX_VIEWS; i++) { + if (!id->views[i]) + continue; + debugfs_remove(id->debugfs_entries[i]); + } + debugfs_remove(id->debugfs_root_entry); + if (id == debug_area_first) + debug_area_first = id->next; + if (id == debug_area_last) + debug_area_last = id->prev; + if (id->prev) + id->prev->next = id->next; + if (id->next) + id->next->prev = id->prev; +} + /** * debug_unregister() - give back debug area. * @@ -730,8 +739,10 @@ void debug_unregister(debug_info_t *id) if (!id) return; mutex_lock(&debug_mutex); - debug_info_put(id); + _debug_unregister(id); mutex_unlock(&debug_mutex); + + debug_info_put(id); } EXPORT_SYMBOL(debug_unregister); diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index e9dac9a24d3fc..ccba63aaeb470 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -84,7 +84,7 @@ static int show_diag_stat(struct seq_file *m, void *v) static void *show_diag_stat_start(struct seq_file *m, loff_t *pos) { - return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL; } static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos) @@ -133,7 +133,7 @@ void diag_stat_inc(enum diag_stat_enum nr) } EXPORT_SYMBOL(diag_stat_inc); -void diag_stat_inc_norecursion(enum diag_stat_enum nr) +void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr) { this_cpu_inc(diag_stat.counter[nr]); trace_s390_diagnose_norecursion(diag_map[nr].code); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 7abe6ae261b42..03e35b399c4ee 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -461,10 +461,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr += sprintf(ptr, "%%c%i", value); else if (operand->flags & OPERAND_VR) ptr += sprintf(ptr, "%%v%i", value); - else if (operand->flags & OPERAND_PCREL) - ptr += sprintf(ptr, "%lx", (signed int) value - + addr); - else if (operand->flags & OPERAND_SIGNED) + else if (operand->flags & OPERAND_PCREL) { + void *pcrel = (void *)((int)value + addr); + + ptr += sprintf(ptr, "%px", pcrel); + } else if (operand->flags & OPERAND_SIGNED) ptr += sprintf(ptr, "%i", value); else ptr += sprintf(ptr, "%u", value); @@ -536,7 +537,7 @@ void show_code(struct pt_regs *regs) else *ptr++ = ' '; addr = regs->psw.addr + start - 32; - ptr += sprintf(ptr, "%016lx: ", addr); + ptr += sprintf(ptr, "%px: ", (void *)addr); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) @@ -556,7 +557,7 @@ void show_code(struct pt_regs *regs) void print_fn_code(unsigned char *code, unsigned long len) { - char buffer[64], *ptr; + char buffer[128], *ptr; int opsize, i; while (len) { @@ -564,7 +565,7 @@ void print_fn_code(unsigned char *code, unsigned long len) opsize = insn_length(*code); if (opsize > len) break; - ptr += sprintf(ptr, "%p: ", code); + ptr += sprintf(ptr, "%px: ", code); for (i = 0; i < opsize; i++) ptr += sprintf(ptr, "%02x", code[i]); *ptr++ = '\t'; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index b432d63d0b373..eb89cb0aa60b4 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -169,6 +169,8 @@ static noinline __init void setup_lowcore_early(void) psw_t psw; psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; + if (IS_ENABLED(CONFIG_KASAN)) + psw.mask |= PSW_MASK_DAT; psw.addr = (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = (unsigned long) s390_base_pgm_handler; @@ -250,6 +252,10 @@ static __init void detect_machine_facilities(void) clock_comparator_max = -1ULL >> 1; __ctl_set_bit(0, 53); } + if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) { + S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO; + /* the control bit is set during PCI initialization */ + } } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 270d1d145761b..5cba1815b8f8f 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -115,26 +115,29 @@ _LPP_OFFSET = __LC_LPP .macro SWITCH_ASYNC savearea,timer tmhh %r8,0x0001 # interrupting from user ? - jnz 1f + jnz 2f lgr %r14,%r9 + cghi %r14,__LC_RETURN_LPSWE + je 0f slg %r14,BASED(.Lcritical_start) clg %r14,BASED(.Lcritical_length) - jhe 0f + jhe 1f +0: lghi %r11,\savearea # inside critical section, do cleanup brasl %r14,cleanup_critical tmhh %r8,0x0001 # retest problem state after cleanup - jnz 1f -0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? + jnz 2f +1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack? slgr %r14,%r15 srag %r14,%r14,STACK_SHIFT - jnz 2f + jnz 3f CHECK_STACK \savearea aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 3f -1: UPDATE_VTIME %r14,%r15,\timer + j 4f +2: UPDATE_VTIME %r14,%r15,\timer BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP -2: lg %r15,__LC_ASYNC_STACK # load async stack -3: la %r11,STACK_FRAME_OVERHEAD(%r15) +3: lg %r15,__LC_ASYNC_STACK # load async stack +4: la %r11,STACK_FRAME_OVERHEAD(%r15) .endm .macro UPDATE_VTIME w1,w2,enter_timer @@ -365,9 +368,9 @@ ENTRY(system_call) jnz .Lsysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 + sth %r1,__PT_INT_CODE+2(%r11) cghi %r1,NR_syscalls jnl .Lsysc_nr_ok - sth %r1,__PT_INT_CODE+2(%r11) slag %r8,%r1,3 .Lsysc_nr_ok: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) @@ -401,7 +404,7 @@ ENTRY(system_call) stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_PSW + b __LC_RETURN_LPSWE(%r0) .Lsysc_done: # @@ -608,43 +611,50 @@ ENTRY(pgm_check_handler) BPOFF stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK - lg %r12,__LC_CURRENT + srag %r11,%r10,12 + jnz 0f + /* if __LC_LAST_BREAK is < 4096, it contains one of + * the lpswe addresses in lowcore. Set it to 1 (initial state) + * to prevent leaking that address to userspace. + */ + lghi %r10,1 +0: lg %r12,__LC_CURRENT lghi %r11,0 larl %r13,cleanup_critical lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # test problem state bit - jnz 2f # -> fault in user space + jnz 3f # -> fault in user space #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in sie64a lgr %r14,%r9 slg %r14,BASED(.Lsie_critical_start) clg %r14,BASED(.Lsie_critical_length) - jhe 0f + jhe 1f lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit lghi %r11,_PIF_GUEST_FAULT #endif -0: tmhh %r8,0x4000 # PER bit set in old PSW ? - jnz 1f # -> enabled, can't be a double fault +1: tmhh %r8,0x4000 # PER bit set in old PSW ? + jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3,0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -1: CHECK_STACK __LC_SAVE_AREA_SYNC +2: CHECK_STACK __LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - # CHECK_VMAP_STACK branches to stack_overflow or 4f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f -2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER + # CHECK_VMAP_STACK branches to stack_overflow or 5f + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f +3: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP lg %r15,__LC_KERNEL_STACK lgr %r14,%r12 aghi %r14,__TASK_thread # pointer to thread_struct lghi %r13,__LC_PGM_TDB tm __LC_PGM_ILC+2,0x02 # check for transaction abort - jz 3f + jz 4f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -3: stg %r10,__THREAD_last_break(%r14) -4: lgr %r13,%r11 +4: stg %r10,__THREAD_last_break(%r14) +5: lgr %r13,%r11 la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) # clear user controlled registers to prevent speculative use @@ -663,14 +673,14 @@ ENTRY(pgm_check_handler) stg %r13,__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 5f + jz 6f tmhh %r8,0x0001 # kernel per event ? jz .Lpgm_kprobe oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -5: REENABLE_IRQS +6: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) @@ -775,7 +785,7 @@ ENTRY(io_int_handler) mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER .Lio_exit_kernel: lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_PSW + b __LC_RETURN_LPSWE(%r0) .Lio_done: # @@ -983,6 +993,7 @@ ENDPROC(ext_int_handler) * Load idle PSW. The second "half" of this function is in .Lcleanup_idle. */ ENTRY(psw_idle) + stg %r14,(__SF_GPRS+8*8)(%r15) stg %r3,__SF_EMPTY(%r15) larl %r1,.Lpsw_idle_lpsw+4 stg %r1,__SF_EMPTY+8(%r15) @@ -1214,7 +1225,7 @@ ENTRY(mcck_int_handler) stpt __LC_EXIT_TIMER mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER 0: lmg %r11,%r15,__PT_R11(%r11) - lpswe __LC_RETURN_MCCK_PSW + b __LC_RETURN_MCCK_LPSWE .Lmcck_panic: lg %r15,__LC_NODAT_STACK @@ -1271,6 +1282,8 @@ ENDPROC(stack_overflow) #endif ENTRY(cleanup_critical) + cghi %r9,__LC_RETURN_LPSWE + je .Lcleanup_lpswe #if IS_ENABLED(CONFIG_KVM) clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap jl 0f @@ -1424,6 +1437,7 @@ ENDPROC(cleanup_critical) mvc __LC_RETURN_PSW(16),__PT_PSW(%r9) mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) +.Lcleanup_lpswe: 1: lmg %r8,%r9,__LC_RETURN_PSW BR_EX %r14,%r11 .Lcleanup_sysc_restore_insn: diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index b2956d49b6ad7..6460bfa688cfc 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -24,6 +24,8 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); void do_protection_exception(struct pt_regs *regs); void do_dat_exception(struct pt_regs *regs); +void do_secure_storage_access(struct pt_regs *regs); +void do_non_secure_storage_access(struct pt_regs *regs); void addressing_exception(struct pt_regs *regs); void data_exception(struct pt_regs *regs); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 1bb85f60c0dd5..4348c01cde224 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -57,6 +57,7 @@ * > brasl %r0,ftrace_caller # offset 0 */ +void *ftrace_func __read_mostly = ftrace_stub; unsigned long ftrace_plt; static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) @@ -72,15 +73,6 @@ static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) #endif } -static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn) -{ -#ifdef CONFIG_KPROBES - if (insn->opc == BREAKPOINT_INSTRUCTION) - return 1; -#endif - return 0; -} - static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn) { #ifdef CONFIG_KPROBES @@ -114,16 +106,6 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, /* Initial code replacement */ ftrace_generate_orig_insn(&orig); ftrace_generate_nop_insn(&new); - } else if (is_kprobe_on_ftrace(&old)) { - /* - * If we find a breakpoint instruction, a kprobe has been - * placed at the beginning of the function. We write the - * constant KPROBE_ON_FTRACE_NOP into the remaining four - * bytes of the original instruction so that the kprobes - * handler can execute a nop, if it reaches this breakpoint. - */ - ftrace_generate_kprobe_call_insn(&orig); - ftrace_generate_kprobe_nop_insn(&new); } else { /* Replace ftrace call with a nop. */ ftrace_generate_call_insn(&orig, rec->ip); @@ -142,21 +124,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - if (is_kprobe_on_ftrace(&old)) { - /* - * If we find a breakpoint instruction, a kprobe has been - * placed at the beginning of the function. We write the - * constant KPROBE_ON_FTRACE_CALL into the remaining four - * bytes of the original instruction so that the kprobes - * handler can execute a brasl if it reaches this breakpoint. - */ - ftrace_generate_kprobe_nop_insn(&orig); - ftrace_generate_kprobe_call_insn(&new); - } else { - /* Replace nop with an ftrace call. */ - ftrace_generate_nop_insn(&orig); - ftrace_generate_call_insn(&new, rec->ip); - } + /* Replace nop with an ftrace call. */ + ftrace_generate_nop_insn(&orig); + ftrace_generate_call_insn(&new, rec->ip); + /* Verify that the to be replaced code matches what we expect. */ if (memcmp(&orig, &old, sizeof(old))) return -EINVAL; @@ -166,6 +137,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) int ftrace_update_ftrace_func(ftrace_func_t func) { + ftrace_func = func; return 0; } @@ -241,3 +213,45 @@ int ftrace_disable_ftrace_graph_caller(void) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_KPROBES_ON_FTRACE +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb; + struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip); + + if (unlikely(!p) || kprobe_disabled(p)) + return; + + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + return; + } + + __this_cpu_write(current_kprobe, p); + + kcb = get_kprobe_ctlblk(); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + instruction_pointer_set(regs, ip); + + if (!p->pre_handler || !p->pre_handler(p, regs)) { + + instruction_pointer_set(regs, ip + MCOUNT_INSN_SIZE); + + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + } + __this_cpu_write(current_kprobe, NULL); +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + return 0; +} +#endif diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 6837affc19e81..0e4a985c6f0ff 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -39,6 +39,7 @@ #define IPL_CCW_STR "ccw" #define IPL_FCP_STR "fcp" #define IPL_FCP_DUMP_STR "fcp_dump" +#define IPL_NVME_STR "nvme" #define IPL_NSS_STR "nss" #define DUMP_CCW_STR "ccw" @@ -93,6 +94,8 @@ static char *ipl_type_str(enum ipl_type type) return IPL_FCP_DUMP_STR; case IPL_TYPE_NSS: return IPL_NSS_STR; + case IPL_TYPE_NVME: + return IPL_NVME_STR; case IPL_TYPE_UNKNOWN: default: return IPL_UNKNOWN_STR; @@ -133,6 +136,7 @@ static int reipl_capabilities = IPL_TYPE_UNKNOWN; static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN; static struct ipl_parameter_block *reipl_block_fcp; +static struct ipl_parameter_block *reipl_block_nvme; static struct ipl_parameter_block *reipl_block_ccw; static struct ipl_parameter_block *reipl_block_nss; static struct ipl_parameter_block *reipl_block_actual; @@ -258,6 +262,8 @@ static __init enum ipl_type get_ipl_type(void) return IPL_TYPE_FCP_DUMP; else return IPL_TYPE_FCP; + case IPL_PBT_NVME: + return IPL_TYPE_NVME; } return IPL_TYPE_UNKNOWN; } @@ -314,6 +320,8 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj, case IPL_TYPE_FCP: case IPL_TYPE_FCP_DUMP: return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno); + case IPL_TYPE_NVME: + return sprintf(page, "%08ux\n", ipl_block.nvme.fid); default: return 0; } @@ -342,15 +350,35 @@ static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj, return memory_read_from_buffer(buf, count, &off, scp_data, size); } + +static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + unsigned int size = ipl_block.nvme.scp_data_len; + void *scp_data = &ipl_block.nvme.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + static struct bin_attribute ipl_scp_data_attr = __BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE); +static struct bin_attribute ipl_nvme_scp_data_attr = + __BIN_ATTR(scp_data, S_IRUGO, ipl_nvme_scp_data_read, NULL, PAGE_SIZE); + static struct bin_attribute *ipl_fcp_bin_attrs[] = { &ipl_parameter_attr, &ipl_scp_data_attr, NULL, }; +static struct bin_attribute *ipl_nvme_bin_attrs[] = { + &ipl_parameter_attr, + &ipl_nvme_scp_data_attr, + NULL, +}; + /* FCP ipl device attributes */ DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", @@ -362,6 +390,16 @@ DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)ipl_block.fcp.br_lba); +/* NVMe ipl device attributes */ +DEFINE_IPL_ATTR_RO(ipl_nvme, fid, "0x%08llx\n", + (unsigned long long)ipl_block.nvme.fid); +DEFINE_IPL_ATTR_RO(ipl_nvme, nsid, "0x%08llx\n", + (unsigned long long)ipl_block.nvme.nsid); +DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n", + (unsigned long long)ipl_block.nvme.bootprog); +DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n", + (unsigned long long)ipl_block.nvme.br_lba); + static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { @@ -396,6 +434,24 @@ static struct attribute_group ipl_fcp_attr_group = { .bin_attrs = ipl_fcp_bin_attrs, }; +static struct attribute *ipl_nvme_attrs[] = { + &sys_ipl_type_attr.attr, + &sys_ipl_nvme_fid_attr.attr, + &sys_ipl_nvme_nsid_attr.attr, + &sys_ipl_nvme_bootprog_attr.attr, + &sys_ipl_nvme_br_lba_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, + &sys_ipl_secure_attr.attr, + &sys_ipl_has_secure_attr.attr, + NULL, +}; + +static struct attribute_group ipl_nvme_attr_group = { + .attrs = ipl_nvme_attrs, + .bin_attrs = ipl_nvme_bin_attrs, +}; + + /* CCW ipl device attributes */ static struct attribute *ipl_ccw_attrs_vm[] = { @@ -471,6 +527,9 @@ static int __init ipl_init(void) case IPL_TYPE_FCP_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group); break; + case IPL_TYPE_NVME: + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group); + break; default: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_unknown_attr_group); @@ -706,6 +765,93 @@ static struct attribute_group reipl_fcp_attr_group = { .bin_attrs = reipl_fcp_bin_attrs, }; +/* NVME reipl device attributes */ + +static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t size = reipl_block_nvme->nvme.scp_data_len; + void *scp_data = reipl_block_nvme->nvme.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + +static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t scpdata_len = count; + size_t padding; + + if (off) + return -EINVAL; + + memcpy(reipl_block_nvme->nvme.scp_data, buf, count); + if (scpdata_len % 8) { + padding = 8 - (scpdata_len % 8); + memset(reipl_block_nvme->nvme.scp_data + scpdata_len, + 0, padding); + scpdata_len += padding; + } + + reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len; + reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len; + reipl_block_nvme->nvme.scp_data_len = scpdata_len; + + return count; +} + +static struct bin_attribute sys_reipl_nvme_scp_data_attr = + __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_nvme_scpdata_read, + reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE); + +static struct bin_attribute *reipl_nvme_bin_attrs[] = { + &sys_reipl_nvme_scp_data_attr, + NULL, +}; + +DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n", + reipl_block_nvme->nvme.fid); +DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n", + reipl_block_nvme->nvme.nsid); +DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", + reipl_block_nvme->nvme.bootprog); +DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n", + reipl_block_nvme->nvme.br_lba); + +/* nvme wrapper */ +static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return reipl_generic_loadparm_show(reipl_block_nvme, page); +} + +static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t len) +{ + return reipl_generic_loadparm_store(reipl_block_nvme, buf, len); +} + +static struct kobj_attribute sys_reipl_nvme_loadparm_attr = + __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nvme_loadparm_show, + reipl_nvme_loadparm_store); + +static struct attribute *reipl_nvme_attrs[] = { + &sys_reipl_nvme_fid_attr.attr, + &sys_reipl_nvme_nsid_attr.attr, + &sys_reipl_nvme_bootprog_attr.attr, + &sys_reipl_nvme_br_lba_attr.attr, + &sys_reipl_nvme_loadparm_attr.attr, + NULL, +}; + +static struct attribute_group reipl_nvme_attr_group = { + .attrs = reipl_nvme_attrs, + .bin_attrs = reipl_nvme_bin_attrs +}; + /* CCW reipl device attributes */ DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw); @@ -850,6 +996,9 @@ static int reipl_set_type(enum ipl_type type) case IPL_TYPE_FCP: reipl_block_actual = reipl_block_fcp; break; + case IPL_TYPE_NVME: + reipl_block_actual = reipl_block_nvme; + break; case IPL_TYPE_NSS: reipl_block_actual = reipl_block_nss; break; @@ -876,6 +1025,8 @@ static ssize_t reipl_type_store(struct kobject *kobj, rc = reipl_set_type(IPL_TYPE_CCW); else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0) rc = reipl_set_type(IPL_TYPE_FCP); + else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0) + rc = reipl_set_type(IPL_TYPE_NVME); else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0) rc = reipl_set_type(IPL_TYPE_NSS); return (rc != 0) ? rc : len; @@ -886,6 +1037,7 @@ static struct kobj_attribute reipl_type_attr = static struct kset *reipl_kset; static struct kset *reipl_fcp_kset; +static struct kset *reipl_nvme_kset; static void __reipl_run(void *unused) { @@ -898,6 +1050,10 @@ static void __reipl_run(void *unused) diag308(DIAG308_SET, reipl_block_fcp); diag308(DIAG308_LOAD_CLEAR, NULL); break; + case IPL_TYPE_NVME: + diag308(DIAG308_SET, reipl_block_nvme); + diag308(DIAG308_LOAD_CLEAR, NULL); + break; case IPL_TYPE_NSS: diag308(DIAG308_SET, reipl_block_nss); diag308(DIAG308_LOAD_CLEAR, NULL); @@ -1034,6 +1190,49 @@ static int __init reipl_fcp_init(void) return 0; } +static int __init reipl_nvme_init(void) +{ + int rc; + + reipl_block_nvme = (void *) get_zeroed_page(GFP_KERNEL); + if (!reipl_block_nvme) + return -ENOMEM; + + /* sysfs: create kset for mixing attr group and bin attrs */ + reipl_nvme_kset = kset_create_and_add(IPL_NVME_STR, NULL, + &reipl_kset->kobj); + if (!reipl_nvme_kset) { + free_page((unsigned long) reipl_block_nvme); + return -ENOMEM; + } + + rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group); + if (rc) { + kset_unregister(reipl_nvme_kset); + free_page((unsigned long) reipl_block_nvme); + return rc; + } + + if (ipl_info.type == IPL_TYPE_NVME) { + memcpy(reipl_block_nvme, &ipl_block, sizeof(ipl_block)); + /* + * Fix loadparm: There are systems where the (SCSI) LOADPARM + * is invalid in the IPL parameter block, so take it + * always from sclp_ipl_info. + */ + memcpy(reipl_block_nvme->nvme.loadparm, sclp_ipl_info.loadparm, + LOADPARM_LEN); + } else { + reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN; + reipl_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION; + reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN; + reipl_block_nvme->nvme.pbt = IPL_PBT_NVME; + reipl_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_IPL; + } + reipl_capabilities |= IPL_TYPE_NVME; + return 0; +} + static int __init reipl_type_init(void) { enum ipl_type reipl_type = ipl_info.type; @@ -1049,6 +1248,9 @@ static int __init reipl_type_init(void) if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) { memcpy(reipl_block_fcp, reipl_block, size); reipl_type = IPL_TYPE_FCP; + } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_NVME) { + memcpy(reipl_block_nvme, reipl_block, size); + reipl_type = IPL_TYPE_NVME; } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) { memcpy(reipl_block_ccw, reipl_block, size); reipl_type = IPL_TYPE_CCW; @@ -1073,6 +1275,9 @@ static int __init reipl_init(void) if (rc) return rc; rc = reipl_fcp_init(); + if (rc) + return rc; + rc = reipl_nvme_init(); if (rc) return rc; rc = reipl_nss_init(); @@ -1691,6 +1896,10 @@ void __init setup_ipl(void) ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn; ipl_info.data.fcp.lun = ipl_block.fcp.lun; break; + case IPL_TYPE_NVME: + ipl_info.data.nvme.fid = ipl_block.nvme.fid; + ipl_info.data.nvme.nsid = ipl_block.nvme.nsid; + break; case IPL_TYPE_NSS: case IPL_TYPE_UNKNOWN: /* We have no info to copy */ @@ -1783,7 +1992,7 @@ void *ipl_report_finish(struct ipl_report *report) buf = vzalloc(report->size); if (!buf) - return ERR_PTR(-ENOMEM); + goto out; ptr = buf; memcpy(ptr, report->ipib, report->ipib->hdr.len); @@ -1822,6 +2031,7 @@ void *ipl_report_finish(struct ipl_report *report) } BUG_ON(ptr > buf + report->size); +out: return buf; } @@ -1842,3 +2052,8 @@ int ipl_report_free(struct ipl_report *report) } #endif + +bool ipl_get_secureboot(void) +{ + return !!ipl_secure_flag; +} diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 8371855042dc2..da550cb8b31bd 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -294,11 +294,6 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy) return IRQ_HANDLED; } -static struct irqaction external_interrupt = { - .name = "EXT", - .handler = do_ext_interrupt, -}; - void __init init_ext_interrupts(void) { int idx; @@ -308,7 +303,8 @@ void __init init_ext_interrupts(void) irq_set_chip_and_handler(EXT_INTERRUPT, &dummy_irq_chip, handle_percpu_irq); - setup_irq(EXT_INTERRUPT, &external_interrupt); + if (request_irq(EXT_INTERRUPT, do_ext_interrupt, 0, "EXT", NULL)) + panic("Failed to register EXT interrupt\n"); } static DEFINE_SPINLOCK(irq_subclass_lock); diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index ab584e8e35275..9156653b56f69 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -36,7 +36,7 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *expected, unsigned char *ipe = (unsigned char *)expected; unsigned char *ipn = (unsigned char *)new; - pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc); + pr_emerg("Jump label code mismatch at %pS [%px]\n", ipc, ipc); pr_emerg("Found: %6ph\n", ipc); pr_emerg("Expected: %6ph\n", ipe); pr_emerg("New: %6ph\n", ipn); diff --git a/arch/s390/kernel/kmsg.c b/arch/s390/kernel/kmsg.c new file mode 100644 index 0000000000000..7d33182a99df4 --- /dev/null +++ b/arch/s390/kernel/kmsg.c @@ -0,0 +1,114 @@ +/* + * Message printing with message catalog prefixes. + * + * Copyright IBM Corp. 2012 + */ + +#include +#include +#include +#include +#include + +static inline u32 __printk_jhash(const void *key, u32 length) +{ + u32 a, b, c, len; + const u8 *k; + u8 zk[12]; + + a = b = 0x9e3779b9; + c = 0; + for (len = length + 12, k = key; len >= 12; len -= 12, k += 12) { + if (len >= 24) { + a += k[0] | k[1] << 8 | k[2] << 16 | k[3] << 24; + b += k[4] | k[5] << 8 | k[6] << 16 | k[7] << 24; + c += k[8] | k[9] << 8 | k[10] << 16 | k[11] << 24; + } else { + memset(zk, 0, 12); + memcpy(zk, k, len - 12); + a += zk[0] | zk[1] << 8 | zk[2] << 16 | zk[3] << 24; + b += zk[4] | zk[5] << 8 | zk[6] << 16 | zk[7] << 24; + c += (u32) zk[8] << 8; + c += (u32) zk[9] << 16; + c += (u32) zk[10] << 24; + c += length; + } + a -= b + c; a ^= (c>>13); + b -= a + c; b ^= (a<<8); + c -= a + b; c ^= (b>>13); + a -= b + c; a ^= (c>>12); + b -= a + c; b ^= (a<<16); + c -= a + b; c ^= (b>>5); + a -= b + c; a ^= (c>>3); + b -= a + c; b ^= (a<<10); + c -= a + b; c ^= (b>>15); + } + return c; +} + +/** + * __jhash_string - calculate the six digit jhash of a string + * @str: string to calculate the jhash + */ +unsigned long long __jhash_string(const char *str) +{ + return __printk_jhash(str, strlen(str)) & 0xffffff; +} +EXPORT_SYMBOL(__jhash_string); + +static int __dev_printk_hash(const char *level, const struct device *dev, + struct va_format *vaf) +{ + if (!dev) + return printk("%s(NULL device *): %pV", level, vaf); + + return printk("%s%s.%06x: %pV", level, dev_driver_string(dev), + __printk_jhash(vaf->fmt, strlen(vaf->fmt)) & 0xffffff, + vaf); +} + +int dev_printk_hash(const char *level, const struct device *dev, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + int r; + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + r = __dev_printk_hash(level, dev, &vaf); + va_end(args); + + return r; +} +EXPORT_SYMBOL(dev_printk_hash); + +#define define_dev_printk_hash_level(func, kern_level) \ +int func(const struct device *dev, const char *fmt, ...) \ +{ \ + struct va_format vaf; \ + va_list args; \ + int r; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + r = __dev_printk_hash(kern_level, dev, &vaf); \ + va_end(args); \ + \ + return r; \ +} \ +EXPORT_SYMBOL(func); + +define_dev_printk_hash_level(dev_emerg_hash, KERN_EMERG); +define_dev_printk_hash_level(dev_alert_hash, KERN_ALERT); +define_dev_printk_hash_level(dev_crit_hash, KERN_CRIT); +define_dev_printk_hash_level(dev_err_hash, KERN_ERR); +define_dev_printk_hash_level(dev_warn_hash, KERN_WARNING); +define_dev_printk_hash_level(dev_notice_hash, KERN_NOTICE); +define_dev_printk_hash_level(_dev_info_hash, KERN_INFO); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 6f1388391620a..548d0ea9808d2 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -56,21 +56,10 @@ struct kprobe_insn_cache kprobe_s390_insn_slots = { static void copy_instruction(struct kprobe *p) { - unsigned long ip = (unsigned long) p->addr; s64 disp, new_disp; u64 addr, new_addr; - if (ftrace_location(ip) == ip) { - /* - * If kprobes patches the instruction that is morphed by - * ftrace make sure that kprobes always sees the branch - * "jg .+24" that skips the mcount block or the "brcl 0,0" - * in case of hotpatch. - */ - ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn); - p->ainsn.is_ftrace_insn = 1; - } else - memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8)); + memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8)); p->opcode = p->ainsn.insn[0]; if (!probe_is_insn_relative_long(p->ainsn.insn)) return; @@ -136,11 +125,6 @@ int arch_prepare_kprobe(struct kprobe *p) } NOKPROBE_SYMBOL(arch_prepare_kprobe); -int arch_check_ftrace_location(struct kprobe *p) -{ - return 0; -} - struct swap_insn_args { struct kprobe *p; unsigned int arm_kprobe : 1; @@ -149,28 +133,11 @@ struct swap_insn_args { static int swap_instruction(void *data) { struct swap_insn_args *args = data; - struct ftrace_insn new_insn, *insn; struct kprobe *p = args->p; - size_t len; - - new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode; - len = sizeof(new_insn.opc); - if (!p->ainsn.is_ftrace_insn) - goto skip_ftrace; - len = sizeof(new_insn); - insn = (struct ftrace_insn *) p->addr; - if (args->arm_kprobe) { - if (is_ftrace_nop(insn)) - new_insn.disp = KPROBE_ON_FTRACE_NOP; - else - new_insn.disp = KPROBE_ON_FTRACE_CALL; - } else { - ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr); - if (insn->disp == KPROBE_ON_FTRACE_NOP) - ftrace_generate_nop_insn(&new_insn); - } -skip_ftrace: - s390_kernel_write(p->addr, &new_insn, len); + u16 opc; + + opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode; + s390_kernel_write(p->addr, &opc, sizeof(opc)); return 0; } NOKPROBE_SYMBOL(swap_instruction); @@ -464,24 +431,6 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) unsigned long ip = regs->psw.addr; int fixup = probe_get_fixup_type(p->ainsn.insn); - /* Check if the kprobes location is an enabled ftrace caller */ - if (p->ainsn.is_ftrace_insn) { - struct ftrace_insn *insn = (struct ftrace_insn *) p->addr; - struct ftrace_insn call_insn; - - ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr); - /* - * A kprobe on an enabled ftrace call site actually single - * stepped an unconditional branch (ftrace nop equivalent). - * Now we need to fixup things and pretend that a brasl r0,... - * was executed instead. - */ - if (insn->disp == KPROBE_ON_FTRACE_CALL) { - ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE; - regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn); - } - } - if (fixup & FIXUP_PSW_NORMAL) ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 444a19125a815..cb8b1cc285c96 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -164,7 +164,9 @@ static bool kdump_csum_valid(struct kimage *image) #ifdef CONFIG_CRASH_DUMP int rc; + preempt_disable(); rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image); + preempt_enable(); return rc == 0; #else return false; @@ -254,10 +256,10 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); - mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); vmcoreinfo_append_str("SDMA=%lx\n", __sdma); vmcoreinfo_append_str("EDMA=%lx\n", __edma); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); } void machine_shutdown(void) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 8415ae7d2a23f..53da174754d97 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len) const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1; struct module_signature *ms; unsigned long sig_len; + int ret; /* Skip signature verification when not secure IPLed. */ if (!ipl_secure_flag) @@ -62,11 +64,18 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len) return -EBADMSG; } - return verify_pkcs7_signature(kernel, kernel_len, - kernel + kernel_len, sig_len, - VERIFY_USE_PLATFORM_KEYRING, - VERIFYING_MODULE_SIGNATURE, - NULL, NULL); + ret = verify_pkcs7_signature(kernel, kernel_len, + kernel + kernel_len, sig_len, + VERIFY_USE_SECONDARY_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); + if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) + ret = verify_pkcs7_signature(kernel, kernel_len, + kernel + kernel_len, sig_len, + VERIFY_USE_PLATFORM_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); + return ret; } #endif /* CONFIG_KEXEC_SIG */ @@ -151,7 +160,7 @@ static int kexec_file_add_initrd(struct kimage *image, buf.mem += crashk_res.start; buf.memsz = buf.bufsz; - data->parm->initrd_start = buf.mem; + data->parm->initrd_start = data->memsz; data->parm->initrd_size = buf.memsz; data->memsz += buf.memsz; @@ -170,6 +179,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, struct kexec_buf buf; unsigned long addr; void *ptr, *end; + int ret; buf.image = image; @@ -199,9 +209,13 @@ static int kexec_file_add_ipl_report(struct kimage *image, ptr += len; } + ret = -ENOMEM; buf.buffer = ipl_report_finish(data->report); + if (!buf.buffer) + goto out; buf.bufsz = data->report->size; buf.memsz = buf.bufsz; + image->arch.ipl_buf = buf.buffer; data->memsz += buf.memsz; @@ -209,7 +223,9 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; - return kexec_add_buffer(&buf); + ret = kexec_add_buffer(&buf); +out: + return ret; } void *kexec_file_add_components(struct kimage *image, @@ -269,6 +285,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, { Elf_Rela *relas; int i, r_type; + int ret; relas = (void *)pi->ehdr + relsec->sh_offset; @@ -303,7 +320,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, addr = section->sh_addr + relas[i].r_offset; r_type = ELF64_R_TYPE(relas[i].r_info); - arch_kexec_do_relocs(r_type, loc, val, addr); + ret = arch_kexec_do_relocs(r_type, loc, val, addr); + if (ret) { + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } } return 0; } @@ -321,3 +342,11 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, return kexec_image_probe_default(image, buf, buf_len); } + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->arch.ipl_buf); + image->arch.ipl_buf = NULL; + + return kexec_image_post_load_cleanup_default(image); +} diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c index d5035de9020e7..b7182cec48dc4 100644 --- a/arch/s390/kernel/machine_kexec_reloc.c +++ b/arch/s390/kernel/machine_kexec_reloc.c @@ -28,6 +28,7 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, break; case R_390_64: /* Direct 64 bit. */ case R_390_GLOB_DAT: + case R_390_JMP_SLOT: *(u64 *)loc = val; break; case R_390_PC16: /* PC relative 16 bit. */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 9e1660a6b9db6..be9e85e7558ae 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -26,6 +26,12 @@ ENDPROC(ftrace_stub) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#ifdef __PACK_STACK +/* allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 +#else +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD +#endif ENTRY(_mcount) BR_EX %r14 @@ -35,25 +41,39 @@ EXPORT_SYMBOL(_mcount) ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller + stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller + lghi %r14,0 # save condition code + ipm %r14 # don't put any instructions + sllg %r14,%r14,16 # clobbering CC before this point lgr %r1,%r15 #if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) aghi %r0,MCOUNT_RETURN_FIXUP #endif - aghi %r15,-STACK_FRAME_SIZE + # allocate stack frame for ftrace_caller to contain traced function + aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(__SF_GPRS+8*8)(%r15) + stg %r15,(__SF_GPRS+9*8)(%r15) + # allocate pt_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + stg %r14,(STACK_PTREGS_PSW)(%r15) + lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address + stosm (STACK_PTREGS_PSW)(%r15),0 + aghi %r1,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(STACK_PTREGS_PSW+8)(%r15) stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES aghik %r2,%r0,-MCOUNT_INSN_SIZE lgrl %r4,function_trace_op - lgrl %r1,ftrace_trace_function + lgrl %r1,ftrace_func #else lgr %r2,%r0 aghi %r2,-MCOUNT_INSN_SIZE larl %r4,function_trace_op lg %r4,0(%r4) - larl %r1,ftrace_trace_function + larl %r1,ftrace_func lg %r1,0(%r1) #endif lgr %r3,%r14 diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 48d48b6187c0d..dddb32e53db8b 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -199,7 +199,7 @@ static const int cpumf_generic_events_user[] = { [PERF_COUNT_HW_BUS_CYCLES] = -1, }; -static int __hw_perf_event_init(struct perf_event *event) +static int __hw_perf_event_init(struct perf_event *event, unsigned int type) { struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; @@ -207,7 +207,7 @@ static int __hw_perf_event_init(struct perf_event *event) int err = 0; u64 ev; - switch (attr->type) { + switch (type) { case PERF_TYPE_RAW: /* Raw events are used to access counters directly, * hence do not permit excludes */ @@ -292,19 +292,38 @@ static int __hw_perf_event_init(struct perf_event *event) return err; } +/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different + * attribute::type values: + * - PERF_TYPE_HARDWARE: + * - pmu->type: + * Handle both type of invocations identical. They address the same hardware. + * The result is different when event modifiers exclude_kernel and/or + * exclude_user are also set. + */ +static int cpumf_pmu_event_type(struct perf_event *event) +{ + u64 ev = event->attr.config; + + if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev) + return PERF_TYPE_HARDWARE; + return PERF_TYPE_RAW; +} + static int cpumf_pmu_event_init(struct perf_event *event) { + unsigned int type = event->attr.type; int err; - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - case PERF_TYPE_RAW: - err = __hw_perf_event_init(event); - break; - default: + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) + /* Registered as unknown PMU */ + err = __hw_perf_event_init(event, cpumf_pmu_event_type(event)); + else return -ENOENT; - } if (unlikely(err) && event->destroy) event->destroy(event); @@ -553,7 +572,7 @@ static int __init cpumf_pmu_init(void) return -ENODEV; cpumf_pmu.attr_groups = cpumf_cf_event_group(); - rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); if (rc) pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); return rc; diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index 2654e348801a1..bfe55a07239db 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -243,13 +243,13 @@ static int cf_diag_event_init(struct perf_event *event) int err = -ENOENT; debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d config %#llx " + "%s event %p cpu %d config %#llx type:%u " "sample_type %#llx cf_diag_events %d\n", __func__, - event, event->cpu, attr->config, attr->sample_type, - atomic_read(&cf_diag_events)); + event, event->cpu, attr->config, event->pmu->type, + attr->sample_type, atomic_read(&cf_diag_events)); if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || - event->attr.type != PERF_TYPE_RAW) + event->attr.type != event->pmu->type) goto out; /* Raw events are used to access counters directly, @@ -308,7 +308,7 @@ static size_t cf_diag_ctrset_size(enum cpumf_ctr_set ctrset, case CPUMF_CTR_SET_CRYPTO: if (info->csvn >= 1 && info->csvn <= 5) ctrset_size = 16; - else if (info->csvn == 6) + else if (info->csvn == 6 || info->csvn == 7) ctrset_size = 20; break; case CPUMF_CTR_SET_EXT: @@ -318,7 +318,7 @@ static size_t cf_diag_ctrset_size(enum cpumf_ctr_set ctrset, ctrset_size = 48; else if (info->csvn >= 3 && info->csvn <= 5) ctrset_size = 128; - else if (info->csvn == 6) + else if (info->csvn == 6 || info->csvn == 7) ctrset_size = 160; break; case CPUMF_CTR_SET_MT_DIAG: @@ -693,7 +693,7 @@ static int __init cf_diag_init(void) } debug_register_view(cf_diag_dbg, &debug_sprintf_view); - rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", PERF_TYPE_RAW); + rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); if (rc) { debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); debug_unregister(cf_diag_dbg); diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 8b33e03e47b83..0d64aafd158f2 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -238,6 +238,134 @@ CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_HPAGE_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z15, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z15, L1D_L2D_SOURCED_WRITES, 0x0085); +CPUMF_EVENT_ATTR(cf_z15, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z15, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z15, L1I_L2I_SOURCED_WRITES, 0x0088); +CPUMF_EVENT_ATTR(cf_z15, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z15, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z15, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z15, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z15, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z15, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c); +CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d); +CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad); +CPUMF_EVENT_ATTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae); +CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af); +CPUMF_EVENT_ATTR(cf_z15, BCD_DFP_EXECUTION_SLOTS, 0x00e0); +CPUMF_EVENT_ATTR(cf_z15, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z15, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z15, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z15, TX_NC_TABORT, 0x00f3); +CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4); +CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108); +CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109); +CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); +CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080); +CPUMF_EVENT_ATTR(cf_z16, DTLB2_WRITES, 0x0081); +CPUMF_EVENT_ATTR(cf_z16, DTLB2_MISSES, 0x0082); +CPUMF_EVENT_ATTR(cf_z16, CRSTE_1MB_WRITES, 0x0083); +CPUMF_EVENT_ATTR(cf_z16, DTLB2_GPAGE_WRITES, 0x0084); +CPUMF_EVENT_ATTR(cf_z16, ITLB2_WRITES, 0x0086); +CPUMF_EVENT_ATTR(cf_z16, ITLB2_MISSES, 0x0087); +CPUMF_EVENT_ATTR(cf_z16, TLB2_PTE_WRITES, 0x0089); +CPUMF_EVENT_ATTR(cf_z16, TLB2_CRSTE_WRITES, 0x008a); +CPUMF_EVENT_ATTR(cf_z16, TLB2_ENGINES_BUSY, 0x008b); +CPUMF_EVENT_ATTR(cf_z16, TX_C_TEND, 0x008c); +CPUMF_EVENT_ATTR(cf_z16, TX_NC_TEND, 0x008d); +CPUMF_EVENT_ATTR(cf_z16, L1C_TLB2_MISSES, 0x008f); +CPUMF_EVENT_ATTR(cf_z16, DCW_REQ, 0x0091); +CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_IV, 0x0092); +CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_CHIP_HIT, 0x0093); +CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_DRAWER_HIT, 0x0094); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP, 0x0095); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_IV, 0x0096); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_CHIP_HIT, 0x0097); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT, 0x0098); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE, 0x0099); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER, 0x009a); +CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER, 0x009b); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_MEMORY, 0x009c); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE_MEMORY, 0x009d); +CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER_MEMORY, 0x009e); +CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER_MEMORY, 0x009f); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_IV, 0x00a0); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT, 0x00a1); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_IV, 0x00a3); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4); +CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5); +CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_IV, 0x00a6); +CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7); +CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8); +CPUMF_EVENT_ATTR(cf_z16, ICW_REQ, 0x00a9); +CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_IV, 0x00aa); +CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_CHIP_HIT, 0x00ab); +CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_DRAWER_HIT, 0x00ac); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP, 0x00ad); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_IV, 0x00ae); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_CHIP_HIT, 0x00af); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT, 0x00b0); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE, 0x00b1); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER, 0x00b2); +CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER, 0x00b3); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_MEMORY, 0x00b4); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE_MEMORY, 0x00b5); +CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER_MEMORY, 0x00b6); +CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER_MEMORY, 0x00b7); +CPUMF_EVENT_ATTR(cf_z16, BCD_DFP_EXECUTION_SLOTS, 0x00e0); +CPUMF_EVENT_ATTR(cf_z16, VX_BCD_EXECUTION_SLOTS, 0x00e1); +CPUMF_EVENT_ATTR(cf_z16, DECIMAL_INSTRUCTIONS, 0x00e2); +CPUMF_EVENT_ATTR(cf_z16, LAST_HOST_TRANSLATIONS, 0x00e8); +CPUMF_EVENT_ATTR(cf_z16, TX_NC_TABORT, 0x00f4); +CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_NO_SPECIAL, 0x00f5); +CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_SPECIAL, 0x00f6); +CPUMF_EVENT_ATTR(cf_z16, DFLT_ACCESS, 0x00f8); +CPUMF_EVENT_ATTR(cf_z16, DFLT_CYCLES, 0x00fd); +CPUMF_EVENT_ATTR(cf_z16, SORTL, 0x0100); +CPUMF_EVENT_ATTR(cf_z16, DFLT_CC, 0x0109); +CPUMF_EVENT_ATTR(cf_z16, DFLT_CCFINISH, 0x010a); +CPUMF_EVENT_ATTR(cf_z16, NNPA_INVOCATIONS, 0x010b); +CPUMF_EVENT_ATTR(cf_z16, NNPA_COMPLETIONS, 0x010c); +CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d); +CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e); +CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0); +CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1); + static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES), CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS), @@ -286,7 +414,7 @@ static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = { NULL, }; -static struct attribute *cpumcf_svn_6_pmu_event_attr[] __initdata = { +static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = { CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES), CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS), @@ -516,6 +644,141 @@ static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = { NULL, }; +static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z15, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_HPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z15, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_L2D_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z15, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z15, L1I_L2I_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z15, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z15, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z15, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z15, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z15, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV), + CPUMF_EVENT_PTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES), + CPUMF_EVENT_PTR(cf_z15, BCD_DFP_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z15, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z15, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z15, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z15, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z15, DFLT_CC), + CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + +static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = { + CPUMF_EVENT_PTR(cf_z16, L1D_RO_EXCL_WRITES), + CPUMF_EVENT_PTR(cf_z16, DTLB2_WRITES), + CPUMF_EVENT_PTR(cf_z16, DTLB2_MISSES), + CPUMF_EVENT_PTR(cf_z16, CRSTE_1MB_WRITES), + CPUMF_EVENT_PTR(cf_z16, DTLB2_GPAGE_WRITES), + CPUMF_EVENT_PTR(cf_z16, ITLB2_WRITES), + CPUMF_EVENT_PTR(cf_z16, ITLB2_MISSES), + CPUMF_EVENT_PTR(cf_z16, TLB2_PTE_WRITES), + CPUMF_EVENT_PTR(cf_z16, TLB2_CRSTE_WRITES), + CPUMF_EVENT_PTR(cf_z16, TLB2_ENGINES_BUSY), + CPUMF_EVENT_PTR(cf_z16, TX_C_TEND), + CPUMF_EVENT_PTR(cf_z16, TX_NC_TEND), + CPUMF_EVENT_PTR(cf_z16, L1C_TLB2_MISSES), + CPUMF_EVENT_PTR(cf_z16, DCW_REQ), + CPUMF_EVENT_PTR(cf_z16, DCW_REQ_IV), + CPUMF_EVENT_PTR(cf_z16, DCW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, DCW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_IV), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_IV), + CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, ICW_REQ), + CPUMF_EVENT_PTR(cf_z16, ICW_REQ_IV), + CPUMF_EVENT_PTR(cf_z16, ICW_REQ_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, ICW_REQ_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_IV), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_CHIP_HIT), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER), + CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_MEMORY), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE_MEMORY), + CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER_MEMORY), + CPUMF_EVENT_PTR(cf_z16, BCD_DFP_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z16, VX_BCD_EXECUTION_SLOTS), + CPUMF_EVENT_PTR(cf_z16, DECIMAL_INSTRUCTIONS), + CPUMF_EVENT_PTR(cf_z16, LAST_HOST_TRANSLATIONS), + CPUMF_EVENT_PTR(cf_z16, TX_NC_TABORT), + CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_NO_SPECIAL), + CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_SPECIAL), + CPUMF_EVENT_PTR(cf_z16, DFLT_ACCESS), + CPUMF_EVENT_PTR(cf_z16, DFLT_CYCLES), + CPUMF_EVENT_PTR(cf_z16, SORTL), + CPUMF_EVENT_PTR(cf_z16, DFLT_CC), + CPUMF_EVENT_PTR(cf_z16, DFLT_CCFINISH), + CPUMF_EVENT_PTR(cf_z16, NNPA_INVOCATIONS), + CPUMF_EVENT_PTR(cf_z16, NNPA_COMPLETIONS), + CPUMF_EVENT_PTR(cf_z16, NNPA_WAIT_LOCK), + CPUMF_EVENT_PTR(cf_z16, NNPA_HOLD_LOCK), + CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE), + CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE), + NULL, +}; + /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */ static struct attribute_group cpumcf_pmu_events_group = { @@ -596,8 +859,8 @@ __init const struct attribute_group **cpumf_cf_event_group(void) case 1 ... 5: csvn = cpumcf_svn_12345_pmu_event_attr; break; - case 6: - csvn = cpumcf_svn_6_pmu_event_attr; + case 6 ... 7: + csvn = cpumcf_svn_67_pmu_event_attr; break; default: csvn = none; @@ -624,9 +887,15 @@ __init const struct attribute_group **cpumf_cf_event_group(void) break; case 0x3906: case 0x3907: + model = cpumcf_z14_pmu_event_attr; + break; case 0x8561: case 0x8562: - model = cpumcf_z14_pmu_event_attr; + model = cpumcf_z15_pmu_event_attr; + break; + case 0x3931: + case 0x3932: + model = cpumcf_z16_pmu_event_attr; break; default: model = none; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 3d8b12a9a6ff4..15f055947b665 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -193,7 +193,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb, gfp_t gfp_flags) { int i, rc; - unsigned long *new, *tail; + unsigned long *new, *tail, *tail_prev = NULL; if (!sfb->sdbt || !sfb->tail) return -EINVAL; @@ -232,6 +232,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, sfb->num_sdbt++; /* Link current page to tail of chain */ *tail = (unsigned long)(void *) new + 1; + tail_prev = tail; tail = new; } @@ -241,10 +242,22 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, * issue, a new realloc call (if required) might succeed. */ rc = alloc_sample_data_block(tail, gfp_flags); - if (rc) + if (rc) { + /* Undo last SDBT. An SDBT with no SDB at its first + * entry but with an SDBT entry instead can not be + * handled by the interrupt handler code. + * Avoid this situation. + */ + if (tail_prev) { + sfb->num_sdbt--; + free_page((unsigned long) new); + tail = tail_prev; + } break; + } sfb->num_sdb++; tail++; + tail_prev = new = NULL; /* Allocated at least one SBD */ } /* Link sampling buffer to its origin */ @@ -1169,7 +1182,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, sample = (struct hws_basic_entry *) *sdbt; while ((unsigned long *) sample < (unsigned long *) te) { /* Check for an empty sample */ - if (!sample->def) + if (!sample->def || sample->LS) break; /* Update perf event period */ @@ -1300,18 +1313,28 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) */ if (flush_all && done) break; - - /* If an event overflow happened, discard samples by - * processing any remaining sample-data-blocks. - */ - if (event_overflow) - flush_all = 1; } /* Account sample overflows in the event hardware structure */ if (sampl_overflow) OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + sampl_overflow, 1 + num_sdb); + + /* Perf_event_overflow() and perf_event_account_interrupt() limit + * the interrupt rate to an upper limit. Roughly 1000 samples per + * task tick. + * Hitting this limit results in a large number + * of throttled REF_REPORT_THROTTLE entries and the samples + * are dropped. + * Slightly increase the interval to avoid hitting this limit. + */ + if (event_overflow) { + SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); + debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n", + __func__, + DIV_ROUND_UP(SAMPL_RATE(hwc), 10)); + } + if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " "overflow stats: sample=%llu event=%llu\n", @@ -1406,8 +1429,8 @@ static int aux_output_begin(struct perf_output_handle *handle, idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - te->flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; - te->flags = te->flags & ~SDB_TE_ALERT_REQ_MASK; + te->flags &= ~(SDB_TE_BUFFER_FULL_MASK | + SDB_TE_ALERT_REQ_MASK); te->overflow = 0; } /* Save the position of empty SDBs */ @@ -1454,8 +1477,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, te = aux_sdb_trailer(aux, alert_index); do { orig_flags = te->flags; - orig_overflow = te->overflow; - *overflow = orig_overflow; + *overflow = orig_overflow = te->overflow; if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { /* * SDB is already set by hardware. @@ -1566,6 +1588,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) perf_aux_output_end(handle, size); num_sdb = aux->sfb.num_sdb; + num_sdb = aux->sfb.num_sdb; while (!done) { /* Get an output handle */ aux = perf_aux_output_begin(handle, cpuhw->event); @@ -1688,7 +1711,7 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages, } /* Allocate aux_buffer struct for the event */ - aux = kmalloc(sizeof(struct aux_buffer), GFP_KERNEL); + aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL); if (!aux) goto no_aux; sfb = &aux->sfb; @@ -2194,4 +2217,4 @@ static int __init init_cpum_sampling_pmu(void) } arch_initcall(init_cpum_sampling_pmu); -core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index fcb6c2e92b071..1e75cc9835468 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -224,9 +224,13 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { struct unwind_state state; + unsigned long addr; - unwind_for_each_frame(&state, current, regs, 0) - perf_callchain_store(entry, state.ip); + unwind_for_each_frame(&state, current, regs, 0) { + addr = unwind_get_return_address(&state); + if (!addr || perf_callchain_store(entry, addr)) + return; + } } /* Perf definitions for PMU event attributes in sysfs */ diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S index 59dee9d3bebf1..27ac4f324c707 100644 --- a/arch/s390/kernel/pgm_check.S +++ b/arch/s390/kernel/pgm_check.S @@ -78,8 +78,8 @@ PGM_CHECK(do_dat_exception) /* 39 */ PGM_CHECK(do_dat_exception) /* 3a */ PGM_CHECK(do_dat_exception) /* 3b */ PGM_CHECK_DEFAULT /* 3c */ -PGM_CHECK_DEFAULT /* 3d */ -PGM_CHECK_DEFAULT /* 3e */ +PGM_CHECK(do_secure_storage_access) /* 3d */ +PGM_CHECK(do_non_secure_storage_access) /* 3e */ PGM_CHECK_DEFAULT /* 3f */ PGM_CHECK_DEFAULT /* 40 */ PGM_CHECK_DEFAULT /* 41 */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index b0afec673f778..fdd5f37ac1fb8 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -76,6 +76,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) memcpy(dst, src, arch_task_struct_size); dst->thread.fpu.regs = dst->thread.fpu.fprs; + + /* + * Don't transfer over the runtime instrumentation or the guarded + * storage control block pointers. These fields are cleared here instead + * of in copy_thread() to avoid premature freeing of associated memory + * on fork() failure. Wait to clear the RI flag because ->stack still + * refers to the source thread. + */ + dst->thread.ri_cb = NULL; + dst->thread.gs_cb = NULL; + dst->thread.gs_bc_cb = NULL; + return 0; } @@ -105,6 +117,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, p->thread.system_timer = 0; p->thread.hardirq_timer = 0; p->thread.softirq_timer = 0; + p->thread.last_break = 1; frame->sf.back_chain = 0; /* new return point is ret_from_fork */ @@ -132,13 +145,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, frame->childregs.flags = 0; if (new_stackp) frame->childregs.gprs[15] = new_stackp; - - /* Don't copy runtime instrumentation info */ - p->thread.ri_cb = NULL; + /* + * Clear the runtime instrumentation flag after the above childregs + * copy. The CB pointer was already cleared in arch_dup_task_struct(). + */ frame->childregs.psw.mask &= ~PSW_MASK_RI; - /* Don't copy guarded storage control block */ - p->thread.gs_cb = NULL; - p->thread.gs_bc_cb = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 6ebc2117c66c7..91b9b3f73de6e 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -165,8 +165,9 @@ static void show_cpu_mhz(struct seq_file *m, unsigned long n) static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; + unsigned long first = cpumask_first(cpu_online_mask); - if (!n) + if (n == first) show_cpu_summary(m, v); if (!machine_has_cpu_mhz) return 0; @@ -179,6 +180,8 @@ static inline void *c_update(loff_t *pos) { if (*pos) *pos = cpumask_next(*pos - 1, cpu_online_mask); + else + *pos = cpumask_first(cpu_online_mask); return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL; } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ad71132374f0c..ad74472ce967e 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -324,6 +324,25 @@ static inline void __poke_user_per(struct task_struct *child, child->thread.per_user.end = data; } +static void fixup_int_code(struct task_struct *child, addr_t data) +{ + struct pt_regs *regs = task_pt_regs(child); + int ilc = regs->int_code >> 16; + u16 insn; + + if (ilc > 6) + return; + + if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), + &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) + return; + + /* double check that tracee stopped on svc instruction */ + if ((insn >> 8) != 0xa) + return; + + regs->int_code = 0x20000 | (data & 0xffff); +} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -335,7 +354,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) struct user *dummy = NULL; addr_t offset; + if (addr < (addr_t) &dummy->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ @@ -353,7 +374,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* Invalid addressing mode bits */ return -EINVAL; } - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct user, regs.gprs[2])) + fixup_int_code(child, data); + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* @@ -719,6 +744,10 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct compat_user, regs.gprs[2])) + fixup_int_code(child, data); /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } @@ -838,40 +867,45 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { unsigned long mask = -1UL; + long ret = -1; /* * The sysc_tracesys code in entry.S stored the system * call number to gprs[2]. */ if (test_thread_flag(TIF_SYSCALL_TRACE) && - (tracehook_report_syscall_entry(regs) || - regs->gprs[2] >= NR_syscalls)) { + tracehook_report_syscall_entry(regs)) { /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip + * Tracing decided this syscall should not happen. Skip * the system call and the system call restart handling. */ - clear_pt_regs_flag(regs, PIF_SYSCALL); - return -1; + goto skip; } /* Do the secure computing check after ptrace. */ if (secure_computing(NULL)) { /* seccomp failures shouldn't expose any additional code. */ - return -1; + goto skip; } if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gprs[2]); + trace_sys_enter(regs, regs->int_code & 0xffff); if (is_compat_task()) mask = 0xffffffff; - audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask, + audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask, regs->gprs[3] &mask, regs->gprs[4] &mask, regs->gprs[5] &mask); + if ((signed long)regs->gprs[2] >= NR_syscalls) { + regs->gprs[2] = -ENOSYS; + ret = -ENOSYS; + } return regs->gprs[2]; +skip: + clear_pt_regs_flag(regs, PIF_SYSCALL); + return ret; } asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) @@ -1256,7 +1290,6 @@ static bool is_ri_cb_valid(struct runtime_instr_cb *cb) cb->pc == 1 && cb->qc == 0 && cb->reserved2 == 0 && - cb->key == PAGE_DEFAULT_KEY && cb->reserved3 == 0 && cb->reserved4 == 0 && cb->reserved5 == 0 && @@ -1320,7 +1353,11 @@ static int s390_runtime_instr_set(struct task_struct *target, kfree(data); return -EINVAL; } - + /* + * Override access key in any case, since user space should + * not be able to set it, nor should it care about it. + */ + ri_cb.key = PAGE_DEFAULT_KEY >> 4; preempt_disable(); if (!target->thread.ri_cb) target->thread.ri_cb = data; diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index 125c7f6e87150..1788a5454b6fc 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -57,7 +57,7 @@ static void init_runtime_instr_cb(struct runtime_instr_cb *cb) cb->k = 1; cb->ps = 1; cb->pc = 1; - cb->key = PAGE_DEFAULT_KEY; + cb->key = PAGE_DEFAULT_KEY >> 4; cb->v = 1; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 3ff291bc63b7e..c36c18542cb45 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -73,6 +74,7 @@ #include #include #include +#include #include "entry.h" /* @@ -92,10 +94,6 @@ char elf_platform[ELF_PLATFORM_SIZE]; unsigned long int_hwcap = 0; -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST -int __bootdata_preserved(prot_virt_guest); -#endif - int __bootdata(noexec_disabled); int __bootdata(memory_end_set); unsigned long __bootdata(memory_end); @@ -111,6 +109,8 @@ unsigned long __bootdata_preserved(__etext_dma); unsigned long __bootdata_preserved(__sdma); unsigned long __bootdata_preserved(__edma); unsigned long __bootdata_preserved(__kaslr_offset); +unsigned int __bootdata_preserved(zlib_dfltcc_support); +EXPORT_SYMBOL(zlib_dfltcc_support); unsigned long VMALLOC_START; EXPORT_SYMBOL(VMALLOC_START); @@ -161,7 +161,7 @@ static void __init set_preferred_console(void) else if (CONSOLE_IS_3270) add_preferred_console("tty3270", 0, NULL); else if (CONSOLE_IS_VT220) - add_preferred_console("ttyS", 1, NULL); + add_preferred_console("ttysclp", 0, NULL); else if (CONSOLE_IS_HVC) add_preferred_console("hvc", 0, NULL); } @@ -355,7 +355,6 @@ early_initcall(async_stack_realloc); void __init arch_call_rest_init(void) { - struct stack_frame *frame; unsigned long stack; stack = stack_alloc(); @@ -368,13 +367,7 @@ void __init arch_call_rest_init(void) set_task_stack_end_magic(current); stack += STACK_INIT_OFFSET; S390_lowcore.kernel_stack = stack; - frame = (struct stack_frame *) stack; - memset(frame, 0, sizeof(*frame)); - /* Branch to rest_init on the new stack, never returns */ - asm volatile( - " la 15,0(%[_frame])\n" - " jg rest_init\n" - : : [_frame] "a" (frame)); + CALL_ON_STACK_NORETURN(rest_init, stack); } static void __init setup_lowcore_dat_off(void) @@ -457,6 +450,8 @@ static void __init setup_lowcore_dat_off(void) lc->spinlock_index = 0; arch_spin_lock_setup(0); lc->br_r1_trampoline = 0x07f1; /* br %r1 */ + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); set_prefix((u32)(unsigned long) lc); lowcore_ptr[0] = lc; @@ -571,6 +566,9 @@ static void __init setup_memory_end(void) vmax = _REGION1_SIZE; /* 4-level kernel page table */ } + if (is_prot_virt_host()) + adjust_to_uv_max(&vmax); + /* module area is at the end of the kernel address space. */ MODULES_END = vmax; MODULES_VADDR = MODULES_END - MODULES_LEN; @@ -631,7 +629,7 @@ static struct notifier_block kdump_mem_nb = { /* * Make sure that the area behind memory_end is protected */ -static void reserve_memory_end(void) +static void __init reserve_memory_end(void) { if (memory_end_set) memblock_reserve(memory_end, ULONG_MAX); @@ -640,7 +638,7 @@ static void reserve_memory_end(void) /* * Make sure that oldmem, where the dump is stored, is protected */ -static void reserve_oldmem(void) +static void __init reserve_oldmem(void) { #ifdef CONFIG_CRASH_DUMP if (OLDMEM_BASE) @@ -652,7 +650,7 @@ static void reserve_oldmem(void) /* * Make sure that oldmem, where the dump is stored, is protected */ -static void remove_oldmem(void) +static void __init remove_oldmem(void) { #ifdef CONFIG_CRASH_DUMP if (OLDMEM_BASE) @@ -845,9 +843,6 @@ static void __init setup_memory(void) storage_key_init_range(reg->base, reg->base + reg->size); } psw_set_key(PAGE_DEFAULT_KEY); - - /* Only cosmetics */ - memblock_enforce_memory_limit(memblock_end_of_DRAM()); } /* @@ -926,9 +921,9 @@ static int __init setup_hwcaps(void) if (MACHINE_HAS_VX) { elf_hwcap |= HWCAP_S390_VXRS; if (test_facility(134)) - elf_hwcap |= HWCAP_S390_VXRS_EXT; - if (test_facility(135)) elf_hwcap |= HWCAP_S390_VXRS_BCD; + if (test_facility(135)) + elf_hwcap |= HWCAP_S390_VXRS_EXT; if (test_facility(148)) elf_hwcap |= HWCAP_S390_VXRS_EXT2; if (test_facility(152)) @@ -1012,6 +1007,11 @@ static void __init setup_randomness(void) if (stsi(vmms, 3, 2, 2) == 0 && vmms->count) add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count); memblock_free((unsigned long) vmms, PAGE_SIZE); + +#ifdef CONFIG_ARCH_RANDOM + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + static_branch_enable(&s390_arch_random_available); +#endif } /* @@ -1038,8 +1038,7 @@ static void __init setup_control_program_code(void) { union diag318_info diag318_info = { .cpnc = CPNC_LINUX, - .cpvc_linux = 0, - .cpvc_distro = {0}, + .cpvc = 0, }; if (!sclp.has_diag318) @@ -1059,7 +1058,7 @@ static void __init log_component_list(void) if (!early_ipl_comp_list_addr) return; - if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR) + if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL) pr_info("Linux is running with Secure-IPL enabled\n"); else pr_info("Linux is running with Secure-IPL disabled\n"); @@ -1103,6 +1102,12 @@ void __init setup_arch(char **cmdline_p) log_component_list(); +#ifdef CONFIG_LOCK_DOWN_IN_SECURE_BOOT + if (ipl_get_secureboot()) + security_lock_kernel_down("Secure IPL", + LOCKDOWN_INTEGRITY_MAX); +#endif + /* Have one command line that is parsed and saved in /proc/cmdline */ /* boot_command_line has been already set up in early.c */ *cmdline_p = boot_command_line; @@ -1117,6 +1122,7 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_EXPOLINE_AUTO)) nospec_auto_detect(); + jump_label_init(); parse_early_param(); #ifdef CONFIG_CRASH_DUMP /* Deactivate elfcorehdr= kernel parameter */ @@ -1151,6 +1157,8 @@ void __init setup_arch(char **cmdline_p) */ memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT)); + if (is_prot_virt_host()) + setup_uv(); setup_memory_end(); setup_memory(); dma_contiguous_reserve(memory_end); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 44974654cbd0c..8c51462f13fd1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -212,6 +212,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; lc->br_r1_trampoline = 0x07f1; /* br %r1 */ + lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); + lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); if (nmi_alloc_per_cpu(lc)) goto out_async; if (vdso_alloc_per_cpu(lc)) @@ -262,10 +264,13 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; + lc->user_asce = S390_lowcore.kernel_asce; lc->machine_flags = S390_lowcore.machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); + lc->cregs_save_area[1] = lc->kernel_asce; + lc->cregs_save_area[7] = lc->vdso_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, sizeof(lc->stfle_fac_list)); @@ -398,7 +403,7 @@ int smp_find_processor_id(u16 address) return -1; } -bool arch_vcpu_is_preempted(int cpu) +bool notrace arch_vcpu_is_preempted(int cpu) { if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) return false; @@ -408,7 +413,7 @@ bool arch_vcpu_is_preempted(int cpu) } EXPORT_SYMBOL(arch_vcpu_is_preempted); -void smp_yield_cpu(int cpu) +void notrace smp_yield_cpu(int cpu) { if (MACHINE_HAS_DIAG9C) { diag_stat_inc_norecursion(DIAG_STAT_X09C); @@ -724,39 +729,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early) static int smp_add_present_cpu(int cpu); -static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add) +static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, + bool configured, bool early) { struct pcpu *pcpu; - cpumask_t avail; - int cpu, nr, i, j; + int cpu, nr, i; u16 address; nr = 0; - cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); - cpu = cpumask_first(&avail); - for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { - if (sclp.has_core_type && info->core[i].type != boot_core_type) + if (sclp.has_core_type && core->type != boot_core_type) + return nr; + cpu = cpumask_first(avail); + address = core->core_id << smp_cpu_mt_shift; + for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { + if (pcpu_find_address(cpu_present_mask, address + i)) continue; - address = info->core[i].core_id << smp_cpu_mt_shift; - for (j = 0; j <= smp_cpu_mtid; j++) { - if (pcpu_find_address(cpu_present_mask, address + j)) - continue; - pcpu = pcpu_devices + cpu; - pcpu->address = address + j; - pcpu->state = - (cpu >= info->configured*(smp_cpu_mtid + 1)) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) - set_cpu_present(cpu, false); - else - nr++; - cpu = cpumask_next(cpu, &avail); - if (cpu >= nr_cpu_ids) + pcpu = pcpu_devices + cpu; + pcpu->address = address + i; + if (configured) + pcpu->state = CPU_STATE_CONFIGURED; + else + pcpu->state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (!early && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpumask_clear_cpu(cpu, avail); + cpu = cpumask_next(cpu, avail); + } + return nr; +} + +static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) +{ + struct sclp_core_entry *core; + static cpumask_t avail; + bool configured; + u16 core_id; + int nr, i; + + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + /* + * Add IPL core first (which got logical CPU number 0) to make sure + * that all SMT threads get subsequent logical CPU numbers. + */ + if (early) { + core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + for (i = 0; i < info->configured; i++) { + core = &info->core[i]; + if (core->core_id == core_id) { + nr += smp_add_core(core, &avail, true, early); break; + } } } + for (i = 0; i < info->combined; i++) { + configured = i < info->configured; + nr += smp_add_core(&info->core[i], &avail, configured, early); + } return nr; } @@ -805,18 +838,21 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ get_online_cpus(); - __smp_rescan_cpus(info, 0); + __smp_rescan_cpus(info, true); put_online_cpus(); memblock_free_early((unsigned long)info, sizeof(*info)); } static void smp_init_secondary(void) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); + set_cpu_flag(CIF_ASCE_PRIMARY); + set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); + rcu_cpu_starting(cpu); preempt_disable(); init_cpu_timer(); vtime_init(); @@ -843,30 +879,18 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid) S390_lowcore.restart_source = -1UL; __ctl_load(S390_lowcore.cregs_save_area, 0, 15); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); - CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0); + CALL_ON_STACK_NORETURN(smp_init_secondary, S390_lowcore.kernel_stack); } /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct pcpu *pcpu; - int base, i, rc; + struct pcpu *pcpu = pcpu_devices + cpu; + int rc; - pcpu = pcpu_devices + cpu; if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; - base = smp_get_base_cpu(cpu); - for (i = 0; i <= smp_cpu_mtid; i++) { - if (base + i < nr_cpu_ids) - if (cpu_online(base + i)) - break; - } - /* - * If this is the first CPU of the core to get online - * do an initial CPU reset. - */ - if (i > smp_cpu_mtid && - pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) != + if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -1148,7 +1172,7 @@ int __ref smp_rescan_cpus(void) smp_get_core_info(info, 0); get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - nr = __smp_rescan_cpus(info, 1); + nr = __smp_rescan_cpus(info, false); mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); kfree(info); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e8766beee5ad8..11c32b228f518 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -310,6 +310,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->tk_mult = tk->tkr_mono.mult; vdso_data->tk_shift = tk->tkr_mono.shift; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++vdso_data->tb_update_count; } @@ -353,8 +354,9 @@ static DEFINE_PER_CPU(atomic_t, clock_sync_word); static DEFINE_MUTEX(clock_sync_mutex); static unsigned long clock_sync_flags; -#define CLOCK_SYNC_HAS_STP 0 -#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_HAS_STP 0 +#define CLOCK_SYNC_STP 1 +#define CLOCK_SYNC_STPINFO_VALID 2 /* * The get_clock function for the physical clock. It will get the current @@ -591,6 +593,22 @@ void stp_queue_work(void) queue_work(time_sync_wq, &stp_work); } +static int __store_stpinfo(void) +{ + int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + + if (rc) + clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + else + set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); + return rc; +} + +static int stpinfo_valid(void) +{ + return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags); +} + static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; @@ -612,8 +630,7 @@ static int stp_sync_clock(void *data) if (rc == 0) { sync->clock_delta = clock_delta; clock_sync_global(clock_delta); - rc = chsc_sstpi(stp_page, &stp_info, - sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc == 0 && stp_info.tmd != 2) rc = -EAGAIN; } @@ -658,7 +675,7 @@ static void stp_work_fn(struct work_struct *work) if (rc) goto out_unlock; - rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi)); + rc = __store_stpinfo(); if (rc || stp_info.c == 0) goto out_unlock; @@ -695,10 +712,14 @@ static ssize_t stp_ctn_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%016llx\n", - *(unsigned long long *) stp_info.ctnid); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%016llx\n", + *(unsigned long long *) stp_info.ctnid); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL); @@ -707,9 +728,13 @@ static ssize_t stp_ctn_type_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.ctn); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.ctn); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL); @@ -718,9 +743,13 @@ static ssize_t stp_dst_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x2000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x2000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL); @@ -729,9 +758,13 @@ static ssize_t stp_leap_seconds_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x8000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x8000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL); @@ -740,9 +773,13 @@ static ssize_t stp_stratum_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL); @@ -751,9 +788,13 @@ static ssize_t stp_time_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x0800)) - return -ENODATA; - return sprintf(buf, "%i\n", (int) stp_info.tto); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x0800)) + ret = sprintf(buf, "%i\n", (int) stp_info.tto); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL); @@ -762,9 +803,13 @@ static ssize_t stp_time_zone_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online || !(stp_info.vbits & 0x4000)) - return -ENODATA; - return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid() && (stp_info.vbits & 0x4000)) + ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(time_zone_offset, 0400, @@ -774,9 +819,13 @@ static ssize_t stp_timing_mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tmd); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tmd); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL); @@ -785,9 +834,13 @@ static ssize_t stp_timing_state_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (!stp_online) - return -ENODATA; - return sprintf(buf, "%i\n", stp_info.tst); + ssize_t ret = -ENODATA; + + mutex_lock(&stp_work_mutex); + if (stpinfo_valid()) + ret = sprintf(buf, "%i\n", stp_info.tst); + mutex_unlock(&stp_work_mutex); + return ret; } static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 3627953007eda..2e2500c1404ae 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -65,6 +65,13 @@ EXPORT_SYMBOL_GPL(cpu_topology); cpumask_t cpus_with_topology; +int __cpu_to_node(int cpu) +{ + return cpu_topology[cpu].node_id; +} + +EXPORT_SYMBOL(__cpu_to_node); + static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) { cpumask_t mask; diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c index 490b52e850145..11a669f3cc93c 100644 --- a/arch/s390/kernel/trace.c +++ b/arch/s390/kernel/trace.c @@ -14,7 +14,7 @@ EXPORT_TRACEPOINT_SYMBOL(s390_diagnose); static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth); -void trace_s390_diagnose_norecursion(int diag_nr) +void notrace trace_s390_diagnose_norecursion(int diag_nr) { unsigned long flags; unsigned int *depth; diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index a8204f952315d..6e609b13c0cec 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -60,6 +60,11 @@ bool unwind_next_frame(struct unwind_state *state) ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; regs = NULL; + if (!__kernel_text_address(ip)) { + /* skip bogus %r14 */ + state->regs = NULL; + return unwind_next_frame(state); + } } else { sf = (struct stack_frame *) state->sp; sp = READ_ONCE_NOCHECK(sf->back_chain); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c new file mode 100644 index 0000000000000..094d5a1d1380a --- /dev/null +++ b/arch/s390/kernel/uv.c @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Ultravisor functions and initialization + * + * Copyright IBM Corp. 2019, 2020 + */ +#define KMSG_COMPONENT "prot_virt" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ +#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST +int __bootdata_preserved(prot_virt_guest); +#endif + +#if IS_ENABLED(CONFIG_KVM) +int prot_virt_host; +EXPORT_SYMBOL(prot_virt_host); +struct uv_info __bootdata_preserved(uv_info); +EXPORT_SYMBOL(uv_info); + +static int __init prot_virt_setup(char *val) +{ + bool enabled; + int rc; + + rc = kstrtobool(val, &enabled); + if (!rc && enabled) + prot_virt_host = 1; + + if (is_prot_virt_guest() && prot_virt_host) { + prot_virt_host = 0; + pr_warn("Protected virtualization not available in protected guests."); + } + + if (prot_virt_host && !test_facility(158)) { + prot_virt_host = 0; + pr_warn("Protected virtualization not supported by the hardware."); + } + + return rc; +} +early_param("prot_virt", prot_virt_setup); + +static int __init uv_init(unsigned long stor_base, unsigned long stor_len) +{ + struct uv_cb_init uvcb = { + .header.cmd = UVC_CMD_INIT_UV, + .header.len = sizeof(uvcb), + .stor_origin = stor_base, + .stor_len = stor_len, + }; + + if (uv_call(0, (uint64_t)&uvcb)) { + pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n", + uvcb.header.rc, uvcb.header.rrc); + return -1; + } + return 0; +} + +void __init setup_uv(void) +{ + unsigned long uv_stor_base; + + uv_stor_base = (unsigned long)memblock_alloc_try_nid( + uv_info.uv_base_stor_len, SZ_1M, SZ_2G, + MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE); + if (!uv_stor_base) { + pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n", + uv_info.uv_base_stor_len); + goto fail; + } + + if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) { + memblock_free(uv_stor_base, uv_info.uv_base_stor_len); + goto fail; + } + + pr_info("Reserving %luMB as ultravisor base storage\n", + uv_info.uv_base_stor_len >> 20); + return; +fail: + pr_info("Disabling support for protected virtualization"); + prot_virt_host = 0; +} + +void adjust_to_uv_max(unsigned long *vmax) +{ + *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr); +} + +/* + * Requests the Ultravisor to pin the page in the shared state. This will + * cause an intercept when the guest attempts to unshare the pinned page. + */ +static int uv_pin_shared(unsigned long paddr) +{ + struct uv_cb_cfs uvcb = { + .header.cmd = UVC_CMD_PIN_PAGE_SHARED, + .header.len = sizeof(uvcb), + .paddr = paddr, + }; + + if (uv_call(0, (u64)&uvcb)) + return -EINVAL; + return 0; +} + +/* + * Requests the Ultravisor to encrypt a guest page and make it + * accessible to the host for paging (export). + * + * @paddr: Absolute host address of page to be exported + */ +int uv_convert_from_secure(unsigned long paddr) +{ + struct uv_cb_cfs uvcb = { + .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, + .header.len = sizeof(uvcb), + .paddr = paddr + }; + + if (uv_call(0, (u64)&uvcb)) + return -EINVAL; + return 0; +} + +/* + * Calculate the expected ref_count for a page that would otherwise have no + * further pins. This was cribbed from similar functions in other places in + * the kernel, but with some slight modifications. We know that a secure + * page can not be a huge page for example. + */ +static int expected_page_refs(struct page *page) +{ + int res; + + res = page_mapcount(page); + if (PageSwapCache(page)) { + res++; + } else if (page_mapping(page)) { + res++; + if (page_has_private(page)) + res++; + } + return res; +} + +static int make_secure_pte(pte_t *ptep, unsigned long addr, + struct page *exp_page, struct uv_cb_header *uvcb) +{ + pte_t entry = READ_ONCE(*ptep); + struct page *page; + int expected, cc = 0; + + if (!pte_present(entry)) + return -ENXIO; + if (pte_val(entry) & _PAGE_INVALID) + return -ENXIO; + + page = pte_page(entry); + if (page != exp_page) + return -ENXIO; + if (PageWriteback(page)) + return -EAGAIN; + expected = expected_page_refs(page); + if (!page_ref_freeze(page, expected)) + return -EBUSY; + set_bit(PG_arch_1, &page->flags); + /* + * If the UVC does not succeed or fail immediately, we don't want to + * loop for long, or we might get stall notifications. + * On the other hand, this is a complex scenario and we are holding a lot of + * locks, so we can't easily sleep and reschedule. We try only once, + * and if the UVC returned busy or partial completion, we return + * -EAGAIN and we let the callers deal with it. + */ + cc = __uv_call(0, (u64)uvcb); + page_ref_unfreeze(page, expected); + /* + * Return -ENXIO if the page was not mapped, -EINVAL for other errors. + * If busy or partially completed, return -EAGAIN. + */ + if (cc == UVC_CC_OK) + return 0; + else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL) + return -EAGAIN; + return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; +} + +/* + * Requests the Ultravisor to make a page accessible to a guest. + * If it's brought in the first time, it will be cleared. If + * it has been exported before, it will be decrypted and integrity + * checked. + */ +int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) +{ + struct vm_area_struct *vma; + bool local_drain = false; + spinlock_t *ptelock; + unsigned long uaddr; + struct page *page; + pte_t *ptep; + int rc; + +again: + rc = -EFAULT; + down_read(&gmap->mm->mmap_sem); + + uaddr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(uaddr)) + goto out; + vma = find_vma(gmap->mm, uaddr); + if (!vma) + goto out; + /* + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. If + * userspace is playing dirty tricky with mapping huge pages later + * on this will result in a segmentation fault. + */ + if (is_vm_hugetlb_page(vma)) + goto out; + + rc = -ENXIO; + page = follow_page(vma, uaddr, FOLL_WRITE); + if (IS_ERR_OR_NULL(page)) + goto out; + + lock_page(page); + ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); + rc = make_secure_pte(ptep, uaddr, page, uvcb); + pte_unmap_unlock(ptep, ptelock); + unlock_page(page); +out: + up_read(&gmap->mm->mmap_sem); + + if (rc == -EAGAIN) { + /* + * If we are here because the UVC returned busy or partial + * completion, this is just a useless check, but it is safe. + */ + wait_on_page_writeback(page); + } else if (rc == -EBUSY) { + /* + * If we have tried a local drain and the page refcount + * still does not match our expected safe value, try with a + * system wide drain. This is needed if the pagevecs holding + * the page are on a different CPU. + */ + if (local_drain) { + lru_add_drain_all(); + /* We give up here, and let the caller try again */ + return -EAGAIN; + } + /* + * We are here if the page refcount does not match the + * expected safe value. The main culprits are usually + * pagevecs. With lru_add_drain() we drain the pagevecs + * on the local CPU so that hopefully the refcount will + * reach the expected safe value. + */ + lru_add_drain(); + local_drain = true; + /* And now we try again immediately after draining */ + goto again; + } else if (rc == -ENXIO) { + if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) + return -EFAULT; + return -EAGAIN; + } + return rc; +} +EXPORT_SYMBOL_GPL(gmap_make_secure); + +int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) +{ + struct uv_cb_cts uvcb = { + .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, + .header.len = sizeof(uvcb), + .guest_handle = gmap->guest_handle, + .gaddr = gaddr, + }; + + return gmap_make_secure(gmap, gaddr, &uvcb); +} +EXPORT_SYMBOL_GPL(gmap_convert_to_secure); + +/* + * To be called with the page locked or with an extra reference! This will + * prevent gmap_make_secure from touching the page concurrently. Having 2 + * parallel make_page_accessible is fine, as the UV calls will become a + * no-op if the page is already exported. + */ +int arch_make_page_accessible(struct page *page) +{ + int rc = 0; + + /* Hugepage cannot be protected, so nothing to do */ + if (PageHuge(page)) + return 0; + + /* + * PG_arch_1 is used in 3 places: + * 1. for kernel page tables during early boot + * 2. for storage keys of huge pages and KVM + * 3. As an indication that this page might be secure. This can + * overindicate, e.g. we set the bit before calling + * convert_to_secure. + * As secure pages are never huge, all 3 variants can co-exists. + */ + if (!test_bit(PG_arch_1, &page->flags)) + return 0; + + rc = uv_pin_shared(page_to_phys(page)); + if (!rc) { + clear_bit(PG_arch_1, &page->flags); + return 0; + } + + rc = uv_convert_from_secure(page_to_phys(page)); + if (!rc) { + clear_bit(PG_arch_1, &page->flags); + return 0; + } + + return rc; +} +EXPORT_SYMBOL_GPL(arch_make_page_accessible); + +#endif + +#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) +static ssize_t uv_query_facilities(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", + uv_info.inst_calls_list[0], + uv_info.inst_calls_list[1], + uv_info.inst_calls_list[2], + uv_info.inst_calls_list[3]); +} + +static struct kobj_attribute uv_query_facilities_attr = + __ATTR(facilities, 0444, uv_query_facilities, NULL); + +static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return snprintf(page, PAGE_SIZE, "%d\n", + uv_info.max_guest_cpus); +} + +static struct kobj_attribute uv_query_max_guest_cpus_attr = + __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL); + +static ssize_t uv_query_max_guest_vms(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return snprintf(page, PAGE_SIZE, "%d\n", + uv_info.max_num_sec_conf); +} + +static struct kobj_attribute uv_query_max_guest_vms_attr = + __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL); + +static ssize_t uv_query_max_guest_addr(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return snprintf(page, PAGE_SIZE, "%lx\n", + uv_info.max_sec_stor_addr); +} + +static struct kobj_attribute uv_query_max_guest_addr_attr = + __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL); + +static struct attribute *uv_query_attrs[] = { + &uv_query_facilities_attr.attr, + &uv_query_max_guest_cpus_attr.attr, + &uv_query_max_guest_vms_attr.attr, + &uv_query_max_guest_addr_attr.attr, + NULL, +}; + +static struct attribute_group uv_query_attr_group = { + .attrs = uv_query_attrs, +}; + +static ssize_t uv_is_prot_virt_guest(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + int val = 0; + +#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST + val = prot_virt_guest; +#endif + return scnprintf(page, PAGE_SIZE, "%d\n", val); +} + +static ssize_t uv_is_prot_virt_host(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + int val = 0; + +#if IS_ENABLED(CONFIG_KVM) + val = prot_virt_host; +#endif + + return scnprintf(page, PAGE_SIZE, "%d\n", val); +} + +static struct kobj_attribute uv_prot_virt_guest = + __ATTR(prot_virt_guest, 0444, uv_is_prot_virt_guest, NULL); + +static struct kobj_attribute uv_prot_virt_host = + __ATTR(prot_virt_host, 0444, uv_is_prot_virt_host, NULL); + +static const struct attribute *uv_prot_virt_attrs[] = { + &uv_prot_virt_guest.attr, + &uv_prot_virt_host.attr, + NULL, +}; + +static struct kset *uv_query_kset; +static struct kobject *uv_kobj; + +static int __init uv_info_init(void) +{ + int rc = -ENOMEM; + + if (!test_facility(158)) + return 0; + + uv_kobj = kobject_create_and_add("uv", firmware_kobj); + if (!uv_kobj) + return -ENOMEM; + + rc = sysfs_create_files(uv_kobj, uv_prot_virt_attrs); + if (rc) + goto out_kobj; + + uv_query_kset = kset_create_and_add("query", NULL, uv_kobj); + if (!uv_query_kset) + goto out_ind_files; + + rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group); + if (!rc) + return 0; + + kset_unregister(uv_query_kset); +out_ind_files: + sysfs_remove_files(uv_kobj, uv_prot_virt_attrs); +out_kobj: + kobject_del(uv_kobj); + kobject_put(uv_kobj); + return rc; +} +device_initcall(uv_info_init); +#endif diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index bec19e7e6e1cf..4a66a1cb919b1 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -18,8 +18,8 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - -Wl,--hash-style=both +ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \ + --hash-style=both --build-id -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) @@ -37,8 +37,8 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE + $(call if_changed,ld) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -50,8 +50,6 @@ $(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 081435398e0a1..0c79caa32b592 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -17,12 +17,14 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: CFI_STARTPROC - larl %r1,4f + larl %r1,3f + lg %r0,0(%r1) cghi %r2,__CLOCK_REALTIME_COARSE je 0f cghi %r2,__CLOCK_MONOTONIC_COARSE je 0f - larl %r1,3f + larl %r1,_vdso_data + llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1) cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -36,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ 1: lghi %r2,0 @@ -45,6 +46,5 @@ __kernel_clock_getres: svc 0 br %r14 CFI_ENDPROC -3: .quad __CLOCK_REALTIME_RES -4: .quad __CLOCK_COARSE_RES +3: .quad __CLOCK_COARSE_RES .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c475ca49cfc6b..6e60cc2443b2e 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -136,7 +136,8 @@ static int do_account_vtime(struct task_struct *tsk) " stck %1" /* Store current tod clock value */ #endif : "=Q" (S390_lowcore.last_update_timer), - "=Q" (S390_lowcore.last_update_clock)); + "=Q" (S390_lowcore.last_update_clock) + : : "cc"); clock = S390_lowcore.last_update_clock - clock; timer -= S390_lowcore.last_update_timer; @@ -216,7 +217,7 @@ void vtime_flush(struct task_struct *tsk) avg_steal = S390_lowcore.avg_steal_timer / 2; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; - account_steal_time(steal); + account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } S390_lowcore.avg_steal_timer = avg_steal; diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 05ee90a5ea08b..12decca22e7c8 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -9,6 +9,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-objs += diag.o gaccess.o guestdbg.o vsie.o +kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 45634b3d2e0ae..a2209f021ecae 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -2,7 +2,7 @@ /* * handling diagnose instructions * - * Copyright IBM Corp. 2008, 2011 + * Copyright IBM Corp. 2008, 2020 * * Author(s): Carsten Otte * Christian Borntraeger @@ -187,6 +187,10 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } + /* + * no need to check the return value of vcpu_stop as it can only have + * an error for protvirt, but protvirt means user cpu state + */ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) kvm_s390_vcpu_stop(vcpu); vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 07d30ffcfa412..a760722364ac8 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -794,48 +795,268 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, - unsigned long *pages, unsigned long nr_pages, - const union asce asce, enum gacc_mode mode) +static int vm_check_access_key(struct kvm *kvm, u8 access_key, + enum gacc_mode mode, gpa_t gpa) +{ + u8 storage_key, access_control; + bool fetch_protected; + unsigned long hva; + int r; + + if (access_key == 0) + return 0; + + hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + + down_read(¤t->mm->mmap_sem); + r = get_guest_storage_key(current->mm, hva, &storage_key); + up_read(¤t->mm->mmap_sem); + if (r) + return r; + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); + if (access_control == access_key) + return 0; + fetch_protected = storage_key & _PAGE_FP_BIT; + if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected) + return 0; + return PGM_PROTECTION; +} + +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, + union asce asce) { psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned long override; + + if (mode == GACC_FETCH || mode == GACC_IFETCH) { + /* check if fetch protection override enabled */ + override = vcpu->arch.sie_block->gcr[0]; + override &= CR0_FETCH_PROTECTION_OVERRIDE; + /* not applicable if subject to DAT && private space */ + override = override && !(psw_bits(*psw).dat && asce.p); + return override; + } + return false; +} + +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len) +{ + return ga < 2048 && ga + len <= 2048; +} + +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu) +{ + /* check if storage protection override enabled */ + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE; +} + +static bool storage_prot_override_applies(u8 access_control) +{ + /* matches special storage protection override key (9) -> allow */ + return access_control == PAGE_SPO_ACC; +} + +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key, + enum gacc_mode mode, union asce asce, gpa_t gpa, + unsigned long ga, unsigned int len) +{ + u8 storage_key, access_control; + unsigned long hva; + int r; + + /* access key 0 matches any storage key -> allow */ + if (access_key == 0) + return 0; + /* + * caller needs to ensure that gfn is accessible, so we can + * assume that this cannot fail + */ + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa)); + down_read(¤t->mm->mmap_sem); + r = get_guest_storage_key(current->mm, hva, &storage_key); + up_read(¤t->mm->mmap_sem); + if (r) + return r; + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); + /* access key matches storage key -> allow */ + if (access_control == access_key) + return 0; + if (mode == GACC_FETCH || mode == GACC_IFETCH) { + /* it is a fetch and fetch protection is off -> allow */ + if (!(storage_key & _PAGE_FP_BIT)) + return 0; + if (fetch_prot_override_applicable(vcpu, mode, asce) && + fetch_prot_override_applies(ga, len)) + return 0; + } + if (storage_prot_override_applicable(vcpu) && + storage_prot_override_applies(access_control)) + return 0; + return PGM_PROTECTION; +} + +/** + * guest_range_to_gpas() - Calculate guest physical addresses of page fragments + * covering a logical range + * @vcpu: virtual cpu + * @ga: guest address, start of range + * @ar: access register + * @gpas: output argument, may be NULL + * @len: length of range in bytes + * @asce: address-space-control element to use for translation + * @mode: access mode + * @access_key: access key to mach the range's storage keys against + * + * Translate a logical range to a series of guest absolute addresses, + * such that the concatenation of page fragments starting at each gpa make up + * the whole range. + * The translation is performed as if done by the cpu for the given @asce, @ar, + * @mode and state of the @vcpu. + * If the translation causes an exception, its program interruption code is + * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified + * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject + * a correct exception into the guest. + * The resulting gpas are stored into @gpas, unless it is NULL. + * + * Note: All fragments except the first one start at the beginning of a page. + * When deriving the boundaries of a fragment from a gpa, all but the last + * fragment end at the end of the page. + * + * Return: + * * 0 - success + * * <0 - translation could not be performed, for example if guest + * memory could not be accessed + * * >0 - an access exception occurred. In this case the returned value + * is the program interruption code and the contents of pgm may + * be used to inject an exception into the guest. + */ +static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + unsigned long *gpas, unsigned long len, + const union asce asce, enum gacc_mode mode, + u8 access_key) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned int offset = offset_in_page(ga); + unsigned int fragment_len; int lap_enabled, rc = 0; enum prot_type prot; + unsigned long gpa; lap_enabled = low_address_protection_enabled(vcpu, asce); - while (nr_pages) { + while (min(PAGE_SIZE - offset, len) > 0) { + fragment_len = min(PAGE_SIZE - offset, len); ga = kvm_s390_logical_to_effective(vcpu, ga); if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, PROT_TYPE_LA); - ga &= PAGE_MASK; if (psw_bits(*psw).dat) { - rc = guest_translate(vcpu, ga, pages, asce, mode, &prot); + rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot); if (rc < 0) return rc; } else { - *pages = kvm_s390_real_to_abs(vcpu, ga); - if (kvm_is_error_gpa(vcpu->kvm, *pages)) + gpa = kvm_s390_real_to_abs(vcpu, ga); + if (kvm_is_error_gpa(vcpu->kvm, gpa)) rc = PGM_ADDRESSING; } if (rc) return trans_exc(vcpu, rc, ga, ar, mode, prot); - ga += PAGE_SIZE; - pages++; - nr_pages--; + rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga, + fragment_len); + if (rc) + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC); + if (gpas) + *gpas++ = gpa; + offset = 0; + ga += fragment_len; + len -= fragment_len; } return 0; } -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, - unsigned long len, enum gacc_mode mode) +static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, + void *data, unsigned int len) +{ + const unsigned int offset = offset_in_page(gpa); + const gfn_t gfn = gpa_to_gfn(gpa); + int rc; + + if (mode == GACC_STORE) + rc = kvm_write_guest_page(kvm, gfn, data, offset, len); + else + rc = kvm_read_guest_page(kvm, gfn, data, offset, len); + return rc; +} + +static int +access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, + void *data, unsigned int len, u8 access_key) +{ + struct kvm_memory_slot *slot; + bool writable; + gfn_t gfn; + hva_t hva; + int rc; + + gfn = gpa >> PAGE_SHIFT; + slot = gfn_to_memslot(kvm, gfn); + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); + + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* + * Check if it's a ro memslot, even tho that can't occur (they're unsupported). + * Don't try to actually handle that case. + */ + if (!writable && mode == GACC_STORE) + return -EOPNOTSUPP; + hva += offset_in_page(gpa); + if (mode == GACC_STORE) + rc = copy_to_user_key((void __user *)hva, data, len, access_key); + else + rc = copy_from_user_key(data, (void __user *)hva, len, access_key); + if (rc) + return PGM_PROTECTION; + if (mode == GACC_STORE) + mark_page_dirty(kvm, gfn); + return 0; +} + +int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, + unsigned long len, enum gacc_mode mode, u8 access_key) +{ + int offset = offset_in_page(gpa); + int fragment_len; + int rc; + + while (min(PAGE_SIZE - offset, len) > 0) { + fragment_len = min(PAGE_SIZE - offset, len); + rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key); + if (rc) + return rc; + offset = 0; + len -= fragment_len; + data += fragment_len; + gpa += fragment_len; + } + return 0; +} + +int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, enum gacc_mode mode, + u8 access_key) { psw_t *psw = &vcpu->arch.sie_block->gpsw; - unsigned long _len, nr_pages, gpa, idx; - unsigned long pages_array[2]; - unsigned long *pages; + unsigned long nr_pages, idx; + unsigned long gpa_array[2]; + unsigned int fragment_len; + unsigned long *gpas; + enum prot_type prot; int need_ipte_lock; union asce asce; + bool try_storage_prot_override; + bool try_fetch_prot_override; int rc; if (!len) @@ -845,55 +1066,85 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; - pages = pages_array; - if (nr_pages > ARRAY_SIZE(pages_array)) - pages = vmalloc(array_size(nr_pages, sizeof(unsigned long))); - if (!pages) + gpas = gpa_array; + if (nr_pages > ARRAY_SIZE(gpa_array)) + gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); + if (!gpas) return -ENOMEM; + try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce); + try_storage_prot_override = storage_prot_override_applicable(vcpu); need_ipte_lock = psw_bits(*psw).dat && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); - for (idx = 0; idx < nr_pages && !rc; idx++) { - gpa = *(pages + idx) + (ga & ~PAGE_MASK); - _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (mode == GACC_STORE) - rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); - else - rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); - len -= _len; - ga += _len; - data += _len; + /* + * Since we do the access further down ultimately via a move instruction + * that does key checking and returns an error in case of a protection + * violation, we don't need to do the check during address translation. + * Skip it by passing access key 0, which matches any storage key, + * obviating the need for any further checks. As a result the check is + * handled entirely in hardware on access, we only need to take care to + * forego key protection checking if fetch protection override applies or + * retry with the special key 9 in case of storage protection override. + */ + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0); + if (rc) + goto out_unlock; + for (idx = 0; idx < nr_pages; idx++) { + fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); + if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) { + rc = access_guest_page(vcpu->kvm, mode, gpas[idx], + data, fragment_len); + } else { + rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], + data, fragment_len, access_key); + } + if (rc == PGM_PROTECTION && try_storage_prot_override) + rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], + data, fragment_len, PAGE_SPO_ACC); + if (rc == PGM_PROTECTION) + prot = PROT_TYPE_KEYC; + if (rc) + break; + len -= fragment_len; + data += fragment_len; + ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len); } + if (rc > 0) + rc = trans_exc(vcpu, rc, ga, ar, mode, prot); +out_unlock: if (need_ipte_lock) ipte_unlock(vcpu); - if (nr_pages > ARRAY_SIZE(pages_array)) - vfree(pages); + if (nr_pages > ARRAY_SIZE(gpa_array)) + vfree(gpas); return rc; } int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, unsigned long len, enum gacc_mode mode) { - unsigned long _len, gpa; + unsigned int fragment_len; + unsigned long gpa; int rc = 0; while (len && !rc) { gpa = kvm_s390_real_to_abs(vcpu, gra); - _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (mode) - rc = write_guest_abs(vcpu, gpa, data, _len); - else - rc = read_guest_abs(vcpu, gpa, data, _len); - len -= _len; - gra += _len; - data += _len; + fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); + rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len); + len -= fragment_len; + gra += fragment_len; + data += fragment_len; } return rc; } /** - * guest_translate_address - translate guest logical into guest absolute address + * guest_translate_address_with_key - translate guest logical into guest absolute address + * @vcpu: virtual cpu + * @gva: Guest virtual address + * @ar: Access register + * @gpa: Guest physical address + * @mode: Translation access mode + * @access_key: access key to mach the storage key with * * Parameter semantics are the same as the ones from guest_translate. * The memory contents at the guest address are not changed. @@ -901,11 +1152,10 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * Note: The IPTE lock is not taken during this function, so the caller * has to take care of this. */ -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, - unsigned long *gpa, enum gacc_mode mode) +int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long *gpa, enum gacc_mode mode, + u8 access_key) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - enum prot_type prot; union asce asce; int rc; @@ -913,47 +1163,59 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); if (rc) return rc; - if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (mode == GACC_STORE) - return trans_exc(vcpu, PGM_PROTECTION, gva, 0, - mode, PROT_TYPE_LA); - } - - if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */ - rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot); - if (rc > 0) - return trans_exc(vcpu, rc, gva, 0, mode, prot); - } else { - *gpa = kvm_s390_real_to_abs(vcpu, gva); - if (kvm_is_error_gpa(vcpu->kvm, *gpa)) - return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0); - } - - return rc; + return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode, + access_key); } /** * check_gva_range - test a range of guest virtual addresses for accessibility + * @vcpu: virtual cpu + * @gva: Guest virtual address + * @ar: Access register + * @length: Length of test range + * @mode: Translation access mode + * @access_key: access key to mach the storage keys with */ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, - unsigned long length, enum gacc_mode mode) + unsigned long length, enum gacc_mode mode, u8 access_key) { - unsigned long gpa; - unsigned long currlen; + union asce asce; int rc = 0; + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); + if (rc) + return rc; ipte_lock(vcpu); - while (length > 0 && !rc) { - currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); - rc = guest_translate_address(vcpu, gva, ar, &gpa, mode); - gva += currlen; - length -= currlen; - } + rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode, + access_key); ipte_unlock(vcpu); return rc; } +/** + * check_gpa_range - test a range of guest physical addresses for accessibility + * @kvm: virtual machine instance + * @gpa: guest physical address + * @length: length of test range + * @mode: access mode to test, relevant for storage keys + * @access_key: access key to mach the storage keys with + */ +int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, + enum gacc_mode mode, u8 access_key) +{ + unsigned int fragment_len; + int rc = 0; + + while (length && !rc) { + fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length); + rc = vm_check_access_key(kvm, access_key, mode, gpa); + length -= fragment_len; + gpa += fragment_len; + } + return rc; +} + /** * kvm_s390_check_low_addr_prot_real - check for low-address protection * @gra: Guest real address diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index f4c51756c4623..e0c296b56e39b 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -18,17 +18,14 @@ /** * kvm_s390_real_to_abs - convert guest real address to guest absolute address - * @vcpu - guest virtual cpu + * @prefix - guest prefix * @gra - guest real address * * Returns the guest absolute address that corresponds to the passed guest real - * address @gra of a virtual guest cpu by applying its prefix. + * address @gra of by applying the given prefix. */ -static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, - unsigned long gra) +static inline unsigned long _kvm_s390_real_to_abs(u32 prefix, unsigned long gra) { - unsigned long prefix = kvm_s390_get_prefix(vcpu); - if (gra < 2 * PAGE_SIZE) gra += prefix; else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE) @@ -36,6 +33,43 @@ static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, return gra; } +/** + * kvm_s390_real_to_abs - convert guest real address to guest absolute address + * @vcpu - guest virtual cpu + * @gra - guest real address + * + * Returns the guest absolute address that corresponds to the passed guest real + * address @gra of a virtual guest cpu by applying its prefix. + */ +static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, + unsigned long gra) +{ + return _kvm_s390_real_to_abs(kvm_s390_get_prefix(vcpu), gra); +} + +/** + * _kvm_s390_logical_to_effective - convert guest logical to effective address + * @psw: psw of the guest + * @ga: guest logical address + * + * Convert a guest logical address to an effective address by applying the + * rules of the addressing mode defined by bits 31 and 32 of the given PSW + * (extendended/basic addressing mode). + * + * Depending on the addressing mode, the upper 40 bits (24 bit addressing + * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing + * mode) of @ga will be zeroed and the remaining bits will be returned. + */ +static inline unsigned long _kvm_s390_logical_to_effective(psw_t *psw, + unsigned long ga) +{ + if (psw_bits(*psw).eaba == PSW_BITS_AMODE_64BIT) + return ga; + if (psw_bits(*psw).eaba == PSW_BITS_AMODE_31BIT) + return ga & ((1UL << 31) - 1); + return ga & ((1UL << 24) - 1); +} + /** * kvm_s390_logical_to_effective - convert guest logical to effective address * @vcpu: guest virtual cpu @@ -52,13 +86,7 @@ static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu, static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu, unsigned long ga) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - - if (psw_bits(*psw).eaba == PSW_BITS_AMODE_64BIT) - return ga; - if (psw_bits(*psw).eaba == PSW_BITS_AMODE_31BIT) - return ga & ((1UL << 31) - 1); - return ga & ((1UL << 24) - 1); + return _kvm_s390_logical_to_effective(&vcpu->arch.sie_block->gpsw, ga); } /* @@ -158,24 +186,34 @@ enum gacc_mode { GACC_IFETCH, }; -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, - u8 ar, unsigned long *gpa, enum gacc_mode mode); +int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long *gpa, enum gacc_mode mode, + u8 access_key); + int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, - unsigned long length, enum gacc_mode mode); + unsigned long length, enum gacc_mode mode, u8 access_key); + +int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, + enum gacc_mode mode, u8 access_key); -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, - unsigned long len, enum gacc_mode mode); +int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, + unsigned long len, enum gacc_mode mode, u8 access_key); + +int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, enum gacc_mode mode, + u8 access_key); int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, unsigned long len, enum gacc_mode mode); /** - * write_guest - copy data from kernel space to guest space + * write_guest_with_key - copy data from kernel space to guest space * @vcpu: virtual cpu * @ga: guest address * @ar: access register * @data: source address in kernel space * @len: number of bytes to copy + * @access_key: access key the storage key needs to match * * Copy @len bytes from @data (kernel space) to @ga (guest address). * In order to copy data to guest space the PSW of the vcpu is inspected: @@ -186,8 +224,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * The addressing mode of the PSW is also inspected, so that address wrap * around is taken into account for 24-, 31- and 64-bit addressing mode, * if the to be copied data crosses page boundaries in guest address space. - * In addition also low address and DAT protection are inspected before - * copying any data (key protection is currently not implemented). + * In addition low address, DAT and key protection checks are performed before + * copying any data. * * This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu. * In case of an access exception (e.g. protection exception) pgm will contain @@ -215,10 +253,53 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * if data has been changed in guest space in case of an exception. */ static inline __must_check +int write_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, u8 access_key) +{ + return access_guest_with_key(vcpu, ga, ar, data, len, GACC_STORE, + access_key); +} + +/** + * write_guest - copy data from kernel space to guest space + * @vcpu: virtual cpu + * @ga: guest address + * @ar: access register + * @data: source address in kernel space + * @len: number of bytes to copy + * + * The behaviour of write_guest is identical to write_guest_with_key, except + * that the PSW access key is used instead of an explicit argument. + */ +static inline __must_check int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, GACC_STORE); + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; + + return write_guest_with_key(vcpu, ga, ar, data, len, access_key); +} + +/** + * read_guest_with_key - copy data from guest space to kernel space + * @vcpu: virtual cpu + * @ga: guest address + * @ar: access register + * @data: destination address in kernel space + * @len: number of bytes to copy + * @access_key: access key the storage key needs to match + * + * Copy @len bytes from @ga (guest address) to @data (kernel space). + * + * The behaviour of read_guest_with_key is identical to write_guest_with_key, + * except that data will be copied from guest space to kernel space. + */ +static inline __must_check +int read_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, u8 access_key) +{ + return access_guest_with_key(vcpu, ga, ar, data, len, GACC_FETCH, + access_key); } /** @@ -231,14 +312,16 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, * * Copy @len bytes from @ga (guest address) to @data (kernel space). * - * The behaviour of read_guest is identical to write_guest, except that - * data will be copied from guest space to kernel space. + * The behaviour of read_guest is identical to read_guest_with_key, except + * that the PSW access key is used instead of an explicit argument. */ static inline __must_check int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; + + return read_guest_with_key(vcpu, ga, ar, data, len, access_key); } /** @@ -259,7 +342,10 @@ static inline __must_check int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data, unsigned long len) { - return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH); + u8 access_key = psw_bits(vcpu->arch.sie_block->gpsw).key; + + return access_guest_with_key(vcpu, ga, 0, data, len, GACC_IFETCH, + access_key); } /** diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index a389fa85cca2d..5c10f96a50b12 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -2,7 +2,7 @@ /* * in-kernel handling for sie intercepts * - * Copyright IBM Corp. 2008, 2014 + * Copyright IBM Corp. 2008, 2020 * * Author(s): Carsten Otte * Christian Borntraeger @@ -16,6 +16,7 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" @@ -79,6 +80,10 @@ static int handle_stop(struct kvm_vcpu *vcpu) return rc; } + /* + * no need to check the return value of vcpu_stop as it can only have + * an error for protvirt, but protvirt means user cpu state + */ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) kvm_s390_vcpu_stop(vcpu); return -EOPNOTSUPP; @@ -231,6 +236,13 @@ static int handle_prog(struct kvm_vcpu *vcpu) vcpu->stat.exit_program_interruption++; + /* + * Intercept 8 indicates a loop of specification exceptions + * for protected guests. + */ + if (kvm_s390_pv_cpu_is_protected(vcpu)) + return -EOPNOTSUPP; + if (guestdbg_enabled(vcpu) && per_event(vcpu)) { rc = kvm_s390_handle_per_event(vcpu); if (rc) @@ -318,18 +330,18 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - /* Make sure that the source is paged-in */ - rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], - reg2, &srcaddr, GACC_FETCH); + /* Ensure that the source is paged-in, no actual access -> no key checking */ + rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg2], + reg2, &srcaddr, GACC_FETCH, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); if (rc != 0) return rc; - /* Make sure that the destination is paged-in */ - rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], - reg1, &dstaddr, GACC_STORE); + /* Ensure that the source is paged-in, no actual access -> no key checking */ + rc = guest_translate_address_with_key(vcpu, vcpu->run->s.regs.gprs[reg1], + reg1, &dstaddr, GACC_STORE, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); @@ -384,7 +396,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) goto out; } - if (addr & ~PAGE_MASK) + if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); sctns = (void *)get_zeroed_page(GFP_KERNEL); @@ -395,10 +407,15 @@ int handle_sthyi(struct kvm_vcpu *vcpu) out: if (!cc) { - r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); - if (r) { - free_page((unsigned long)sctns); - return kvm_s390_inject_prog_cond(vcpu, r); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + memcpy((void *)(sida_origin(vcpu->arch.sie_block)), + sctns, PAGE_SIZE); + } else { + r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); + if (r) { + free_page((unsigned long)sctns); + return kvm_s390_inject_prog_cond(vcpu, r); + } } } @@ -444,6 +461,82 @@ static int handle_operexc(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); } +static int handle_pv_spx(struct kvm_vcpu *vcpu) +{ + u32 pref = *(u32 *)vcpu->arch.sie_block->sidad; + + kvm_s390_set_prefix(vcpu, pref); + trace_kvm_s390_handle_prefix(vcpu, 1, pref); + return 0; +} + +static int handle_pv_sclp(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + + spin_lock(&fi->lock); + /* + * 2 cases: + * a: an sccb answering interrupt was already pending or in flight. + * As the sccb value is not known we can simply set some value to + * trigger delivery of a saved SCCB. UV will then use its saved + * copy of the SCCB value. + * b: an error SCCB interrupt needs to be injected so we also inject + * a fake SCCB address. Firmware will use the proper one. + * This makes sure, that both errors and real sccb returns will only + * be delivered after a notification intercept (instruction has + * finished) but not after others. + */ + fi->srv_signal.ext_params |= 0x43000; + set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); + clear_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs); + spin_unlock(&fi->lock); + return 0; +} + +static int handle_pv_uvc(struct kvm_vcpu *vcpu) +{ + struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad; + struct uv_cb_cts uvcb = { + .header.cmd = UVC_CMD_UNPIN_PAGE_SHARED, + .header.len = sizeof(uvcb), + .guest_handle = kvm_s390_pv_get_handle(vcpu->kvm), + .gaddr = guest_uvcb->paddr, + }; + int rc; + + if (guest_uvcb->header.cmd != UVC_CMD_REMOVE_SHARED_ACCESS) { + WARN_ONCE(1, "Unexpected notification intercept for UVC 0x%x\n", + guest_uvcb->header.cmd); + return 0; + } + rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb); + /* + * If the unpin did not succeed, the guest will exit again for the UVC + * and we will retry the unpin. + */ + if (rc == -EINVAL) + return 0; + /* + * If we got -EAGAIN here, we simply return it. It will eventually + * get propagated all the way to userspace, which should then try + * again. + */ + return rc; +} + +static int handle_pv_notification(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.sie_block->ipa == 0xb210) + return handle_pv_spx(vcpu); + if (vcpu->arch.sie_block->ipa == 0xb220) + return handle_pv_sclp(vcpu); + if (vcpu->arch.sie_block->ipa == 0xb9a4) + return handle_pv_uvc(vcpu); + + return handle_instruction(vcpu); +} + int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { int rc, per_rc = 0; @@ -480,6 +573,28 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) case ICPT_KSS: rc = kvm_s390_skey_check_enable(vcpu); break; + case ICPT_MCHKREQ: + case ICPT_INT_ENABLE: + /* + * PSW bit 13 or a CR (0, 6, 14) changed and we might + * now be able to deliver interrupts. The pre-run code + * will take care of this. + */ + rc = 0; + break; + case ICPT_PV_INSTR: + rc = handle_instruction(vcpu); + break; + case ICPT_PV_NOTIFY: + rc = handle_pv_notification(vcpu); + break; + case ICPT_PV_PREF: + rc = 0; + gmap_convert_to_secure(vcpu->arch.gmap, + kvm_s390_get_prefix(vcpu)); + gmap_convert_to_secure(vcpu->arch.gmap, + kvm_s390_get_prefix(vcpu) + PAGE_SIZE); + break; default: return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index d1ccc168c0714..4141c6e5fd59d 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2,7 +2,7 @@ /* * handling kvm guest interrupts * - * Copyright IBM Corp. 2008, 2015 + * Copyright IBM Corp. 2008, 2020 * * Author(s): Carsten Otte */ @@ -324,8 +324,11 @@ static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu) { - return vcpu->kvm->arch.float_int.pending_irqs | - vcpu->arch.local_int.pending_irqs; + unsigned long pending = vcpu->kvm->arch.float_int.pending_irqs | + vcpu->arch.local_int.pending_irqs; + + pending &= ~vcpu->kvm->arch.float_int.masked_irqs; + return pending; } static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) @@ -383,10 +386,18 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK)) __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask); - if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) + if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) { __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask); + __clear_bit(IRQ_PEND_EXT_SERVICE_EV, &active_mask); + } if (psw_mchk_disabled(vcpu)) active_mask &= ~IRQ_PEND_MCHK_MASK; + /* PV guest cpus can have a single interruption injected at a time. */ + if (kvm_s390_pv_cpu_is_protected(vcpu) && + vcpu->arch.sie_block->iictl != IICTL_CODE_NONE) + active_mask &= ~(IRQ_PEND_EXT_II_MASK | + IRQ_PEND_IO_MASK | + IRQ_PEND_MCHK_MASK); /* * Check both floating and local interrupt's cr14 because * bit IRQ_PEND_MCHK_REP could be set in both cases. @@ -408,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) static void __set_cpu_idle(struct kvm_vcpu *vcpu) { kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); - set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); + set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); } static void __unset_cpu_idle(struct kvm_vcpu *vcpu) { kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); - clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); + clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); } static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) @@ -479,19 +490,23 @@ static void set_intercept_indicators(struct kvm_vcpu *vcpu) static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - int rc; + int rc = 0; vcpu->stat.deliver_cputm++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, 0, 0); - - rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER, - (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + vcpu->arch.sie_block->iictl = IICTL_CODE_EXT; + vcpu->arch.sie_block->eic = EXT_IRQ_CPU_TIMER; + } else { + rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + } clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); return rc ? -EFAULT : 0; } @@ -499,19 +514,23 @@ static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu) static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - int rc; + int rc = 0; vcpu->stat.deliver_ckc++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, 0, 0); - - rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP, - (u16 __user *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + vcpu->arch.sie_block->iictl = IICTL_CODE_EXT; + vcpu->arch.sie_block->eic = EXT_IRQ_CLK_COMP; + } else { + rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP, + (u16 __user *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + } clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); return rc ? -EFAULT : 0; } @@ -553,6 +572,20 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, union mci mci; int rc; + /* + * All other possible payload for a machine check (e.g. the register + * contents in the save area) will be handled by the ultravisor, as + * the hypervisor does not not have the needed information for + * protected guests. + */ + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + vcpu->arch.sie_block->iictl = IICTL_CODE_MCHK; + vcpu->arch.sie_block->mcic = mchk->mcic; + vcpu->arch.sie_block->faddr = mchk->failing_storage_address; + vcpu->arch.sie_block->edc = mchk->ext_damage_code; + return 0; + } + mci.val = mchk->mcic; /* take care of lazy register loading */ save_fpu_regs(); @@ -696,17 +729,21 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) static int __must_check __deliver_restart(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - int rc; + int rc = 0; VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart"); vcpu->stat.deliver_restart_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0); - rc = write_guest_lc(vcpu, - offsetof(struct lowcore, restart_old_psw), - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw), - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + vcpu->arch.sie_block->iictl = IICTL_CODE_RESTART; + } else { + rc = write_guest_lc(vcpu, + offsetof(struct lowcore, restart_old_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw), + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + } clear_bit(IRQ_PEND_RESTART, &li->pending_irqs); return rc ? -EFAULT : 0; } @@ -748,6 +785,12 @@ static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu) vcpu->stat.deliver_emergency_signal++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, cpu_addr, 0); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + vcpu->arch.sie_block->iictl = IICTL_CODE_EXT; + vcpu->arch.sie_block->eic = EXT_IRQ_EMERGENCY_SIG; + vcpu->arch.sie_block->extcpuaddr = cpu_addr; + return 0; + } rc = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG, (u16 *)__LC_EXT_INT_CODE); @@ -776,6 +819,12 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu) trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL, extcall.code, 0); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + vcpu->arch.sie_block->iictl = IICTL_CODE_EXT; + vcpu->arch.sie_block->eic = EXT_IRQ_EXTERNAL_CALL; + vcpu->arch.sie_block->extcpuaddr = extcall.code; + return 0; + } rc = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL, (u16 *)__LC_EXT_INT_CODE); @@ -787,6 +836,21 @@ static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu) return rc ? -EFAULT : 0; } +static int __deliver_prog_pv(struct kvm_vcpu *vcpu, u16 code) +{ + switch (code) { + case PGM_SPECIFICATION: + vcpu->arch.sie_block->iictl = IICTL_CODE_SPECIFICATION; + break; + case PGM_OPERAND: + vcpu->arch.sie_block->iictl = IICTL_CODE_OPERAND; + break; + default: + return -EINVAL; + } + return 0; +} + static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -807,6 +871,10 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, pgm_info.code, 0); + /* PER is handled by the ultravisor */ + if (kvm_s390_pv_cpu_is_protected(vcpu)) + return __deliver_prog_pv(vcpu, pgm_info.code & ~PGM_PER); + switch (pgm_info.code & ~PGM_PER) { case PGM_AFX_TRANSLATION: case PGM_ASX_TRANSLATION: @@ -902,20 +970,49 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) return rc ? -EFAULT : 0; } +#define SCCB_MASK 0xFFFFFFF8 +#define SCCB_EVENT_PENDING 0x3 + +static int write_sclp(struct kvm_vcpu *vcpu, u32 parm) +{ + int rc; + + if (kvm_s390_pv_cpu_get_handle(vcpu)) { + vcpu->arch.sie_block->iictl = IICTL_CODE_EXT; + vcpu->arch.sie_block->eic = EXT_IRQ_SERVICE_SIG; + vcpu->arch.sie_block->eiparams = parm; + return 0; + } + + rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + rc |= put_guest_lc(vcpu, parm, + (u32 *)__LC_EXT_PARAMS); + + return rc ? -EFAULT : 0; +} + static int __must_check __deliver_service(struct kvm_vcpu *vcpu) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_ext_info ext; - int rc = 0; spin_lock(&fi->lock); - if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) { + if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs) || + !(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) { spin_unlock(&fi->lock); return 0; } ext = fi->srv_signal; memset(&fi->srv_signal, 0, sizeof(ext)); clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); + clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs); + if (kvm_s390_pv_cpu_is_protected(vcpu)) + set_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs); spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x", @@ -924,16 +1021,31 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu) trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE, ext.ext_params, 0); - rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, ext.ext_params, - (u32 *)__LC_EXT_PARAMS); + return write_sclp(vcpu, ext.ext_params); +} - return rc ? -EFAULT : 0; +static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_ext_info ext; + + spin_lock(&fi->lock); + if (!(test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs))) { + spin_unlock(&fi->lock); + return 0; + } + ext = fi->srv_signal; + /* only clear the event bit */ + fi->srv_signal.ext_params &= ~SCCB_EVENT_PENDING; + clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs); + spin_unlock(&fi->lock); + + VCPU_EVENT(vcpu, 4, "%s", "deliver: sclp parameter event"); + vcpu->stat.deliver_service_signal++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE, + ext.ext_params, 0); + + return write_sclp(vcpu, SCCB_EVENT_PENDING); } static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu) @@ -1028,6 +1140,15 @@ static int __do_deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_io_info *io) { int rc; + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + vcpu->arch.sie_block->iictl = IICTL_CODE_IO; + vcpu->arch.sie_block->subchannel_id = io->subchannel_id; + vcpu->arch.sie_block->subchannel_nr = io->subchannel_nr; + vcpu->arch.sie_block->io_int_parm = io->io_int_parm; + vcpu->arch.sie_block->io_int_word = io->io_int_word; + return 0; + } + rc = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID); rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR); rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM); @@ -1329,6 +1450,9 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) case IRQ_PEND_EXT_SERVICE: rc = __deliver_service(vcpu); break; + case IRQ_PEND_EXT_SERVICE_EV: + rc = __deliver_service_ev(vcpu); + break; case IRQ_PEND_PFAULT_DONE: rc = __deliver_pfault_done(vcpu); break; @@ -1421,7 +1545,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL) return -EINVAL; - if (sclp.has_sigpif) + if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu)) return sca_inject_ext_call(vcpu, src_id); if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) @@ -1682,9 +1806,6 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, return inti; } -#define SCCB_MASK 0xFFFFFFF8 -#define SCCB_EVENT_PENDING 0x3 - static int __inject_service(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { @@ -1693,6 +1814,11 @@ static int __inject_service(struct kvm *kvm, kvm->stat.inject_service_signal++; spin_lock(&fi->lock); fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING; + + /* We always allow events, track them separately from the sccb ints */ + if (fi->srv_signal.ext_params & SCCB_EVENT_PENDING) + set_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs); + /* * Early versions of the QEMU s390 bios will inject several * service interrupts after another without handling a @@ -1774,7 +1900,14 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) kvm->stat.inject_io++; isc = int_word_to_isc(inti->io.io_int_word); - if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) { + /* + * Do not make use of gisa in protected mode. We do not use the lock + * checking variant as this is just a performance optimization and we + * do not hold the lock here. This is ok as the code will pick + * interrupts from both "lists" for delivery. + */ + if (!kvm_s390_pv_get_handle(kvm) && + gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) { VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc); gisa_set_ipm_gisc(gi->origin, isc); kfree(inti); @@ -1835,7 +1968,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: if (!(type & KVM_S390_INT_IO_AI_MASK && - kvm->arch.gisa_int.origin)) + kvm->arch.gisa_int.origin) || + kvm_s390_pv_cpu_get_handle(dst_vcpu)) kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); break; default: @@ -1982,6 +2116,13 @@ int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu) return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); } +int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + return test_bit(IRQ_PEND_RESTART, &li->pending_irqs); +} + void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -2081,6 +2222,10 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; int i; + mutex_lock(&kvm->lock); + if (!kvm_s390_pv_is_protected(kvm)) + fi->masked_irqs = 0; + mutex_unlock(&kvm->lock); spin_lock(&fi->lock); fi->pending_irqs = 0; memset(&fi->srv_signal, 0, sizeof(fi->srv_signal)); @@ -2147,7 +2292,8 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) n++; } } - if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) { + if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs) || + test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs)) { if (n == max_irqs) { /* signal userspace to try again */ ret = -ENOMEM; @@ -2191,7 +2337,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&fi->ais_lock); ais.simm = fi->simm; @@ -2328,9 +2474,6 @@ static int register_io_adapter(struct kvm_device *dev, if (!adapter) return -ENOMEM; - INIT_LIST_HEAD(&adapter->maps); - init_rwsem(&adapter->maps_lock); - atomic_set(&adapter->nr_maps, 0); adapter->id = adapter_info.id; adapter->isc = adapter_info.isc; adapter->maskable = adapter_info.maskable; @@ -2355,87 +2498,12 @@ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked) return ret; } -static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr) -{ - struct s390_io_adapter *adapter = get_io_adapter(kvm, id); - struct s390_map_info *map; - int ret; - - if (!adapter || !addr) - return -EINVAL; - - map = kzalloc(sizeof(*map), GFP_KERNEL); - if (!map) { - ret = -ENOMEM; - goto out; - } - INIT_LIST_HEAD(&map->list); - map->guest_addr = addr; - map->addr = gmap_translate(kvm->arch.gmap, addr); - if (map->addr == -EFAULT) { - ret = -EFAULT; - goto out; - } - ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page); - if (ret < 0) - goto out; - BUG_ON(ret != 1); - down_write(&adapter->maps_lock); - if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) { - list_add_tail(&map->list, &adapter->maps); - ret = 0; - } else { - put_page(map->page); - ret = -EINVAL; - } - up_write(&adapter->maps_lock); -out: - if (ret) - kfree(map); - return ret; -} - -static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr) -{ - struct s390_io_adapter *adapter = get_io_adapter(kvm, id); - struct s390_map_info *map, *tmp; - int found = 0; - - if (!adapter || !addr) - return -EINVAL; - - down_write(&adapter->maps_lock); - list_for_each_entry_safe(map, tmp, &adapter->maps, list) { - if (map->guest_addr == addr) { - found = 1; - atomic_dec(&adapter->nr_maps); - list_del(&map->list); - put_page(map->page); - kfree(map); - break; - } - } - up_write(&adapter->maps_lock); - - return found ? 0 : -EINVAL; -} - void kvm_s390_destroy_adapters(struct kvm *kvm) { int i; - struct s390_map_info *map, *tmp; - for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) { - if (!kvm->arch.adapters[i]) - continue; - list_for_each_entry_safe(map, tmp, - &kvm->arch.adapters[i]->maps, list) { - list_del(&map->list); - put_page(map->page); - kfree(map); - } + for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) kfree(kvm->arch.adapters[i]); - } } static int modify_io_adapter(struct kvm_device *dev, @@ -2457,11 +2525,14 @@ static int modify_io_adapter(struct kvm_device *dev, if (ret > 0) ret = 0; break; + /* + * The following operations are no longer needed and therefore no-ops. + * The gpa to hva translation is done when an IRQ route is set up. The + * set_irq code uses get_user_pages_remote() to do the actual write. + */ case KVM_S390_IO_ADAPTER_MAP: - ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr); - break; case KVM_S390_IO_ADAPTER_UNMAP: - ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr); + ret = 0; break; default: ret = -EINVAL; @@ -2500,7 +2571,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) int ret = 0; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) return -EFAULT; @@ -2580,7 +2651,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_ais_all ais; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais))) return -EFAULT; @@ -2700,19 +2771,15 @@ static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap) return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit; } -static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter, - u64 addr) +static struct page *get_map_page(struct kvm *kvm, u64 uaddr) { - struct s390_map_info *map; + struct page *page = NULL; - if (!adapter) - return NULL; - - list_for_each_entry(map, &adapter->maps, list) { - if (map->guest_addr == addr) - return map; - } - return NULL; + down_read(&kvm->mm->mmap_sem); + get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE, + &page, NULL, NULL); + up_read(&kvm->mm->mmap_sem); + return page; } static int adapter_indicators_set(struct kvm *kvm, @@ -2721,30 +2788,35 @@ static int adapter_indicators_set(struct kvm *kvm, { unsigned long bit; int summary_set, idx; - struct s390_map_info *info; + struct page *ind_page, *summary_page; void *map; - info = get_map_info(adapter, adapter_int->ind_addr); - if (!info) + ind_page = get_map_page(kvm, adapter_int->ind_addr); + if (!ind_page) return -1; - map = page_address(info->page); - bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap); - set_bit(bit, map); - idx = srcu_read_lock(&kvm->srcu); - mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); - set_page_dirty_lock(info->page); - info = get_map_info(adapter, adapter_int->summary_addr); - if (!info) { - srcu_read_unlock(&kvm->srcu, idx); + summary_page = get_map_page(kvm, adapter_int->summary_addr); + if (!summary_page) { + put_page(ind_page); return -1; } - map = page_address(info->page); - bit = get_ind_bit(info->addr, adapter_int->summary_offset, - adapter->swap); + + idx = srcu_read_lock(&kvm->srcu); + map = page_address(ind_page); + bit = get_ind_bit(adapter_int->ind_addr, + adapter_int->ind_offset, adapter->swap); + set_bit(bit, map); + mark_page_dirty(kvm, adapter_int->ind_addr >> PAGE_SHIFT); + set_page_dirty_lock(ind_page); + map = page_address(summary_page); + bit = get_ind_bit(adapter_int->summary_addr, + adapter_int->summary_offset, adapter->swap); summary_set = test_and_set_bit(bit, map); - mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT); - set_page_dirty_lock(info->page); + mark_page_dirty(kvm, adapter_int->summary_addr >> PAGE_SHIFT); + set_page_dirty_lock(summary_page); srcu_read_unlock(&kvm->srcu, idx); + + put_page(ind_page); + put_page(summary_page); return summary_set ? 0 : 1; } @@ -2766,9 +2838,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, adapter = get_io_adapter(kvm, e->adapter.adapter_id); if (!adapter) return -1; - down_read(&adapter->maps_lock); ret = adapter_indicators_set(kvm, adapter, &e->adapter); - up_read(&adapter->maps_lock); if ((ret > 0) && !adapter->masked) { ret = kvm_s390_inject_airq(kvm, adapter); if (ret == 0) @@ -2819,23 +2889,27 @@ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { - int ret; + u64 uaddr; switch (ue->type) { + /* we store the userspace addresses instead of the guest addresses */ case KVM_IRQ_ROUTING_S390_ADAPTER: e->set = set_adapter_int; - e->adapter.summary_addr = ue->u.adapter.summary_addr; - e->adapter.ind_addr = ue->u.adapter.ind_addr; + uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr); + if (uaddr == -EFAULT) + return -EFAULT; + e->adapter.summary_addr = uaddr; + uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr); + if (uaddr == -EFAULT) + return -EFAULT; + e->adapter.ind_addr = uaddr; e->adapter.summary_offset = ue->u.adapter.summary_offset; e->adapter.ind_offset = ue->u.adapter.ind_offset; e->adapter.adapter_id = ue->u.adapter.adapter_id; - ret = 0; - break; + return 0; default: - ret = -EINVAL; + return -EINVAL; } - - return ret; } int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, @@ -2984,18 +3058,19 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) { - int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus); + int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus); struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_vcpu *vcpu; + u8 vcpu_isc_mask; - for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) { - vcpu = kvm_get_vcpu(kvm, vcpu_id); + for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) { + vcpu = kvm_get_vcpu(kvm, vcpu_idx); if (psw_ioint_disabled(vcpu)) continue; - deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); - if (deliverable_mask) { + vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24); + if (deliverable_mask & vcpu_isc_mask) { /* lately kicked but not yet running */ - if (test_and_set_bit(vcpu_id, gi->kicked_mask)) + if (test_and_set_bit(vcpu_idx, gi->kicked_mask)) return; kvm_s390_vcpu_wakeup(vcpu); return; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d047e846e1b9f..1fb8b8a9fa0f5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2,7 +2,7 @@ /* * hosting IBM Z kernel virtual machines (s390x) * - * Copyright IBM Corp. 2008, 2018 + * Copyright IBM Corp. 2008, 2020 * * Author(s): Carsten Otte * Christian Borntraeger @@ -44,6 +44,7 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" @@ -219,6 +220,7 @@ static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; static struct gmap_notifier gmap_notifier; static struct gmap_notifier vsie_gmap_notifier; debug_info_t *kvm_s390_dbf; +debug_info_t *kvm_s390_dbf_uv; /* Section: not file related */ int kvm_arch_hardware_enable(void) @@ -232,8 +234,10 @@ int kvm_arch_check_processor_compat(void) return 0; } +/* forward declarations */ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); +static int sca_switch_to_extended(struct kvm *kvm); static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta) { @@ -318,31 +322,31 @@ static void allow_cpu_feat(unsigned long nr) static inline int plo_test_bit(unsigned char nr) { - register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; + unsigned long function = (unsigned long)nr | 0x100; int cc; asm volatile( + " lgr 0,%[function]\n" /* Parameter registers are ignored for "test bit" */ " plo 0,0,0,0(0)\n" " ipm %0\n" " srl %0,28\n" : "=d" (cc) - : "d" (r0) - : "cc"); + : [function] "d" (function) + : "cc", "0"); return cc == 0; } static __always_inline void __insn32_query(unsigned int opcode, u8 *query) { - register unsigned long r0 asm("0") = 0; /* query function */ - register unsigned long r1 asm("1") = (unsigned long) query; - asm volatile( - /* Parameter regs are ignored */ + " lghi 0,0\n" + " lgr 1,%[query]\n" + /* Parameter registers are ignored */ " .insn rrf,%[opc] << 16,2,4,6,0\n" : - : "d" (r0), "a" (r1), [opc] "i" (opcode) - : "cc", "memory"); + : [query] "d" ((unsigned long)query), [opc] "i" (opcode) + : "cc", "memory", "0", "1"); } #define INSN_SORTL 0xb938 @@ -453,16 +457,19 @@ static void kvm_s390_cpu_feat_init(void) int kvm_arch_init(void *opaque) { - int rc; + int rc = -ENOMEM; kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); if (!kvm_s390_dbf) return -ENOMEM; - if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { - rc = -ENOMEM; - goto out_debug_unreg; - } + kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long)); + if (!kvm_s390_dbf_uv) + goto out; + + if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) || + debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view)) + goto out; kvm_s390_cpu_feat_init(); @@ -470,19 +477,17 @@ int kvm_arch_init(void *opaque) rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); if (rc) { pr_err("A FLIC registration call failed with rc=%d\n", rc); - goto out_debug_unreg; + goto out; } rc = kvm_s390_gib_init(GAL_ISC); if (rc) - goto out_gib_destroy; + goto out; return 0; -out_gib_destroy: - kvm_s390_gib_destroy(); -out_debug_unreg: - debug_unregister(kvm_s390_dbf); +out: + kvm_arch_exit(); return rc; } @@ -490,6 +495,7 @@ void kvm_arch_exit(void) { kvm_s390_gib_destroy(); debug_unregister(kvm_s390_dbf); + debug_unregister(kvm_s390_dbf_uv); } /* Section: device related */ @@ -532,6 +538,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_CMMA_MIGRATION: case KVM_CAP_S390_AIS: case KVM_CAP_S390_AIS_MIGRATION: + case KVM_CAP_S390_VCPU_RESETS: + case KVM_CAP_S390_DIAG318: + case KVM_CAP_S390_MEM_OP_EXTENSION: r = 1; break; case KVM_CAP_S390_HPAGE_1M: @@ -566,6 +575,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_BPB: r = test_facility(82); break; + case KVM_CAP_S390_PROTECTED: + r = is_prot_virt_host(); + break; default: r = 0; } @@ -1932,6 +1944,9 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn) start = slot + 1; } + if (start >= slots->used_slots) + return slots->used_slots - 1; + if (gfn >= memslots[start].base_gfn && gfn < memslots[start].base_gfn + memslots[start].npages) { atomic_set(&slots->lru_slot, start); @@ -2160,6 +2175,271 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm, return r; } +static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp) +{ + struct kvm_vcpu *vcpu; + u16 rc, rrc; + int ret = 0; + int i; + + /* + * We ignore failures and try to destroy as many CPUs as possible. + * At the same time we must not free the assigned resources when + * this fails, as the ultravisor has still access to that memory. + * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak + * behind. + * We want to return the first failure rc and rrc, though. + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + mutex_lock(&vcpu->mutex); + if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) { + *rcp = rc; + *rrcp = rrc; + ret = -EIO; + } + mutex_unlock(&vcpu->mutex); + } + return ret; +} + +static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + int i, r = 0; + u16 dummy; + + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + mutex_lock(&vcpu->mutex); + r = kvm_s390_pv_create_cpu(vcpu, rc, rrc); + mutex_unlock(&vcpu->mutex); + if (r) + break; + } + if (r) + kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); + return r; +} + +static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) +{ + int r = 0; + u16 dummy; + void __user *argp = (void __user *)cmd->data; + + switch (cmd->cmd) { + case KVM_PV_ENABLE: { + r = -EINVAL; + if (kvm_s390_pv_is_protected(kvm)) + break; + + /* + * FMT 4 SIE needs esca. As we never switch back to bsca from + * esca, we need no cleanup in the error cases below + */ + r = sca_switch_to_extended(kvm); + if (r) + break; + + down_write(¤t->mm->mmap_sem); + r = gmap_mark_unmergeable(); + up_write(¤t->mm->mmap_sem); + if (r) + break; + + r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); + if (r) + break; + + r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); + if (r) + kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); + + /* we need to block service interrupts from now on */ + set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); + break; + } + case KVM_PV_DISABLE: { + r = -EINVAL; + if (!kvm_s390_pv_is_protected(kvm)) + break; + + r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); + /* + * If a CPU could not be destroyed, destroy VM will also fail. + * There is no point in trying to destroy it. Instead return + * the rc and rrc from the first CPU that failed destroying. + */ + if (r) + break; + r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); + + /* no need to block service interrupts any more */ + clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); + break; + } + case KVM_PV_SET_SEC_PARMS: { + struct kvm_s390_pv_sec_parm parms = {}; + void *hdr; + + r = -EINVAL; + if (!kvm_s390_pv_is_protected(kvm)) + break; + + r = -EFAULT; + if (copy_from_user(&parms, argp, sizeof(parms))) + break; + + /* Currently restricted to 8KB */ + r = -EINVAL; + if (parms.length > PAGE_SIZE * 2) + break; + + r = -ENOMEM; + hdr = vmalloc(parms.length); + if (!hdr) + break; + + r = -EFAULT; + if (!copy_from_user(hdr, (void __user *)parms.origin, + parms.length)) + r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length, + &cmd->rc, &cmd->rrc); + + vfree(hdr); + break; + } + case KVM_PV_UNPACK: { + struct kvm_s390_pv_unp unp = {}; + + r = -EINVAL; + if (!kvm_s390_pv_is_protected(kvm)) + break; + + r = -EFAULT; + if (copy_from_user(&unp, argp, sizeof(unp))) + break; + + r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak, + &cmd->rc, &cmd->rrc); + break; + } + case KVM_PV_VERIFY: { + r = -EINVAL; + if (!kvm_s390_pv_is_protected(kvm)) + break; + + r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), + UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc); + KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc, + cmd->rrc); + break; + } + case KVM_PV_PREP_RESET: { + r = -EINVAL; + if (!kvm_s390_pv_is_protected(kvm)) + break; + + r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), + UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc); + KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x", + cmd->rc, cmd->rrc); + break; + } + case KVM_PV_UNSHARE_ALL: { + r = -EINVAL; + if (!kvm_s390_pv_is_protected(kvm)) + break; + + r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), + UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc); + KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x", + cmd->rc, cmd->rrc); + break; + } + default: + r = -ENOTTY; + } + return r; +} + +static bool access_key_invalid(u8 access_key) +{ + return access_key > 0xf; +} + +static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) +{ + void __user *uaddr = (void __user *)mop->buf; + u64 supported_flags; + void *tmpbuf = NULL; + int r, srcu_idx; + + supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION + | KVM_S390_MEMOP_F_CHECK_ONLY; + if (mop->flags & ~supported_flags || !mop->size) + return -EINVAL; + if (mop->size > MEM_OP_MAX_SIZE) + return -E2BIG; + if (kvm_s390_pv_is_protected(kvm)) + return -EINVAL; + if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { + if (access_key_invalid(mop->key)) + return -EINVAL; + } else { + mop->key = 0; + } + if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { + tmpbuf = vmalloc(mop->size); + if (!tmpbuf) + return -ENOMEM; + } + + srcu_idx = srcu_read_lock(&kvm->srcu); + + if (kvm_is_error_gpa(kvm, mop->gaddr)) { + r = PGM_ADDRESSING; + goto out_unlock; + } + + switch (mop->op) { + case KVM_S390_MEMOP_ABSOLUTE_READ: { + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); + } else { + r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, + mop->size, GACC_FETCH, mop->key); + if (r == 0) { + if (copy_to_user(uaddr, tmpbuf, mop->size)) + r = -EFAULT; + } + } + break; + } + case KVM_S390_MEMOP_ABSOLUTE_WRITE: { + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); + } else { + if (copy_from_user(tmpbuf, uaddr, mop->size)) { + r = -EFAULT; + break; + } + r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, + mop->size, GACC_STORE, mop->key); + } + break; + } + default: + r = -EINVAL; + } + +out_unlock: + srcu_read_unlock(&kvm->srcu, srcu_idx); + + vfree(tmpbuf); + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2257,6 +2537,42 @@ long kvm_arch_vm_ioctl(struct file *filp, mutex_unlock(&kvm->slots_lock); break; } + case KVM_S390_PV_COMMAND: { + struct kvm_pv_cmd args; + + /* protvirt means user cpu state */ + kvm_s390_set_user_cpu_state_ctrl(kvm); + r = 0; + if (!is_prot_virt_host()) { + r = -EINVAL; + break; + } + if (copy_from_user(&args, argp, sizeof(args))) { + r = -EFAULT; + break; + } + if (args.flags) { + r = -EINVAL; + break; + } + mutex_lock(&kvm->lock); + r = kvm_s390_handle_pv(kvm, &args); + mutex_unlock(&kvm->lock); + if (copy_to_user(argp, &args, sizeof(args))) { + r = -EFAULT; + break; + } + break; + } + case KVM_S390_MEM_OP: { + struct kvm_s390_mem_op mem_op; + + if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) + r = kvm_s390_vm_mem_op(kvm, &mem_op); + else + r = -EFAULT; + break; + } default: r = -ENOTTY; } @@ -2520,6 +2836,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { + u16 rc, rrc; + VCPU_EVENT(vcpu, 3, "%s", "free cpu"); trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); kvm_s390_clear_local_irqs(vcpu); @@ -2532,6 +2850,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.use_cmma) kvm_s390_vcpu_unsetup_cmma(vcpu); + /* We can not hold the vcpu mutex here, we are already dying */ + if (kvm_s390_pv_cpu_get_handle(vcpu)) + kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc); free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); @@ -2556,10 +2877,20 @@ static void kvm_free_vcpus(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { + u16 rc, rrc; + kvm_free_vcpus(kvm); sca_dispose(kvm); - debug_unregister(kvm->arch.dbf); kvm_s390_gisa_destroy(kvm); + /* + * We are already at the end of life and kvm->lock is not taken. + * This is ok as the file descriptor is closed by now and nobody + * can mess with the pv state. To avoid lockdep_assert_held from + * complaining we do not use kvm_s390_pv_is_protected. + */ + if (kvm_s390_pv_get_handle(kvm)) + kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); + debug_unregister(kvm->arch.dbf); free_page((unsigned long)kvm->arch.sie_page2); if (!kvm_is_ucontrol(kvm)) gmap_remove(kvm->arch.gmap); @@ -2655,6 +2986,9 @@ static int sca_switch_to_extended(struct kvm *kvm) unsigned int vcpu_idx; u32 scaol, scaoh; + if (kvm->arch.use_esca) + return 0; + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); if (!new_sca) return -ENOMEM; @@ -2715,7 +3049,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_ACRS | KVM_SYNC_CRS | KVM_SYNC_ARCH0 | - KVM_SYNC_PFAULT; + KVM_SYNC_PFAULT | + KVM_SYNC_DIAG318; kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; @@ -2847,35 +3182,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) } -static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) -{ - /* this equals initial cpu reset in pop, but we don't switch to ESA */ - vcpu->arch.sie_block->gpsw.mask = 0UL; - vcpu->arch.sie_block->gpsw.addr = 0UL; - kvm_s390_set_prefix(vcpu, 0); - kvm_s390_set_cpu_timer(vcpu, 0); - vcpu->arch.sie_block->ckc = 0UL; - vcpu->arch.sie_block->todpr = 0; - memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); - vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 | - CR0_INTERRUPT_KEY_SUBMASK | - CR0_MEASUREMENT_ALERT_SUBMASK; - vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 | - CR14_UNUSED_33 | - CR14_EXTERNAL_DAMAGE_SUBMASK; - /* make sure the new fpc will be lazily loaded */ - save_fpu_regs(); - current->thread.fpu.fpc = 0; - vcpu->arch.sie_block->gbea = 1; - vcpu->arch.sie_block->pp = 0; - vcpu->arch.sie_block->fpf &= ~FPF_BPBC; - vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; - kvm_clear_async_pf_completion_queue(vcpu); - if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) - kvm_s390_vcpu_stop(vcpu); - kvm_s390_clear_local_irqs(vcpu); -} - void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { mutex_lock(&vcpu->kvm->lock); @@ -2968,6 +3274,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { int rc = 0; + u16 uvrc, uvrrc; atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM | @@ -3035,6 +3342,14 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_s390_vcpu_crypto_setup(vcpu); + mutex_lock(&vcpu->kvm->lock); + if (kvm_s390_pv_is_protected(vcpu->kvm)) { + rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc); + if (rc) + kvm_s390_vcpu_unsetup_cmma(vcpu); + } + mutex_unlock(&vcpu->kvm->lock); + return rc; } @@ -3091,6 +3406,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); return kvm_s390_vcpu_has_irq(vcpu, 0); } @@ -3290,10 +3606,60 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, return r; } -static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) +static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu) { - kvm_s390_vcpu_initial_reset(vcpu); - return 0; + vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI; + vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; + memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb)); + + kvm_clear_async_pf_completion_queue(vcpu); + if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) + kvm_s390_vcpu_stop(vcpu); + kvm_s390_clear_local_irqs(vcpu); +} + +static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) +{ + /* Initial reset is a superset of the normal reset */ + kvm_arch_vcpu_ioctl_normal_reset(vcpu); + + /* this equals initial cpu reset in pop, but we don't switch to ESA */ + vcpu->arch.sie_block->gpsw.mask = 0; + vcpu->arch.sie_block->gpsw.addr = 0; + kvm_s390_set_prefix(vcpu, 0); + kvm_s390_set_cpu_timer(vcpu, 0); + vcpu->arch.sie_block->ckc = 0; + memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); + vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; + vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; + vcpu->run->s.regs.fpc = 0; + /* + * Do not reset these registers in the protected case, as some of + * them are overlayed and they are not accessible in this case + * anyway. + */ + if (!kvm_s390_pv_cpu_is_protected(vcpu)) { + vcpu->arch.sie_block->gbea = 1; + vcpu->arch.sie_block->pp = 0; + vcpu->arch.sie_block->fpf &= ~FPF_BPBC; + vcpu->arch.sie_block->todpr = 0; + } +} + +static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) +{ + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + /* Clear reset is a superset of the initial reset */ + kvm_arch_vcpu_ioctl_initial_reset(vcpu); + + memset(®s->gprs, 0, sizeof(regs->gprs)); + memset(®s->vrs, 0, sizeof(regs->vrs)); + memset(®s->acrs, 0, sizeof(regs->acrs)); + memset(®s->gscb, 0, sizeof(regs->gscb)); + + regs->etoken = 0; + regs->etoken_extension = 0; } int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) @@ -3463,16 +3829,22 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu_load(vcpu); /* user space knows about this interface - let it control the state */ - vcpu->kvm->arch.user_cpu_state_ctrl = 1; + kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm); switch (mp_state->mp_state) { case KVM_MP_STATE_STOPPED: - kvm_s390_vcpu_stop(vcpu); + rc = kvm_s390_vcpu_stop(vcpu); break; case KVM_MP_STATE_OPERATING: - kvm_s390_vcpu_start(vcpu); + rc = kvm_s390_vcpu_start(vcpu); break; case KVM_MP_STATE_LOAD: + if (!kvm_s390_pv_cpu_is_protected(vcpu)) { + rc = -ENXIO; + break; + } + rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD); + break; case KVM_MP_STATE_CHECK_STOP: /* fall through - CHECK_STOP and LOAD are not supported yet */ default: @@ -3725,7 +4097,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) kvm_s390_patch_guest_per_regs(vcpu); } - clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask); + clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask); vcpu->arch.sie_block->icptcode = 0; cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); @@ -3824,9 +4196,11 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) return vcpu_post_run_fault_in_sie(vcpu); } +#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK) static int __vcpu_run(struct kvm_vcpu *vcpu) { int rc, exit_reason; + struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block; /* * We try to hold kvm->srcu during most of vcpu_run (except when run- @@ -3848,8 +4222,28 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) guest_enter_irqoff(); __disable_cpu_timer_accounting(vcpu); local_irq_enable(); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + memcpy(sie_page->pv_grregs, + vcpu->run->s.regs.gprs, + sizeof(sie_page->pv_grregs)); + } exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + memcpy(vcpu->run->s.regs.gprs, + sie_page->pv_grregs, + sizeof(sie_page->pv_grregs)); + /* + * We're not allowed to inject interrupts on intercepts + * that leave the guest state in an "in-between" state + * where the next SIE entry will do a continuation. + * Fence interrupts in our "internal" PSW. + */ + if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR || + vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) { + vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; + } + } local_irq_disable(); __enable_cpu_timer_accounting(vcpu); guest_exit_irqoff(); @@ -3863,7 +4257,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return rc; } -static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct runtime_instr_cb *riccb; struct gs_cb *gscb; @@ -3872,16 +4266,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; - if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) - kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); - if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { - memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); - /* some control register changes require a tlb flush */ - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - } if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { - kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); - vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr; vcpu->arch.sie_block->pp = kvm_run->s.regs.pp; vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea; @@ -3893,6 +4278,11 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) kvm_clear_async_pf_completion_queue(vcpu); } + if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) { + vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318; + vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc; + VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc); + } /* * If userspace sets the riccb (e.g. after migration) to a valid state, * we should enable RI here instead of doing the lazy enablement. @@ -3922,6 +4312,36 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0; } + if (MACHINE_HAS_GS) { + preempt_disable(); + __ctl_set_bit(2, 4); + if (current->thread.gs_cb) { + vcpu->arch.host_gscb = current->thread.gs_cb; + save_gs_cb(vcpu->arch.host_gscb); + } + if (vcpu->arch.gs_enabled) { + current->thread.gs_cb = (struct gs_cb *) + &vcpu->run->s.regs.gscb; + restore_gs_cb(current->thread.gs_cb); + } + preempt_enable(); + } + /* SIE will load etoken directly from SDNX and therefore kvm_run */ +} + +static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) + kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); + if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { + memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); + /* some control register changes require a tlb flush */ + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + } + if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) { + kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm); + vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc; + } save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); /* save host (userspace) fprs/vrs */ @@ -3936,23 +4356,48 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ current->thread.fpu.fpc = 0; + + /* Sync fmt2 only data */ + if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { + sync_regs_fmt2(vcpu, kvm_run); + } else { + /* + * In several places we have to modify our internal view to + * not do things that are disallowed by the ultravisor. For + * example we must not inject interrupts after specific exits + * (e.g. 112 prefix page not secure). We do this by turning + * off the machine check, external and I/O interrupt bits + * of our PSW copy. To avoid getting validity intercepts, we + * do only accept the condition code from userspace. + */ + vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC; + vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask & + PSW_MASK_CC; + } + + kvm_run->kvm_dirty_regs = 0; +} + +static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; + kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; + kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; + kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; + kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val; if (MACHINE_HAS_GS) { preempt_disable(); __ctl_set_bit(2, 4); - if (current->thread.gs_cb) { - vcpu->arch.host_gscb = current->thread.gs_cb; - save_gs_cb(vcpu->arch.host_gscb); - } - if (vcpu->arch.gs_enabled) { - current->thread.gs_cb = (struct gs_cb *) - &vcpu->run->s.regs.gscb; - restore_gs_cb(current->thread.gs_cb); - } + if (vcpu->arch.gs_enabled) + save_gs_cb(current->thread.gs_cb); + current->thread.gs_cb = vcpu->arch.host_gscb; + restore_gs_cb(vcpu->arch.host_gscb); + if (!vcpu->arch.host_gscb) + __ctl_clear_bit(2, 4); + vcpu->arch.host_gscb = NULL; preempt_enable(); } - /* SIE will load etoken directly from SDNX and therefore kvm_run */ - - kvm_run->kvm_dirty_regs = 0; + /* SIE will save etoken directly into SDNX and therefore kvm_run */ } static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -3963,13 +4408,9 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu); kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc; - kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr; - kvm_run->s.regs.pp = vcpu->arch.sie_block->pp; - kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea; kvm_run->s.regs.pft = vcpu->arch.pfault_token; kvm_run->s.regs.pfs = vcpu->arch.pfault_select; kvm_run->s.regs.pfc = vcpu->arch.pfault_compare; - kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); /* Save guest register state */ @@ -3978,19 +4419,8 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* Restore will be done lazily at return */ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; - if (MACHINE_HAS_GS) { - __ctl_set_bit(2, 4); - if (vcpu->arch.gs_enabled) - save_gs_cb(current->thread.gs_cb); - preempt_disable(); - current->thread.gs_cb = vcpu->arch.host_gscb; - restore_gs_cb(vcpu->arch.host_gscb); - preempt_enable(); - if (!vcpu->arch.host_gscb) - __ctl_clear_bit(2, 4); - vcpu->arch.host_gscb = NULL; - } - /* SIE will save etoken directly into SDNX and therefore kvm_run */ + if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) + store_regs_fmt2(vcpu, kvm_run); } int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -4014,6 +4444,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_sigset_activate(vcpu); + /* + * no need to check the return value of vcpu_start as it can only have + * an error for protvirt, but protvirt means user cpu state + */ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { kvm_s390_vcpu_start(vcpu); } else if (is_vcpu_stopped(vcpu)) { @@ -4151,18 +4585,27 @@ static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); } -void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) +int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) { - int i, online_vcpus, started_vcpus = 0; + int i, online_vcpus, r = 0, started_vcpus = 0; if (!is_vcpu_stopped(vcpu)) - return; + return 0; trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); /* Only one cpu at a time may enter/leave the STOPPED state. */ spin_lock(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); + /* Let's tell the UV that we want to change into the operating state */ + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); + if (r) { + spin_unlock(&vcpu->kvm->arch.start_stop_lock); + return r; + } + } + for (i = 0; i < online_vcpus; i++) { if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) started_vcpus++; @@ -4181,32 +4624,53 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu) } kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); + /* + * The real PSW might have changed due to a RESTART interpreted by the + * ultravisor. We block all interrupts and let the next sie exit + * refresh our view. + */ + if (kvm_s390_pv_cpu_is_protected(vcpu)) + vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; /* * Another VCPU might have used IBS while we were offline. * Let's play safe and flush the VCPU at startup. */ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); spin_unlock(&vcpu->kvm->arch.start_stop_lock); - return; + return 0; } -void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) +int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) { - int i, online_vcpus, started_vcpus = 0; + int i, online_vcpus, r = 0, started_vcpus = 0; struct kvm_vcpu *started_vcpu = NULL; if (is_vcpu_stopped(vcpu)) - return; + return 0; trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); /* Only one cpu at a time may enter/leave the STOPPED state. */ spin_lock(&vcpu->kvm->arch.start_stop_lock); online_vcpus = atomic_read(&vcpu->kvm->online_vcpus); - /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ - kvm_s390_clear_stop_irq(vcpu); + /* Let's tell the UV that we want to change into the stopped state */ + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); + if (r) { + spin_unlock(&vcpu->kvm->arch.start_stop_lock); + return r; + } + } + /* + * Set the VCPU to STOPPED and THEN clear the interrupt flag, + * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders + * have been fully processed. This will ensure that the VCPU + * is kept BUSY if another VCPU is inquiring with SIGP SENSE. + */ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); + kvm_s390_clear_stop_irq(vcpu); + __disable_ibs_on_vcpu(vcpu); for (i = 0; i < online_vcpus; i++) { @@ -4225,7 +4689,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) } spin_unlock(&vcpu->kvm->arch.start_stop_lock); - return; + return 0; } static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, @@ -4252,37 +4716,74 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } -static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, +static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, struct kvm_s390_mem_op *mop) +{ + void __user *uaddr = (void __user *)mop->buf; + int r = 0; + + if (mop->flags || !mop->size) + return -EINVAL; + if (mop->size + mop->sida_offset < mop->size) + return -EINVAL; + if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) + return -E2BIG; + if (!kvm_s390_pv_cpu_is_protected(vcpu)) + return -EINVAL; + + switch (mop->op) { + case KVM_S390_MEMOP_SIDA_READ: + if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + + mop->sida_offset), mop->size)) + r = -EFAULT; + + break; + case KVM_S390_MEMOP_SIDA_WRITE: + if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + + mop->sida_offset), uaddr, mop->size)) + r = -EFAULT; + break; + } + return r; +} + +static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, + struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; void *tmpbuf = NULL; - int r, srcu_idx; + int r = 0; const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION - | KVM_S390_MEMOP_F_CHECK_ONLY; + | KVM_S390_MEMOP_F_CHECK_ONLY + | KVM_S390_MEMOP_F_SKEY_PROTECTION; if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) return -EINVAL; - if (mop->size > MEM_OP_MAX_SIZE) return -E2BIG; - + if (kvm_s390_pv_cpu_is_protected(vcpu)) + return -EINVAL; + if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { + if (access_key_invalid(mop->key)) + return -EINVAL; + } else { + mop->key = 0; + } if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) return -ENOMEM; } - srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - switch (mop->op) { case KVM_S390_MEMOP_LOGICAL_READ: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, - mop->size, GACC_FETCH); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, + GACC_FETCH, mop->key); break; } - r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); + r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, + mop->size, mop->key); if (r == 0) { if (copy_to_user(uaddr, tmpbuf, mop->size)) r = -EFAULT; @@ -4290,22 +4791,19 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, break; case KVM_S390_MEMOP_LOGICAL_WRITE: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, - mop->size, GACC_STORE); + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, + GACC_STORE, mop->key); break; } if (copy_from_user(tmpbuf, uaddr, mop->size)) { r = -EFAULT; break; } - r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); + r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, + mop->size, mop->key); break; - default: - r = -EINVAL; } - srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); - if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); @@ -4313,6 +4811,31 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, return r; } +static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu, + struct kvm_s390_mem_op *mop) +{ + int r, srcu_idx; + + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + switch (mop->op) { + case KVM_S390_MEMOP_LOGICAL_READ: + case KVM_S390_MEMOP_LOGICAL_WRITE: + r = kvm_s390_vcpu_mem_op(vcpu, mop); + break; + case KVM_S390_MEMOP_SIDA_READ: + case KVM_S390_MEMOP_SIDA_WRITE: + /* we are locked against sida going away by the vcpu->mutex */ + r = kvm_s390_vcpu_sida_op(vcpu, mop); + break; + default: + r = -EINVAL; + } + + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); + return r; +} + long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4348,13 +4871,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp, void __user *argp = (void __user *)arg; int idx; long r; + u16 rc, rrc; vcpu_load(vcpu); switch (ioctl) { case KVM_S390_STORE_STATUS: idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_s390_vcpu_store_status(vcpu, arg); + r = kvm_s390_store_status_unloaded(vcpu, arg); srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case KVM_S390_SET_INITIAL_PSW: { @@ -4366,12 +4890,43 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); break; } + case KVM_S390_CLEAR_RESET: + r = 0; + kvm_arch_vcpu_ioctl_clear_reset(vcpu); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), + UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc); + VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x", + rc, rrc); + } + break; case KVM_S390_INITIAL_RESET: - r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); + r = 0; + kvm_arch_vcpu_ioctl_initial_reset(vcpu); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), + UVC_CMD_CPU_RESET_INITIAL, + &rc, &rrc); + VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x", + rc, rrc); + } + break; + case KVM_S390_NORMAL_RESET: + r = 0; + kvm_arch_vcpu_ioctl_normal_reset(vcpu); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), + UVC_CMD_CPU_RESET, &rc, &rrc); + VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x", + rc, rrc); + } break; case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { struct kvm_one_reg reg; + r = -EINVAL; + if (kvm_s390_pv_cpu_is_protected(vcpu)) + break; r = -EFAULT; if (copy_from_user(®, argp, sizeof(reg))) break; @@ -4434,7 +4989,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_s390_mem_op mem_op; if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) - r = kvm_s390_guest_mem_op(vcpu, &mem_op); + r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); else r = -EFAULT; break; @@ -4520,6 +5075,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) return -EINVAL; + /* When we are protected, we should not change the memory slots */ + if (kvm_s390_pv_get_handle(kvm)) + return -EINVAL; return 0; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 6d9448dbd052b..817d213693e03 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -2,7 +2,7 @@ /* * definition for kvm on s390 * - * Copyright IBM Corp. 2008, 2009 + * Copyright IBM Corp. 2008, 2020 * * Author(s): Carsten Otte * Christian Borntraeger @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,17 @@ #define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) extern debug_info_t *kvm_s390_dbf; +extern debug_info_t *kvm_s390_dbf_uv; + +#define KVM_UV_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event((d_kvm)->arch.dbf, d_loglevel, d_string "\n", \ + d_args); \ + debug_sprintf_event(kvm_s390_dbf_uv, d_loglevel, \ + "%d: " d_string "\n", (d_kvm)->userspace_pid, \ + d_args); \ +} while (0) + #define KVM_EVENT(d_loglevel, d_string, d_args...)\ do { \ debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \ @@ -67,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) { - return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); + return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask); } static inline int kvm_is_ucontrol(struct kvm *kvm) @@ -196,6 +208,48 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) return kvm->arch.user_cpu_state_ctrl != 0; } +static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm) +{ + if (kvm->arch.user_cpu_state_ctrl) + return; + + VM_EVENT(kvm, 3, "%s", "ENABLE: Userspace CPU state control"); + kvm->arch.user_cpu_state_ctrl = 1; +} + +/* implemented in pv.c */ +int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); +int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); +int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc); +int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc); +int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, + u16 *rrc); +int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, + unsigned long tweak, u16 *rc, u16 *rrc); +int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state); + +static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm) +{ + return kvm->arch.pv.handle; +} + +static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.pv.handle; +} + +static inline bool kvm_s390_pv_is_protected(struct kvm *kvm) +{ + lockdep_assert_held(&kvm->lock); + return !!kvm_s390_pv_get_handle(kvm); +} + +static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) +{ + lockdep_assert_held(&vcpu->mutex); + return !!kvm_s390_pv_cpu_get_handle(vcpu); +} + /* implemented in interrupt.c */ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); @@ -286,8 +340,8 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu); -void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu); +int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu); +int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu); bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu); @@ -373,6 +427,7 @@ void kvm_s390_destroy_adapters(struct kvm *kvm); int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu); extern struct kvm_device_ops kvm_flic_ops; int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu); +int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu); void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu); int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *buf, int len); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index ed52ffa8d5d45..840b383ba7565 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -2,7 +2,7 @@ /* * handling privileged instructions * - * Copyright IBM Corp. 2008, 2018 + * Copyright IBM Corp. 2008, 2020 * * Author(s): Carsten Otte * Christian Borntraeger @@ -398,6 +398,8 @@ static int handle_sske(struct kvm_vcpu *vcpu) up_read(¤t->mm->mmap_sem); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (rc == -EAGAIN) + continue; if (rc < 0) return rc; start += PAGE_SIZE; @@ -626,10 +628,12 @@ static int handle_pqap(struct kvm_vcpu *vcpu) * available for the guest are AQIC and TAPQ with the t bit set * since we do not set IC.3 (FIII) we currently will only intercept * the AQIC function code. + * Note: running nested under z/VM can result in intercepts for other + * function codes, e.g. PQAP(QCI). We do not support this and bail out. */ reg0 = vcpu->run->s.regs.gprs[0]; fc = (reg0 >> 24) & 0xff; - if (WARN_ON_ONCE(fc != 0x03)) + if (fc != 0x03) return -EOPNOTSUPP; /* PQAP instruction is allowed for guest kernel only */ @@ -872,7 +876,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) operand2 = kvm_s390_get_base_disp_s(vcpu, &ar); - if (operand2 & 0xfff) + if (!kvm_s390_pv_cpu_is_protected(vcpu) && (operand2 & 0xfff)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); switch (fc) { @@ -893,8 +897,13 @@ static int handle_stsi(struct kvm_vcpu *vcpu) handle_stsi_3_2_2(vcpu, (void *) mem); break; } - - rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE); + if (kvm_s390_pv_cpu_is_protected(vcpu)) { + memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem, + PAGE_SIZE); + rc = 0; + } else { + rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE); + } if (rc) { rc = kvm_s390_inject_prog_cond(vcpu, rc); goto out; @@ -1432,10 +1441,11 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) static int handle_tprot(struct kvm_vcpu *vcpu) { - u64 address1, address2; - unsigned long hva, gpa; - int ret = 0, cc = 0; + u64 address, operand2; + unsigned long gpa; + u8 access_key; bool writable; + int ret, cc; u8 ar; vcpu->stat.instruction_tprot++; @@ -1443,43 +1453,46 @@ static int handle_tprot(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL); + kvm_s390_get_base_disp_sse(vcpu, &address, &operand2, &ar, NULL); + access_key = (operand2 & 0xf0) >> 4; - /* we only handle the Linux memory detection case: - * access key == 0 - * everything else goes to userspace. */ - if (address2 & 0xf0) - return -EOPNOTSUPP; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) ipte_lock(vcpu); - ret = guest_translate_address(vcpu, address1, ar, &gpa, GACC_STORE); - if (ret == PGM_PROTECTION) { + + ret = guest_translate_address_with_key(vcpu, address, ar, &gpa, + GACC_STORE, access_key); + if (ret == 0) { + gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable); + } else if (ret == PGM_PROTECTION) { + writable = false; /* Write protected? Try again with read-only... */ - cc = 1; - ret = guest_translate_address(vcpu, address1, ar, &gpa, - GACC_FETCH); + ret = guest_translate_address_with_key(vcpu, address, ar, &gpa, + GACC_FETCH, access_key); } - if (ret) { - if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { - ret = kvm_s390_inject_program_int(vcpu, ret); - } else if (ret > 0) { - /* Translation not available */ - kvm_s390_set_psw_cc(vcpu, 3); + if (ret >= 0) { + cc = -1; + + /* Fetching permitted; storing permitted */ + if (ret == 0 && writable) + cc = 0; + /* Fetching permitted; storing not permitted */ + else if (ret == 0 && !writable) + cc = 1; + /* Fetching not permitted; storing not permitted */ + else if (ret == PGM_PROTECTION) + cc = 2; + /* Translation not available */ + else if (ret != PGM_ADDRESSING && ret != PGM_TRANSLATION_SPEC) + cc = 3; + + if (cc != -1) { + kvm_s390_set_psw_cc(vcpu, cc); ret = 0; + } else { + ret = kvm_s390_inject_program_int(vcpu, ret); } - goto out_unlock; } - hva = gfn_to_hva_prot(vcpu->kvm, gpa_to_gfn(gpa), &writable); - if (kvm_is_error_hva(hva)) { - ret = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } else { - if (!writable) - cc = 1; /* Write not permitted ==> read-only */ - kvm_s390_set_psw_cc(vcpu, cc); - /* Note: CC2 only occurs for storage keys (not supported yet) */ - } -out_unlock: if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) ipte_unlock(vcpu); return ret; diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c new file mode 100644 index 0000000000000..06152f33cbb7f --- /dev/null +++ b/arch/s390/kvm/pv.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hosting Protected Virtual Machines + * + * Copyright IBM Corp. 2019, 2020 + * Author(s): Janosch Frank + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "kvm-s390.h" + +int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) +{ + int cc = 0; + + if (kvm_s390_pv_cpu_get_handle(vcpu)) { + cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), + UVC_CMD_DESTROY_SEC_CPU, rc, rrc); + + KVM_UV_EVENT(vcpu->kvm, 3, + "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", + vcpu->vcpu_id, *rc, *rrc); + WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", + *rc, *rrc); + } + /* Intended memory leak for something that should never happen. */ + if (!cc) + free_pages(vcpu->arch.pv.stor_base, + get_order(uv_info.guest_cpu_stor_len)); + + free_page(sida_origin(vcpu->arch.sie_block)); + vcpu->arch.sie_block->pv_handle_cpu = 0; + vcpu->arch.sie_block->pv_handle_config = 0; + memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv)); + vcpu->arch.sie_block->sdf = 0; + /* + * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0). + * Use the reset value of gbea to avoid leaking the kernel pointer of + * the just freed sida. + */ + vcpu->arch.sie_block->gbea = 1; + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + + return cc ? EIO : 0; +} + +int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) +{ + struct uv_cb_csc uvcb = { + .header.cmd = UVC_CMD_CREATE_SEC_CPU, + .header.len = sizeof(uvcb), + }; + int cc; + + if (kvm_s390_pv_cpu_get_handle(vcpu)) + return -EINVAL; + + vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, + get_order(uv_info.guest_cpu_stor_len)); + if (!vcpu->arch.pv.stor_base) + return -ENOMEM; + + /* Input */ + uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm); + uvcb.num = vcpu->arch.sie_block->icpua; + uvcb.state_origin = (u64)vcpu->arch.sie_block; + uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; + + /* Alloc Secure Instruction Data Area Designation */ + vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!vcpu->arch.sie_block->sidad) { + free_pages(vcpu->arch.pv.stor_base, + get_order(uv_info.guest_cpu_stor_len)); + return -ENOMEM; + } + + cc = uv_call(0, (u64)&uvcb); + *rc = uvcb.header.rc; + *rrc = uvcb.header.rrc; + KVM_UV_EVENT(vcpu->kvm, 3, + "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x", + vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc, + uvcb.header.rrc); + + if (cc) { + u16 dummy; + + kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy); + return -EIO; + } + + /* Output */ + vcpu->arch.pv.handle = uvcb.cpu_handle; + vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle; + vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm); + vcpu->arch.sie_block->sdf = 2; + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + return 0; +} + +/* only free resources when the destroy was successful */ +static void kvm_s390_pv_dealloc_vm(struct kvm *kvm) +{ + vfree(kvm->arch.pv.stor_var); + free_pages(kvm->arch.pv.stor_base, + get_order(uv_info.guest_base_stor_len)); + memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv)); +} + +static int kvm_s390_pv_alloc_vm(struct kvm *kvm) +{ + unsigned long base = uv_info.guest_base_stor_len; + unsigned long virt = uv_info.guest_virt_var_stor_len; + unsigned long npages = 0, vlen = 0; + struct kvm_memory_slot *memslot; + + kvm->arch.pv.stor_var = NULL; + kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base)); + if (!kvm->arch.pv.stor_base) + return -ENOMEM; + + /* + * Calculate current guest storage for allocation of the + * variable storage, which is based on the length in MB. + * + * Slots are sorted by GFN + */ + mutex_lock(&kvm->slots_lock); + memslot = kvm_memslots(kvm)->memslots; + npages = memslot->base_gfn + memslot->npages; + mutex_unlock(&kvm->slots_lock); + + kvm->arch.pv.guest_len = npages * PAGE_SIZE; + + /* Allocate variable storage */ + vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE); + vlen += uv_info.guest_virt_base_stor_len; + kvm->arch.pv.stor_var = vzalloc(vlen); + if (!kvm->arch.pv.stor_var) + goto out_err; + return 0; + +out_err: + kvm_s390_pv_dealloc_vm(kvm); + return -ENOMEM; +} + +/* this should not fail, but if it does, we must not free the donated memory */ +int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + int cc; + + /* make all pages accessible before destroying the guest */ + s390_reset_acc(kvm->mm); + + cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), + UVC_CMD_DESTROY_SEC_CONF, rc, rrc); + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + atomic_set(&kvm->mm->context.is_protected, 0); + KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc); + WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc); + /* Inteded memory leak on "impossible" error */ + if (!cc) + kvm_s390_pv_dealloc_vm(kvm); + return cc ? -EIO : 0; +} + +int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct uv_cb_cgc uvcb = { + .header.cmd = UVC_CMD_CREATE_SEC_CONF, + .header.len = sizeof(uvcb) + }; + int cc, ret; + u16 dummy; + + ret = kvm_s390_pv_alloc_vm(kvm); + if (ret) + return ret; + + /* Inputs */ + uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */ + uvcb.guest_stor_len = kvm->arch.pv.guest_len; + uvcb.guest_asce = kvm->arch.gmap->asce; + uvcb.guest_sca = (unsigned long)kvm->arch.sca; + uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base; + uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; + + cc = uv_call_sched(0, (u64)&uvcb); + *rc = uvcb.header.rc; + *rrc = uvcb.header.rrc; + KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x", + uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc); + + /* Outputs */ + kvm->arch.pv.handle = uvcb.guest_handle; + + if (cc) { + if (uvcb.header.rc & UVC_RC_NEED_DESTROY) + kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy); + else + kvm_s390_pv_dealloc_vm(kvm); + return -EIO; + } + kvm->arch.gmap->guest_handle = uvcb.guest_handle; + atomic_set(&kvm->mm->context.is_protected, 1); + return 0; +} + +int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, + u16 *rrc) +{ + struct uv_cb_ssc uvcb = { + .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS, + .header.len = sizeof(uvcb), + .sec_header_origin = (u64)hdr, + .sec_header_len = length, + .guest_handle = kvm_s390_pv_get_handle(kvm), + }; + int cc = uv_call(0, (u64)&uvcb); + + *rc = uvcb.header.rc; + *rrc = uvcb.header.rrc; + KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x", + *rc, *rrc); + return cc ? -EINVAL : 0; +} + +static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, + u64 offset, u16 *rc, u16 *rrc) +{ + struct uv_cb_unp uvcb = { + .header.cmd = UVC_CMD_UNPACK_IMG, + .header.len = sizeof(uvcb), + .guest_handle = kvm_s390_pv_get_handle(kvm), + .gaddr = addr, + .tweak[0] = tweak, + .tweak[1] = offset, + }; + int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb); + + *rc = uvcb.header.rc; + *rrc = uvcb.header.rrc; + + if (ret && ret != -EAGAIN) + KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x", + uvcb.gaddr, *rc, *rrc); + return ret; +} + +int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, + unsigned long tweak, u16 *rc, u16 *rrc) +{ + u64 offset = 0; + int ret = 0; + + if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK) + return -EINVAL; + + KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx", + addr, size); + + while (offset < size) { + ret = unpack_one(kvm, addr, tweak, offset, rc, rrc); + if (ret == -EAGAIN) { + cond_resched(); + if (fatal_signal_pending(current)) + break; + continue; + } + if (ret) + break; + addr += PAGE_SIZE; + offset += PAGE_SIZE; + } + if (!ret) + KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful"); + return ret; +} + +int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state) +{ + struct uv_cb_cpu_set_state uvcb = { + .header.cmd = UVC_CMD_CPU_SET_STATE, + .header.len = sizeof(uvcb), + .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu), + .state = state, + }; + int cc; + + cc = uv_call(0, (u64)&uvcb); + KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x", + vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc); + if (cc) + return -EINVAL; + return 0; +} diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 683036c1c92a8..8aaee2892ec35 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -151,22 +151,10 @@ static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu, static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter, u64 *status_reg) { - unsigned int i; - struct kvm_vcpu *v; - bool all_stopped = true; - - kvm_for_each_vcpu(i, v, vcpu->kvm) { - if (v == vcpu) - continue; - if (!is_vcpu_stopped(v)) - all_stopped = false; - } - *status_reg &= 0xffffffff00000000UL; /* Reject set arch order, with czam we're always in z/Arch mode. */ - *status_reg |= (all_stopped ? SIGP_STATUS_INVALID_PARAMETER : - SIGP_STATUS_INCORRECT_STATE); + *status_reg |= SIGP_STATUS_INVALID_PARAMETER; return SIGP_CC_STATUS_STORED; } @@ -288,6 +276,34 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; + /* + * SIGP RESTART, SIGP STOP, and SIGP STOP AND STORE STATUS orders + * are processed asynchronously. Until the affected VCPU finishes + * its work and calls back into KVM to clear the (RESTART or STOP) + * interrupt, we need to return any new non-reset orders "busy". + * + * This is important because a single VCPU could issue: + * 1) SIGP STOP $DESTINATION + * 2) SIGP SENSE $DESTINATION + * + * If the SIGP SENSE would not be rejected as "busy", it could + * return an incorrect answer as to whether the VCPU is STOPPED + * or OPERATING. + */ + if (order_code != SIGP_INITIAL_CPU_RESET && + order_code != SIGP_CPU_RESET) { + /* + * Lockless check. Both SIGP STOP and SIGP (RE)START + * properly synchronize everything while processing + * their orders, while the guest cannot observe a + * difference when issuing other orders from two + * different VCPUs. + */ + if (kvm_s390_is_stop_irq_pending(dst_vcpu) || + kvm_s390_is_restart_irq_pending(dst_vcpu)) + return SIGP_CC_BUSY; + } + switch (order_code) { case SIGP_SENSE: vcpu->stat.instruction_sigp_sense++; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 076090f9e666c..6528716fbd51f 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -548,6 +548,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->ecd |= scb_o->ecd & ECD_ETOKENF; scb_s->hpid = HPID_VSIE; + scb_s->cpnc = scb_o->cpnc; prepare_ibc(vcpu, vsie_page); rc = shadow_crycb(vcpu, vsie_page); @@ -1202,6 +1203,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->iprcc = PGM_ADDRESSING; scb_s->pgmilc = 4; scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4); + rc = 1; } return rc; } diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 0e30e6e43b0c5..18fbbb679851d 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -246,14 +246,13 @@ EXPORT_SYMBOL(strcmp); #ifdef __HAVE_ARCH_STRRCHR char *strrchr(const char *s, int c) { - size_t len = __strend(s) - s; - - if (len) - do { - if (s[len] == (char) c) - return (char *) s + len; - } while (--len > 0); - return NULL; + ssize_t len = __strend(s) - s; + + do { + if (s[len] == (char)c) + return (char *)s + len; + } while (--len >= 0); + return NULL; } EXPORT_SYMBOL(strrchr); #endif diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index c4f8039a35e8d..555c9a5831cb7 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -64,10 +64,13 @@ mm_segment_t enable_sacf_uaccess(void) { mm_segment_t old_fs; unsigned long asce, cr; + unsigned long flags; old_fs = current->thread.mm_segment; if (old_fs & 1) return old_fs; + /* protect against a concurrent page table upgrade */ + local_irq_save(flags); current->thread.mm_segment |= 1; asce = S390_lowcore.kernel_asce; if (likely(old_fs == USER_DS)) { @@ -83,6 +86,7 @@ mm_segment_t enable_sacf_uaccess(void) __ctl_load(asce, 7, 7); set_cpu_flag(CIF_ASCE_SECONDARY); } + local_irq_restore(flags); return old_fs; } EXPORT_SYMBOL(enable_sacf_uaccess); @@ -98,10 +102,16 @@ void disable_sacf_uaccess(mm_segment_t old_fs) EXPORT_SYMBOL(disable_sacf_uaccess); static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, - unsigned long size) + unsigned long size, unsigned long key) { - register unsigned long reg0 asm("0") = 0x01UL; unsigned long tmp1, tmp2; + union oac spec = { + .oac2.key = key, + .oac2.as = PSW_BITS_AS_PRIMARY, + .oac2.k = 1, + .oac2.a = 1, + }; + register unsigned long reg0 asm("0") = spec.val; tmp1 = -4096UL; asm volatile( @@ -128,7 +138,7 @@ static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr } static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; mm_segment_t old_fs; @@ -137,12 +147,12 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, tmp1 = -256UL; asm volatile( " sacf 0\n" - "0: mvcp 0(%0,%2),0(%1),%3\n" + "0: mvcp 0(%0,%2),0(%1),%[key]\n" "7: jz 5f\n" "1: algr %0,%3\n" " la %1,256(%1)\n" " la %2,256(%2)\n" - "2: mvcp 0(%0,%2),0(%1),%3\n" + "2: mvcp 0(%0,%2),0(%1),%[key]\n" "8: jnz 1b\n" " j 5f\n" "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ @@ -151,7 +161,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "4: mvcp 0(%4,%2),0(%1),%3\n" + "4: mvcp 0(%4,%2),0(%1),%[key]\n" "9: slgr %0,%4\n" " j 6f\n" "5: slgr %0,%0\n" @@ -159,24 +169,53 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); + : [key] "d" (key << 4) + : "cc", "memory"); disable_sacf_uaccess(old_fs); return size; } -unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) +static unsigned long raw_copy_from_user_key(void *to, const void __user *from, + unsigned long n, unsigned long key) { if (copy_with_mvcos()) - return copy_from_user_mvcos(to, from, n); - return copy_from_user_mvcp(to, from, n); + return copy_from_user_mvcos(to, from, n, key); + return copy_from_user_mvcp(to, from, n, key); +} + +unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return raw_copy_from_user_key(to, from, n, 0); } EXPORT_SYMBOL(raw_copy_from_user); +unsigned long _copy_from_user_key(void *to, const void __user *from, + unsigned long n, unsigned long key) +{ + unsigned long res = n; + + might_fault(); + if (likely(access_ok(from, n))) { + kasan_check_write(to, n); + res = raw_copy_from_user_key(to, from, n, key); + } + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} +EXPORT_SYMBOL(_copy_from_user_key); + static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, - unsigned long size) + unsigned long size, unsigned long key) { - register unsigned long reg0 asm("0") = 0x010000UL; unsigned long tmp1, tmp2; + union oac spec = { + .oac1.key = key, + .oac1.as = PSW_BITS_AS_PRIMARY, + .oac1.k = 1, + .oac1.a = 1, + }; + register unsigned long reg0 asm("0") = spec.val; tmp1 = -4096UL; asm volatile( @@ -203,7 +242,7 @@ static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, } static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, - unsigned long size) + unsigned long size, unsigned long key) { unsigned long tmp1, tmp2; mm_segment_t old_fs; @@ -212,12 +251,12 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, tmp1 = -256UL; asm volatile( " sacf 0\n" - "0: mvcs 0(%0,%1),0(%2),%3\n" + "0: mvcs 0(%0,%1),0(%2),%[key]\n" "7: jz 5f\n" "1: algr %0,%3\n" " la %1,256(%1)\n" " la %2,256(%2)\n" - "2: mvcs 0(%0,%1),0(%2),%3\n" + "2: mvcs 0(%0,%1),0(%2),%[key]\n" "8: jnz 1b\n" " j 5f\n" "3: la %4,255(%1)\n" /* %4 = ptr + 255 */ @@ -226,7 +265,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, " slgr %4,%1\n" " clgr %0,%4\n" /* copy crosses next page boundary? */ " jnh 6f\n" - "4: mvcs 0(%4,%1),0(%2),%3\n" + "4: mvcs 0(%4,%1),0(%2),%[key]\n" "9: slgr %0,%4\n" " j 6f\n" "5: slgr %0,%0\n" @@ -234,24 +273,49 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, EX_TABLE(0b,3b) EX_TABLE(2b,3b) EX_TABLE(4b,6b) EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) - : : "cc", "memory"); + : [key] "d" (key << 4) + : "cc", "memory"); disable_sacf_uaccess(old_fs); return size; } -unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) +static unsigned long raw_copy_to_user_key(void __user *to, const void *from, + unsigned long n, unsigned long key) { if (copy_with_mvcos()) - return copy_to_user_mvcos(to, from, n); - return copy_to_user_mvcs(to, from, n); + return copy_to_user_mvcos(to, from, n, key); + return copy_to_user_mvcs(to, from, n, key); +} + +unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return raw_copy_to_user_key(to, from, n, 0); } EXPORT_SYMBOL(raw_copy_to_user); +unsigned long _copy_to_user_key(void __user *to, const void *from, + unsigned long n, unsigned long key) +{ + might_fault(); + if (access_ok(to, n)) { + kasan_check_read(from, n); + n = raw_copy_to_user_key(to, from, n, key); + } + return n; +} +EXPORT_SYMBOL(_copy_to_user_key); + static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010001UL; unsigned long tmp1, tmp2; + union oac spec = { + .oac1.as = PSW_BITS_AS_PRIMARY, + .oac1.a = 1, + .oac2.as = PSW_BITS_AS_PRIMARY, + .oac2.a = 1, + }; + register unsigned long reg0 asm("0") = spec.val; tmp1 = -4096UL; /* FIXME: copy with reduced length. */ @@ -314,8 +378,12 @@ EXPORT_SYMBOL(raw_copy_in_user); static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { - register unsigned long reg0 asm("0") = 0x010000UL; unsigned long tmp1, tmp2; + union oac spec = { + .oac1.as = PSW_BITS_AS_PRIMARY, + .oac1.a = 1, + }; + register unsigned long reg0 asm("0") = spec.val; tmp1 = -4096UL; asm volatile( diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 7b0bb475c1664..d6b11a8940705 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "../kernel/entry.h" #define __FAIL_ADDR_MASK -4096L @@ -432,7 +433,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (user_mode(regs)) flags |= FAULT_FLAG_USER; - if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) + if ((trans_exc_code & store_indication) == 0x400) + access = VM_WRITE; + if (access == VM_WRITE) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); @@ -816,3 +819,80 @@ static int __init pfault_irq_init(void) early_initcall(pfault_irq_init); #endif /* CONFIG_PFAULT */ + +#if IS_ENABLED(CONFIG_PGSTE) +void do_secure_storage_access(struct pt_regs *regs) +{ + unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK; + struct vm_area_struct *vma; + struct mm_struct *mm; + struct page *page; + int rc; + + switch (get_fault_type(regs)) { + case USER_FAULT: + mm = current->mm; + down_read(&mm->mmap_sem); + vma = find_vma(mm, addr); + if (!vma) { + up_read(&mm->mmap_sem); + do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + break; + } + page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET); + if (IS_ERR_OR_NULL(page)) { + up_read(&mm->mmap_sem); + break; + } + if (arch_make_page_accessible(page)) + send_sig(SIGSEGV, current, 0); + put_page(page); + up_read(&mm->mmap_sem); + break; + case KERNEL_FAULT: + page = phys_to_page(addr); + if (unlikely(!try_get_page(page))) + break; + rc = arch_make_page_accessible(page); + put_page(page); + if (rc) + BUG(); + break; + case VDSO_FAULT: + /* fallthrough */ + case GMAP_FAULT: + /* fallthrough */ + default: + do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + WARN_ON_ONCE(1); + } +} +NOKPROBE_SYMBOL(do_secure_storage_access); + +void do_non_secure_storage_access(struct pt_regs *regs) +{ + unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK; + struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; + + if (get_fault_type(regs) != GMAP_FAULT) { + do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP); + WARN_ON_ONCE(1); + return; + } + + if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL) + send_sig(SIGSEGV, current, 0); +} +NOKPROBE_SYMBOL(do_non_secure_storage_access); + +#else +void do_secure_storage_access(struct pt_regs *regs) +{ + default_trap_handler(regs); +} + +void do_non_secure_storage_access(struct pt_regs *regs) +{ + default_trap_handler(regs); +} +#endif diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index edcdca97e85ee..e2d2b941be174 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -684,9 +684,10 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) vmaddr |= gaddr & ~PMD_MASK; /* Get pointer to the page table entry */ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (likely(ptep)) + if (likely(ptep)) { ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); - pte_unmap_unlock(ptep, ptl); + pte_unmap_unlock(ptep, ptl); + } } } EXPORT_SYMBOL_GPL(__gmap_zap); @@ -787,14 +788,18 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start, static inline unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { + const int asce_type = gmap->asce & _ASCE_TYPE_MASK; unsigned long *table; if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) return NULL; if (gmap_is_shadow(gmap) && gmap->removed) return NULL; - if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11))) + + if (asce_type != _ASCE_TYPE_REGION1 && + gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) return NULL; + table = gmap->table; switch (gmap->asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: @@ -1173,6 +1178,7 @@ EXPORT_SYMBOL_GPL(gmap_read_table); static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, struct gmap_rmap *rmap) { + struct gmap_rmap *temp; void __rcu **slot; BUG_ON(!gmap_is_shadow(sg)); @@ -1180,6 +1186,12 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, if (slot) { rmap->next = radix_tree_deref_slot_protected(slot, &sg->guest_table_lock); + for (temp = rmap->next; temp; temp = temp->next) { + if (temp->raddr == rmap->raddr) { + kfree(rmap); + return; + } + } radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap); } else { rmap->next = NULL; @@ -1840,6 +1852,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, goto out_free; } else if (*table & _REGION_ENTRY_ORIGIN) { rc = -EAGAIN; /* Race with shadow */ + goto out_free; } crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ @@ -2480,23 +2493,36 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], } EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + + split_huge_pmd(vma, pmd, addr); + return 0; +} + +static const struct mm_walk_ops thp_split_walk_ops = { + .pmd_entry = thp_split_walk_pmd_entry, +}; + static inline void thp_split_mm(struct mm_struct *mm) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE struct vm_area_struct *vma; - unsigned long addr; for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - for (addr = vma->vm_start; - addr < vma->vm_end; - addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); vma->vm_flags &= ~VM_HUGEPAGE; vma->vm_flags |= VM_NOHUGEPAGE; + walk_page_vma(vma, &thp_split_walk_ops, NULL); } mm->def_flags |= VM_NOHUGEPAGE; -#endif } +#else +static inline void thp_split_mm(struct mm_struct *mm) +{ +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* * Remove all empty zero pages from the mapping for lazy refaulting @@ -2548,6 +2574,22 @@ int s390_enable_sie(void) } EXPORT_SYMBOL_GPL(s390_enable_sie); +int gmap_mark_unmergeable(void) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (ksm_madvise(vma, vma->vm_start, vma->vm_end, + MADV_UNMERGEABLE, &vma->vm_flags)) { + return -ENOMEM; + } + } + mm->def_flags &= ~VM_MERGEABLE; + return 0; +} +EXPORT_SYMBOL_GPL(gmap_mark_unmergeable); + /* * Enable storage key handling from now on and initialize the storage * keys with the default key. @@ -2560,6 +2602,18 @@ static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, return 0; } +/* + * Give a chance to schedule after setting a key to 256 pages. + * We only hold the mm lock, which is a rwsem and the kvm srcu. + * Both can sleep. + */ +static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + cond_resched(); + return 0; +} + static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, unsigned long hmask, unsigned long next, struct mm_walk *walk) @@ -2582,18 +2636,19 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, end = start + HPAGE_SIZE - 1; __storage_key_init_range(start, end); set_bit(PG_arch_1, &page->flags); + cond_resched(); return 0; } static const struct mm_walk_ops enable_skey_walk_ops = { .hugetlb_entry = __s390_enable_skey_hugetlb, .pte_entry = __s390_enable_skey_pte, + .pmd_entry = __s390_enable_skey_pmd, }; int s390_enable_skey(void) { struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; int rc = 0; down_write(&mm->mmap_sem); @@ -2601,16 +2656,11 @@ int s390_enable_skey(void) goto out_up; mm->context.uses_skeys = 1; - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vma->vm_flags)) { - mm->context.uses_skeys = 0; - rc = -ENOMEM; - goto out_up; - } + rc = gmap_mark_unmergeable(); + if (rc) { + mm->context.uses_skeys = 0; + goto out_up; } - mm->def_flags &= ~VM_MERGEABLE; - walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL); out_up: @@ -2640,3 +2690,38 @@ void s390_reset_cmma(struct mm_struct *mm) up_write(&mm->mmap_sem); } EXPORT_SYMBOL_GPL(s390_reset_cmma); + +/* + * make inaccessible pages accessible again + */ +static int __s390_reset_acc(pte_t *ptep, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t pte = READ_ONCE(*ptep); + + if (pte_present(pte)) + WARN_ON_ONCE(uv_convert_from_secure(pte_val(pte) & PAGE_MASK)); + return 0; +} + +static const struct mm_walk_ops reset_acc_walk_ops = { + .pte_entry = __s390_reset_acc, +}; + +#include +void s390_reset_acc(struct mm_struct *mm) +{ + /* + * we might be called during + * reset: we walk the pages and clear + * close of all kvm file descriptors: we walk the pages and clear + * exit of process on fd closure: vma already gone, do nothing + */ + if (!mmget_not_zero(mm)) + return; + down_read(&mm->mmap_sem); + walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL); + up_read(&mm->mmap_sem); + mmput(mm); +} +EXPORT_SYMBOL_GPL(s390_reset_acc); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index b0246c705a192..ff8234bca56cd 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -2,7 +2,7 @@ /* * IBM System z Huge TLB Page Support for Kernel. * - * Copyright IBM Corp. 2007,2016 + * Copyright IBM Corp. 2007,2020 * Author(s): Gerald Schaefer */ @@ -11,6 +11,9 @@ #include #include +#include +#include +#include /* * If the bit selected by single-bit bitmask "a" is set within "x", move @@ -114,7 +117,7 @@ static inline pte_t __rste_to_pte(unsigned long rste) _PAGE_YOUNG); #ifdef CONFIG_MEM_SOFT_DIRTY pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, - _PAGE_DIRTY); + _PAGE_SOFT_DIRTY); #endif pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); @@ -156,10 +159,13 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, rste &= ~_SEGMENT_ENTRY_NOEXEC; /* Set correct table type for 2G hugepages */ - if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) - rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; - else + if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { + if (likely(pte_present(pte))) + rste |= _REGION3_ENTRY_LARGE; + rste |= _REGION_ENTRY_TYPE_R3; + } else if (likely(pte_present(pte))) rste |= _SEGMENT_ENTRY_LARGE; + clear_huge_pte_skeys(mm, rste); pte_val(*ptep) = rste; } @@ -267,3 +273,98 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); + +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = current->mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + unsigned long addr0, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + unsigned long addr; + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } + + return addr; +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (len > TASK_SIZE - mmap_min_addr) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + goto check_asce_limit; + } + + if (addr) { + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma))) + goto check_asce_limit; + } + + if (mm->get_unmapped_area == arch_get_unmapped_area) + addr = hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + addr = hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); + if (addr & ~PAGE_MASK) + return addr; + +check_asce_limit: + if (addr + len > current->mm->context.asce_limit && + addr + len <= TASK_SIZE) { + rc = crst_table_upgrade(mm, addr + len); + if (rc) + return (unsigned long) rc; + } + return addr; +} diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index a124f19f7b3cc..5521f593cd20a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -168,9 +168,9 @@ static void pv_init(void) return; /* make sure bounce buffers are shared */ + swiotlb_force = SWIOTLB_FORCE; swiotlb_init(1); swiotlb_update_mem_attributes(); - swiotlb_force = SWIOTLB_FORCE; } void __init mem_init(void) @@ -291,10 +291,8 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); vmem_remove_mapping(start, size); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c index 460f255729402..5182e0836ca71 100644 --- a/arch/s390/mm/kasan_init.c +++ b/arch/s390/mm/kasan_init.c @@ -101,6 +101,9 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pgt_prot = pgprot_val(PAGE_KERNEL_EXEC); sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC); + /* + * The first 1MB of 1:1 mapping is mapped with 4KB pages + */ while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -146,30 +149,26 @@ static void __init kasan_early_vmemmap_populate(unsigned long address, pm_dir = pmd_offset(pu_dir, address); if (pmd_none(*pm_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, PMD_SIZE) && + if (IS_ALIGNED(address, PMD_SIZE) && end - address >= PMD_SIZE) { - pmd_populate(&init_mm, pm_dir, - kasan_early_shadow_pte); - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - /* the first megabyte of 1:1 is mapped with 4k pages */ - if (has_edat && address && end - address >= PMD_SIZE && - mode != POPULATE_ZERO_SHADOW) { - void *page; - - if (mode == POPULATE_ONE2ONE) { - page = (void *)address; - } else { - page = kasan_early_alloc_segment(); - memset(page, 0, _SEGMENT_SIZE); + if (mode == POPULATE_ZERO_SHADOW) { + pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte); + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } else if (has_edat && address) { + void *page; + + if (mode == POPULATE_ONE2ONE) { + page = (void *)address; + } else { + page = kasan_early_alloc_segment(); + memset(page, 0, _SEGMENT_SIZE); + } + pmd_val(*pm_dir) = __pa(page) | sgt_prot; + address = (address + PMD_SIZE) & PMD_MASK; + continue; } - pmd_val(*pm_dir) = __pa(page) | sgt_prot; - address = (address + PMD_SIZE) & PMD_MASK; - continue; } - pt_dir = kasan_early_pte_alloc(); pmd_populate(&init_mm, pm_dir, pt_dir); } else if (pmd_large(*pm_dir)) { diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 1864a8bb9622f..1d17413b319a4 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -55,22 +55,29 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz */ static DEFINE_SPINLOCK(s390_kernel_write_lock); -void notrace s390_kernel_write(void *dst, const void *src, size_t size) +notrace void *s390_kernel_write(void *dst, const void *src, size_t size) { + void *tmp = dst; unsigned long flags; long copied; spin_lock_irqsave(&s390_kernel_write_lock, flags); - while (size) { - copied = s390_kernel_write_odd(dst, src, size); - dst += copied; - src += copied; - size -= copied; + if (!(flags & PSW_MASK_DAT)) { + memcpy(dst, src, size); + } else { + while (size) { + copied = s390_kernel_write_odd(tmp, src, size); + tmp += copied; + src += copied; + size -= copied; + } } spin_unlock_irqrestore(&s390_kernel_write_lock, flags); + + return dst; } -static int __memcpy_real(void *dest, void *src, size_t count) +static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; register unsigned long _len1 asm("3") = (unsigned long) count; @@ -91,19 +98,23 @@ static int __memcpy_real(void *dest, void *src, size_t count) return rc; } -static unsigned long _memcpy_real(unsigned long dest, unsigned long src, - unsigned long count) +static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest, + unsigned long src, + unsigned long count) { int irqs_disabled, rc; unsigned long flags; if (!count) return 0; - flags = __arch_local_irq_stnsm(0xf8UL); + flags = arch_local_irq_save(); irqs_disabled = arch_irqs_disabled_flags(flags); if (!irqs_disabled) trace_hardirqs_off(); + __arch_local_irq_stnsm(0xf8); // disable DAT rc = __memcpy_real((void *) dest, (void *) src, (size_t) count); + if (flags & PSW_MASK_DAT) + __arch_local_irq_stosm(0x04); // enable DAT if (!irqs_disabled) trace_hardirqs_on(); __arch_local_irq_ssm(flags); @@ -115,9 +126,15 @@ static unsigned long _memcpy_real(unsigned long dest, unsigned long src, */ int memcpy_real(void *dest, void *src, size_t count) { - if (S390_lowcore.nodat_stack != 0) - return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, - 3, dest, src, count); + int rc; + + if (S390_lowcore.nodat_stack != 0) { + preempt_disable(); + rc = CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, 3, + dest, src, count); + preempt_enable(); + return rc; + } /* * This is a really early memcpy_real call, the stacks are * not set up yet. Just call _memcpy_real on the early boot diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 3dd253f81a771..90943bf3e5eec 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -70,8 +70,20 @@ static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; - if (current->active_mm == mm) - set_user_asce(mm); + /* we must change all active ASCEs to avoid the creation of new TLBs */ + if (current->active_mm == mm) { + S390_lowcore.user_asce = mm->context.asce; + if (current->thread.mm_segment == USER_DS) { + __ctl_load(S390_lowcore.user_asce, 1, 1); + /* Mark user-ASCE present in CR1 */ + clear_cpu_flag(CIF_ASCE_PRIMARY); + } + if (current->thread.mm_segment == USER_DS_SACF) { + __ctl_load(S390_lowcore.user_asce, 7, 7); + /* enable_sacf_uaccess does all or nothing */ + WARN_ON(!test_cpu_flag(CIF_ASCE_SECONDARY)); + } + } __tlb_flush_local(); } @@ -243,13 +255,15 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) /* Free 2K page table fragment of a 4K page */ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.lock); - mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); + mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); mask >>= 24; if (mask & 3) list_add(&page->lru, &mm->context.pgtable_list); else list_del(&page->lru); spin_unlock_bh(&mm->context.lock); + mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24)); + mask >>= 24; if (mask != 0) return; } else { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 9ebd01219812c..28ca07360e970 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -716,7 +716,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; ptev = pte_val(*ptep); if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) - page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); + page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0); pgste_set_unlock(ptep, pgste); preempt_enable(); } @@ -970,6 +970,7 @@ EXPORT_SYMBOL(get_guest_storage_key); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste) { + struct vm_area_struct *vma; unsigned long pgstev; spinlock_t *ptl; pgste_t pgste; @@ -979,6 +980,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, WARN_ON_ONCE(orc > ESSA_MAX); if (unlikely(orc > ESSA_MAX)) return -EINVAL; + + vma = find_vma(mm, hva); + if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1071,10 +1076,14 @@ EXPORT_SYMBOL(pgste_perform_essa); int set_pgste_bits(struct mm_struct *mm, unsigned long hva, unsigned long bits, unsigned long value) { + struct vm_area_struct *vma; spinlock_t *ptl; pgste_t new; pte_t *ptep; + vma = find_vma(mm, hva); + if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1099,9 +1108,13 @@ EXPORT_SYMBOL(set_pgste_bits); */ int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) { + struct vm_area_struct *vma; spinlock_t *ptl; pte_t *ptep; + vma = find_vma(mm, hva); + if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b403fa14847dc..f810930aff427 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -415,6 +415,10 @@ void __init vmem_map_init(void) SET_MEMORY_RO | SET_MEMORY_X); __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); + + /* we need lowcore executable for our LPSWE instructions */ + set_memory_x(0, 1); + pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index ce88211b9c6cd..f63e4cb6c9b31 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -113,7 +114,7 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) { u32 r1 = reg2hex[b1]; - if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15) + if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1]) jit->seen_reg[r1] = 1; } @@ -568,10 +569,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9080000, dst_reg, src_reg); break; case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */ - if (!imm) - break; - /* alfi %dst,imm */ - EMIT6_IMM(0xc20b0000, dst_reg, imm); + if (imm != 0) { + /* alfi %dst,imm */ + EMIT6_IMM(0xc20b0000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */ @@ -593,17 +594,22 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9090000, dst_reg, src_reg); break; case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */ - if (!imm) - break; - /* alfi %dst,-imm */ - EMIT6_IMM(0xc20b0000, dst_reg, -imm); + if (imm != 0) { + /* alfi %dst,-imm */ + EMIT6_IMM(0xc20b0000, dst_reg, -imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */ if (!imm) break; - /* agfi %dst,-imm */ - EMIT6_IMM(0xc2080000, dst_reg, -imm); + if (imm == -0x80000000) { + /* algfi %dst,0x80000000 */ + EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000); + } else { + /* agfi %dst,-imm */ + EMIT6_IMM(0xc2080000, dst_reg, -imm); + } break; /* * BPF_MUL @@ -618,10 +624,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb90c0000, dst_reg, src_reg); break; case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */ - if (imm == 1) - break; - /* msfi %r5,imm */ - EMIT6_IMM(0xc2010000, dst_reg, imm); + if (imm != 1) { + /* msfi %r5,imm */ + EMIT6_IMM(0xc2010000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */ @@ -674,6 +680,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, if (BPF_OP(insn->code) == BPF_MOD) /* lhgi %dst,0 */ EMIT4_IMM(0xa7090000, dst_reg, 0); + else + EMIT_ZERO(dst_reg); break; } /* lhi %w0,0 */ @@ -768,10 +776,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT4(0xb9820000, dst_reg, src_reg); break; case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */ - if (!imm) - break; - /* xilf %dst,imm */ - EMIT6_IMM(0xc0070000, dst_reg, imm); + if (imm != 0) { + /* xilf %dst,imm */ + EMIT6_IMM(0xc0070000, dst_reg, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */ @@ -792,10 +800,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */ - if (imm == 0) - break; - /* sll %dst,imm(%r0) */ - EMIT4_DISP(0x89000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* sll %dst,imm(%r0) */ + EMIT4_DISP(0x89000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */ @@ -817,10 +825,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */ - if (imm == 0) - break; - /* srl %dst,imm(%r0) */ - EMIT4_DISP(0x88000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* srl %dst,imm(%r0) */ + EMIT4_DISP(0x88000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */ @@ -842,10 +850,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0); break; case BPF_ALU | BPF_ARSH | BPF_K: /* ((s32) dst >> imm */ - if (imm == 0) - break; - /* sra %dst,imm(%r0) */ - EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm); + if (imm != 0) { + /* sra %dst,imm(%r0) */ + EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm); + } EMIT_ZERO(dst_reg); break; case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */ @@ -912,6 +920,11 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, break; } break; + /* + * BPF_NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; /* * BPF_ST(X) */ @@ -1369,10 +1382,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } memset(&jit, 0, sizeof(jit)); - jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); + jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); if (jit.addrs == NULL) { fp = orig_fp; - goto out; + goto free_addrs; } /* * Three initial passes: @@ -1422,7 +1435,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) if (!fp->is_func || extra_pass) { bpf_prog_fill_jited_linfo(fp, jit.addrs + 1); free_addrs: - kfree(jit.addrs); + kvfree(jit.addrs); kfree(jit_data); fp->aux->jit_data = NULL; } diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index d2910fa834c8a..8386c58fdb3a0 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -49,12 +49,6 @@ void numa_update_cpu_topology(void) mode->update_cpu_topology(); } -int __node_distance(int a, int b) -{ - return mode->distance ? mode->distance(a, b) : 0; -} -EXPORT_SYMBOL(__node_distance); - int numa_debug_enabled; /* diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 748626a330282..b4e3c84772a16 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -4,4 +4,5 @@ # obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ - pci_event.o pci_debug.o pci_insn.o pci_mmio.o + pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ + pci_bus.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index c7fea9bea8cb5..563c9138d8769 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -35,17 +35,21 @@ #include #include +#include "pci_bus.h" + /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); static DEFINE_SPINLOCK(zpci_list_lock); -static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); +static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE); static DEFINE_SPINLOCK(zpci_domain_lock); #define ZPCI_IOMAP_ENTRIES \ min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2), \ ZPCI_IOMAP_MAX_ENTRIES) +unsigned int s390_pci_no_rid; + static DEFINE_SPINLOCK(zpci_iomap_lock); static unsigned long *zpci_iomap_bitmap; struct zpci_iomap_entry *zpci_iomap_start; @@ -86,17 +90,12 @@ void zpci_remove_reserved_devices(void) spin_unlock(&zpci_list_lock); list_for_each_entry_safe(zdev, tmp, &remove, entry) - zpci_remove_device(zdev); -} - -static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) -{ - return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; + zpci_device_reserved(zdev); } int pci_domain_nr(struct pci_bus *bus) { - return ((struct zpci_dev *) bus->sysdata)->domain; + return ((struct zpci_bus *) bus->sysdata)->domain_nr; } EXPORT_SYMBOL_GPL(pci_domain_nr); @@ -371,29 +370,17 @@ EXPORT_SYMBOL(pci_iounmap); static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { - struct zpci_dev *zdev = get_zdev_by_bus(bus); - int ret; - - if (!zdev || devfn != ZPCI_DEVFN) - ret = -ENODEV; - else - ret = zpci_cfg_load(zdev, where, val, size); + struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn); - return ret; + return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV; } static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { - struct zpci_dev *zdev = get_zdev_by_bus(bus); - int ret; - - if (!zdev || devfn != ZPCI_DEVFN) - ret = -ENODEV; - else - ret = zpci_cfg_store(zdev, where, val, size); + struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn); - return ret; + return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV; } static struct pci_ops pci_root_ops = { @@ -423,7 +410,7 @@ static void zpci_map_resources(struct pci_dev *pdev) if (zpci_use_mio(zdev)) pdev->resource[i].start = - (resource_size_t __force) zdev->bars[i].mio_wb; + (resource_size_t __force) zdev->bars[i].mio_wt; else pdev->resource[i].start = (resource_size_t __force) pci_iomap_range_fh(pdev, i, 0, 0); @@ -504,15 +491,15 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start, return r; } -static int zpci_setup_bus_resources(struct zpci_dev *zdev, - struct list_head *resources) +int zpci_setup_bus_resources(struct zpci_dev *zdev, + struct list_head *resources) { unsigned long addr, size, flags; struct resource *res; int i, entry; snprintf(zdev->res_name, sizeof(zdev->res_name), - "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR); + "PCI Bus %04x:%02x", zdev->uid, ZPCI_BUS_NR); for (i = 0; i < PCI_BAR_COUNT; i++) { if (!zdev->bars[i].size) @@ -530,7 +517,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev, flags |= IORESOURCE_MEM_64; if (zpci_use_mio(zdev)) - addr = (unsigned long) zdev->bars[i].mio_wb; + addr = (unsigned long) zdev->bars[i].mio_wt; else addr = ZPCI_ADDR(entry); size = 1UL << zdev->bars[i].size; @@ -563,9 +550,12 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) int pcibios_add_device(struct pci_dev *pdev) { + struct zpci_dev *zdev = to_zpci(pdev); struct resource *res; int i; + /* The pdev has a reference to the zdev via its bus */ + zpci_zdev_get(zdev); if (pdev->is_physfn) pdev->no_vf_scan = 1; @@ -585,7 +575,10 @@ int pcibios_add_device(struct pci_dev *pdev) void pcibios_release_device(struct pci_dev *pdev) { + struct zpci_dev *zdev = to_zpci(pdev); + zpci_unmap_resources(pdev); + zpci_zdev_put(zdev); } int pcibios_enable_device(struct pci_dev *pdev, int mask) @@ -649,86 +642,54 @@ struct dev_pm_ops pcibios_pm_ops = { }; #endif /* CONFIG_HIBERNATE_CALLBACKS */ -static int zpci_alloc_domain(struct zpci_dev *zdev) +static int __zpci_register_domain(int domain) { - if (zpci_unique_uid) { - zdev->domain = (u16) zdev->uid; - if (zdev->domain >= ZPCI_NR_DEVICES) - return 0; - - spin_lock(&zpci_domain_lock); - if (test_bit(zdev->domain, zpci_domain)) { - spin_unlock(&zpci_domain_lock); - return -EEXIST; - } - set_bit(zdev->domain, zpci_domain); - spin_unlock(&zpci_domain_lock); - return 0; - } - spin_lock(&zpci_domain_lock); - zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES); - if (zdev->domain == ZPCI_NR_DEVICES) { + if (test_bit(domain, zpci_domain)) { spin_unlock(&zpci_domain_lock); - return -ENOSPC; + pr_err("Domain %04x is already assigned\n", domain); + return -EEXIST; } - set_bit(zdev->domain, zpci_domain); + set_bit(domain, zpci_domain); spin_unlock(&zpci_domain_lock); - return 0; + return domain; } -static void zpci_free_domain(struct zpci_dev *zdev) +static int __zpci_alloc_domain(void) { - if (zdev->domain >= ZPCI_NR_DEVICES) - return; + int domain; spin_lock(&zpci_domain_lock); - clear_bit(zdev->domain, zpci_domain); + /* + * We can always auto allocate domains below ZPCI_NR_DEVICES. + * There is either a free domain or we have reached the maximum in + * which case we would have bailed earlier. + */ + domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES); + set_bit(domain, zpci_domain); spin_unlock(&zpci_domain_lock); + return domain; } -void pcibios_remove_bus(struct pci_bus *bus) +int zpci_alloc_domain(int domain) { - struct zpci_dev *zdev = get_zdev_by_bus(bus); - - zpci_exit_slot(zdev); - zpci_cleanup_bus_resources(zdev); - zpci_destroy_iommu(zdev); - zpci_free_domain(zdev); - - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); - - zpci_dbg(3, "rem fid:%x\n", zdev->fid); - kfree(zdev); + if (zpci_unique_uid) { + if (domain) + return __zpci_register_domain(domain); + pr_warn("UID checking was active but no UID is provided: switching to automatic domain allocation\n"); + update_uid_checking(false); + } + return __zpci_alloc_domain(); } -static int zpci_scan_bus(struct zpci_dev *zdev) +void zpci_free_domain(int domain) { - LIST_HEAD(resources); - int ret; - - ret = zpci_setup_bus_resources(zdev, &resources); - if (ret) - goto error; - - zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops, - zdev, &resources); - if (!zdev->bus) { - ret = -EIO; - goto error; - } - zdev->bus->max_bus_speed = zdev->max_bus_speed; - pci_bus_add_devices(zdev->bus); - return 0; - -error: - zpci_cleanup_bus_resources(zdev); - pci_free_resource_list(&resources); - return ret; + spin_lock(&zpci_domain_lock); + clear_bit(domain, zpci_domain); + spin_unlock(&zpci_domain_lock); } + int zpci_enable_device(struct zpci_dev *zdev) { int rc; @@ -754,21 +715,68 @@ EXPORT_SYMBOL_GPL(zpci_enable_device); int zpci_disable_device(struct zpci_dev *zdev) { zpci_dma_exit_device(zdev); + /* + * The zPCI function may already be disabled by the platform, this is + * detected in clp_disable_fh() which becomes a no-op. + */ return clp_disable_fh(zdev); } EXPORT_SYMBOL_GPL(zpci_disable_device); +bool zpci_is_device_configured(struct zpci_dev *zdev) +{ + enum zpci_state state = zdev->state; + + return state != ZPCI_FN_STATE_RESERVED && + state != ZPCI_FN_STATE_STANDBY; +} + +/* zpci_remove_device - Removes the given zdev from the PCI core + * @zdev: the zdev to be removed from the PCI core + * @set_error: if true the device's error state is set to permanent failure + * + * Sets a zPCI device to a configured but offline state; the zPCI + * device is still accessible through its hotplug slot and the zPCI + * API but is removed from the common code PCI bus, making it + * no longer available to drivers. + */ +void zpci_remove_device(struct zpci_dev *zdev, bool set_error) +{ + struct zpci_bus *zbus = zdev->zbus; + struct pci_dev *pdev; + + if (!zdev->zbus->bus) + return; + + pdev = pci_get_slot(zbus->bus, zdev->devfn); + if (pdev) { + if (set_error) + pdev->error_state = pci_channel_io_perm_failure; + if (pdev->is_virtfn) { + zpci_remove_virtfn(pdev, zdev->vfn); + /* balance pci_get_slot */ + pci_dev_put(pdev); + return; + } + pci_stop_and_remove_bus_device_locked(pdev); + /* balance pci_get_slot */ + pci_dev_put(pdev); + } +} + int zpci_create_device(struct zpci_dev *zdev) { int rc; - rc = zpci_alloc_domain(zdev); - if (rc) - goto out; + kref_init(&zdev->kref); + + spin_lock(&zpci_list_lock); + list_add_tail(&zdev->entry, &zpci_list); + spin_unlock(&zpci_list_lock); rc = zpci_init_iommu(zdev); if (rc) - goto out_free; + goto out; mutex_init(&zdev->lock); if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { @@ -776,36 +784,81 @@ int zpci_create_device(struct zpci_dev *zdev) if (rc) goto out_destroy_iommu; } - rc = zpci_scan_bus(zdev); + + rc = zpci_bus_device_register(zdev, &pci_root_ops); if (rc) goto out_disable; - spin_lock(&zpci_list_lock); - list_add_tail(&zdev->entry, &zpci_list); - spin_unlock(&zpci_list_lock); - - zpci_init_slot(zdev); - return 0; out_disable: if (zdev->state == ZPCI_FN_STATE_ONLINE) zpci_disable_device(zdev); + out_destroy_iommu: zpci_destroy_iommu(zdev); -out_free: - zpci_free_domain(zdev); out: + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); return rc; } -void zpci_remove_device(struct zpci_dev *zdev) +/** + * zpci_device_reserved() - Mark device as resverved + * @zdev: the zpci_dev that was reserved + * + * Handle the case that a given zPCI function was reserved by another system. + * After a call to this function the zpci_dev can not be found via + * get_zdev_by_fid() anymore but may still be accessible via existing + * references though it will not be functional anymore. + */ +void zpci_device_reserved(struct zpci_dev *zdev) +{ + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); + /* + * Remove device from zpci_list as it is going away. This also + * makes sure we ignore subsequent zPCI events for this device. + */ + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + zdev->state = ZPCI_FN_STATE_RESERVED; + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + zpci_zdev_put(zdev); +} + +void zpci_release_device(struct kref *kref) { - if (!zdev->bus) - return; + struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); + + if (zdev->zbus->bus) + zpci_remove_device(zdev, false); - pci_stop_root_bus(zdev->bus); - pci_remove_root_bus(zdev->bus); + switch (zdev->state) { + case ZPCI_FN_STATE_ONLINE: + case ZPCI_FN_STATE_CONFIGURED: + zpci_disable_device(zdev); + fallthrough; + case ZPCI_FN_STATE_STANDBY: + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + fallthrough; + case ZPCI_FN_STATE_RESERVED: + zpci_cleanup_bus_resources(zdev); + zpci_bus_device_unregister(zdev); + zpci_destroy_iommu(zdev); + fallthrough; + default: + break; + } + zpci_dbg(3, "rem fid:%x\n", zdev->fid); + kfree(zdev); } int zpci_report_error(struct pci_dev *pdev, @@ -854,7 +907,6 @@ static void zpci_mem_exit(void) } static unsigned int s390_pci_probe __initdata = 1; -static unsigned int s390_pci_no_mio __initdata; unsigned int s390_pci_force_floating __initdata; static unsigned int s390_pci_initialized; @@ -865,13 +917,17 @@ char * __init pcibios_setup(char *str) return NULL; } if (!strcmp(str, "nomio")) { - s390_pci_no_mio = 1; + S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO; return NULL; } if (!strcmp(str, "force_floating")) { s390_pci_force_floating = 1; return NULL; } + if (!strcmp(str, "norid")) { + s390_pci_no_rid = 1; + return NULL; + } return str; } @@ -890,7 +946,7 @@ static int __init pci_base_init(void) if (!test_facility(69) || !test_facility(71)) return 0; - if (test_facility(153) && !s390_pci_no_mio) { + if (MACHINE_HAS_PCI_MIO) { static_branch_enable(&have_mio); ctl_set_bit(2, 5); } @@ -934,5 +990,5 @@ subsys_initcall_sync(pci_base_init); void zpci_rescan(void) { if (zpci_is_enabled()) - clp_rescan_pci_devices_simple(); + clp_rescan_pci_devices_simple(NULL); } diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c new file mode 100644 index 0000000000000..ebcd04514e8eb --- /dev/null +++ b/arch/s390/pci/pci_bus.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2020 + * + * Author(s): + * Pierre Morel + * + */ + +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "pci_bus.h" + +static LIST_HEAD(zbus_list); +static DEFINE_SPINLOCK(zbus_list_lock); +static int zpci_nb_devices; + +/* zpci_bus_scan + * @zbus: the zbus holding the zdevices + * @ops: the pci operations + * + * The domain number must be set before pci_scan_root_bus is called. + * This function can be called once the domain is known, hence + * when the function_0 is dicovered. + */ +static int zpci_bus_scan(struct zpci_bus *zbus, int domain, struct pci_ops *ops) +{ + struct pci_bus *bus; + int rc; + + rc = zpci_alloc_domain(domain); + if (rc < 0) + return rc; + zbus->domain_nr = rc; + + bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources); + if (!bus) { + zpci_free_domain(zbus->domain_nr); + return -EFAULT; + } + + zbus->bus = bus; + pci_bus_add_devices(bus); + return 0; +} + +static void zpci_bus_release(struct kref *kref) +{ + struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref); + + if (zbus->bus) { + pci_lock_rescan_remove(); + pci_stop_root_bus(zbus->bus); + + zpci_free_domain(zbus->domain_nr); + pci_free_resource_list(&zbus->resources); + + pci_remove_root_bus(zbus->bus); + pci_unlock_rescan_remove(); + } + + spin_lock(&zbus_list_lock); + list_del(&zbus->bus_next); + spin_unlock(&zbus_list_lock); + kfree(zbus); +} + +static void zpci_bus_put(struct zpci_bus *zbus) +{ + kref_put(&zbus->kref, zpci_bus_release); +} + +static struct zpci_bus *zpci_bus_get(int pchid) +{ + struct zpci_bus *zbus; + + spin_lock(&zbus_list_lock); + list_for_each_entry(zbus, &zbus_list, bus_next) { + if (pchid == zbus->pchid) { + kref_get(&zbus->kref); + goto out_unlock; + } + } + zbus = NULL; +out_unlock: + spin_unlock(&zbus_list_lock); + return zbus; +} + +static struct zpci_bus *zpci_bus_alloc(int pchid) +{ + struct zpci_bus *zbus; + + zbus = kzalloc(sizeof(*zbus), GFP_KERNEL); + if (!zbus) + return NULL; + + zbus->pchid = pchid; + INIT_LIST_HEAD(&zbus->bus_next); + spin_lock(&zbus_list_lock); + list_add_tail(&zbus->bus_next, &zbus_list); + spin_unlock(&zbus_list_lock); + + kref_init(&zbus->kref); + INIT_LIST_HEAD(&zbus->resources); + + return zbus; +} + +#ifdef CONFIG_PCI_IOV +static int zpci_bus_link_virtfn(struct pci_dev *pdev, + struct pci_dev *virtfn, int vfid) +{ + int rc; + + rc = pci_iov_sysfs_link(pdev, virtfn, vfid); + if (rc) + return rc; + + virtfn->is_virtfn = 1; + virtfn->multifunction = 0; + virtfn->physfn = pci_dev_get(pdev); + + return 0; +} + +static int zpci_bus_setup_virtfn(struct zpci_bus *zbus, + struct pci_dev *virtfn, int vfn) +{ + int i, cand_devfn; + struct zpci_dev *zdev; + struct pci_dev *pdev; + int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ + int rc = 0; + + if (!zbus->multifunction) + return 0; + + /* If the parent PF for the given VF is also configured in the + * instance, it must be on the same zbus. + * We can then identify the parent PF by checking what + * devfn the VF would have if it belonged to that PF using the PF's + * stride and offset. Only if this candidate devfn matches the + * actual devfn will we link both functions. + */ + for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) { + zdev = zbus->function[i]; + if (zdev && zdev->is_physfn) { + pdev = pci_get_slot(zbus->bus, zdev->devfn); + if (!pdev) + continue; + cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); + if (cand_devfn == virtfn->devfn) { + rc = zpci_bus_link_virtfn(pdev, virtfn, vfid); + /* balance pci_get_slot() */ + pci_dev_put(pdev); + break; + } + /* balance pci_get_slot() */ + pci_dev_put(pdev); + } + } + return rc; +} +#else +static inline int zpci_bus_setup_virtfn(struct zpci_bus *zbus, + struct pci_dev *virtfn, int vfn) +{ + return 0; +} +#endif + +void pcibios_bus_add_device(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = to_zpci(pdev); + + /* + * With pdev->no_vf_scan the common PCI probing code does not + * perform PF/VF linking. + */ + if (zdev->vfn) { + zpci_bus_setup_virtfn(zdev->zbus, pdev, zdev->vfn); + pdev->no_command_memory = 1; + } +} + +static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + struct pci_bus *bus; + struct resource_entry *window, *n; + struct resource *res; + struct pci_dev *pdev; + int rc; + + bus = zbus->bus; + if (!bus) + return -EINVAL; + + pdev = pci_get_slot(bus, zdev->devfn); + if (pdev) { + /* Device is already known. */ + pci_dev_put(pdev); + return 0; + } + + rc = zpci_init_slot(zdev); + if (rc) + return rc; + zdev->has_hp_slot = 1; + + resource_list_for_each_entry_safe(window, n, &zbus->resources) { + res = window->res; + pci_bus_add_resource(bus, res, 0); + } + + pdev = pci_scan_single_device(bus, zdev->devfn); + if (pdev) + pci_bus_add_device(pdev); + + return 0; +} + +static void zpci_bus_add_devices(struct zpci_bus *zbus) +{ + int i; + + for (i = 1; i < ZPCI_FUNCTIONS_PER_BUS; i++) + if (zbus->function[i]) + zpci_bus_add_device(zbus, zbus->function[i]); + + pci_lock_rescan_remove(); + pci_bus_add_devices(zbus->bus); + pci_unlock_rescan_remove(); +} + +int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) +{ + struct zpci_bus *zbus = NULL; + int rc = -EBADF; + + if (zpci_nb_devices == ZPCI_NR_DEVICES) { + pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n", + zdev->fid, ZPCI_NR_DEVICES); + return -ENOSPC; + } + zpci_nb_devices++; + + if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS) + return -EINVAL; + + if (!s390_pci_no_rid && zdev->rid_available) + zbus = zpci_bus_get(zdev->pchid); + + if (!zbus) { + zbus = zpci_bus_alloc(zdev->pchid); + if (!zbus) + return -ENOMEM; + } + + zdev->zbus = zbus; + if (zbus->function[zdev->devfn]) { + pr_err("devfn %04x is already assigned\n", zdev->devfn); + goto error; /* rc already set */ + } + zbus->function[zdev->devfn] = zdev; + + zpci_setup_bus_resources(zdev, &zbus->resources); + + if (zbus->bus) { + if (!zbus->multifunction) { + WARN_ONCE(1, "zbus is not multifunction\n"); + goto error_bus; + } + if (!zdev->rid_available) { + WARN_ONCE(1, "rid_available not set for multifunction\n"); + goto error_bus; + } + rc = zpci_bus_add_device(zbus, zdev); + if (rc) + goto error_bus; + } else if (zdev->devfn == 0) { + if (zbus->multifunction && !zdev->rid_available) { + WARN_ONCE(1, "rid_available not set on function 0 for multifunction\n"); + goto error_bus; + } + rc = zpci_bus_scan(zbus, (u16)zdev->uid, ops); + if (rc) + goto error_bus; + zpci_bus_add_devices(zbus); + rc = zpci_init_slot(zdev); + if (rc) + goto error_bus; + zdev->has_hp_slot = 1; + zbus->multifunction = zdev->rid_available; + zbus->max_bus_speed = zdev->max_bus_speed; + } else { + zbus->multifunction = 1; + } + + return 0; + +error_bus: + zpci_nb_devices--; + zbus->function[zdev->devfn] = NULL; +error: + pr_err("Adding PCI function %08x failed\n", zdev->fid); + zpci_bus_put(zbus); + return rc; +} + +void zpci_bus_device_unregister(struct zpci_dev *zdev) +{ + struct zpci_bus *zbus = zdev->zbus; + + zpci_nb_devices--; + zbus->function[zdev->devfn] = NULL; + zpci_bus_put(zbus); +} diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h new file mode 100644 index 0000000000000..8808ff0835b80 --- /dev/null +++ b/arch/s390/pci/pci_bus.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2020 + * + * Author(s): + * Pierre Morel + * + */ + +int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops); +void zpci_bus_device_unregister(struct zpci_dev *zdev); +int zpci_bus_init(void); + +void zpci_release_device(struct kref *kref); +static inline void zpci_zdev_put(struct zpci_dev *zdev) +{ + kref_put(&zdev->kref, zpci_release_device); +} + +static inline void zpci_zdev_get(struct zpci_dev *zdev) +{ + kref_get(&zdev->kref); +} + +int zpci_alloc_domain(int domain); +void zpci_free_domain(int domain); +int zpci_setup_bus_resources(struct zpci_dev *zdev, + struct list_head *resources); + +static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus, + unsigned int devfn) +{ + struct zpci_bus *zbus = bus->sysdata; + + return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn]; +} + +#ifdef CONFIG_PCI_IOV +static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) +{ + + pci_lock_rescan_remove(); + /* Linux' vfid's start at 0 vfn at 1 */ + pci_iov_remove_virtfn(pdev->physfn, vfn - 1); + pci_unlock_rescan_remove(); +} +#else /* CONFIG_PCI_IOV */ +static inline void zpci_remove_virtfn(struct pci_dev *pdev, int vfn) {} +#endif /* CONFIG_PCI_IOV */ diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index e585a62d65300..48351b4eb0cd2 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -24,7 +24,7 @@ bool zpci_unique_uid; -static void update_uid_checking(bool new) +void update_uid_checking(bool new) { if (zpci_unique_uid != new) zpci_dbg(1, "uid checking:%d\n", new); @@ -155,8 +155,13 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, zdev->pfgid = response->pfgid; zdev->pft = response->pft; zdev->vfn = response->vfn; + zdev->port = response->port; zdev->uid = response->uid; zdev->fmb_length = sizeof(u32) * response->fmb_len; + zdev->rid_available = response->rid_avail; + zdev->is_physfn = response->is_physfn; + if (!s390_pci_no_rid && zdev->rid_available) + zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN; memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip)); if (response->util_str_avail) { @@ -240,12 +245,14 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured) } /* - * Enable/Disable a given PCI function defined by its function handle. + * Enable/Disable a given PCI function and update its function handle if + * necessary */ -static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) +static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) { struct clp_req_rsp_set_pci *rrb; int rc, retries = 100; + u32 fid = zdev->fid; rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) @@ -256,7 +263,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_SET_PCI_FN; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = *fh; + rrb->request.fh = zdev->fh; rrb->request.oc = command; rrb->request.ndas = nr_dma_as; @@ -269,12 +276,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) } } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) - *fh = rrb->response.fh; - else { + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("Set PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); - rc = -EIO; + } + + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { + zdev->fh = rrb->response.fh; + } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY && + rrb->response.fh == 0) { + /* Function is already in desired state - update handle */ + rc = clp_rescan_pci_devices_simple(&fid); } clp_free_block(rrb); return rc; @@ -282,18 +294,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) { - u32 fh = zdev->fh; int rc; - rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN); - zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN); + zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); if (rc) goto out; - zdev->fh = fh; if (zpci_use_mio(zdev)) { - rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO); - zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO); + zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", + zdev->fid, zdev->fh, rc); if (rc) clp_disable_fh(zdev); } @@ -303,17 +314,13 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) int clp_disable_fh(struct zpci_dev *zdev) { - u32 fh = zdev->fh; int rc; if (!zdev_enabled(zdev)) return 0; - rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN); - zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); - if (!rc) - zdev->fh = fh; - + rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN); + zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); return rc; } @@ -370,10 +377,14 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) static void __clp_update(struct clp_fh_list_entry *entry, void *data) { struct zpci_dev *zdev; + u32 *fid = data; if (!entry->vendor_id) return; + if (fid && *fid != entry->fid) + return; + zdev = get_zdev_by_fid(entry->fid); if (!zdev) return; @@ -413,7 +424,10 @@ int clp_rescan_pci_devices(void) return rc; } -int clp_rescan_pci_devices_simple(void) +/* Rescan PCI functions and refresh function handles. If fid is non-NULL only + * refresh the handle of the function matching @fid + */ +int clp_rescan_pci_devices_simple(u32 *fid) { struct clp_req_rsp_list_pci *rrb; int rc; @@ -422,7 +436,7 @@ int clp_rescan_pci_devices_simple(void) if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, NULL, __clp_update); + rc = clp_list_pci(rrb, fid, __clp_update); clp_free_block(rrb); return rc; diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 8d6ee4af42303..bdcf288cf8ccc 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -14,6 +14,8 @@ #include #include +#include "pci_bus.h" + /* Content Code Description for PCI Function Error */ struct zpci_ccdf_err { u32 reserved1; @@ -53,7 +55,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) zpci_err_hex(ccdf, sizeof(*ccdf)); if (zdev) - pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); @@ -74,46 +76,52 @@ void zpci_event_error(void *data) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = NULL; enum zpci_state state; + struct pci_dev *pdev; int ret; - if (zdev) - pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); - - pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n", - pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); zpci_err("avail CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); switch (ccdf->pec) { case 0x0301: /* Reserved|Standby -> Configured */ if (!zdev) { - ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0); - if (ret) - break; - zdev = get_zdev_by_fid(ccdf->fid); + ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 1); + break; } - if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY) + /* the configuration request may be stale */ + if (zdev->state != ZPCI_FN_STATE_STANDBY) break; - zdev->state = ZPCI_FN_STATE_CONFIGURED; zdev->fh = ccdf->fh; + zdev->state = ZPCI_FN_STATE_CONFIGURED; ret = zpci_enable_device(zdev); if (ret) break; + + /* the PCI function will be scanned once function 0 appears */ + if (!zdev->zbus->bus) + break; + + pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn); + if (!pdev) + break; + + pci_bus_add_device(pdev); pci_lock_rescan_remove(); - pci_rescan_bus(zdev->bus); + pci_bus_add_devices(zdev->zbus->bus); pci_unlock_rescan_remove(); break; case 0x0302: /* Reserved -> Standby */ - if (!zdev) + if (!zdev) { clp_add_pci_device(ccdf->fid, ccdf->fh, 0); + break; + } + zdev->fh = ccdf->fh; break; case 0x0303: /* Deconfiguration requested */ if (!zdev) break; - if (pdev) - pci_stop_and_remove_bus_device_locked(pdev); + zpci_remove_device(zdev, false); ret = zpci_disable_device(zdev); if (ret) @@ -128,19 +136,17 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0304: /* Configured -> Standby|Reserved */ if (!zdev) break; - if (pdev) { - /* Give the driver a hint that the function is - * already unusable. */ - pdev->error_state = pci_channel_io_perm_failure; - pci_stop_and_remove_bus_device_locked(pdev); - } + /* Give the driver a hint that the function is + * already unusable. + */ + zpci_remove_device(zdev, true); zdev->fh = ccdf->fh; zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; if (!clp_get_state(ccdf->fid, &state) && state == ZPCI_FN_STATE_RESERVED) { - zpci_remove_device(zdev); + zpci_device_reserved(zdev); } break; case 0x0306: /* 0x308 or 0x302 for multiple devices */ @@ -149,12 +155,11 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0308: /* Standby -> Reserved */ if (!zdev) break; - zpci_remove_device(zdev); + zpci_device_reserved(zdev); break; default: break; } - pci_dev_put(pdev); } void zpci_event_availability(void *data) diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index fbe97ab2e2286..743f257cf2cbd 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -115,7 +115,6 @@ static struct irq_chip zpci_irq_chip = { .name = "PCI-MSI", .irq_unmask = pci_msi_unmask_irq, .irq_mask = pci_msi_mask_irq, - .irq_set_affinity = zpci_set_irq_affinity, }; static void zpci_handle_cpu_local_irq(bool rescan) @@ -276,7 +275,9 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) rc = -EIO; if (hwirq - bit >= msi_vecs) break; - irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, msi->affinity); + irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, + (irq_delivery == DIRECTED) ? + msi->affinity : NULL); if (irq < 0) return -ENOMEM; rc = irq_set_msi_desc(irq, msi); diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 7d42a8794f10d..921f0fc12f1fa 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -11,6 +11,113 @@ #include #include #include +#include +#include + +static inline void zpci_err_mmio(u8 cc, u8 status, u64 offset) +{ + struct { + u64 offset; + u8 cc; + u8 status; + } data = {offset, cc, status}; + + zpci_err_hex(&data, sizeof(data)); +} + +static inline int __pcistb_mio_inuser( + void __iomem *ioaddr, const void __user *src, + u64 len, u8 *status) +{ + int cc = -ENXIO; + + asm volatile ( + " sacf 256\n" + "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n" + "1: ipm %[cc]\n" + " srl %[cc],28\n" + "2: sacf 768\n" + EX_TABLE(0b, 2b) EX_TABLE(1b, 2b) + : [cc] "+d" (cc), [len] "+d" (len) + : [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src)) + : "cc", "memory"); + *status = len >> 24 & 0xff; + return cc; +} + +static inline int __pcistg_mio_inuser( + void __iomem *ioaddr, const void __user *src, + u64 ulen, u8 *status) +{ + register u64 addr asm("2") = (u64 __force) ioaddr; + register u64 len asm("3") = ulen; + int cc = -ENXIO; + u64 val = 0; + u64 cnt = ulen; + u8 tmp; + + /* + * copy 0 < @len <= 8 bytes from @src into the right most bytes of + * a register, then store it to PCI at @ioaddr while in secondary + * address space. pcistg then uses the user mappings. + */ + asm volatile ( + " sacf 256\n" + "0: llgc %[tmp],0(%[src])\n" + " sllg %[val],%[val],8\n" + " aghi %[src],1\n" + " ogr %[val],%[tmp]\n" + " brctg %[cnt],0b\n" + "1: .insn rre,0xb9d40000,%[val],%[ioaddr]\n" + "2: ipm %[cc]\n" + " srl %[cc],28\n" + "3: sacf 768\n" + EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b) + : + [src] "+a" (src), [cnt] "+d" (cnt), + [val] "+d" (val), [tmp] "=d" (tmp), + [len] "+d" (len), [cc] "+d" (cc), + [ioaddr] "+a" (addr) + :: "cc", "memory"); + *status = len >> 24 & 0xff; + + /* did we read everything from user memory? */ + if (!cc && cnt != 0) + cc = -EFAULT; + + return cc; +} + +static inline int __memcpy_toio_inuser(void __iomem *dst, + const void __user *src, size_t n) +{ + int size, rc = 0; + u8 status = 0; + mm_segment_t old_fs; + + if (!src) + return -EINVAL; + + old_fs = enable_sacf_uaccess(); + while (n > 0) { + size = zpci_get_max_write_size((u64 __force) dst, + (u64 __force) src, n, + ZPCI_MAX_WRITE_SIZE); + if (size > 8) /* main path */ + rc = __pcistb_mio_inuser(dst, src, size, &status); + else + rc = __pcistg_mio_inuser(dst, src, size, &status); + if (rc) + break; + src += size; + dst += size; + n -= size; + } + disable_sacf_uaccess(old_fs); + if (rc) + zpci_err_mmio(rc, status, (__force u64) dst); + return rc; +} static long get_pfn(unsigned long user_addr, unsigned long access, unsigned long *pfn) @@ -21,7 +128,7 @@ static long get_pfn(unsigned long user_addr, unsigned long access, down_read(¤t->mm->mmap_sem); ret = -EINVAL; vma = find_vma(current->mm, user_addr); - if (!vma) + if (!vma || user_addr < vma->vm_start) goto out; ret = -EACCES; if (!(vma->vm_flags & access)) @@ -46,6 +153,20 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length) return -EINVAL; + + /* + * Only support read access to MIO capable devices on a MIO enabled + * system. Otherwise we would have to check for every address if it is + * a special ZPCI_ADDR and we would have to do a get_pfn() which we + * don't need for MIO capable devices. + */ + if (static_branch_likely(&have_mio)) { + ret = __memcpy_toio_inuser((void __iomem *) mmio_addr, + user_buffer, + length); + return ret; + } + if (length > 64) { buf = kmalloc(length, GFP_KERNEL); if (!buf) @@ -56,7 +177,8 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, ret = get_pfn(mmio_addr, VM_WRITE, &pfn); if (ret) goto out; - io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); + io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | + (mmio_addr & ~PAGE_MASK)); ret = -EFAULT; if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) @@ -72,6 +194,78 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, return ret; } +static inline int __pcilg_mio_inuser( + void __user *dst, const void __iomem *ioaddr, + u64 ulen, u8 *status) +{ + register u64 addr asm("2") = (u64 __force) ioaddr; + register u64 len asm("3") = ulen; + u64 cnt = ulen; + int shift = ulen * 8; + int cc = -ENXIO; + u64 val, tmp; + + /* + * read 0 < @len <= 8 bytes from the PCI memory mapped at @ioaddr (in + * user space) into a register using pcilg then store these bytes at + * user address @dst + */ + asm volatile ( + " sacf 256\n" + "0: .insn rre,0xb9d60000,%[val],%[ioaddr]\n" + "1: ipm %[cc]\n" + " srl %[cc],28\n" + " ltr %[cc],%[cc]\n" + " jne 4f\n" + "2: ahi %[shift],-8\n" + " srlg %[tmp],%[val],0(%[shift])\n" + "3: stc %[tmp],0(%[dst])\n" + " aghi %[dst],1\n" + " brctg %[cnt],2b\n" + "4: sacf 768\n" + EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b) + : + [cc] "+d" (cc), [val] "=d" (val), [len] "+d" (len), + [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp), + [shift] "+d" (shift) + : + [ioaddr] "a" (addr) + : "cc", "memory"); + + /* did we write everything to the user space buffer? */ + if (!cc && cnt != 0) + cc = -EFAULT; + + *status = len >> 24 & 0xff; + return cc; +} + +static inline int __memcpy_fromio_inuser(void __user *dst, + const void __iomem *src, + unsigned long n) +{ + int size, rc = 0; + u8 status; + mm_segment_t old_fs; + + old_fs = enable_sacf_uaccess(); + while (n > 0) { + size = zpci_get_max_write_size((u64 __force) src, + (u64 __force) dst, n, + ZPCI_MAX_READ_SIZE); + rc = __pcilg_mio_inuser(dst, src, size, &status); + if (rc) + break; + src += size; + dst += size; + n -= size; + } + disable_sacf_uaccess(old_fs); + if (rc) + zpci_err_mmio(rc, status, (__force u64) dst); + return rc; +} + SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, void __user *, user_buffer, size_t, length) { @@ -86,12 +280,27 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length) return -EINVAL; + + /* + * Only support write access to MIO capable devices on a MIO enabled + * system. Otherwise we would have to check for every address if it is + * a special ZPCI_ADDR and we would have to do a get_pfn() which we + * don't need for MIO capable devices. + */ + if (static_branch_likely(&have_mio)) { + ret = __memcpy_fromio_inuser( + user_buffer, (const void __iomem *)mmio_addr, + length); + return ret; + } + if (length > 64) { buf = kmalloc(length, GFP_KERNEL); if (!buf) return -ENOMEM; - } else + } else { buf = local_buf; + } ret = get_pfn(mmio_addr, VM_READ, &pfn); if (ret) diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index a433ba01a3175..5c028bee91b90 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -13,6 +13,8 @@ #include #include +#include "../../../drivers/pci/pci.h" + #include #define zpci_attr(name, fmt, member) \ @@ -31,6 +33,7 @@ zpci_attr(pchid, "0x%04x\n", pchid); zpci_attr(pfgid, "0x%02x\n", pfgid); zpci_attr(vfn, "0x%04x\n", vfn); zpci_attr(pft, "0x%02x\n", pft); +zpci_attr(port, "%d\n", port); zpci_attr(uid, "0x%x\n", uid); zpci_attr(segment0, "0x%02x\n", pfip[0]); zpci_attr(segment1, "0x%02x\n", pfip[1]); @@ -49,31 +52,50 @@ static DEVICE_ATTR_RO(mio_enabled); static ssize_t recover_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + struct kernfs_node *kn; struct pci_dev *pdev = to_pci_dev(dev); struct zpci_dev *zdev = to_zpci(pdev); - int ret; - - if (!device_remove_file_self(dev, attr)) - return count; - + int ret = 0; + + /* Can't use device_remove_self() here as that would lead us to lock + * the pci_rescan_remove_lock while holding the device' kernfs lock. + * This would create a possible deadlock with disable_slot() which is + * not directly protected by the device' kernfs lock but takes it + * during the device removal which happens under + * pci_rescan_remove_lock. + * + * This is analogous to sdev_store_delete() in + * drivers/scsi/scsi_sysfs.c + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + WARN_ON_ONCE(!kn); + /* device_remove_file() serializes concurrent calls ignoring all but + * the first + */ + device_remove_file(dev, attr); + + /* A concurrent call to recover_store() may slip between + * sysfs_break_active_protection() and the sysfs file removal. + * Once it unblocks from pci_lock_rescan_remove() the original pdev + * will already be removed. + */ pci_lock_rescan_remove(); - pci_stop_and_remove_bus_device(pdev); - ret = zpci_disable_device(zdev); - if (ret) - goto error; - - ret = zpci_enable_device(zdev); - if (ret) - goto error; - - pci_rescan_bus(zdev->bus); + if (pci_dev_is_added(pdev)) { + pci_stop_and_remove_bus_device(pdev); + ret = zpci_disable_device(zdev); + if (ret) + goto out; + + ret = zpci_enable_device(zdev); + if (ret) + goto out; + pci_rescan_bus(zdev->zbus->bus); + } +out: pci_unlock_rescan_remove(); - - return count; - -error: - pci_unlock_rescan_remove(); - return ret; + if (kn) + sysfs_unbreak_active_protection(kn); + return ret ? ret : count; } static DEVICE_ATTR_WO(recover); @@ -121,6 +143,7 @@ static struct attribute *zpci_dev_attrs[] = { &dev_attr_pchid.attr, &dev_attr_pfgid.attr, &dev_attr_pft.attr, + &dev_attr_port.attr, &dev_attr_vfn.attr, &dev_attr_uid.attr, &dev_attr_recover.attr, diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index bc0d7a0d03945..9de56065f28cf 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -15,8 +15,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE - $(call if_changed_rule,cc_o_c) +KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n +KASAN_SANITIZE := n KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 5a10ce34b95d1..3d1c31e0cf3dd 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -62,14 +62,15 @@ jh 10b .endm -.macro START_NEXT_KERNEL base +.macro START_NEXT_KERNEL base subcode lg %r4,kernel_entry-\base(%r13) lg %r5,load_psw_mask-\base(%r13) ogr %r4,%r5 stg %r4,0(%r0) xgr %r0,%r0 - diag %r0,%r0,0x308 + lghi %r1,\subcode + diag %r0,%r1,0x308 .endm .text @@ -123,7 +124,7 @@ ENTRY(purgatory_start) je .start_crash_kernel /* start normal kernel */ - START_NEXT_KERNEL .base_crash + START_NEXT_KERNEL .base_crash 0 .return_old_kernel: lmg %r6,%r15,gprregs-.base_crash(%r13) @@ -227,7 +228,7 @@ ENTRY(purgatory_start) MEMCPY %r9,%r10,%r11 /* start crash kernel */ - START_NEXT_KERNEL .base_dst + START_NEXT_KERNEL .base_dst 1 load_psw_mask: diff --git a/arch/s390/purgatory/string.c b/arch/s390/purgatory/string.c new file mode 100644 index 0000000000000..c98c22a72db71 --- /dev/null +++ b/arch/s390/purgatory/string.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define __HAVE_ARCH_MEMCMP /* arch function */ +#include "../lib/string.c" diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index 010b6c33bbba2..71acd3d9b9e83 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -58,6 +58,7 @@ config DUMP_CODE config DWARF_UNWINDER bool "Enable the DWARF unwinder for stacktraces" + depends on DEBUG_KERNEL select FRAME_POINTER depends on SUPERH32 default n diff --git a/arch/sh/boards/mach-landisk/setup.c b/arch/sh/boards/mach-landisk/setup.c index 16b4d8b0bb850..2c44b94f82fb2 100644 --- a/arch/sh/boards/mach-landisk/setup.c +++ b/arch/sh/boards/mach-landisk/setup.c @@ -82,6 +82,9 @@ device_initcall(landisk_devices_setup); static void __init landisk_setup(char **cmdline_p) { + /* I/O port identity mapping */ + __set_io_port_base(0); + /* LED ON */ __raw_writeb(__raw_readb(PA_LED) | 0x03, PA_LED); diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index e5beb625ab888..87db9a84b5eca 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -46,7 +46,6 @@ CONFIG_BLK_DEV_IDETAPE=m CONFIG_SCSI=m CONFIG_BLK_DEV_SD=m CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y diff --git a/arch/sh/drivers/dma/Kconfig b/arch/sh/drivers/dma/Kconfig index d0de378beefe5..7d54f284ce10f 100644 --- a/arch/sh/drivers/dma/Kconfig +++ b/arch/sh/drivers/dma/Kconfig @@ -63,8 +63,7 @@ config PVR2_DMA config G2_DMA tristate "G2 Bus DMA support" - depends on SH_DREAMCAST - select SH_DMA_API + depends on SH_DREAMCAST && SH_DMA_API help This enables support for the DMA controller for the Dreamcast's G2 bus. Drivers that want this will generally enable this on diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index 22d968bfe9bb6..d770da3f8b6fb 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -12,6 +12,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd); extern pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address); extern void pmd_free(struct mm_struct *mm, pmd_t *pmd); +#define __pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, (pmdp)) #endif static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, @@ -33,13 +34,4 @@ do { \ tlb_remove_page((tlb), (pte)); \ } while (0) -#if CONFIG_PGTABLE_LEVELS > 2 -#define __pmd_free_tlb(tlb, pmdp, addr) \ -do { \ - struct page *page = virt_to_page(pmdp); \ - pgtable_pmd_page_dtor(page); \ - tlb_remove_page((tlb), page); \ -} while (0); -#endif - #endif /* __ASM_SH_PGALLOC_H */ diff --git a/arch/sh/include/asm/sfp-machine.h b/arch/sh/include/asm/sfp-machine.h index cbc7cf8c97ce6..2d2423478b71d 100644 --- a/arch/sh/include/asm/sfp-machine.h +++ b/arch/sh/include/asm/sfp-machine.h @@ -13,6 +13,14 @@ #ifndef _SFP_MACHINE_H #define _SFP_MACHINE_H +#ifdef __BIG_ENDIAN__ +#define __BYTE_ORDER __BIG_ENDIAN +#define __LITTLE_ENDIAN 0 +#else +#define __BYTE_ORDER __LITTLE_ENDIAN +#define __BIG_ENDIAN 0 +#endif + #define _FP_W_TYPE_SIZE 32 #define _FP_W_TYPE unsigned long #define _FP_WS_TYPE signed long diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h index d516e5d488180..b887cc402b712 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h @@ -78,8 +78,15 @@ enum { GPIO_FN_WDTOVF, /* CAN */ - GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1, - GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2, GPIO_FN_CRX2, + GPIO_FN_CTX1, GPIO_FN_CRX1, + GPIO_FN_CTX0, GPIO_FN_CRX0, + GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1, + GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20, + GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20, /* DMAC */ GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0, diff --git a/arch/sh/include/cpu-sh4/cpu/sh7734.h b/arch/sh/include/cpu-sh4/cpu/sh7734.h index 96f0246ad2f2b..82b63208135ae 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7734.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7734.h @@ -134,7 +134,7 @@ enum { GPIO_FN_EX_WAIT1, GPIO_FN_SD1_DAT0_A, GPIO_FN_DREQ2, GPIO_FN_CAN1_TX_C, GPIO_FN_ET0_LINK_C, GPIO_FN_ET0_ETXD5_A, GPIO_FN_EX_WAIT0, GPIO_FN_TCLK1_B, - GPIO_FN_RD_WR, GPIO_FN_TCLK0, + GPIO_FN_RD_WR, GPIO_FN_TCLK0, GPIO_FN_CAN_CLK_B, GPIO_FN_ET0_ETXD4, GPIO_FN_EX_CS5, GPIO_FN_SD1_CMD_A, GPIO_FN_ATADIR, GPIO_FN_QSSL_B, GPIO_FN_ET0_ETXD3_A, GPIO_FN_EX_CS4, GPIO_FN_SD1_WP_A, GPIO_FN_ATAWR, GPIO_FN_QMI_QIO1_B, diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index ae354a2931e7e..fd6db0ab19288 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -62,18 +62,20 @@ void fpu_state_restore(struct pt_regs *regs) } if (!tsk_used_math(tsk)) { - local_irq_enable(); + int ret; /* * does a slab alloc which can sleep */ - if (init_fpu(tsk)) { + local_irq_enable(); + ret = init_fpu(tsk); + local_irq_disable(); + if (ret) { /* * ran out of memory! */ - do_group_exit(SIGKILL); + force_sig(SIGKILL); return; } - local_irq_disable(); } grab_fpu(regs); diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c index f8a2bec0f260b..1261dc7b84e8b 100644 --- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c @@ -73,8 +73,9 @@ static void shx3_prepare_cpus(unsigned int max_cpus) BUILD_BUG_ON(SMP_MSG_NR >= 8); for (i = 0; i < SMP_MSG_NR; i++) - request_irq(104 + i, ipi_interrupt_handler, - IRQF_PERCPU, "IPI", (void *)(long)i); + if (request_irq(104 + i, ipi_interrupt_handler, + IRQF_PERCPU, "IPI", (void *)(long)i)) + pr_err("Failed to request irq %d\n", i); for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index d31f66e82ce51..4a8ec9e40cc2a 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -199,7 +199,7 @@ syscall_trace_entry: mov.l @(OFF_R7,r15), r7 ! arg3 mov.l @(OFF_R3,r15), r3 ! syscall_nr ! - mov.l 2f, r10 ! Number of syscalls + mov.l 6f, r10 ! Number of syscalls cmp/hs r10, r3 bf syscall_call mov #-ENOSYS, r0 @@ -353,7 +353,7 @@ ENTRY(system_call) tst r9, r8 bf syscall_trace_entry ! - mov.l 2f, r8 ! Number of syscalls + mov.l 6f, r8 ! Number of syscalls cmp/hs r8, r3 bt syscall_badsys ! @@ -392,7 +392,7 @@ syscall_exit: #if !defined(CONFIG_CPU_SH2) 1: .long TRA #endif -2: .long NR_syscalls +6: .long NR_syscalls 3: .long sys_call_table 7: .long do_syscall_trace_enter 8: .long do_syscall_trace_leave diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index e8be0eca0444a..615ba932c398e 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -467,109 +467,6 @@ static int fpu_emulate(u16 code, struct sh_fpu_soft_struct *fregs, struct pt_reg return id_sys(fregs, regs, code); } -/** - * denormal_to_double - Given denormalized float number, - * store double float - * - * @fpu: Pointer to sh_fpu_soft structure - * @n: Index to FP register - */ -static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n) -{ - unsigned long du, dl; - unsigned long x = fpu->fpul; - int exp = 1023 - 126; - - if (x != 0 && (x & 0x7f800000) == 0) { - du = (x & 0x80000000); - while ((x & 0x00800000) == 0) { - x <<= 1; - exp--; - } - x &= 0x007fffff; - du |= (exp << 20) | (x >> 3); - dl = x << 29; - - fpu->fp_regs[n] = du; - fpu->fp_regs[n+1] = dl; - } -} - -/** - * ieee_fpe_handler - Handle denormalized number exception - * - * @regs: Pointer to register structure - * - * Returns 1 when it's handled (should not cause exception). - */ -static int ieee_fpe_handler(struct pt_regs *regs) -{ - unsigned short insn = *(unsigned short *)regs->pc; - unsigned short finsn; - unsigned long nextpc; - int nib[4] = { - (insn >> 12) & 0xf, - (insn >> 8) & 0xf, - (insn >> 4) & 0xf, - insn & 0xf}; - - if (nib[0] == 0xb || - (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ - regs->pr = regs->pc + 4; - - if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ - nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ - if (regs->sr & 1) - nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); - else - nextpc = regs->pc + 4; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ - if (regs->sr & 1) - nextpc = regs->pc + 4; - else - nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x4 && nib[3] == 0xb && - (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ - nextpc = regs->regs[nib[1]]; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x0 && nib[3] == 0x3 && - (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ - nextpc = regs->pc + 4 + regs->regs[nib[1]]; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (insn == 0x000b) { /* rts */ - nextpc = regs->pr; - finsn = *(unsigned short *) (regs->pc + 2); - } else { - nextpc = regs->pc + 2; - finsn = insn; - } - - if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ - struct task_struct *tsk = current; - - if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) { - /* FPU error */ - denormal_to_double (&tsk->thread.xstate->softfpu, - (finsn >> 8) & 0xf); - tsk->thread.xstate->softfpu.fpscr &= - ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); - task_thread_info(tsk)->status |= TS_USEDFPU; - } else { - force_sig_fault(SIGFPE, FPE_FLTINV, - (void __user *)regs->pc); - } - - regs->pc = nextpc; - return 1; - } - - return 0; -} - /** * fpu_init - Initialize FPU registers * @fpu: Pointer to software emulated FPU registers. diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index dfdbaa50946eb..d1b1ff2be17aa 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index eb24cb1afc11f..349e27771ceaf 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -65,7 +65,6 @@ config SPARC64 select HAVE_KRETPROBES select HAVE_KPROBES select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE select HAVE_MEMBLOCK_NODE_MAP select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE @@ -525,7 +524,7 @@ config COMPAT bool depends on SPARC64 default y - select COMPAT_BINFMT_ELF + select COMPAT_BINFMT_ELF if BINFMT_ELF select HAVE_UID16 select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 6c325d53a20a0..bde4d21a8ac8e 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -73,7 +73,6 @@ CONFIG_RAID_ATTRS=m CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index 688911051b446..f4afa301954a2 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -407,6 +407,7 @@ static inline void __iomem *ioremap(unsigned long offset, unsigned long size) } #define ioremap_nocache(X,Y) ioremap((X),(Y)) +#define ioremap_uc(X,Y) ioremap((X),(Y)) #define ioremap_wc(X,Y) ioremap((X),(Y)) #define ioremap_wt(X,Y) ioremap((X),(Y)) diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h index f94532f25db14..274217e7ed702 100644 --- a/arch/sparc/include/asm/mman.h +++ b/arch/sparc/include/asm/mman.h @@ -57,35 +57,39 @@ static inline int sparc_validate_prot(unsigned long prot, unsigned long addr) { if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_ADI)) return 0; - if (prot & PROT_ADI) { - if (!adi_capable()) - return 0; + return 1; +} - if (addr) { - struct vm_area_struct *vma; +#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) +/* arch_validate_flags() - Ensure combination of flags is valid for a + * VMA. + */ +static inline bool arch_validate_flags(unsigned long vm_flags) +{ + /* If ADI is being enabled on this VMA, check for ADI + * capability on the platform and ensure VMA is suitable + * for ADI + */ + if (vm_flags & VM_SPARC_ADI) { + if (!adi_capable()) + return false; - vma = find_vma(current->mm, addr); - if (vma) { - /* ADI can not be enabled on PFN - * mapped pages - */ - if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) - return 0; + /* ADI can not be enabled on PFN mapped pages */ + if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) + return false; - /* Mergeable pages can become unmergeable - * if ADI is enabled on them even if they - * have identical data on them. This can be - * because ADI enabled pages with identical - * data may still not have identical ADI - * tags on them. Disallow ADI on mergeable - * pages. - */ - if (vma->vm_flags & VM_MERGEABLE) - return 0; - } - } + /* Mergeable pages can become unmergeable + * if ADI is enabled on them even if they + * have identical data on them. This can be + * because ADI enabled pages with identical + * data may still not have identical ADI + * tags on them. Disallow ADI on mergeable + * pages. + */ + if (vm_flags & VM_MERGEABLE) + return false; } - return 1; + return true; } #endif /* CONFIG_SPARC64 */ diff --git a/arch/sparc/include/asm/timex_32.h b/arch/sparc/include/asm/timex_32.h index 542915b462097..f86326a6f89e0 100644 --- a/arch/sparc/include/asm/timex_32.h +++ b/arch/sparc/include/asm/timex_32.h @@ -9,8 +9,6 @@ #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -/* XXX Maybe do something better at some point... -DaveM */ -typedef unsigned long cycles_t; -#define get_cycles() (0) +#include #endif diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h index a2f3fa61ee36a..8cb8f3833239a 100644 --- a/arch/sparc/include/asm/tlb_64.h +++ b/arch/sparc/include/asm/tlb_64.h @@ -28,6 +28,15 @@ void flush_tlb_pending(void); #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #define tlb_flush(tlb) flush_tlb_pending() +/* + * SPARC64's hardware TLB fill does not use the Linux page-tables + * and therefore we don't need a TLBI when freeing page-table pages. + */ + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE +#define tlb_needs_table_invalidate() (false) +#endif + #include #endif /* _SPARC64_TLB_H */ diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 9d0d125500e24..084b8949ddff6 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -15,19 +15,19 @@ struct ipc64_perm { - __kernel_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; #ifndef __arch64__ - unsigned short __pad0; + unsigned short __pad0; #endif - __kernel_mode_t mode; - unsigned short __pad1; - unsigned short seq; - unsigned long long __unused1; - unsigned long long __unused2; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned long long __unused1; + unsigned long long __unused2; }; #endif /* __SPARC_IPCBUF_H */ diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index f89603855f1ec..b87e0002131dd 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -356,7 +356,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (!sparc_dma_free_resource(cpu_addr, PAGE_ALIGN(size))) + size = PAGE_ALIGN(size); + + if (!sparc_dma_free_resource(cpu_addr, size)) return; dma_make_coherent(dma_addr, size); diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 8e645ddac58e2..30f171b7b00c2 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -39,6 +39,7 @@ struct mdesc_hdr { u32 node_sz; /* node block size */ u32 name_sz; /* name block size */ u32 data_sz; /* data block size */ + char data[]; } __attribute__((aligned(16))); struct mdesc_elem { @@ -612,7 +613,7 @@ EXPORT_SYMBOL(mdesc_get_node_info); static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) { - return (struct mdesc_elem *) (mdesc + 1); + return (struct mdesc_elem *) mdesc->data; } static void *name_block(struct mdesc_hdr *mdesc) diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index 16b50afe7b52f..646dd58169ecb 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -46,82 +46,79 @@ enum sparc_regset { REGSET_FP, }; +static int regwindow32_get(struct task_struct *target, + const struct pt_regs *regs, + u32 *uregs) +{ + unsigned long reg_window = regs->u_regs[UREG_I6]; + int size = 16 * sizeof(u32); + + if (target == current) { + if (copy_from_user(uregs, (void __user *)reg_window, size)) + return -EFAULT; + } else { + if (access_process_vm(target, reg_window, uregs, size, + FOLL_FORCE) != size) + return -EFAULT; + } + return 0; +} + +static int regwindow32_set(struct task_struct *target, + const struct pt_regs *regs, + u32 *uregs) +{ + unsigned long reg_window = regs->u_regs[UREG_I6]; + int size = 16 * sizeof(u32); + + if (target == current) { + if (copy_to_user((void __user *)reg_window, uregs, size)) + return -EFAULT; + } else { + if (access_process_vm(target, reg_window, uregs, size, + FOLL_FORCE | FOLL_WRITE) != size) + return -EFAULT; + } + return 0; +} + static int genregs32_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { const struct pt_regs *regs = target->thread.kregs; - unsigned long __user *reg_window; - unsigned long *k = kbuf; - unsigned long __user *u = ubuf; - unsigned long reg; + u32 uregs[16]; + int ret; if (target == current) flush_user_windows(); - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) { - for (; count > 0 && pos < 16; count--) - *k++ = regs->u_regs[pos++]; - - reg_window = (unsigned long __user *) regs->u_regs[UREG_I6]; - reg_window -= 16; - for (; count > 0 && pos < 32; count--) { - if (get_user(*k++, ®_window[pos++])) - return -EFAULT; - } - } else { - for (; count > 0 && pos < 16; count--) { - if (put_user(regs->u_regs[pos++], u++)) - return -EFAULT; - } - - reg_window = (unsigned long __user *) regs->u_regs[UREG_I6]; - reg_window -= 16; - for (; count > 0 && pos < 32; count--) { - if (get_user(reg, ®_window[pos++]) || - put_user(reg, u++)) - return -EFAULT; - } - } - while (count > 0) { - switch (pos) { - case 32: /* PSR */ - reg = regs->psr; - break; - case 33: /* PC */ - reg = regs->pc; - break; - case 34: /* NPC */ - reg = regs->npc; - break; - case 35: /* Y */ - reg = regs->y; - break; - case 36: /* WIM */ - case 37: /* TBR */ - reg = 0; - break; - default: - goto finish; - } + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs->u_regs, + 0, 16 * sizeof(u32)); + if (ret || !count) + return ret; - if (kbuf) - *k++ = reg; - else if (put_user(reg, u++)) + if (pos < 32 * sizeof(u32)) { + if (regwindow32_get(target, regs, uregs)) return -EFAULT; - pos++; - count--; + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + uregs, + 16 * sizeof(u32), 32 * sizeof(u32)); + if (ret || !count) + return ret; } -finish: - pos *= sizeof(reg); - count *= sizeof(reg); - return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, - 38 * sizeof(reg), -1); + uregs[0] = regs->psr; + uregs[1] = regs->pc; + uregs[2] = regs->npc; + uregs[3] = regs->y; + uregs[4] = 0; /* WIM */ + uregs[5] = 0; /* TBR */ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + uregs, + 32 * sizeof(u32), 38 * sizeof(u32)); } static int genregs32_set(struct task_struct *target, @@ -130,82 +127,58 @@ static int genregs32_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { struct pt_regs *regs = target->thread.kregs; - unsigned long __user *reg_window; - const unsigned long *k = kbuf; - const unsigned long __user *u = ubuf; - unsigned long reg; + u32 uregs[16]; + u32 psr; + int ret; if (target == current) flush_user_windows(); - pos /= sizeof(reg); - count /= sizeof(reg); - - if (kbuf) { - for (; count > 0 && pos < 16; count--) - regs->u_regs[pos++] = *k++; - - reg_window = (unsigned long __user *) regs->u_regs[UREG_I6]; - reg_window -= 16; - for (; count > 0 && pos < 32; count--) { - if (put_user(*k++, ®_window[pos++])) - return -EFAULT; - } - } else { - for (; count > 0 && pos < 16; count--) { - if (get_user(reg, u++)) - return -EFAULT; - regs->u_regs[pos++] = reg; - } - - reg_window = (unsigned long __user *) regs->u_regs[UREG_I6]; - reg_window -= 16; - for (; count > 0 && pos < 32; count--) { - if (get_user(reg, u++) || - put_user(reg, ®_window[pos++])) - return -EFAULT; - } - } - while (count > 0) { - unsigned long psr; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs->u_regs, + 0, 16 * sizeof(u32)); + if (ret || !count) + return ret; - if (kbuf) - reg = *k++; - else if (get_user(reg, u++)) + if (pos < 32 * sizeof(u32)) { + if (regwindow32_get(target, regs, uregs)) return -EFAULT; - - switch (pos) { - case 32: /* PSR */ - psr = regs->psr; - psr &= ~(PSR_ICC | PSR_SYSCALL); - psr |= (reg & (PSR_ICC | PSR_SYSCALL)); - regs->psr = psr; - break; - case 33: /* PC */ - regs->pc = reg; - break; - case 34: /* NPC */ - regs->npc = reg; - break; - case 35: /* Y */ - regs->y = reg; - break; - case 36: /* WIM */ - case 37: /* TBR */ - break; - default: - goto finish; - } - - pos++; - count--; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + uregs, + 16 * sizeof(u32), 32 * sizeof(u32)); + if (ret) + return ret; + if (regwindow32_set(target, regs, uregs)) + return -EFAULT; + if (!count) + return 0; } -finish: - pos *= sizeof(reg); - count *= sizeof(reg); - + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &psr, + 32 * sizeof(u32), 33 * sizeof(u32)); + if (ret) + return ret; + regs->psr = (regs->psr & ~(PSR_ICC | PSR_SYSCALL)) | + (psr & (PSR_ICC | PSR_SYSCALL)); + if (!count) + return 0; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->pc, + 33 * sizeof(u32), 34 * sizeof(u32)); + if (ret || !count) + return ret; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->npc, + 34 * sizeof(u32), 35 * sizeof(u32)); + if (ret || !count) + return ret; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->y, + 35 * sizeof(u32), 36 * sizeof(u32)); + if (ret || !count) + return ret; return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - 38 * sizeof(reg), -1); + 36 * sizeof(u32), 38 * sizeof(u32)); } static int fpregs32_get(struct task_struct *target, diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index c9d41a96468f7..3f5930bfab06f 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -572,19 +572,13 @@ static int genregs32_get(struct task_struct *target, for (; count > 0 && pos < 32; count--) { if (access_process_vm(target, (unsigned long) - ®_window[pos], + ®_window[pos++], ®, sizeof(reg), FOLL_FORCE) != sizeof(reg)) return -EFAULT; - if (access_process_vm(target, - (unsigned long) u, - ®, sizeof(reg), - FOLL_FORCE | FOLL_WRITE) - != sizeof(reg)) + if (put_user(reg, u++)) return -EFAULT; - pos++; - u++; } } } @@ -684,12 +678,7 @@ static int genregs32_set(struct task_struct *target, } } else { for (; count > 0 && pos < 32; count--) { - if (access_process_vm(target, - (unsigned long) - u, - ®, sizeof(reg), - FOLL_FORCE) - != sizeof(reg)) + if (get_user(reg, u++)) return -EFAULT; if (access_process_vm(target, (unsigned long) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index a8275fea4b70c..aa81c25b44cf3 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1039,38 +1039,9 @@ void smp_fetch_global_pmu(void) * are flush_tlb_*() routines, and these run after flush_cache_*() * which performs the flushw. * - * The SMP TLB coherency scheme we use works as follows: - * - * 1) mm->cpu_vm_mask is a bit mask of which cpus an address - * space has (potentially) executed on, this is the heuristic - * we use to avoid doing cross calls. - * - * Also, for flushing from kswapd and also for clones, we - * use cpu_vm_mask as the list of cpus to make run the TLB. - * - * 2) TLB context numbers are shared globally across all processors - * in the system, this allows us to play several games to avoid - * cross calls. - * - * One invariant is that when a cpu switches to a process, and - * that processes tsk->active_mm->cpu_vm_mask does not have the - * current cpu's bit set, that tlb context is flushed locally. - * - * If the address space is non-shared (ie. mm->count == 1) we avoid - * cross calls when we want to flush the currently running process's - * tlb state. This is done by clearing all cpu bits except the current - * processor's in current->mm->cpu_vm_mask and performing the - * flush locally only. This will force any subsequent cpus which run - * this task to flush the context from the local tlb if the process - * migrates to another cpu (again). - * - * 3) For shared address spaces (threads) and swapping we bite the - * bullet for most cases and perform the cross call (but only to - * the cpus listed in cpu_vm_mask). - * - * The performance gain from "optimizing" away the cross call for threads is - * questionable (in theory the big win for threads is the massive sharing of - * address space state across processors). + * mm->cpu_vm_mask is a bit mask of which cpus an address + * space has (potentially) executed on, this is the heuristic + * we use to limit cross calls. */ /* This currently is only used by the hugetlb arch pre-fault @@ -1080,18 +1051,13 @@ void smp_fetch_global_pmu(void) void smp_flush_tlb_mm(struct mm_struct *mm) { u32 ctx = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (atomic_read(&mm->mm_users) == 1) { - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - goto local_flush_and_out; - } + get_cpu(); smp_cross_call_masked(&xcall_flush_tlb_mm, ctx, 0, 0, mm_cpumask(mm)); -local_flush_and_out: __flush_tlb_mm(ctx, SECONDARY_CONTEXT); put_cpu(); @@ -1114,17 +1080,15 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long { u32 ctx = CTX_HWBITS(mm->context); struct tlb_pending_info info; - int cpu = get_cpu(); + + get_cpu(); info.ctx = ctx; info.nr = nr; info.vaddrs = vaddrs; - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_call_function_many(mm_cpumask(mm), tlb_pending_func, - &info, 1); + smp_call_function_many(mm_cpumask(mm), tlb_pending_func, + &info, 1); __flush_tlb_pending(ctx, nr, vaddrs); @@ -1134,14 +1098,13 @@ void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr) { unsigned long context = CTX_HWBITS(mm->context); - int cpu = get_cpu(); - if (mm == current->mm && atomic_read(&mm->mm_users) == 1) - cpumask_copy(mm_cpumask(mm), cpumask_of(cpu)); - else - smp_cross_call_masked(&xcall_flush_tlb_page, - context, vaddr, 0, - mm_cpumask(mm)); + get_cpu(); + + smp_cross_call_masked(&xcall_flush_tlb_page, + context, vaddr, 0, + mm_cpumask(mm)); + __flush_tlb_page(context, vaddr); put_cpu(); diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 27778b65a965e..f2b22c496fb97 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -275,14 +275,13 @@ bool is_no_fault_exception(struct pt_regs *regs) asi = (regs->tstate >> 24); /* saved %asi */ else asi = (insn >> 5); /* immediate asi */ - if ((asi & 0xf2) == ASI_PNF) { - if (insn & 0x1000000) { /* op3[5:4]=3 */ - handle_ldf_stq(insn, regs); - return true; - } else if (insn & 0x200000) { /* op3[2], stores */ + if ((asi & 0xf6) == ASI_PNF) { + if (insn & 0x200000) /* op3[2], stores */ return false; - } - handle_ld_nf(insn, regs); + if (insn & 0x1000000) /* op3[5:4]=3 (fp) */ + handle_ldf_stq(insn, regs); + else + handle_ld_nf(insn, regs); return true; } } diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 61afd787bd0c7..59b6df13ddead 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -172,12 +172,14 @@ SECTIONS } PERCPU_SECTION(SMP_CACHE_BYTES) -#ifdef CONFIG_JUMP_LABEL . = ALIGN(PAGE_SIZE); .exit.text : { EXIT_TEXT } -#endif + + .exit.data : { + EXIT_DATA + } . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c index c9da9f139694d..f3a8cd491ce0d 100644 --- a/arch/sparc/lib/iomap.c +++ b/arch/sparc/lib/iomap.c @@ -19,8 +19,10 @@ void ioport_unmap(void __iomem *addr) EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); +#ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { /* nothing to do */ } EXPORT_SYMBOL(pci_iounmap); +#endif diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index b89d42b29e344..f427f34b8b79b 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -142,6 +142,7 @@ __bzero: ZERO_LAST_BLOCKS(%o0, 0x48, %g2) ZERO_LAST_BLOCKS(%o0, 0x08, %g2) 13: + EXT(12b, 13b, 21f) be 8f andcc %o1, 4, %g0 diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 906eda1158b4d..40dd6cb4a4133 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -197,6 +197,9 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) size = memblock_phys_mem_size() - memblock_reserved_size(); *pages_avail = (size >> PAGE_SHIFT) - high_pages; + /* Only allow low memory to be allocated via memblock allocation */ + memblock_set_current_limit(max_low_pfn << PAGE_SHIFT); + return max_pfn; } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index e6d91819da921..28b9ffd85db0b 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2904,7 +2904,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm) if (!page) return NULL; if (!pgtable_pte_page_ctor(page)) { - free_unref_page(page); + __free_page(page); return NULL; } return (pte_t *) page_address(page); diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index cc3ad64479ac3..9e256d4d1f4cf 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -379,7 +379,6 @@ pgtable_t pte_alloc_one(struct mm_struct *mm) return NULL; page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT); if (!pgtable_pte_page_ctor(page)) { - __free_page(page); return NULL; } return page; diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 3364e2a009899..fef734473c0f3 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1287,6 +1287,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) return 1; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/um/Kconfig b/arch/um/Kconfig index fec6b4ca2b6e5..c56d3526a3bd7 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -14,6 +14,7 @@ config UML select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_BUGVERBOSE + select HAVE_COPY_THREAD_TLS select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug index 85726eeec3451..e4a0f12f20d97 100644 --- a/arch/um/Kconfig.debug +++ b/arch/um/Kconfig.debug @@ -17,6 +17,7 @@ config GCOV bool "Enable gcov support" depends on DEBUG_INFO depends on !KCOV + depends on !MODULES help This option allows developers to retrieve coverage data from a UML session. diff --git a/arch/um/Makefile b/arch/um/Makefile index d2daa206872da..275f5ffdf6f0a 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -140,6 +140,7 @@ export CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS) $(LD_FLAGS_CMDLINE) # When cleaning we don't include .config, so we don't include # TT or skas makefiles and don't clean skas_ptregs.h. CLEAN_FILES += linux x.i gmon.out +MRPROPER_DIRS += arch/$(SUBARCH)/include/generated archclean: @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index fea5a0d522dcf..388096fb45a25 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -337,7 +337,7 @@ config UML_NET_SLIRP endmenu config VIRTIO_UML - tristate "UML driver for virtio devices" + bool "UML driver for virtio devices" select VIRTIO help This driver provides support for virtio based paravirtual device diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index 4d80526a4236e..25727ed648b72 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -26,10 +26,10 @@ int generic_read(int fd, char *c_out, void *unused) n = read(fd, c_out, sizeof(*c_out)); if (n > 0) return n; - else if (errno == EAGAIN) - return 0; else if (n == 0) return -EIO; + else if (errno == EAGAIN) + return 0; return -errno; } @@ -220,7 +220,7 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, unsigned long *stack_out) { struct winch_data data; - int fds[2], n, err; + int fds[2], n, err, pid; char c; err = os_pipe(fds, 1, 1); @@ -238,8 +238,9 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, * problem with /dev/net/tun, which if held open by this * thread, prevents the TUN/TAP device from being reused. */ - err = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); - if (err < 0) { + pid = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); + if (pid < 0) { + err = pid; printk(UM_KERN_ERR "fork of winch_thread failed - errno = %d\n", -err); goto out_close; @@ -256,13 +257,14 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, goto out_close; } - if (os_set_fd_block(*fd_out, 0)) { + err = os_set_fd_block(*fd_out, 0); + if (err) { printk(UM_KERN_ERR "winch_tramp: failed to set thread_fd " "non-blocking.\n"); goto out_close; } - return err; + return pid; out_close: close(fds[1]); diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 0117489e9b30a..750acc1344328 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -217,7 +217,7 @@ void mconsole_go(struct mc_request *req) void mconsole_stop(struct mc_request *req) { - deactivate_fd(req->originating_fd, MCONSOLE_IRQ); + block_signals(); os_set_fd_block(req->originating_fd, 1); mconsole_reply(req, "stopped", 0, 0); for (;;) { @@ -240,6 +240,7 @@ void mconsole_stop(struct mc_request *req) } os_set_fd_block(req->originating_fd, 0); mconsole_reply(req, "", 0, 0); + unblock_signals(); } static DEFINE_SPINLOCK(mc_devices_lock); diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c index 8016d32b68092..8d736eb629614 100644 --- a/arch/um/drivers/slip_user.c +++ b/arch/um/drivers/slip_user.c @@ -145,7 +145,8 @@ static int slip_open(void *data) } sfd = err; - if (set_up_tty(sfd)) + err = set_up_tty(sfd); + if (err) goto out_close2; pri->slave = sfd; diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 6627d7c30f370..4e59ab817d3e7 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -47,18 +47,25 @@ /* Max request size is determined by sector mask - 32K */ #define UBD_MAX_REQUEST (8 * sizeof(long)) +struct io_desc { + char *buffer; + unsigned long length; + unsigned long sector_mask; + unsigned long long cow_offset; + unsigned long bitmap_words[2]; +}; + struct io_thread_req { struct request *req; int fds[2]; unsigned long offsets[2]; unsigned long long offset; - unsigned long length; - char *buffer; int sectorsize; - unsigned long sector_mask; - unsigned long long cow_offset; - unsigned long bitmap_words[2]; int error; + + int desc_cnt; + /* io_desc has to be the last element of the struct */ + struct io_desc io_desc[]; }; @@ -524,12 +531,7 @@ static void ubd_handler(void) blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); } - if ((io_req->error) || (io_req->buffer == NULL)) - blk_mq_end_request(io_req->req, io_req->error); - else { - if (!blk_update_request(io_req->req, io_req->error, io_req->length)) - __blk_mq_end_request(io_req->req, io_req->error); - } + blk_mq_end_request(io_req->req, io_req->error); kfree(io_req); } } @@ -945,6 +947,7 @@ static int ubd_add(int n, char **error_out) blk_queue_write_cache(ubd_dev->queue, true, false); blk_queue_max_segments(ubd_dev->queue, MAX_SG); + blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; @@ -1288,37 +1291,74 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, *cow_offset += bitmap_offset; } -static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, +static void cowify_req(struct io_thread_req *req, struct io_desc *segment, + unsigned long offset, unsigned long *bitmap, __u64 bitmap_offset, __u64 bitmap_len) { - __u64 sector = req->offset >> SECTOR_SHIFT; + __u64 sector = offset >> SECTOR_SHIFT; int i; - if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT) + if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT) panic("Operation too long"); if (req_op(req->req) == REQ_OP_READ) { - for (i = 0; i < req->length >> SECTOR_SHIFT; i++) { + for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) { if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) ubd_set_bit(i, (unsigned char *) - &req->sector_mask); + &segment->sector_mask); + } + } else { + cowify_bitmap(offset, segment->length, &segment->sector_mask, + &segment->cow_offset, bitmap, bitmap_offset, + segment->bitmap_words, bitmap_len); + } +} + +static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req, + struct request *req) +{ + struct bio_vec bvec; + struct req_iterator iter; + int i = 0; + unsigned long byte_offset = io_req->offset; + int op = req_op(req); + + if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) { + io_req->io_desc[0].buffer = NULL; + io_req->io_desc[0].length = blk_rq_bytes(req); + } else { + rq_for_each_segment(bvec, req, iter) { + BUG_ON(i >= io_req->desc_cnt); + + io_req->io_desc[i].buffer = + page_address(bvec.bv_page) + bvec.bv_offset; + io_req->io_desc[i].length = bvec.bv_len; + i++; + } + } + + if (dev->cow.file) { + for (i = 0; i < io_req->desc_cnt; i++) { + cowify_req(io_req, &io_req->io_desc[i], byte_offset, + dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + byte_offset += io_req->io_desc[i].length; } + } - else cowify_bitmap(req->offset, req->length, &req->sector_mask, - &req->cow_offset, bitmap, bitmap_offset, - req->bitmap_words, bitmap_len); } -static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, - u64 off, struct bio_vec *bvec) +static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req, + int desc_cnt) { - struct ubd *dev = hctx->queue->queuedata; struct io_thread_req *io_req; - int ret; + int i; - io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + io_req = kmalloc(sizeof(*io_req) + + (desc_cnt * sizeof(struct io_desc)), + GFP_ATOMIC); if (!io_req) - return -ENOMEM; + return NULL; io_req->req = req; if (dev->cow.file) @@ -1326,26 +1366,41 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, else io_req->fds[0] = dev->fd; io_req->error = 0; - - if (bvec != NULL) { - io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset; - io_req->length = bvec->bv_len; - } else { - io_req->buffer = NULL; - io_req->length = blk_rq_bytes(req); - } - io_req->sectorsize = SECTOR_SIZE; io_req->fds[1] = dev->fd; - io_req->cow_offset = -1; - io_req->offset = off; - io_req->sector_mask = 0; + io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - if (dev->cow.file) - cowify_req(io_req, dev->cow.bitmap, - dev->cow.bitmap_offset, dev->cow.bitmap_len); + for (i = 0 ; i < desc_cnt; i++) { + io_req->io_desc[i].sector_mask = 0; + io_req->io_desc[i].cow_offset = -1; + } + + return io_req; +} + +static int ubd_submit_request(struct ubd *dev, struct request *req) +{ + int segs = 0; + struct io_thread_req *io_req; + int ret; + int op = req_op(req); + + if (op == REQ_OP_FLUSH) + segs = 0; + else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) + segs = 1; + else + segs = blk_rq_nr_phys_segments(req); + + io_req = ubd_alloc_req(dev, req, segs); + if (!io_req) + return -ENOMEM; + + io_req->desc_cnt = segs; + if (segs) + ubd_map_req(dev, io_req, req); ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); if (ret != sizeof(io_req)) { @@ -1356,22 +1411,6 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req, return ret; } -static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req) -{ - struct req_iterator iter; - struct bio_vec bvec; - int ret; - u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT; - - rq_for_each_segment(bvec, req, iter) { - ret = ubd_queue_one_vec(hctx, req, off, &bvec); - if (ret < 0) - return ret; - off += bvec.bv_len; - } - return 0; -} - static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1384,17 +1423,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, spin_lock_irq(&ubd_dev->lock); switch (req_op(req)) { - /* operations with no lentgth/offset arguments */ case REQ_OP_FLUSH: - ret = ubd_queue_one_vec(hctx, req, 0, NULL); - break; case REQ_OP_READ: case REQ_OP_WRITE: - ret = queue_rw_req(hctx, req); - break; case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: - ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL); + ret = ubd_submit_request(ubd_dev, req); break; default: WARN_ON_ONCE(1); @@ -1482,22 +1516,22 @@ static int map_error(int error_code) * will result in unpredictable behaviour and/or crashes. */ -static int update_bitmap(struct io_thread_req *req) +static int update_bitmap(struct io_thread_req *req, struct io_desc *segment) { int n; - if(req->cow_offset == -1) + if (segment->cow_offset == -1) return map_error(0); - n = os_pwrite_file(req->fds[1], &req->bitmap_words, - sizeof(req->bitmap_words), req->cow_offset); - if (n != sizeof(req->bitmap_words)) + n = os_pwrite_file(req->fds[1], &segment->bitmap_words, + sizeof(segment->bitmap_words), segment->cow_offset); + if (n != sizeof(segment->bitmap_words)) return map_error(-n); return map_error(0); } -static void do_io(struct io_thread_req *req) +static void do_io(struct io_thread_req *req, struct io_desc *desc) { char *buf = NULL; unsigned long len; @@ -1512,21 +1546,20 @@ static void do_io(struct io_thread_req *req) return; } - nsectors = req->length / req->sectorsize; + nsectors = desc->length / req->sectorsize; start = 0; do { - bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); + bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask); end = start; while((end < nsectors) && - (ubd_test_bit(end, (unsigned char *) - &req->sector_mask) == bit)) + (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit)) end++; off = req->offset + req->offsets[bit] + start * req->sectorsize; len = (end - start) * req->sectorsize; - if (req->buffer != NULL) - buf = &req->buffer[start * req->sectorsize]; + if (desc->buffer != NULL) + buf = &desc->buffer[start * req->sectorsize]; switch (req_op(req->req)) { case REQ_OP_READ: @@ -1566,7 +1599,8 @@ static void do_io(struct io_thread_req *req) start = end; } while(start < nsectors); - req->error = update_bitmap(req); + req->offset += len; + req->error = update_bitmap(req, desc); } /* Changed in start_io_thread, which is serialized by being called only @@ -1599,14 +1633,21 @@ int io_thread(void *arg) } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { + struct io_thread_req *req = (*io_req_buffer)[count]; + int i; + io_count++; - do_io((*io_req_buffer)[count]); + for (i = 0; !req->error && i < req->desc_cnt; i++) + do_io(req, &(req->io_desc[i])); + } written = 0; do { - res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n); + res = os_write_file(kernel_fd, + ((char *) io_req_buffer) + written, + n - written); if (res >= 0) { written += res; } diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index fc8c52cff5aa8..936e17eab3360 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -4,12 +4,12 @@ * * Copyright(c) 2019 Intel Corporation * - * This module allows virtio devices to be used over a vhost-user socket. + * This driver allows virtio devices to be used over a vhost-user socket. * * Guest devices can be instantiated by kernel module or command line * parameters. One device will be created for each parameter. Syntax: * - * [virtio_uml.]device=:[:] + * virtio_uml.device=:[:] * where: * := vhost-user socket path to connect * := virtio device id (as in virtio_ids.h) @@ -83,7 +83,7 @@ static int full_sendmsg_fds(int fd, const void *buf, unsigned int len, return 0; } -static int full_read(int fd, void *buf, int len) +static int full_read(int fd, void *buf, int len, bool abortable) { int rc; @@ -93,7 +93,7 @@ static int full_read(int fd, void *buf, int len) buf += rc; len -= rc; } - } while (len && (rc > 0 || rc == -EINTR)); + } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN))); if (rc < 0) return rc; @@ -104,7 +104,7 @@ static int full_read(int fd, void *buf, int len) static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg) { - return full_read(fd, msg, sizeof(msg->header)); + return full_read(fd, msg, sizeof(msg->header), true); } static int vhost_user_recv(int fd, struct vhost_user_msg *msg, @@ -118,7 +118,7 @@ static int vhost_user_recv(int fd, struct vhost_user_msg *msg, size = msg->header.size; if (size > max_payload_size) return -EPROTO; - return full_read(fd, &msg->payload, size); + return full_read(fd, &msg->payload, size, false); } static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, @@ -959,6 +959,7 @@ static void virtio_uml_release_dev(struct device *d) } os_close_file(vu_dev->sock); + kfree(vu_dev); } /* Platform device */ @@ -977,7 +978,7 @@ static int virtio_uml_probe(struct platform_device *pdev) if (!pdata) return -EINVAL; - vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL); + vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); if (!vu_dev) return -ENOMEM; @@ -993,7 +994,7 @@ static int virtio_uml_probe(struct platform_device *pdev) rc = os_connect_socket(pdata->socket_path); } while (rc == -EINTR); if (rc < 0) - return rc; + goto error_free; vu_dev->sock = rc; rc = vhost_user_init(vu_dev); @@ -1009,6 +1010,8 @@ static int virtio_uml_probe(struct platform_device *pdev) error_init: os_close_file(vu_dev->sock); +error_free: + kfree(vu_dev); return rc; } diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c index fc7f1e7467032..87ca4a47cd66e 100644 --- a/arch/um/drivers/xterm.c +++ b/arch/um/drivers/xterm.c @@ -18,6 +18,7 @@ struct xterm_chan { int pid; int helper_pid; + int chan_fd; char *title; int device; int raw; @@ -33,6 +34,7 @@ static void *xterm_init(char *str, int device, const struct chan_opts *opts) return NULL; *data = ((struct xterm_chan) { .pid = -1, .helper_pid = -1, + .chan_fd = -1, .device = device, .title = opts->xterm_title, .raw = opts->raw } ); @@ -149,6 +151,7 @@ static int xterm_open(int input, int output, int primary, void *d, goto out_kill; } + data->chan_fd = fd; new = xterm_fd(fd, &data->helper_pid); if (new < 0) { err = new; @@ -206,6 +209,8 @@ static void xterm_close(int fd, void *d) os_kill_process(data->helper_pid, 0); data->helper_pid = -1; + if (data->chan_fd != -1) + os_close_file(data->chan_fd); os_close_file(fd); } diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index d7086b985f278..4049f2c463876 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -83,8 +83,8 @@ __preinit_array_end = .; } .init_array : { - /* dummy - we call this ourselves */ __init_array_start = .; + *(.init_array) __init_array_end = .; } .fini_array : { diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h index 81c647ef9c6c8..adf91ef553ae9 100644 --- a/arch/um/include/asm/ptrace-generic.h +++ b/arch/um/include/asm/ptrace-generic.h @@ -36,7 +36,7 @@ extern long subarch_ptrace(struct task_struct *child, long request, extern unsigned long getreg(struct task_struct *child, int regno); extern int putreg(struct task_struct *child, int regno, unsigned long value); -extern int arch_copy_tls(struct task_struct *new); +extern int arch_set_tls(struct task_struct *new, unsigned long tls); extern void clear_flushed_tls(struct task_struct *task); extern int syscall_trace_enter(struct pt_regs *regs); extern void syscall_trace_leave(struct pt_regs *regs); diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 4c19ce4c49f18..66ab6a07330b2 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -63,6 +63,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 7 #define TIF_NOTIFY_RESUME 8 #define TIF_SECCOMP 9 /* secure computing */ +#define TIF_SINGLESTEP 10 /* single stepping userspace */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -70,5 +71,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #endif diff --git a/arch/um/include/asm/timex.h b/arch/um/include/asm/timex.h index e392a9a5bc9bd..9f27176adb26d 100644 --- a/arch/um/include/asm/timex.h +++ b/arch/um/include/asm/timex.h @@ -2,13 +2,8 @@ #ifndef __UM_TIMEX_H #define __UM_TIMEX_H -typedef unsigned long cycles_t; - -static inline cycles_t get_cycles (void) -{ - return 0; -} - #define CLOCK_TICK_RATE (HZ) +#include + #endif diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h index 0c50fa6e8a55b..fbb709a222839 100644 --- a/arch/um/include/shared/registers.h +++ b/arch/um/include/shared/registers.h @@ -16,8 +16,8 @@ extern int restore_fp_registers(int pid, unsigned long *fp_regs); extern int save_fpx_registers(int pid, unsigned long *fp_regs); extern int restore_fpx_registers(int pid, unsigned long *fp_regs); extern int save_registers(int pid, struct uml_pt_regs *regs); -extern int restore_registers(int pid, struct uml_pt_regs *regs); -extern int init_registers(int pid); +extern int restore_pid_registers(int pid, struct uml_pt_regs *regs); +extern int init_pid_registers(int pid); extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs); extern unsigned long get_thread_reg(int reg, jmp_buf *buf); extern int get_fp_registers(int pid, unsigned long *regs); diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index 5aa882011e041..e698e0c7dbdca 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -21,7 +21,6 @@ obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o -obj-$(CONFIG_GCOV) += gmon_syms.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index c69d69ee96beb..a82ec01133212 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -6,6 +6,12 @@ OUTPUT_ARCH(ELF_ARCH) ENTRY(_start) jiffies = jiffies_64; +VERSION { + { + local: *; + }; +} + SECTIONS { PROVIDE (__executable_start = START); @@ -103,6 +109,7 @@ SECTIONS be empty, which isn't pretty. */ . = ALIGN(32 / 8); .preinit_array : { *(.preinit_array) } + .init_array : { *(.init_array) } .fini_array : { *(.fini_array) } .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index e8fd5d540b05d..7f7a74c82abb6 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -44,7 +44,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; - current->ptrace &= ~PT_DTRACE; + clear_thread_flag(TIF_SINGLESTEP); #ifdef SUBARCH_EXECVE1 SUBARCH_EXECVE1(regs->regs); #endif diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c deleted file mode 100644 index 9361a8eb9bf1a..0000000000000 --- a/arch/um/kernel/gmon_syms.c +++ /dev/null @@ -1,16 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include - -extern void __bb_init_func(void *) __attribute__((weak)); -EXPORT_SYMBOL(__bb_init_func); - -extern void __gcov_init(void *) __attribute__((weak)); -EXPORT_SYMBOL(__gcov_init); -extern void __gcov_merge_add(void *, unsigned int) __attribute__((weak)); -EXPORT_SYMBOL(__gcov_merge_add); -extern void __gcov_exit(void) __attribute__((weak)); -EXPORT_SYMBOL(__gcov_exit); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 263a8f0691334..d71dd7725bef1 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -153,8 +153,8 @@ void fork_handler(void) userspace(¤t->thread.regs.regs, current_thread_info()->aux_fp_regs); } -int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long arg, struct task_struct * p) +int copy_thread_tls(unsigned long clone_flags, unsigned long sp, + unsigned long arg, struct task_struct * p, unsigned long tls) { void (*handler)(void); int kthread = current->flags & PF_KTHREAD; @@ -188,7 +188,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, * Set a new TLS for the child thread? */ if (clone_flags & CLONE_SETTLS) - ret = arch_copy_tls(p); + ret = arch_set_tls(p, tls); } return ret; @@ -380,7 +380,7 @@ int singlestepping(void * t) { struct task_struct *task = t ? t : current; - if (!(task->ptrace & PT_DTRACE)) + if (!test_thread_flag(TIF_SINGLESTEP)) return 0; if (task->thread.singlestep_syscall) diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index b425f47bddbb3..d37802ced5636 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -12,7 +12,7 @@ void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_DTRACE; + set_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -22,7 +22,7 @@ void user_enable_single_step(struct task_struct *child) void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_DTRACE; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -121,7 +121,7 @@ static void send_sigtrap(struct uml_pt_regs *regs, int error_code) } /* - * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and + * XXX Check TIF_SINGLESTEP for singlestepping check and * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check */ int syscall_trace_enter(struct pt_regs *regs) @@ -145,7 +145,7 @@ void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(regs); /* Fake a debug trap */ - if (ptraced & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) send_sigtrap(®s->regs, 0); if (!test_thread_flag(TIF_SYSCALL_TRACE)) diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c index 10c99e058fcae..d1cffc2a7f212 100644 --- a/arch/um/kernel/sigio.c +++ b/arch/um/kernel/sigio.c @@ -35,14 +35,14 @@ int write_sigio_irq(int fd) } /* These are called from os-Linux/sigio.c to protect its pollfds arrays. */ -static DEFINE_SPINLOCK(sigio_spinlock); +static DEFINE_MUTEX(sigio_mutex); void sigio_lock(void) { - spin_lock(&sigio_spinlock); + mutex_lock(&sigio_mutex); } void sigio_unlock(void) { - spin_unlock(&sigio_spinlock); + mutex_unlock(&sigio_mutex); } diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 3d57c71c532e4..01628195ae520 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -53,7 +53,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) unsigned long sp; int err; - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED)) singlestep = 1; /* Did we come from a system call? */ @@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs) * on the host. The tracing thread will check this flag and * PTRACE_SYSCALL if necessary. */ - if (current->ptrace & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) current->thread.singlestep_syscall = is_syscall(PT_REGS_IP(¤t->thread.regs)); diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index b7eaf655635cd..11499136720d8 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -126,6 +126,9 @@ static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, struct host_vm_op *last; int fd = -1, ret = 0; + if (virt + len > STUB_START && virt < STUB_END) + return -EINVAL; + if (hvc->userspace) fd = phys_mapping(phys, &offset); else @@ -163,7 +166,7 @@ static int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; - if ((addr >= STUB_START) && (addr < STUB_END)) + if (addr + len > STUB_START && addr < STUB_END) return -EINVAL; if (hvc->index != 0) { @@ -193,6 +196,9 @@ static int add_mprotect(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; + if (addr + len > STUB_START && addr < STUB_END) + return -EINVAL; + if (hvc->index != 0) { last = &hvc->ops[hvc->index - 1]; if ((last->type == MPROTECT) && @@ -433,6 +439,10 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) struct mm_id *mm_id; address &= PAGE_MASK; + + if (address >= STUB_START && address < STUB_END) + goto kill; + pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto kill; diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 9f21443be2c9e..85b404d068f4b 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -7,6 +7,12 @@ OUTPUT_ARCH(ELF_ARCH) ENTRY(_start) jiffies = jiffies_64; +VERSION { + { + local: *; + }; +} + SECTIONS { /* This must contain the right address - not quite the default ELF one.*/ diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index 5133e3afb96f7..3996937e2c0dd 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c index d508310ee5e1e..f1732c308c615 100644 --- a/arch/um/os-Linux/irq.c +++ b/arch/um/os-Linux/irq.c @@ -48,7 +48,7 @@ int os_epoll_triggered(int index, int events) int os_event_mask(int irq_type) { if (irq_type == IRQ_READ) - return EPOLLIN | EPOLLPRI; + return EPOLLIN | EPOLLPRI | EPOLLERR | EPOLLHUP | EPOLLRDHUP; if (irq_type == IRQ_WRITE) return EPOLLOUT; return 0; diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c index 8014dfac644dc..c8a42ecbd7a2d 100644 --- a/arch/um/os-Linux/main.c +++ b/arch/um/os-Linux/main.c @@ -170,7 +170,7 @@ int __init main(int argc, char **argv, char **envp) * that they won't be delivered after the exec, when * they are definitely not expected. */ - unblock_signals_trace(); + unblock_signals(); os_info("\n"); /* Reboot */ diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index 2d9270508e156..b123955be7acc 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -21,7 +21,7 @@ int save_registers(int pid, struct uml_pt_regs *regs) return 0; } -int restore_registers(int pid, struct uml_pt_regs *regs) +int restore_pid_registers(int pid, struct uml_pt_regs *regs) { int err; @@ -36,7 +36,7 @@ int restore_registers(int pid, struct uml_pt_regs *regs) static unsigned long exec_regs[MAX_REG_NR]; static unsigned long exec_fp_regs[FP_SIZE]; -int init_registers(int pid) +int init_pid_registers(int pid) { int err; diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 4fb877b99dded..0571cc0a30fcc 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -641,10 +642,24 @@ void halt_skas(void) UML_LONGJMP(&initial_jmpbuf, INIT_JMP_HALT); } +static bool noreboot; + +static int __init noreboot_cmd_param(char *str, int *add) +{ + noreboot = true; + return 0; +} + +__uml_setup("noreboot", noreboot_cmd_param, +"noreboot\n" +" Rather than rebooting, exit always, akin to QEMU's -no-reboot option.\n" +" This is useful if you're using CONFIG_PANIC_TIMEOUT in order to catch\n" +" crashes in CI\n"); + void reboot_skas(void) { block_signals_trace(); - UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT); + UML_LONGJMP(&initial_jmpbuf, noreboot ? INIT_JMP_HALT : INIT_JMP_REBOOT); } void __switch_mm(struct mm_id *mm_idp) diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index f79dc338279e6..b28373a2b8d2d 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -336,7 +336,7 @@ void __init os_early_checks(void) check_tmpexec(); pid = start_ptraced_child(); - if (init_registers(pid)) + if (init_pid_registers(pid)) fatal("Failed to initialize default registers"); stop_ptraced_child(pid, 1, 1); } diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index 44def53a11cd6..ea5c60f4393e9 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -137,20 +137,13 @@ static inline int is_umdir_used(char *dir) { char pid[sizeof("nnnnn\0")], *end, *file; int dead, fd, p, n, err; - size_t filelen; + size_t filelen = strlen(dir) + sizeof("/pid") + 1; - err = asprintf(&file, "%s/pid", dir); - if (err < 0) - return 0; - - filelen = strlen(file); + file = malloc(filelen); + if (!file) + return -ENOMEM; - n = snprintf(file, filelen, "%s/pid", dir); - if (n >= filelen) { - printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n"); - err = -E2BIG; - goto out; - } + snprintf(file, filelen, "%s/pid", dir); dead = 0; fd = open(file, O_RDONLY); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ef85139553f5..6002252692af4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -73,7 +73,6 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL - select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE select ARCH_HAS_SET_MEMORY @@ -550,6 +549,7 @@ config X86_UV depends on X86_EXTENDED_PLATFORM depends on NUMA depends on EFI + depends on KEXEC_CORE depends on X86_X2APIC depends on PCI ---help--- @@ -1424,7 +1424,7 @@ config HIGHMEM4G config HIGHMEM64G bool "64GB" - depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6 + depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !MWINCHIP3D && !MK6 select X86_PAE ---help--- Select this if you have a 32-bit processor and more than 4 @@ -1540,7 +1540,6 @@ config AMD_MEM_ENCRYPT config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT bool "Activate AMD Secure Memory Encryption (SME) by default" - default y depends on AMD_MEM_ENCRYPT ---help--- Say yes to have system memory encrypted by default if running on @@ -1990,6 +1989,7 @@ config EFI depends on ACPI select UCS2_STRING select EFI_RUNTIME_WRAPPERS + select ARCH_USE_MEMREMAP_PROT ---help--- This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 94df0868804bc..69f0cb01c6662 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -34,12 +34,13 @@ M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS)) REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ - -mno-mmx -mno-sse + -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -ffreestanding) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -fno-stack-protector) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), -Wno-address-of-packed-member) REALMODE_CFLAGS += $(call __cc-option, $(CC), $(REALMODE_CFLAGS), $(cc_stack_align4)) +REALMODE_CFLAGS += $(CLANG_FLAGS) export REALMODE_CFLAGS # BITS is used as extension for files which are available in a 32 bit @@ -61,6 +62,9 @@ endif KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow KBUILD_CFLAGS += $(call cc-option,-mno-avx,) +# Intel CET isn't enabled in the kernel +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) + ifeq ($(CONFIG_X86_32),y) BITS := 32 UTS_MACHINE := i386 diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index e2839b5c246c2..82500962f1b3d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -87,7 +87,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ @@ -100,7 +100,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE AFLAGS_header.o += -I$(objtree)/$(obj) $(obj)/header.o: $(obj)/zoffset.h -LDFLAGS_setup.elf := -m elf_i386 -T +LDFLAGS_setup.elf := -m elf_i386 -z noexecstack -T $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6b84afdd75382..edfb1a7185109 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -38,6 +38,8 @@ KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) KBUILD_CFLAGS += -Wno-pointer-sign +# Disable relocation relaxation in case the link is not PIE. +KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no) KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n @@ -55,6 +57,10 @@ else KBUILD_LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \ && echo "-z noreloc-overflow -pie --no-dynamic-linker") endif + +KBUILD_LDFLAGS += -z noexecstack +KBUILD_LDFLAGS += $(call ld-option,--no-warn-rwx-segments) + LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy @@ -102,7 +108,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o quiet_cmd_check_data_rel = DATAREL $@ define cmd_check_data_rel for obj in $(filter %.o,$^); do \ - ${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \ + $(READELF) -S $$obj | grep -qF .rel.local && { \ echo "error: $$obj has data relocations!" >&2; \ exit 1; \ } || true; \ diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 25019d42ae937..ef2ad7253cd5e 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -393,7 +393,13 @@ int count_immovable_mem_regions(void) table = table_addr + sizeof(struct acpi_table_srat); while (table + sizeof(struct acpi_subtable_header) < table_end) { + sub_table = (struct acpi_subtable_header *)table; + if (!sub_table->length) { + debug_putstr("Invalid zero length SRAT subtable.\n"); + return 0; + } + if (sub_table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) { struct acpi_srat_mem_affinity *ma; diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 5e30eaaf8576f..d7c0fcc1dbf9e 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -49,16 +49,17 @@ * Position Independent Executable (PIE) so that linker won't optimize * R_386_GOT32X relocation to its fixed symbol address. Older * linkers generate R_386_32 relocations against locally defined symbols, - * _bss, _ebss, _got and _egot, in PIE. It isn't wrong, just less + * _bss, _ebss, _got, _egot and _end, in PIE. It isn't wrong, just less * optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle * R_386_32 relocations when relocating the kernel. To generate - * R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as + * R_386_RELATIVE relocations, we mark _bss, _ebss, _got, _egot and _end as * hidden: */ .hidden _bss .hidden _ebss .hidden _got .hidden _egot + .hidden _end __HEAD ENTRY(startup_32) @@ -106,7 +107,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx cmpl $LOAD_PHYSICAL_ADDR, %ebx - jge 1f + jae 1f #endif movl $LOAD_PHYSICAL_ADDR, %ebx 1: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d98cd483377eb..50c9eeb36f0d8 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -42,6 +42,7 @@ .hidden _ebss .hidden _got .hidden _egot + .hidden _end __HEAD .code32 @@ -106,7 +107,7 @@ ENTRY(startup_32) notl %eax andl %eax, %ebx cmpl $LOAD_PHYSICAL_ADDR, %ebx - jge 1f + jae 1f #endif movl $LOAD_PHYSICAL_ADDR, %ebx 1: @@ -244,6 +245,11 @@ ENTRY(efi32_stub_entry) leal efi32_config(%ebp), %eax movl %eax, efi_config(%ebp) + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + jmp startup_32 ENDPROC(efi32_stub_entry) #endif @@ -292,7 +298,7 @@ ENTRY(startup_64) notq %rax andq %rax, %rbp cmpq $LOAD_PHYSICAL_ADDR, %rbp - jge 1f + jae 1f #endif movq $LOAD_PHYSICAL_ADDR, %rbp 1: diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c index 748456c365f46..9557c5a15b91e 100644 --- a/arch/x86/boot/compressed/kaslr_64.c +++ b/arch/x86/boot/compressed/kaslr_64.c @@ -29,9 +29,6 @@ #define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" -/* Used by pgtable.h asm code to force instruction serialization. */ -unsigned long __force_order; - /* Used to track our page table allocation area. */ struct alloc_pgt_data { unsigned char *pgt_buf; diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index c8862696a47b9..7d0394f4ebf97 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -5,15 +5,6 @@ #include "pgtable.h" #include "../string.h" -/* - * __force_order is used by special_insns.h asm code to force instruction - * serialization. - * - * It is not referenced from the code, but GCC < 5 with -fPIE would fail - * due to an undefined symbol. Define it to make these ancient GCCs work. - */ -unsigned long __force_order; - #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index 0149e41d42c27..5fac3de579445 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -20,7 +20,7 @@ SECTIONS .initdata : { *(.initdata) } __end_init = .; - .text : { *(.text) } + .text : { *(.text .text.*) } .text32 : { *(.text32) } . = ALIGN(16); diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 4816cb9cf9968..03dd57043326d 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -188,7 +188,7 @@ static void vga_set_80x60(void) vga_set_vertical_end(60*8); } -static int vga_set_mode(struct mode_info *mode) +static int __attribute__((optimize("no-jump-tables"))) vga_set_mode(struct mode_info *mode) { /* Set the basic mode */ vga_set_basic_mode(); diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 59ce9ed584306..088709089e9b8 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -137,7 +137,6 @@ CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y @@ -205,7 +204,6 @@ CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_EFI=y # CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index d0a5ffeae8dfd..8092d7baf8b50 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -136,7 +136,6 @@ CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y @@ -201,7 +200,6 @@ CONFIG_FB_MODE_HELPERS=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_EFI=y # CONFIG_LCD_CLASS_DEVICE is not set -CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index 5f6a5af9c489b..77043a82da510 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -127,10 +127,6 @@ ddq_add_8: /* generate a unique variable for ddq_add_x */ -.macro setddq n - var_ddq_add = ddq_add_\n -.endm - /* generate a unique variable for xmm register */ .macro setxdata n var_xdata = %xmm\n @@ -140,9 +136,7 @@ ddq_add_8: .macro club name, id .altmacro - .if \name == DDQ_DATA - setddq %\id - .elseif \name == XDATA + .if \name == XDATA setxdata %\id .endif .noaltmacro @@ -165,9 +159,8 @@ ddq_add_8: .set i, 1 .rept (by - 1) - club DDQ_DATA, i club XDATA, i - vpaddq var_ddq_add(%rip), xcounter, var_xdata + vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata vptest ddq_low_msk(%rip), var_xdata jnz 1f vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata @@ -180,8 +173,7 @@ ddq_add_8: vmovdqa 1*16(p_keys), xkeyA vpxor xkey0, xdata0, xdata0 - club DDQ_DATA, by - vpaddq var_ddq_add(%rip), xcounter, xcounter + vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter vptest ddq_low_msk(%rip), xcounter jnz 1f vpaddq ddq_high_add_1(%rip), xcounter, xcounter diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index e40bdf024ba76..dd954d8db629b 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -266,7 +266,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff PSHUFB_XMM %xmm2, %xmm0 movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv - PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7 movdqu HashKey(%arg2), %xmm13 CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ @@ -319,7 +319,7 @@ _initial_blocks_\@: # Main loop - Encrypt/Decrypt remaining blocks - cmp $0, %r13 + test %r13, %r13 je _zero_cipher_left_\@ sub $64, %r13 je _four_cipher_left_\@ @@ -438,7 +438,7 @@ _multiple_of_16_bytes_\@: mov PBlockLen(%arg2), %r12 - cmp $0, %r12 + test %r12, %r12 je _partial_done\@ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 @@ -475,7 +475,7 @@ _T_8_\@: add $8, %r10 sub $8, %r11 psrldq $8, %xmm0 - cmp $0, %r11 + test %r11, %r11 je _return_T_done_\@ _T_4_\@: movd %xmm0, %eax @@ -483,7 +483,7 @@ _T_4_\@: add $4, %r10 sub $4, %r11 psrldq $4, %xmm0 - cmp $0, %r11 + test %r11, %r11 je _return_T_done_\@ _T_123_\@: movd %xmm0, %eax @@ -620,7 +620,7 @@ _get_AAD_blocks\@: /* read the last <16B of AAD */ _get_AAD_rest\@: - cmp $0, %r11 + test %r11, %r11 je _get_AAD_done\@ READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 @@ -641,7 +641,7 @@ _get_AAD_done\@: .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ AAD_HASH operation mov PBlockLen(%arg2), %r13 - cmp $0, %r13 + test %r13, %r13 je _partial_block_done_\@ # Leave Macro if no partial blocks # Read in input data without over reading cmp $16, \PLAIN_CYPH_LEN @@ -693,7 +693,7 @@ _no_extra_mask_1_\@: PSHUFB_XMM %xmm2, %xmm3 pxor %xmm3, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_1_\@ # GHASH computation for the last <16 Byte block @@ -728,7 +728,7 @@ _no_extra_mask_2_\@: PSHUFB_XMM %xmm2, %xmm9 pxor %xmm9, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_2_\@ # GHASH computation for the last <16 Byte block @@ -748,7 +748,7 @@ _encode_done_\@: PSHUFB_XMM %xmm2, %xmm9 .endif # output encrypted Bytes - cmp $0, %r10 + test %r10, %r10 jl _partial_fill_\@ mov %r13, %r12 mov $16, %r13 @@ -978,7 +978,7 @@ _initial_blocks_done\@: * arg1, %arg3, %arg4 are used as pointers only, not modified * %r11 is the data offset value */ -.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ +.macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM1, \XMM5 @@ -1186,7 +1186,7 @@ aes_loop_par_enc_done\@: * arg1, %arg3, %arg4 are used as pointers only, not modified * %r11 is the data offset value */ -.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ +.macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM1, \XMM5 @@ -1946,7 +1946,7 @@ ENTRY(aesni_set_key) ENDPROC(aesni_set_key) /* - * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) + * void aesni_enc(const void *ctx, u8 *dst, const u8 *src) */ ENTRY(aesni_enc) FRAME_BEGIN @@ -2137,7 +2137,7 @@ _aesni_enc4: ENDPROC(_aesni_enc4) /* - * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) + * void aesni_dec (const void *ctx, u8 *dst, const u8 *src) */ ENTRY(aesni_dec) FRAME_BEGIN @@ -2726,25 +2726,18 @@ ENDPROC(aesni_ctr_enc) pxor CTR, IV; /* - * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, - * bool enc, u8 *iv) + * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) */ -ENTRY(aesni_xts_crypt8) +ENTRY(aesni_xts_encrypt) FRAME_BEGIN - cmpb $0, %cl - movl $0, %ecx - movl $240, %r10d - leaq _aesni_enc4, %r11 - leaq _aesni_dec4, %rax - cmovel %r10d, %ecx - cmoveq %rax, %r11 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK movups (IVP), IV mov 480(KEYP), KLEN - addq %rcx, KEYP +.Lxts_enc_loop4: movdqa IV, STATE1 movdqu 0x00(INP), INC pxor INC, STATE1 @@ -2768,71 +2761,103 @@ ENTRY(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - CALL_NOSPEC %r11 + call _aesni_enc4 movdqu 0x00(OUTP), INC pxor INC, STATE1 movdqu STATE1, 0x00(OUTP) - _aesni_gf128mul_x_ble() - movdqa IV, STATE1 - movdqu 0x40(INP), INC - pxor INC, STATE1 - movdqu IV, 0x40(OUTP) - movdqu 0x10(OUTP), INC pxor INC, STATE2 movdqu STATE2, 0x10(OUTP) - _aesni_gf128mul_x_ble() - movdqa IV, STATE2 - movdqu 0x50(INP), INC - pxor INC, STATE2 - movdqu IV, 0x50(OUTP) - movdqu 0x20(OUTP), INC pxor INC, STATE3 movdqu STATE3, 0x20(OUTP) - _aesni_gf128mul_x_ble() - movdqa IV, STATE3 - movdqu 0x60(INP), INC - pxor INC, STATE3 - movdqu IV, 0x60(OUTP) - movdqu 0x30(OUTP), INC pxor INC, STATE4 movdqu STATE4, 0x30(OUTP) _aesni_gf128mul_x_ble() - movdqa IV, STATE4 - movdqu 0x70(INP), INC - pxor INC, STATE4 - movdqu IV, 0x70(OUTP) - _aesni_gf128mul_x_ble() + add $64, INP + add $64, OUTP + sub $64, LEN + ja .Lxts_enc_loop4 + movups IV, (IVP) - CALL_NOSPEC %r11 + FRAME_END + ret +ENDPROC(aesni_xts_encrypt) + +/* + * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) + */ +ENTRY(aesni_xts_decrypt) + FRAME_BEGIN + + movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK + movups (IVP), IV + + mov 480(KEYP), KLEN + add $240, KEYP - movdqu 0x40(OUTP), INC +.Lxts_dec_loop4: + movdqa IV, STATE1 + movdqu 0x00(INP), INC pxor INC, STATE1 - movdqu STATE1, 0x40(OUTP) + movdqu IV, 0x00(OUTP) - movdqu 0x50(OUTP), INC + _aesni_gf128mul_x_ble() + movdqa IV, STATE2 + movdqu 0x10(INP), INC + pxor INC, STATE2 + movdqu IV, 0x10(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE3 + movdqu 0x20(INP), INC + pxor INC, STATE3 + movdqu IV, 0x20(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE4 + movdqu 0x30(INP), INC + pxor INC, STATE4 + movdqu IV, 0x30(OUTP) + + call _aesni_dec4 + + movdqu 0x00(OUTP), INC + pxor INC, STATE1 + movdqu STATE1, 0x00(OUTP) + + movdqu 0x10(OUTP), INC pxor INC, STATE2 - movdqu STATE2, 0x50(OUTP) + movdqu STATE2, 0x10(OUTP) - movdqu 0x60(OUTP), INC + movdqu 0x20(OUTP), INC pxor INC, STATE3 - movdqu STATE3, 0x60(OUTP) + movdqu STATE3, 0x20(OUTP) - movdqu 0x70(OUTP), INC + movdqu 0x30(OUTP), INC pxor INC, STATE4 - movdqu STATE4, 0x70(OUTP) + movdqu STATE4, 0x30(OUTP) + + _aesni_gf128mul_x_ble() + + add $64, INP + add $64, OUTP + sub $64, LEN + ja .Lxts_dec_loop4 + + movups IV, (IVP) FRAME_END ret -ENDPROC(aesni_xts_crypt8) +ENDPROC(aesni_xts_decrypt) #endif diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 91c039ab56999..4e4d34956170b 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -370,7 +370,7 @@ _initial_num_blocks_is_0\@: _initial_blocks_encrypted\@: - cmp $0, %r13 + test %r13, %r13 je _zero_cipher_left\@ sub $128, %r13 @@ -529,7 +529,7 @@ _multiple_of_16_bytes\@: vmovdqu HashKey(arg2), %xmm13 mov PBlockLen(arg2), %r12 - cmp $0, %r12 + test %r12, %r12 je _partial_done\@ #GHASH computation for the last <16 Byte block @@ -574,7 +574,7 @@ _T_8\@: add $8, %r10 sub $8, %r11 vpsrldq $8, %xmm9, %xmm9 - cmp $0, %r11 + test %r11, %r11 je _return_T_done\@ _T_4\@: vmovd %xmm9, %eax @@ -582,7 +582,7 @@ _T_4\@: add $4, %r10 sub $4, %r11 vpsrldq $4, %xmm9, %xmm9 - cmp $0, %r11 + test %r11, %r11 je _return_T_done\@ _T_123\@: vmovd %xmm9, %eax @@ -626,7 +626,7 @@ _get_AAD_blocks\@: cmp $16, %r11 jge _get_AAD_blocks\@ vmovdqu \T8, \T7 - cmp $0, %r11 + test %r11, %r11 je _get_AAD_done\@ vpxor \T7, \T7, \T7 @@ -645,7 +645,7 @@ _get_AAD_rest8\@: vpxor \T1, \T7, \T7 jmp _get_AAD_rest8\@ _get_AAD_rest4\@: - cmp $0, %r11 + test %r11, %r11 jle _get_AAD_rest0\@ mov (%r10), %eax movq %rax, \T1 @@ -750,7 +750,7 @@ _done_read_partial_block_\@: .macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ AAD_HASH ENC_DEC mov PBlockLen(arg2), %r13 - cmp $0, %r13 + test %r13, %r13 je _partial_block_done_\@ # Leave Macro if no partial blocks # Read in input data without over reading cmp $16, \PLAIN_CYPH_LEN @@ -802,7 +802,7 @@ _no_extra_mask_1_\@: vpshufb %xmm2, %xmm3, %xmm3 vpxor %xmm3, \AAD_HASH, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_1_\@ # GHASH computation for the last <16 Byte block @@ -837,7 +837,7 @@ _no_extra_mask_2_\@: vpshufb %xmm2, %xmm9, %xmm9 vpxor %xmm9, \AAD_HASH, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_2_\@ # GHASH computation for the last <16 Byte block @@ -857,7 +857,7 @@ _encode_done_\@: vpshufb %xmm2, %xmm9, %xmm9 .endif # output encrypted Bytes - cmp $0, %r10 + test %r10, %r10 jl _partial_fill_\@ mov %r13, %r12 mov $16, %r13 diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 3e707e81afdb4..18cfb76daa232 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -83,10 +83,8 @@ struct gcm_context_data { asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); -asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in); -asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in); +asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in); +asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in); asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len); asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out, @@ -99,6 +97,12 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, #define AVX_GEN2_OPTSIZE 640 #define AVX_GEN4_OPTSIZE 4096 +asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + #ifdef CONFIG_X86_64 static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, @@ -106,9 +110,6 @@ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); -asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, bool enc, u8 *iv); - /* asmlinkage void aesni_gcm_enc() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. * struct gcm_context_data. May be uninitialized. @@ -550,29 +551,24 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, } -static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in) -{ - aesni_enc(ctx, out, in); -} - -static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void aesni_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_enc); } -static void aesni_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_dec)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec); } -static void aesni_xts_enc8(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, true, (u8 *)iv); + aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); } -static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - aesni_xts_crypt8(ctx, (u8 *)dst, (const u8 *)src, false, (u8 *)iv); + aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); } static const struct common_glue_ctx aesni_enc_xts = { @@ -580,11 +576,11 @@ static const struct common_glue_ctx aesni_enc_xts = { .fpu_blocks_limit = 1, .funcs = { { - .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc8) } + .num_blocks = 32, + .fn_u = { .xts = aesni_xts_enc32 } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_enc) } + .fn_u = { .xts = aesni_xts_enc } } } }; @@ -593,11 +589,11 @@ static const struct common_glue_ctx aesni_dec_xts = { .fpu_blocks_limit = 1, .funcs = { { - .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec8) } + .num_blocks = 32, + .fn_u = { .xts = aesni_xts_dec32 } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(aesni_xts_dec) } + .fn_u = { .xts = aesni_xts_dec } } } }; @@ -606,8 +602,7 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&aesni_enc_xts, req, - XTS_TWEAK_CAST(aesni_xts_tweak), + return glue_xts_req_128bit(&aesni_enc_xts, req, aesni_enc, aes_ctx(ctx->raw_tweak_ctx), aes_ctx(ctx->raw_crypt_ctx), false); @@ -618,8 +613,7 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&aesni_dec_xts, req, - XTS_TWEAK_CAST(aesni_xts_tweak), + return glue_xts_req_128bit(&aesni_dec_xts, req, aesni_enc, aes_ctx(ctx->raw_tweak_ctx), aes_ctx(ctx->raw_crypt_ctx), true); @@ -707,7 +701,8 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned long auth_tag_len = crypto_aead_authsize(tfm); const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm; - struct gcm_context_data data AESNI_ALIGN_ATTR; + u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8); + struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN); struct scatter_walk dst_sg_walk = {}; unsigned long left = req->cryptlen; unsigned long len, srclen, dstlen; @@ -760,8 +755,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, } kernel_fpu_begin(); - gcm_tfm->init(aes_ctx, &data, iv, - hash_subkey, assoc, assoclen); + gcm_tfm->init(aes_ctx, data, iv, hash_subkey, assoc, assoclen); if (req->src != req->dst) { while (left) { src = scatterwalk_map(&src_sg_walk); @@ -771,10 +765,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, len = min(srclen, dstlen); if (len) { if (enc) - gcm_tfm->enc_update(aes_ctx, &data, + gcm_tfm->enc_update(aes_ctx, data, dst, src, len); else - gcm_tfm->dec_update(aes_ctx, &data, + gcm_tfm->dec_update(aes_ctx, data, dst, src, len); } left -= len; @@ -792,10 +786,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, len = scatterwalk_clamp(&src_sg_walk, left); if (len) { if (enc) - gcm_tfm->enc_update(aes_ctx, &data, + gcm_tfm->enc_update(aes_ctx, data, src, src, len); else - gcm_tfm->dec_update(aes_ctx, &data, + gcm_tfm->dec_update(aes_ctx, data, src, src, len); } left -= len; @@ -804,7 +798,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, scatterwalk_done(&src_sg_walk, 1, left); } } - gcm_tfm->finalize(aes_ctx, &data, authTag, auth_tag_len); + gcm_tfm->finalize(aes_ctx, data, authTag, auth_tag_len); kernel_fpu_end(); if (!assocmem) @@ -853,7 +847,8 @@ static int helper_rfc4106_encrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); unsigned int i; __be32 counter = cpu_to_be32(1); @@ -880,7 +875,8 @@ static int helper_rfc4106_decrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); unsigned int i; if (unlikely(req->assoclen != 16 && req->assoclen != 20)) @@ -1010,7 +1006,8 @@ static int generic_gcmaes_encrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); __be32 counter = cpu_to_be32(1); memcpy(iv, req->iv, 12); @@ -1026,7 +1023,8 @@ static int generic_gcmaes_decrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); memcpy(iv, req->iv, 12); *((__be32 *)(iv+12)) = counter; diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index a4f00128ea552..a8cc2c83fe1bb 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -19,20 +19,17 @@ #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32 /* 32-way AVX2/AES-NI parallel cipher functions */ -asmlinkage void camellia_ecb_enc_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ecb_dec_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void camellia_cbc_dec_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ctr_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); -asmlinkage void camellia_xts_enc_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void camellia_xts_dec_32way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); static const struct common_glue_ctx camellia_enc = { .num_funcs = 4, @@ -40,16 +37,16 @@ static const struct common_glue_ctx camellia_enc = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_32way) } + .fn_u = { .ecb = camellia_ecb_enc_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) } + .fn_u = { .ecb = camellia_ecb_enc_16way } }, { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } + .fn_u = { .ecb = camellia_enc_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } + .fn_u = { .ecb = camellia_enc_blk } } } }; @@ -59,16 +56,16 @@ static const struct common_glue_ctx camellia_ctr = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_32way) } + .fn_u = { .ctr = camellia_ctr_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) } + .fn_u = { .ctr = camellia_ctr_16way } }, { .num_blocks = 2, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } + .fn_u = { .ctr = camellia_crypt_ctr_2way } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } + .fn_u = { .ctr = camellia_crypt_ctr } } } }; @@ -78,13 +75,13 @@ static const struct common_glue_ctx camellia_enc_xts = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_32way) } + .fn_u = { .xts = camellia_xts_enc_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) } + .fn_u = { .xts = camellia_xts_enc_16way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) } + .fn_u = { .xts = camellia_xts_enc } } } }; @@ -94,16 +91,16 @@ static const struct common_glue_ctx camellia_dec = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_32way) } + .fn_u = { .ecb = camellia_ecb_dec_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) } + .fn_u = { .ecb = camellia_ecb_dec_16way } }, { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } + .fn_u = { .ecb = camellia_dec_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .ecb = camellia_dec_blk } } } }; @@ -113,16 +110,16 @@ static const struct common_glue_ctx camellia_dec_cbc = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_32way) } + .fn_u = { .cbc = camellia_cbc_dec_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) } + .fn_u = { .cbc = camellia_cbc_dec_16way } }, { .num_blocks = 2, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } + .fn_u = { .cbc = camellia_decrypt_cbc_2way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .cbc = camellia_dec_blk } } } }; @@ -132,13 +129,13 @@ static const struct common_glue_ctx camellia_dec_xts = { .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_32way) } + .fn_u = { .xts = camellia_xts_dec_32way } }, { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) } + .fn_u = { .xts = camellia_xts_dec_16way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) } + .fn_u = { .xts = camellia_xts_dec } } } }; @@ -161,8 +158,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), - req); + return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -180,8 +176,7 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&camellia_enc_xts, req, - XTS_TWEAK_CAST(camellia_enc_blk), + return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, false); } @@ -190,8 +185,7 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&camellia_dec_xts, req, - XTS_TWEAK_CAST(camellia_enc_blk), + return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, true); } diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index f28d282779b87..31a82a79f4ac9 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -18,41 +18,36 @@ #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 /* 16-way parallel cipher functions (avx/aes-ni) */ -asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_ecb_enc_16way); -asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way); -asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way); -asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); EXPORT_SYMBOL_GPL(camellia_ctr_16way); -asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); EXPORT_SYMBOL_GPL(camellia_xts_enc_16way); -asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); EXPORT_SYMBOL_GPL(camellia_xts_dec_16way); -void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(camellia_enc_blk)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_enc_blk); } EXPORT_SYMBOL_GPL(camellia_xts_enc); -void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(camellia_dec_blk)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_dec_blk); } EXPORT_SYMBOL_GPL(camellia_xts_dec); @@ -62,13 +57,13 @@ static const struct common_glue_ctx camellia_enc = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) } + .fn_u = { .ecb = camellia_ecb_enc_16way } }, { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } + .fn_u = { .ecb = camellia_enc_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } + .fn_u = { .ecb = camellia_enc_blk } } } }; @@ -78,13 +73,13 @@ static const struct common_glue_ctx camellia_ctr = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) } + .fn_u = { .ctr = camellia_ctr_16way } }, { .num_blocks = 2, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } + .fn_u = { .ctr = camellia_crypt_ctr_2way } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } + .fn_u = { .ctr = camellia_crypt_ctr } } } }; @@ -94,10 +89,10 @@ static const struct common_glue_ctx camellia_enc_xts = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc_16way) } + .fn_u = { .xts = camellia_xts_enc_16way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_enc) } + .fn_u = { .xts = camellia_xts_enc } } } }; @@ -107,13 +102,13 @@ static const struct common_glue_ctx camellia_dec = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) } + .fn_u = { .ecb = camellia_ecb_dec_16way } }, { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } + .fn_u = { .ecb = camellia_dec_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .ecb = camellia_dec_blk } } } }; @@ -123,13 +118,13 @@ static const struct common_glue_ctx camellia_dec_cbc = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) } + .fn_u = { .cbc = camellia_cbc_dec_16way } }, { .num_blocks = 2, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } + .fn_u = { .cbc = camellia_decrypt_cbc_2way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .cbc = camellia_dec_blk } } } }; @@ -139,10 +134,10 @@ static const struct common_glue_ctx camellia_dec_xts = { .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec_16way) } + .fn_u = { .xts = camellia_xts_dec_16way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(camellia_xts_dec) } + .fn_u = { .xts = camellia_xts_dec } } } }; @@ -165,8 +160,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), - req); + return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -206,8 +200,7 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&camellia_enc_xts, req, - XTS_TWEAK_CAST(camellia_enc_blk), + return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, false); } @@ -216,8 +209,7 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&camellia_dec_xts, req, - XTS_TWEAK_CAST(camellia_enc_blk), + return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, true); } diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 7c62db56ffe1b..5f3ed5af68d70 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -18,19 +18,17 @@ #include /* regular block cipher functions */ -asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); +asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, + bool xor); EXPORT_SYMBOL_GPL(__camellia_enc_blk); -asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_dec_blk); /* 2-way parallel cipher functions */ -asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); +asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src, + bool xor); EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way); -asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_dec_blk_2way); static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) @@ -1267,8 +1265,10 @@ static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, return camellia_setkey(&tfm->base, key, key_len); } -void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) +void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s) { + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; u128 iv = *src; camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src); @@ -1277,9 +1277,11 @@ void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) } EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); -void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void camellia_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; if (dst != src) *dst = *src; @@ -1291,9 +1293,11 @@ void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) } EXPORT_SYMBOL_GPL(camellia_crypt_ctr); -void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void camellia_crypt_ctr_2way(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblks[2]; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; if (dst != src) { dst[0] = src[0]; @@ -1315,10 +1319,10 @@ static const struct common_glue_ctx camellia_enc = { .funcs = { { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) } + .fn_u = { .ecb = camellia_enc_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) } + .fn_u = { .ecb = camellia_enc_blk } } } }; @@ -1328,10 +1332,10 @@ static const struct common_glue_ctx camellia_ctr = { .funcs = { { .num_blocks = 2, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) } + .fn_u = { .ctr = camellia_crypt_ctr_2way } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) } + .fn_u = { .ctr = camellia_crypt_ctr } } } }; @@ -1341,10 +1345,10 @@ static const struct common_glue_ctx camellia_dec = { .funcs = { { .num_blocks = 2, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) } + .fn_u = { .ecb = camellia_dec_blk_2way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .ecb = camellia_dec_blk } } } }; @@ -1354,10 +1358,10 @@ static const struct common_glue_ctx camellia_dec_cbc = { .funcs = { { .num_blocks = 2, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) } + .fn_u = { .cbc = camellia_decrypt_cbc_2way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) } + .fn_u = { .cbc = camellia_dec_blk } } } }; @@ -1373,8 +1377,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), - req); + return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req); } static int cbc_decrypt(struct skcipher_request *req) diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index a8a38fffb4a98..da5297475f9ec 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -20,20 +20,17 @@ #define CAST6_PARALLEL_BLOCKS 8 -asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst, - const u8 *src); - -asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, +asmlinkage void cast6_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void cast6_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src); + +asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void cast6_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void cast6_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -41,21 +38,21 @@ static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, return cast6_setkey(&tfm->base, key, keylen); } -static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void cast6_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(__cast6_encrypt)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_encrypt); } -static void cast6_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void cast6_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(__cast6_decrypt)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_decrypt); } -static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; le128_to_be128(&ctrblk, iv); le128_inc(iv); @@ -70,10 +67,10 @@ static const struct common_glue_ctx cast6_enc = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) } + .fn_u = { .ecb = cast6_ecb_enc_8way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) } + .fn_u = { .ecb = __cast6_encrypt } } } }; @@ -83,10 +80,10 @@ static const struct common_glue_ctx cast6_ctr = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) } + .fn_u = { .ctr = cast6_ctr_8way } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) } + .fn_u = { .ctr = cast6_crypt_ctr } } } }; @@ -96,10 +93,10 @@ static const struct common_glue_ctx cast6_enc_xts = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc_8way) } + .fn_u = { .xts = cast6_xts_enc_8way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_enc) } + .fn_u = { .xts = cast6_xts_enc } } } }; @@ -109,10 +106,10 @@ static const struct common_glue_ctx cast6_dec = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) } + .fn_u = { .ecb = cast6_ecb_dec_8way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) } + .fn_u = { .ecb = __cast6_decrypt } } } }; @@ -122,10 +119,10 @@ static const struct common_glue_ctx cast6_dec_cbc = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) } + .fn_u = { .cbc = cast6_cbc_dec_8way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) } + .fn_u = { .cbc = __cast6_decrypt } } } }; @@ -135,10 +132,10 @@ static const struct common_glue_ctx cast6_dec_xts = { .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec_8way) } + .fn_u = { .xts = cast6_xts_dec_8way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(cast6_xts_dec) } + .fn_u = { .xts = cast6_xts_dec } } } }; @@ -154,8 +151,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt), - req); + return glue_cbc_encrypt_req_128bit(__cast6_encrypt, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -199,8 +195,7 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&cast6_enc_xts, req, - XTS_TWEAK_CAST(__cast6_encrypt), + return glue_xts_req_128bit(&cast6_enc_xts, req, __cast6_encrypt, &ctx->tweak_ctx, &ctx->crypt_ctx, false); } @@ -209,8 +204,7 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&cast6_dec_xts, req, - XTS_TWEAK_CAST(__cast6_encrypt), + return glue_xts_req_128bit(&cast6_dec_xts, req, __cast6_encrypt, &ctx->tweak_ctx, &ctx->crypt_ctx, true); } diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S index 848f9c75fd4f5..596f91e3a774e 100644 --- a/arch/x86/crypto/chacha-avx512vl-x86_64.S +++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S @@ -172,7 +172,7 @@ ENTRY(chacha_2block_xor_avx512vl) # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone2 mov %rax,%r9 and $~0xf,%r9 @@ -438,7 +438,7 @@ ENTRY(chacha_4block_xor_avx512vl) # xor remaining bytes from partial register into output mov %rcx,%rax and $0xf,%rcx - jz .Ldone8 + jz .Ldone4 mov %rax,%r9 and $~0xf,%r9 diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index d9b734d0c8cc7..3c6e01520a973 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -170,7 +170,7 @@ continue_block: ## branch into array lea jump_table(%rip), bufp - movzxw (bufp, %rax, 2), len + movzwq (bufp, %rax, 2), len lea crc_array(%rip), bufp lea (bufp, len, 1), bufp JMP_NOSPEC bufp diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index d15b99397480b..d3d91a0abf88f 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -134,7 +134,8 @@ int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, src -= num_blocks - 1; dst -= num_blocks - 1; - gctx->funcs[i].fn_u.cbc(ctx, dst, src); + gctx->funcs[i].fn_u.cbc(ctx, (u8 *)dst, + (const u8 *)src); nbytes -= func_bytes; if (nbytes < bsize) @@ -188,7 +189,9 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, /* Process multi-block batch */ do { - gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); + gctx->funcs[i].fn_u.ctr(ctx, (u8 *)dst, + (const u8 *)src, + &ctrblk); src += num_blocks; dst += num_blocks; nbytes -= func_bytes; @@ -210,7 +213,8 @@ int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, be128_to_le128(&ctrblk, (be128 *)walk.iv); memcpy(&tmp, walk.src.virt.addr, nbytes); - gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp, + gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, (u8 *)&tmp, + (const u8 *)&tmp, &ctrblk); memcpy(walk.dst.virt.addr, &tmp, nbytes); le128_to_be128((be128 *)walk.iv, &ctrblk); @@ -240,7 +244,8 @@ static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx, if (nbytes >= func_bytes) { do { - gctx->funcs[i].fn_u.xts(ctx, dst, src, + gctx->funcs[i].fn_u.xts(ctx, (u8 *)dst, + (const u8 *)src, walk->iv); src += num_blocks; @@ -354,8 +359,8 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_xts_req_128bit); -void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, - common_glue_func_t fn) +void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, const u8 *src, + le128 *iv, common_glue_func_t fn) { le128 ivblk = *iv; @@ -363,13 +368,13 @@ void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, gf128mul_x_ble(iv, &ivblk); /* CC <- T xor C */ - u128_xor(dst, src, (u128 *)&ivblk); + u128_xor((u128 *)dst, (const u128 *)src, (u128 *)&ivblk); /* PP <- D(Key2,CC) */ - fn(ctx, (u8 *)dst, (u8 *)dst); + fn(ctx, dst, dst); /* P <- T xor PP */ - u128_xor(dst, dst, (u128 *)&ivblk); + u128_xor((u128 *)dst, (u128 *)dst, (u128 *)&ivblk); } EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one); diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c index f7567cbd35b69..80fcb85736e1d 100644 --- a/arch/x86/crypto/nhpoly1305-avx2-glue.c +++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c @@ -29,7 +29,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2); diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c index a661ede3b5cfa..cc6b7c1a2705d 100644 --- a/arch/x86/crypto/nhpoly1305-sse2-glue.c +++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c @@ -29,7 +29,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc, return crypto_nhpoly1305_update(desc, src, srclen); do { - unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE); + unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2); diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 13fd8d3d2da00..f973ace44ad35 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -19,18 +19,16 @@ #define SERPENT_AVX2_PARALLEL_BLOCKS 16 /* 16-way AVX2 parallel cipher functions */ -asmlinkage void serpent_ecb_enc_16way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void serpent_ecb_dec_16way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void serpent_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src); +asmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ctr_16way(void *ctx, u128 *dst, const u128 *src, +asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void serpent_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void serpent_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -44,13 +42,13 @@ static const struct common_glue_ctx serpent_enc = { .funcs = { { .num_blocks = 16, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_16way) } + .fn_u = { .ecb = serpent_ecb_enc_16way } }, { .num_blocks = 8, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) } + .fn_u = { .ecb = serpent_ecb_enc_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } + .fn_u = { .ecb = __serpent_encrypt } } } }; @@ -60,13 +58,13 @@ static const struct common_glue_ctx serpent_ctr = { .funcs = { { .num_blocks = 16, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_16way) } + .fn_u = { .ctr = serpent_ctr_16way } }, { .num_blocks = 8, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } + .fn_u = { .ctr = serpent_ctr_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) } + .fn_u = { .ctr = __serpent_crypt_ctr } } } }; @@ -76,13 +74,13 @@ static const struct common_glue_ctx serpent_enc_xts = { .funcs = { { .num_blocks = 16, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_16way) } + .fn_u = { .xts = serpent_xts_enc_16way } }, { .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) } + .fn_u = { .xts = serpent_xts_enc_8way_avx } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) } + .fn_u = { .xts = serpent_xts_enc } } } }; @@ -92,13 +90,13 @@ static const struct common_glue_ctx serpent_dec = { .funcs = { { .num_blocks = 16, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_16way) } + .fn_u = { .ecb = serpent_ecb_dec_16way } }, { .num_blocks = 8, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) } + .fn_u = { .ecb = serpent_ecb_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .ecb = __serpent_decrypt } } } }; @@ -108,13 +106,13 @@ static const struct common_glue_ctx serpent_dec_cbc = { .funcs = { { .num_blocks = 16, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_16way) } + .fn_u = { .cbc = serpent_cbc_dec_16way } }, { .num_blocks = 8, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) } + .fn_u = { .cbc = serpent_cbc_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .cbc = __serpent_decrypt } } } }; @@ -124,13 +122,13 @@ static const struct common_glue_ctx serpent_dec_xts = { .funcs = { { .num_blocks = 16, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_16way) } + .fn_u = { .xts = serpent_xts_dec_16way } }, { .num_blocks = 8, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) } + .fn_u = { .xts = serpent_xts_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) } + .fn_u = { .xts = serpent_xts_dec } } } }; @@ -146,8 +144,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), - req); + return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -166,8 +163,8 @@ static int xts_encrypt(struct skcipher_request *req) struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); return glue_xts_req_128bit(&serpent_enc_xts, req, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx, false); + __serpent_encrypt, &ctx->tweak_ctx, + &ctx->crypt_ctx, false); } static int xts_decrypt(struct skcipher_request *req) @@ -176,8 +173,8 @@ static int xts_decrypt(struct skcipher_request *req) struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); return glue_xts_req_128bit(&serpent_dec_xts, req, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx, true); + __serpent_encrypt, &ctx->tweak_ctx, + &ctx->crypt_ctx, true); } static struct skcipher_alg serpent_algs[] = { diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 7d3dca38a5a2e..7806d1cbe8541 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -20,33 +20,35 @@ #include /* 8-way parallel cipher functions */ -asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_ecb_enc_8way_avx); -asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_ecb_dec_8way_avx); -asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx); -asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx); -asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx); -asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx); -void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; le128_to_be128(&ctrblk, iv); le128_inc(iv); @@ -56,17 +58,15 @@ void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) } EXPORT_SYMBOL_GPL(__serpent_crypt_ctr); -void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(__serpent_encrypt)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_encrypt); } EXPORT_SYMBOL_GPL(serpent_xts_enc); -void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(__serpent_decrypt)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_decrypt); } EXPORT_SYMBOL_GPL(serpent_xts_dec); @@ -102,10 +102,10 @@ static const struct common_glue_ctx serpent_enc = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) } + .fn_u = { .ecb = serpent_ecb_enc_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } + .fn_u = { .ecb = __serpent_encrypt } } } }; @@ -115,10 +115,10 @@ static const struct common_glue_ctx serpent_ctr = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) } + .fn_u = { .ctr = serpent_ctr_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(__serpent_crypt_ctr) } + .fn_u = { .ctr = __serpent_crypt_ctr } } } }; @@ -128,10 +128,10 @@ static const struct common_glue_ctx serpent_enc_xts = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc_8way_avx) } + .fn_u = { .xts = serpent_xts_enc_8way_avx } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_enc) } + .fn_u = { .xts = serpent_xts_enc } } } }; @@ -141,10 +141,10 @@ static const struct common_glue_ctx serpent_dec = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) } + .fn_u = { .ecb = serpent_ecb_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .ecb = __serpent_decrypt } } } }; @@ -154,10 +154,10 @@ static const struct common_glue_ctx serpent_dec_cbc = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) } + .fn_u = { .cbc = serpent_cbc_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .cbc = __serpent_decrypt } } } }; @@ -167,10 +167,10 @@ static const struct common_glue_ctx serpent_dec_xts = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec_8way_avx) } + .fn_u = { .xts = serpent_xts_dec_8way_avx } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(serpent_xts_dec) } + .fn_u = { .xts = serpent_xts_dec } } } }; @@ -186,8 +186,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), - req); + return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -206,8 +205,8 @@ static int xts_encrypt(struct skcipher_request *req) struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); return glue_xts_req_128bit(&serpent_enc_xts, req, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx, false); + __serpent_encrypt, &ctx->tweak_ctx, + &ctx->crypt_ctx, false); } static int xts_decrypt(struct skcipher_request *req) @@ -216,8 +215,8 @@ static int xts_decrypt(struct skcipher_request *req) struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); return glue_xts_req_128bit(&serpent_dec_xts, req, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx, true); + __serpent_encrypt, &ctx->tweak_ctx, + &ctx->crypt_ctx, true); } static struct skcipher_alg serpent_algs[] = { diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index 5fdf1931d0690..4fed8d26b91a4 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -31,9 +31,11 @@ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); } -static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) +static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s) { u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; unsigned int j; for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++) @@ -45,9 +47,11 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); } -static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; le128_to_be128(&ctrblk, iv); le128_inc(iv); @@ -56,10 +60,12 @@ static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) u128_xor(dst, src, (u128 *)&ctrblk); } -static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, +static void serpent_crypt_ctr_xway(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblks[SERPENT_PARALLEL_BLOCKS]; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; unsigned int i; for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) { @@ -79,10 +85,10 @@ static const struct common_glue_ctx serpent_enc = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) } + .fn_u = { .ecb = serpent_enc_blk_xway } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) } + .fn_u = { .ecb = __serpent_encrypt } } } }; @@ -92,10 +98,10 @@ static const struct common_glue_ctx serpent_ctr = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) } + .fn_u = { .ctr = serpent_crypt_ctr_xway } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) } + .fn_u = { .ctr = serpent_crypt_ctr } } } }; @@ -105,10 +111,10 @@ static const struct common_glue_ctx serpent_dec = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) } + .fn_u = { .ecb = serpent_dec_blk_xway } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .ecb = __serpent_decrypt } } } }; @@ -118,10 +124,10 @@ static const struct common_glue_ctx serpent_dec_cbc = { .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) } + .fn_u = { .cbc = serpent_decrypt_cbc_xway } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) } + .fn_u = { .cbc = __serpent_decrypt } } } }; @@ -137,7 +143,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req); } diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index d561c821788b7..3b36e97ec7abb 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -22,20 +22,17 @@ #define TWOFISH_PARALLEL_BLOCKS 8 /* 8-way parallel cipher functions */ -asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void twofish_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void twofish_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); -asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void twofish_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void twofish_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) @@ -43,22 +40,19 @@ static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, return twofish_setkey(&tfm->base, key, keylen); } -static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) +static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) { __twofish_enc_blk_3way(ctx, dst, src, false); } -static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void twofish_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(twofish_enc_blk)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_enc_blk); } -static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) +static void twofish_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - glue_xts_crypt_128bit_one(ctx, dst, src, iv, - GLUE_FUNC_CAST(twofish_dec_blk)); + glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_dec_blk); } struct twofish_xts_ctx { @@ -93,13 +87,13 @@ static const struct common_glue_ctx twofish_enc = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) } + .fn_u = { .ecb = twofish_ecb_enc_8way } }, { .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } + .fn_u = { .ecb = twofish_enc_blk_3way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } + .fn_u = { .ecb = twofish_enc_blk } } } }; @@ -109,13 +103,13 @@ static const struct common_glue_ctx twofish_ctr = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) } + .fn_u = { .ctr = twofish_ctr_8way } }, { .num_blocks = 3, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) } + .fn_u = { .ctr = twofish_enc_blk_ctr_3way } }, { .num_blocks = 1, - .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) } + .fn_u = { .ctr = twofish_enc_blk_ctr } } } }; @@ -125,10 +119,10 @@ static const struct common_glue_ctx twofish_enc_xts = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) } + .fn_u = { .xts = twofish_xts_enc_8way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) } + .fn_u = { .xts = twofish_xts_enc } } } }; @@ -138,13 +132,13 @@ static const struct common_glue_ctx twofish_dec = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) } + .fn_u = { .ecb = twofish_ecb_dec_8way } }, { .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } + .fn_u = { .ecb = twofish_dec_blk_3way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } + .fn_u = { .ecb = twofish_dec_blk } } } }; @@ -154,13 +148,13 @@ static const struct common_glue_ctx twofish_dec_cbc = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) } + .fn_u = { .cbc = twofish_cbc_dec_8way } }, { .num_blocks = 3, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } + .fn_u = { .cbc = twofish_dec_blk_cbc_3way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } + .fn_u = { .cbc = twofish_dec_blk } } } }; @@ -170,10 +164,10 @@ static const struct common_glue_ctx twofish_dec_xts = { .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) } + .fn_u = { .xts = twofish_xts_dec_8way } }, { .num_blocks = 1, - .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) } + .fn_u = { .xts = twofish_xts_dec } } } }; @@ -189,8 +183,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), - req); + return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); } static int cbc_decrypt(struct skcipher_request *req) @@ -208,8 +201,7 @@ static int xts_encrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&twofish_enc_xts, req, - XTS_TWEAK_CAST(twofish_enc_blk), + return glue_xts_req_128bit(&twofish_enc_xts, req, twofish_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, false); } @@ -218,8 +210,7 @@ static int xts_decrypt(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_req_128bit(&twofish_dec_xts, req, - XTS_TWEAK_CAST(twofish_enc_blk), + return glue_xts_req_128bit(&twofish_dec_xts, req, twofish_enc_blk, &ctx->tweak_ctx, &ctx->crypt_ctx, true); } diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 1dc9e29f221e8..768af6075479c 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -25,21 +25,22 @@ static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, return twofish_setkey(&tfm->base, key, keylen); } -static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src) +static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) { __twofish_enc_blk_3way(ctx, dst, src, false); } -static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, +static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst, const u8 *src) { __twofish_enc_blk_3way(ctx, dst, src, true); } -void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) +void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) { u128 ivs[2]; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; ivs[0] = src[0]; ivs[1] = src[1]; @@ -51,9 +52,11 @@ void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) } EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); -void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) +void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblk; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; if (dst != src) *dst = *src; @@ -66,10 +69,11 @@ void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) } EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); -void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, - le128 *iv) +void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv) { be128 ctrblks[3]; + u128 *dst = (u128 *)d; + const u128 *src = (const u128 *)s; if (dst != src) { dst[0] = src[0]; @@ -94,10 +98,10 @@ static const struct common_glue_ctx twofish_enc = { .funcs = { { .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } + .fn_u = { .ecb = twofish_enc_blk_3way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } + .fn_u = { .ecb = twofish_enc_blk } } } }; @@ -107,10 +111,10 @@ static const struct common_glue_ctx twofish_ctr = { .funcs = { { .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } + .fn_u = { .ctr = twofish_enc_blk_ctr_3way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } + .fn_u = { .ctr = twofish_enc_blk_ctr } } } }; @@ -120,10 +124,10 @@ static const struct common_glue_ctx twofish_dec = { .funcs = { { .num_blocks = 3, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } + .fn_u = { .ecb = twofish_dec_blk_3way } }, { .num_blocks = 1, - .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } + .fn_u = { .ecb = twofish_dec_blk } } } }; @@ -133,10 +137,10 @@ static const struct common_glue_ctx twofish_dec_cbc = { .funcs = { { .num_blocks = 3, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } + .fn_u = { .cbc = twofish_dec_blk_cbc_3way } }, { .num_blocks = 1, - .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } + .fn_u = { .cbc = twofish_dec_blk } } } }; @@ -152,8 +156,7 @@ static int ecb_decrypt(struct skcipher_request *req) static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), - req); + return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); } static int cbc_decrypt(struct skcipher_request *req) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 515c0ceeb4a36..29e5675c6d4f2 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include /* @@ -98,13 +100,6 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 - /* - * Push registers and sanitize registers of values that a - * speculation attack might otherwise want to exploit. The - * lower registers are likely clobbered well before they - * could be put to use in a speculative execution gadget. - * Interleave XOR with PUSH for better uop scheduling: - */ .if \save_ret pushq %rsi /* pt_regs->si */ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ @@ -114,57 +109,58 @@ For 32-bit we have the following conventions - kernel is built with pushq %rsi /* pt_regs->si */ .endif pushq \rdx /* pt_regs->dx */ - xorl %edx, %edx /* nospec dx */ pushq %rcx /* pt_regs->cx */ - xorl %ecx, %ecx /* nospec cx */ pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ - xorl %r8d, %r8d /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ - xorl %r9d, %r9d /* nospec r9 */ pushq %r10 /* pt_regs->r10 */ - xorl %r10d, %r10d /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ - xorl %r11d, %r11d /* nospec r11*/ pushq %rbx /* pt_regs->rbx */ - xorl %ebx, %ebx /* nospec rbx*/ pushq %rbp /* pt_regs->rbp */ - xorl %ebp, %ebp /* nospec rbp*/ pushq %r12 /* pt_regs->r12 */ - xorl %r12d, %r12d /* nospec r12*/ pushq %r13 /* pt_regs->r13 */ - xorl %r13d, %r13d /* nospec r13*/ pushq %r14 /* pt_regs->r14 */ - xorl %r14d, %r14d /* nospec r14*/ pushq %r15 /* pt_regs->r15 */ - xorl %r15d, %r15d /* nospec r15*/ UNWIND_HINT_REGS + .if \save_ret pushq %rsi /* return address on top of stack */ .endif + + /* + * Sanitize registers of values that a speculation attack might + * otherwise want to exploit. The lower registers are likely clobbered + * well before they could be put to use in a speculative execution + * gadget. + */ + xorl %edx, %edx /* nospec dx */ + xorl %ecx, %ecx /* nospec cx */ + xorl %r8d, %r8d /* nospec r8 */ + xorl %r9d, %r9d /* nospec r9 */ + xorl %r10d, %r10d /* nospec r10 */ + xorl %r11d, %r11d /* nospec r11 */ + xorl %ebx, %ebx /* nospec rbx */ + xorl %ebp, %ebp /* nospec rbp */ + xorl %r12d, %r12d /* nospec r12 */ + xorl %r13d, %r13d /* nospec r13 */ + xorl %r14d, %r14d /* nospec r14 */ + xorl %r15d, %r15d /* nospec r15 */ + .endm -.macro POP_REGS pop_rdi=1 skip_r11rcx=0 +.macro POP_REGS pop_rdi=1 popq %r15 popq %r14 popq %r13 popq %r12 popq %rbp popq %rbx - .if \skip_r11rcx - popq %rsi - .else popq %r11 - .endif popq %r10 popq %r9 popq %r8 popq %rax - .if \skip_r11rcx - popq %rsi - .else popq %rcx - .endif popq %rdx popq %rsi .if \pop_rdi @@ -314,6 +310,62 @@ For 32-bit we have the following conventions - kernel is built with #endif +/* + * IBRS kernel mitigation for Spectre_v2. + * + * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers + * the regs it uses (AX, CX, DX). Must be called before the first RET + * instruction (NOTE! UNTRAIN_RET includes a RET instruction) + * + * The optional argument is used to save/restore the current value, + * which is used on the paranoid paths. + * + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ +.macro IBRS_ENTER save_reg + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + rdmsr + shl $32, %rdx + or %rdx, %rax + mov %rax, \save_reg + test $SPEC_CTRL_IBRS, %eax + jz .Ldo_wrmsr_\@ + lfence + jmp .Lend_\@ +.Ldo_wrmsr_\@: +.endif + + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +.endm + +/* + * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) + * regs. Must be called after the last RET. + */ +.macro IBRS_EXIT save_reg + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +.ifnb \save_reg + mov \save_reg, %rdx +.else + movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx + andl $(~SPEC_CTRL_IBRS), %edx +.endif + + movl %edx, %eax + shr $32, %rdx + wrmsr +.Lend_\@: +.endm + /* * Mitigate Spectre v1 for conditional swapgs code paths. * diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f83ca5aa8b779..2d837fb54c31b 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -172,7 +172,7 @@ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI .if \no_user_check == 0 /* coming from usermode? */ - testl $SEGMENT_RPL_MASK, PT_CS(%esp) + testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp) jz .Lend_\@ .endif /* On user-cr3? */ @@ -205,64 +205,76 @@ #define CS_FROM_ENTRY_STACK (1 << 31) #define CS_FROM_USER_CR3 (1 << 30) #define CS_FROM_KERNEL (1 << 29) +#define CS_FROM_ESPFIX (1 << 28) .macro FIXUP_FRAME /* * The high bits of the CS dword (__csh) are used for CS_FROM_*. * Clear them in case hardware didn't do this for us. */ - andl $0x0000ffff, 3*4(%esp) + andl $0x0000ffff, 4*4(%esp) #ifdef CONFIG_VM86 - testl $X86_EFLAGS_VM, 4*4(%esp) + testl $X86_EFLAGS_VM, 5*4(%esp) jnz .Lfrom_usermode_no_fixup_\@ #endif - testl $SEGMENT_RPL_MASK, 3*4(%esp) + testl $USER_SEGMENT_RPL_MASK, 4*4(%esp) jnz .Lfrom_usermode_no_fixup_\@ - orl $CS_FROM_KERNEL, 3*4(%esp) + orl $CS_FROM_KERNEL, 4*4(%esp) /* * When we're here from kernel mode; the (exception) stack looks like: * - * 5*4(%esp) - - * 4*4(%esp) - flags - * 3*4(%esp) - cs - * 2*4(%esp) - ip - * 1*4(%esp) - orig_eax - * 0*4(%esp) - gs / function + * 6*4(%esp) - + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs * * Lets build a 5 entry IRET frame after that, such that struct pt_regs * is complete and in particular regs->sp is correct. This gives us - * the original 5 enties as gap: + * the original 6 enties as gap: * - * 12*4(%esp) - - * 11*4(%esp) - gap / flags - * 10*4(%esp) - gap / cs - * 9*4(%esp) - gap / ip - * 8*4(%esp) - gap / orig_eax - * 7*4(%esp) - gap / gs / function - * 6*4(%esp) - ss - * 5*4(%esp) - sp - * 4*4(%esp) - flags - * 3*4(%esp) - cs - * 2*4(%esp) - ip - * 1*4(%esp) - orig_eax - * 0*4(%esp) - gs / function + * 14*4(%esp) - + * 13*4(%esp) - gap / flags + * 12*4(%esp) - gap / cs + * 11*4(%esp) - gap / ip + * 10*4(%esp) - gap / orig_eax + * 9*4(%esp) - gap / gs / function + * 8*4(%esp) - gap / fs + * 7*4(%esp) - ss + * 6*4(%esp) - sp + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs */ pushl %ss # ss pushl %esp # sp (points at ss) - addl $6*4, (%esp) # point sp back at the previous context - pushl 6*4(%esp) # flags - pushl 6*4(%esp) # cs - pushl 6*4(%esp) # ip - pushl 6*4(%esp) # orig_eax - pushl 6*4(%esp) # gs / function + addl $7*4, (%esp) # point sp back at the previous context + pushl 7*4(%esp) # flags + pushl 7*4(%esp) # cs + pushl 7*4(%esp) # ip + pushl 7*4(%esp) # orig_eax + pushl 7*4(%esp) # gs / function + pushl 7*4(%esp) # fs .Lfrom_usermode_no_fixup_\@: .endm .macro IRET_FRAME + /* + * We're called with %ds, %es, %fs, and %gs from the interrupted + * frame, so we shouldn't use them. Also, we may be in ESPFIX + * mode and therefore have a nonzero SS base and an offset ESP, + * so any attempt to access the stack needs to use SS. (except for + * accesses through %esp, which automatically use SS.) + */ testl $CS_FROM_KERNEL, 1*4(%esp) jz .Lfinished_frame_\@ @@ -276,31 +288,40 @@ movl 5*4(%esp), %eax # (modified) regs->sp movl 4*4(%esp), %ecx # flags - movl %ecx, -4(%eax) + movl %ecx, %ss:-1*4(%eax) movl 3*4(%esp), %ecx # cs andl $0x0000ffff, %ecx - movl %ecx, -8(%eax) + movl %ecx, %ss:-2*4(%eax) movl 2*4(%esp), %ecx # ip - movl %ecx, -12(%eax) + movl %ecx, %ss:-3*4(%eax) movl 1*4(%esp), %ecx # eax - movl %ecx, -16(%eax) + movl %ecx, %ss:-4*4(%eax) popl %ecx - lea -16(%eax), %esp + lea -4*4(%eax), %esp popl %eax .Lfinished_frame_\@: .endm -.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0 cld .if \skip_gs == 0 PUSH_GS .endif - FIXUP_FRAME pushl %fs + + pushl %eax + movl $(__KERNEL_PERCPU), %eax + movl %eax, %fs +.if \unwind_espfix > 0 + UNWIND_ESPFIX_STACK +.endif + popl %eax + + FIXUP_FRAME pushl %es pushl %ds pushl \pt_regs_ax @@ -313,8 +334,6 @@ movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es - movl $(__KERNEL_PERCPU), %edx - movl %edx, %fs .if \skip_gs == 0 SET_KERNEL_GS %edx .endif @@ -324,8 +343,8 @@ .endif .endm -.macro SAVE_ALL_NMI cr3_reg:req - SAVE_ALL +.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0 + SAVE_ALL unwind_espfix=\unwind_espfix BUG_IF_WRONG_CR3 @@ -357,6 +376,7 @@ 2: popl %es 3: popl %fs POP_GS \pop + IRET_FRAME .pushsection .fixup, "ax" 4: movl $0, (%esp) jmp 1b @@ -395,7 +415,8 @@ .macro CHECK_AND_APPLY_ESPFIX #ifdef CONFIG_X86_ESPFIX32 -#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX @@ -729,7 +750,6 @@ ENTRY(__switch_to_asm) movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -738,7 +758,6 @@ ENTRY(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* restore callee-saved registers */ popfl @@ -848,9 +867,10 @@ GLOBAL(__begin_SYSENTER_singlestep_region) * Xen doesn't set %esp to be precisely what the normal SYSENTER * entry point expects, so fix it up before using the normal path. */ -ENTRY(xen_sysenter_target) +SYM_CODE_START(xen_sysenter_target) addl $5*4, %esp /* remove xen-provided frame */ jmp .Lsysenter_past_esp +SYM_CODE_END(xen_sysenter_target) #endif /* @@ -1075,7 +1095,6 @@ restore_all: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code .Lirq_return: - IRET_FRAME /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * when returning from IPI handler and when returning from @@ -1128,30 +1147,43 @@ ENDPROC(entry_INT80_32) * We can't call C functions using the ESPFIX stack. This code reads * the high word of the segment base from the GDT and swiches to the * normal stack and adjusts ESP with the matching offset. + * + * We might be on user CR3 here, so percpu data is not mapped and we can't + * access the GDT through the percpu segment. Instead, use SGDT to find + * the cpu_entry_area alias of the GDT. */ #ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ - mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ - mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ + pushl %ecx + subl $2*4, %esp + sgdt (%esp) + movl 2(%esp), %ecx /* GDT address */ + /* + * Careful: ECX is a linear pointer, so we need to force base + * zero. %cs is the only known-linear segment we have right now. + */ + mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */ + mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */ shl $16, %eax + addl $2*4, %esp + popl %ecx addl %esp, %eax /* the adjusted stack pointer */ pushl $__KERNEL_DS pushl %eax lss (%esp), %esp /* switch to the normal stack segment */ #endif .endm + .macro UNWIND_ESPFIX_STACK + /* It's safe to clobber %eax, all other regs need to be preserved */ #ifdef CONFIG_X86_ESPFIX32 movl %ss, %eax /* see if on espfix stack */ cmpw $__ESPFIX_SS, %ax - jne 27f - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es + jne .Lno_fixup_\@ /* switch to normal stack */ FIXUP_ESPFIX_STACK -27: +.Lno_fixup_\@: #endif .endm @@ -1341,11 +1373,6 @@ END(spurious_interrupt_bug) #ifdef CONFIG_XEN_PV ENTRY(xen_hypervisor_callback) - pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - TRACE_IRQS_OFF - /* * Check to see if we got the event in the critical * region in xen_iret_direct, after we've reenabled @@ -1353,16 +1380,17 @@ ENTRY(xen_hypervisor_callback) * iret instruction's behaviour where it delivers a * pending interrupt when enabling interrupts: */ - movl PT_EIP(%esp), %eax - cmpl $xen_iret_start_crit, %eax + cmpl $xen_iret_start_crit, (%esp) jb 1f - cmpl $xen_iret_end_crit, %eax + cmpl $xen_iret_end_crit, (%esp) jae 1f - - jmp xen_iret_crit_fixup - -ENTRY(xen_do_upcall) -1: mov %esp, %eax + call xen_iret_crit_fixup +1: + pushl $-1 /* orig_ax = -1 => not a system call */ + SAVE_ALL + ENCODE_FRAME_POINTER + TRACE_IRQS_OFF + mov %esp, %eax call xen_evtchn_do_upcall #ifndef CONFIG_PREEMPTION call xen_maybe_preempt_hcall @@ -1449,10 +1477,9 @@ END(page_fault) common_exception_read_cr2: /* the function address is in %gs's slot on the stack */ - SAVE_ALL switch_stacks=1 skip_gs=1 + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 ENCODE_FRAME_POINTER - UNWIND_ESPFIX_STACK /* fixup %gs */ GS_TO_REG %ecx @@ -1474,9 +1501,8 @@ END(common_exception_read_cr2) common_exception: /* the function address is in %gs's slot on the stack */ - SAVE_ALL switch_stacks=1 skip_gs=1 + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 ENCODE_FRAME_POINTER - UNWIND_ESPFIX_STACK /* fixup %gs */ GS_TO_REG %ecx @@ -1515,6 +1541,10 @@ ENTRY(nmi) ASM_CLAC #ifdef CONFIG_X86_ESPFIX32 + /* + * ESPFIX_SS is only ever set on the return to user path + * after we've switched to the entry stack. + */ pushl %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax @@ -1550,6 +1580,11 @@ ENTRY(nmi) movl %ebx, %esp .Lnmi_return: +#ifdef CONFIG_X86_ESPFIX32 + testl $CS_FROM_ESPFIX, PT_CS(%esp) + jnz .Lnmi_from_espfix +#endif + CHECK_AND_APPLY_ESPFIX RESTORE_ALL_NMI cr3_reg=%edi pop=4 jmp .Lirq_return @@ -1557,23 +1592,42 @@ ENTRY(nmi) #ifdef CONFIG_X86_ESPFIX32 .Lnmi_espfix_stack: /* - * create the pointer to lss back + * Create the pointer to LSS back */ pushl %ss pushl %esp addl $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - .endr - pushl %eax - SAVE_ALL_NMI cr3_reg=%edi + + /* Copy the (short) IRET frame */ + pushl 4*4(%esp) # flags + pushl 4*4(%esp) # cs + pushl 4*4(%esp) # ip + + pushl %eax # orig_ax + + SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1 ENCODE_FRAME_POINTER - FIXUP_ESPFIX_STACK # %eax == %esp + + /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */ + xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp) + xorl %edx, %edx # zero error code - call do_nmi + movl %esp, %eax # pt_regs pointer + jmp .Lnmi_from_sysenter_stack + +.Lnmi_from_espfix: RESTORE_ALL_NMI cr3_reg=%edi - lss 12+4(%esp), %esp # back to espfix stack + /* + * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to + * fix up the gap and long frame: + * + * 3 - original frame (exception) + * 2 - ESPFIX block (above) + * 6 - gap (FIXUP_FRAME) + * 5 - long frame (FIXUP_FRAME) + * 1 - orig_ax + */ + lss (1+5+6)*4(%esp), %esp # back to espfix stack jmp .Lirq_return #endif END(nmi) @@ -1592,6 +1646,7 @@ ENTRY(int3) END(int3) ENTRY(general_protection) + ASM_CLAC pushl $do_general_protection jmp common_exception END(general_protection) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index b7c3ea4cb19d4..c82136030d58f 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -172,6 +172,10 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) /* IRQs are off. */ movq %rax, %rdi movq %rsp, %rsi + + /* clobbers %rax, make sure it is after saving the syscall nr */ + IBRS_ENTER + call do_syscall_64 /* returns with IRQs disabled */ TRACE_IRQS_IRETQ /* we're about to change IF */ @@ -248,9 +252,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) * perf profiles. Nothing jumps here. */ syscall_return_via_sysret: - /* rcx and r11 are already restored (see code above) */ - UNWIND_HINT_EMPTY - POP_REGS pop_rdi=0 skip_r11rcx=1 + IBRS_EXIT + POP_REGS pop_rdi=0 /* * Now all regs are restored except RSP and RDI. @@ -258,6 +261,7 @@ syscall_return_via_sysret: */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + UNWIND_HINT_EMPTY pushq RSP-RDI(%rdi) /* RSP */ pushq (%rdi) /* RDI */ @@ -301,7 +305,6 @@ ENTRY(__switch_to_asm) movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset #endif -#ifdef CONFIG_RETPOLINE /* * When switching from a shallower to a deeper call stack * the RSB may either underflow or use entries populated @@ -310,7 +313,6 @@ ENTRY(__switch_to_asm) * speculative execution to prevent attack. */ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW -#endif /* restore callee-saved registers */ popq %r15 @@ -512,7 +514,7 @@ END(spurious_entries_start) * +----------------------------------------------------+ */ ENTRY(interrupt_entry) - UNWIND_HINT_FUNC + UNWIND_HINT_IRET_REGS offset=16 ASM_CLAC cld @@ -544,9 +546,9 @@ ENTRY(interrupt_entry) pushq 5*8(%rdi) /* regs->eflags */ pushq 4*8(%rdi) /* regs->cs */ pushq 3*8(%rdi) /* regs->ip */ + UNWIND_HINT_IRET_REGS pushq 2*8(%rdi) /* regs->orig_ax */ pushq 8(%rdi) /* return address */ - UNWIND_HINT_FUNC movq (%rdi), %rdi jmp 2f @@ -622,6 +624,7 @@ GLOBAL(retint_user) TRACE_IRQS_IRETQ GLOBAL(swapgs_restore_regs_and_return_to_usermode) + IBRS_EXIT #ifdef CONFIG_DEBUG_ENTRY /* Assert that pt_regs indicates user mode. */ testb $3, CS(%rsp) @@ -637,6 +640,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode) */ movq %rsp, %rdi movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp + UNWIND_HINT_EMPTY /* Copy the IRET frame to the trampoline stack. */ pushq 6*8(%rdi) /* SS */ @@ -1247,7 +1251,13 @@ ENTRY(paranoid_entry) */ FENCE_SWAPGS_KERNEL_ENTRY - ret + /* + * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like + * CR3 above, keep the old value in a callee saved register. + */ + IBRS_ENTER save_reg=%r15 + + RET END(paranoid_entry) /* @@ -1275,12 +1285,20 @@ ENTRY(paranoid_exit) jmp .Lparanoid_exit_restore .Lparanoid_exit_no_swapgs: TRACE_IRQS_IRETQ_DEBUG + + /* + * Must restore IBRS state before both CR3 and %GS since we need access + * to the per-CPU x86_spec_ctrl_shadow variable. + */ + IBRS_EXIT save_reg=%r15 + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%rbx save_reg=%r14 .Lparanoid_exit_restore: jmp restore_regs_and_return_to_kernel END(paranoid_exit) + /* * Save all registers in pt_regs, and switch GS if needed. */ @@ -1300,6 +1318,7 @@ ENTRY(error_entry) FENCE_SWAPGS_USER_ENTRY /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER .Lerror_entry_from_usermode_after_swapgs: /* Put us onto the real thread stack. */ @@ -1355,6 +1374,7 @@ ENTRY(error_entry) SWAPGS FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax + IBRS_ENTER /* * Pretend that the exception came from user mode: set up pt_regs @@ -1460,6 +1480,8 @@ ENTRY(nmi) PUSH_AND_CLEAR_REGS rdx=(%rdx) ENCODE_FRAME_POINTER + IBRS_ENTER + /* * At this point we no longer need to worry about stack damage * due to nesting -- we're on the normal thread stack and we're @@ -1683,6 +1705,9 @@ end_repeat_nmi: movq $-1, %rsi call do_nmi + /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ + IBRS_EXIT save_reg=%r15 + /* Always restore stashed CR3 value (see paranoid_entry) */ RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 @@ -1739,7 +1764,7 @@ ENTRY(rewind_stack_do_exit) movq PER_CPU_VAR(cpu_current_top_of_stack), %rax leaq -PTREGS_SIZE(%rax), %rsp - UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE + UNWIND_HINT_REGS call do_exit END(rewind_stack_do_exit) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 39913770a44d5..c3c4ea4a6711a 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -4,7 +4,6 @@ * * Copyright 2000-2002 Andi Kleen, SuSE Labs. */ -#include "calling.h" #include #include #include @@ -17,6 +16,8 @@ #include #include +#include "calling.h" + .section .entry.text, "ax" /* @@ -106,6 +107,8 @@ ENTRY(entry_SYSENTER_compat) xorl %r15d, %r15d /* nospec r15 */ cld + IBRS_ENTER + /* * SYSENTER doesn't filter flags, so we need to clear NT and AC * ourselves. To save a few cycles, we can check whether @@ -253,6 +256,8 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) */ TRACE_IRQS_OFF + IBRS_ENTER + movq %rsp, %rdi call do_fast_syscall_32 /* XEN PV guests always use IRET path */ @@ -267,6 +272,9 @@ sysret32_from_system_call: */ STACKLEAK_ERASE TRACE_IRQS_ON /* User mode traces as IRQs on. */ + + IBRS_EXIT + movq RBX(%rsp), %rbx /* pt_regs->rbx */ movq RBP(%rsp), %rbp /* pt_regs->rbp */ movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ @@ -408,6 +416,7 @@ ENTRY(entry_INT80_compat) * gate turned them off. */ TRACE_IRQS_OFF + IBRS_ENTER movq %rsp, %rdi call do_int80_syscall_32 diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index aa3336a7cb150..7d17b3addbbb3 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -10,13 +10,11 @@ #ifdef CONFIG_IA32_EMULATION /* On X86_64, we use struct pt_regs * to pass parameters to syscalls */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); - -/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ -extern asmlinkage long sys_ni_syscall(const struct pt_regs *); - +#define __sys_ni_syscall __ia32_sys_ni_syscall #else /* CONFIG_IA32_EMULATION */ #define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#define __sys_ni_syscall sys_ni_syscall #endif /* CONFIG_IA32_EMULATION */ #include @@ -29,6 +27,6 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_compat_max] = &sys_ni_syscall, + [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall, #include }; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index b1bf31713374a..adf619a856e8d 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -4,11 +4,17 @@ #include #include #include +#include #include #include -/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ -extern asmlinkage long sys_ni_syscall(const struct pt_regs *); +extern asmlinkage long sys_ni_syscall(void); + +SYSCALL_DEFINE0(ni_syscall) +{ + return sys_ni_syscall(); +} + #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); #define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual) #include @@ -23,7 +29,7 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_max] = &sys_ni_syscall, + [0 ... __NR_syscall_max] = &__x64_sys_ni_syscall, #include }; @@ -40,7 +46,7 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = { * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_x32_max] = &sys_ni_syscall, + [0 ... __NR_syscall_x32_max] = &__x64_sys_ni_syscall, #include }; diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 3fe02546aed35..15908eb9b17e5 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -124,13 +124,13 @@ 110 i386 iopl sys_iopl __ia32_sys_iopl 111 i386 vhangup sys_vhangup __ia32_sys_vhangup 112 i386 idle -113 i386 vm86old sys_vm86old sys_ni_syscall +113 i386 vm86old sys_vm86old __ia32_sys_ni_syscall 114 i386 wait4 sys_wait4 __ia32_compat_sys_wait4 115 i386 swapoff sys_swapoff __ia32_sys_swapoff 116 i386 sysinfo sys_sysinfo __ia32_compat_sys_sysinfo 117 i386 ipc sys_ipc __ia32_compat_sys_ipc 118 i386 fsync sys_fsync __ia32_sys_fsync -119 i386 sigreturn sys_sigreturn sys32_sigreturn +119 i386 sigreturn sys_sigreturn __ia32_compat_sys_sigreturn 120 i386 clone sys_clone __ia32_compat_sys_x86_clone 121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname 122 i386 uname sys_newuname __ia32_sys_newuname @@ -177,14 +177,14 @@ 163 i386 mremap sys_mremap __ia32_sys_mremap 164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16 165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16 -166 i386 vm86 sys_vm86 sys_ni_syscall +166 i386 vm86 sys_vm86 __ia32_sys_ni_syscall 167 i386 query_module 168 i386 poll sys_poll __ia32_sys_poll 169 i386 nfsservctl 170 i386 setresgid sys_setresgid16 __ia32_sys_setresgid16 171 i386 getresgid sys_getresgid16 __ia32_sys_getresgid16 172 i386 prctl sys_prctl __ia32_sys_prctl -173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn +173 i386 rt_sigreturn sys_rt_sigreturn __ia32_compat_sys_rt_sigreturn 174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction 175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 0f2154106d016..5aae81f40bc3a 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -178,7 +178,7 @@ quiet_cmd_vdso = VDSO $@ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' VDSO_LDFLAGS = -shared --hash-style=both --build-id \ - $(call ld-option, --eh-frame-hdr) -Bsymbolic + $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack GCOV_PROFILE := n quiet_cmd_vdso_and_check = VDSO $@ diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 240626e7f55aa..43842fade8fa1 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index f5937742b2901..3613cfb83c6dc 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -323,7 +323,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) static __init int vdso_setup(char *s) { vdso64_enabled = simple_strtoul(s, NULL, 0); - return 0; + return 1; } __setup("vdso=", vdso_setup); diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig index 9a7a1446cb3a0..4a809c6cbd2f5 100644 --- a/arch/x86/events/Kconfig +++ b/arch/x86/events/Kconfig @@ -10,11 +10,11 @@ config PERF_EVENTS_INTEL_UNCORE available on NehalemEX and more modern processors. config PERF_EVENTS_INTEL_RAPL - tristate "Intel rapl performance events" - depends on PERF_EVENTS && CPU_SUP_INTEL && PCI + tristate "Intel/AMD rapl performance events" + depends on PERF_EVENTS && (CPU_SUP_INTEL || CPU_SUP_AMD) && PCI default y ---help--- - Include support for Intel rapl performance events for power + Include support for Intel and AMD rapl performance events for power monitoring on modern processors. config PERF_EVENTS_INTEL_CSTATE diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 9e07f554333fb..726e83c0a31a1 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += core.o probe.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o obj-y += amd/ obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 64c3e70b0556b..7198b5f11706e 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -14,6 +14,10 @@ static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); static unsigned long perf_nmi_window; +/* AMD Event 0xFFF: Merge. Used with Large Increment per Cycle events */ +#define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL) +#define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE) + static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -246,6 +250,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, @@ -301,6 +306,25 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) return offset; } +/* + * AMD64 events are detected based on their event codes. + */ +static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) +{ + return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); +} + +static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc) +{ + if (!(x86_pmu.flags & PMU_FL_PAIR)) + return false; + + switch (amd_get_event_code(hwc)) { + case 0x003: return true; /* Retired SSE/AVX FLOPs */ + default: return false; + } +} + static int amd_core_hw_config(struct perf_event *event) { if (event->attr.exclude_host && event->attr.exclude_guest) @@ -316,15 +340,10 @@ static int amd_core_hw_config(struct perf_event *event) else if (event->attr.exclude_guest) event->hw.config |= AMD64_EVENTSEL_HOSTONLY; - return 0; -} + if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw)) + event->hw.flags |= PERF_X86_EVENT_PAIR; -/* - * AMD64 events are detected based on their event codes. - */ -static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) -{ - return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); + return 0; } static inline int amd_is_nb_event(struct hw_perf_event *hwc) @@ -864,6 +883,29 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, } } +static struct event_constraint pair_constraint; + +static struct event_constraint * +amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (amd_is_pair_event_code(hwc)) + return &pair_constraint; + + return &unconstrained; +} + +static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (is_counter_pair(hwc)) + --cpuc->n_pair; +} + static ssize_t amd_event_sysfs_show(char *page, u64 config) { u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | @@ -907,33 +949,15 @@ static __initconst const struct x86_pmu amd_pmu = { static int __init amd_core_pmu_init(void) { + u64 even_ctr_mask = 0ULL; + int i; + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) return 0; - /* Avoid calulating the value each time in the NMI handler */ + /* Avoid calculating the value each time in the NMI handler */ perf_nmi_window = msecs_to_jiffies(100); - switch (boot_cpu_data.x86) { - case 0x15: - pr_cont("Fam15h "); - x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; - break; - case 0x17: - pr_cont("Fam17h "); - /* - * In family 17h, there are no event constraints in the PMC hardware. - * We fallback to using default amd_get_event_constraints. - */ - break; - case 0x18: - pr_cont("Fam18h "); - /* Using default amd_get_event_constraints. */ - break; - default: - pr_err("core perfctr but no constraints; unknown hardware!\n"); - return -ENODEV; - } - /* * If core performance counter extensions exists, we must use * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also @@ -948,6 +972,32 @@ static int __init amd_core_pmu_init(void) */ x86_pmu.amd_nb_constraints = 0; + if (boot_cpu_data.x86 == 0x15) { + pr_cont("Fam15h "); + x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; + } + if (boot_cpu_data.x86 >= 0x17) { + pr_cont("Fam17h+ "); + /* + * Family 17h and compatibles have constraints for Large + * Increment per Cycle events: they may only be assigned an + * even numbered counter that has a consecutive adjacent odd + * numbered counter following it. + */ + for (i = 0; i < x86_pmu.num_counters - 1; i += 2) + even_ctr_mask |= 1 << i; + + pair_constraint = (struct event_constraint) + __EVENT_CONSTRAINT(0, even_ctr_mask, 0, + x86_pmu.num_counters / 2, 0, + PERF_X86_EVENT_PAIR); + + x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; + x86_pmu.put_event_constraints = amd_put_event_constraints_f17h; + x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE; + x86_pmu.flags |= PMU_FL_PAIR; + } + pr_cont("core perfctr, "); return 0; } diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 26c36357c4c9c..2e930d8c04d95 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -89,6 +89,8 @@ struct perf_ibs { u64 max_period; unsigned long offset_mask[1]; int offset_max; + unsigned int fetch_count_reset_broken : 1; + unsigned int fetch_ignore_if_zero_rip : 1; struct cpu_perf_ibs __percpu *pcpu; struct attribute **format_attrs; @@ -310,6 +312,16 @@ static int perf_ibs_init(struct perf_event *event) hwc->config_base = perf_ibs->msr; hwc->config = config; + /* + * rip recorded by IbsOpRip will not be consistent with rsp and rbp + * recorded as part of interrupt regs. Thus we need to use rip from + * interrupt regs while unwinding call stack. Setting _EARLY flag + * makes sure we unwind call-stack before perf sample rip is set to + * IbsOpRip. + */ + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; + return 0; } @@ -334,11 +346,15 @@ static u64 get_ibs_op_count(u64 config) { u64 count = 0; + /* + * If the internal 27-bit counter rolled over, the count is MaxCnt + * and the lower 7 bits of CurCnt are randomized. + * Otherwise CurCnt has the full 27-bit current counter value. + */ if (config & IBS_OP_VAL) - count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */ - - if (ibs_caps & IBS_CAPS_RDWROPCNT) - count += (config & IBS_OP_CUR_CNT) >> 32; + count = (config & IBS_OP_MAX_CNT) << 4; + else if (ibs_caps & IBS_CAPS_RDWROPCNT) + count = (config & IBS_OP_CUR_CNT) >> 32; return count; } @@ -363,7 +379,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { - wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); + u64 tmp = hwc->config | config; + + if (perf_ibs->fetch_count_reset_broken) + wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask); + + wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask); } /* @@ -551,6 +572,7 @@ static struct perf_ibs perf_ibs_op = { .start = perf_ibs_start, .stop = perf_ibs_stop, .read = perf_ibs_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }, .msr = MSR_AMD64_IBSOPCTL, .config_mask = IBS_OP_CONFIG_MASK, @@ -626,18 +648,24 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) perf_ibs->offset_max, offset + 1); } while (offset < offset_max); + /* + * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately + * depending on their availability. + * Can't add to offset_max as they are staggered + */ if (event->attr.sample_type & PERF_SAMPLE_RAW) { - /* - * Read IbsBrTarget and IbsOpData4 separately - * depending on their availability. - * Can't add to offset_max as they are staggered - */ - if (ibs_caps & IBS_CAPS_BRNTRGT) { - rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); - size++; + if (perf_ibs == &perf_ibs_op) { + if (ibs_caps & IBS_CAPS_BRNTRGT) { + rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); + size++; + } + if (ibs_caps & IBS_CAPS_OPDATA4) { + rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + size++; + } } - if (ibs_caps & IBS_CAPS_OPDATA4) { - rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++); + if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { + rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++); size++; } } @@ -647,6 +675,10 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { regs.flags &= ~PERF_EFLAGS_EXACT; } else { + /* Workaround for erratum #1197 */ + if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) + goto out; + set_linear_ip(®s, ibs_data.regs[1]); regs.flags |= PERF_EFLAGS_EXACT; } @@ -661,6 +693,14 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) data.raw = &raw; } + /* + * rip recorded by IbsOpRip will not be consistent with rsp and rbp + * recorded as part of interrupt regs. Thus we need to use rip from + * interrupt regs while unwinding call stack. + */ + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + data.callchain = perf_callchain(event, iregs); + throttle = perf_event_overflow(event, &data, ®s); out: if (throttle) { @@ -733,6 +773,16 @@ static __init void perf_event_ibs_init(void) { struct attribute **attr = ibs_op_format_attrs; + /* + * Some chips fail to reset the fetch count when it is written; instead + * they need a 0-1 transition of IbsFetchEn. + */ + if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) + perf_ibs_fetch.fetch_count_reset_broken = 1; + + if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) + perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; + perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); if (ibs_caps & IBS_CAPS_OPCNT) { diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index fb616203ce427..2da6139b0977f 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -18,8 +18,6 @@ #include "../perf_event.h" #include "iommu.h" -#define COUNTER_SHIFT 16 - /* iommu pmu conf masks */ #define GET_CSOURCE(x) ((x)->conf & 0xFFULL) #define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) @@ -81,12 +79,12 @@ static struct attribute_group amd_iommu_events_group = { }; struct amd_iommu_event_desc { - struct kobj_attribute attr; + struct device_attribute attr; const char *event; }; -static ssize_t _iommu_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t _iommu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct amd_iommu_event_desc *event = container_of(attr, struct amd_iommu_event_desc, attr); @@ -285,22 +283,31 @@ static void perf_iommu_start(struct perf_event *event, int flags) WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); hwc->state = 0; + /* + * To account for power-gating, which prevents write to + * the counter, we need to enable the counter + * before setting up counter register. + */ + perf_iommu_enable_event(event); + if (flags & PERF_EF_RELOAD) { - u64 prev_raw_count = local64_read(&hwc->prev_count); + u64 count = 0; struct amd_iommu *iommu = perf_event_2_iommu(event); + /* + * Since the IOMMU PMU only support counting mode, + * the counter always start with value zero. + */ amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, - IOMMU_PC_COUNTER_REG, &prev_raw_count); + IOMMU_PC_COUNTER_REG, &count); } - perf_iommu_enable_event(event); perf_event_update_userpage(event); - } static void perf_iommu_read(struct perf_event *event) { - u64 count, prev, delta; + u64 count; struct hw_perf_event *hwc = &event->hw; struct amd_iommu *iommu = perf_event_2_iommu(event); @@ -311,14 +318,11 @@ static void perf_iommu_read(struct perf_event *event) /* IOMMU pc counter register is only 48 bits */ count &= GENMASK_ULL(47, 0); - prev = local64_read(&hwc->prev_count); - if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev) - return; - - /* Handle 48-bit counter overflow */ - delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); - delta >>= COUNTER_SHIFT; - local64_add(delta, &event->count); + /* + * Since the counter always start with value zero, + * simply just accumulate the count for the event. + */ + local64_add(count, &event->count); } static void perf_iommu_stop(struct perf_event *event, int flags) @@ -328,15 +332,16 @@ static void perf_iommu_stop(struct perf_event *event, int flags) if (hwc->state & PERF_HES_UPTODATE) return; + /* + * To account for power-gating, in which reading the counter would + * return zero, we need to read the register before disabling. + */ + perf_iommu_read(event); + hwc->state |= PERF_HES_UPTODATE; + perf_iommu_disable_event(event); WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; - - if (hwc->state & PERF_HES_UPTODATE) - return; - - perf_iommu_read(event); - hwc->state |= PERF_HES_UPTODATE; } static int perf_iommu_add(struct perf_event *event, int flags) @@ -379,7 +384,7 @@ static __init int _init_events_attrs(void) while (amd_iommu_v2_event_descs[i].attr.attr.name) i++; - attrs = kcalloc(i + 1, sizeof(struct attribute **), GFP_KERNEL); + attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL); if (!attrs) return -ENOMEM; diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c index abef51320e3a5..c4892b7d0c36b 100644 --- a/arch/x86/events/amd/power.c +++ b/arch/x86/events/amd/power.c @@ -217,6 +217,7 @@ static struct pmu pmu_class = { .stop = pmu_event_stop, .read = pmu_event_read, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .module = THIS_MODULE, }; static int power_cpu_exit(unsigned int cpu) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a6ea07f2aa848..4d867a752f0ec 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -190,15 +190,12 @@ static int amd_uncore_event_init(struct perf_event *event) /* * NB and Last level cache counters (MSRs) are shared across all cores - * that share the same NB / Last level cache. Interrupts can be directed - * to a single target core, however, event counts generated by processes - * running on other cores cannot be masked out. So we do not support - * sampling and per-thread events. + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; - - /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; @@ -306,7 +303,7 @@ static struct pmu amd_nb_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, }; static struct pmu amd_llc_pmu = { @@ -317,7 +314,7 @@ static struct pmu amd_llc_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 7b21455d7504c..09a689ca5b586 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -375,7 +375,7 @@ int x86_add_exclusive(unsigned int what) * LBR and BTS are still mutually exclusive. */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) - return 0; + goto out; if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { mutex_lock(&pmc_reserve_mutex); @@ -387,6 +387,7 @@ int x86_add_exclusive(unsigned int what) mutex_unlock(&pmc_reserve_mutex); } +out: atomic_inc(&active_events); return 0; @@ -397,11 +398,15 @@ int x86_add_exclusive(unsigned int what) void x86_del_exclusive(unsigned int what) { + atomic_dec(&active_events); + + /* + * See the comment in x86_add_exclusive(). + */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) return; atomic_dec(&x86_pmu.lbr_exclusive[what]); - atomic_dec(&active_events); } int x86_setup_perfctr(struct perf_event *event) @@ -612,6 +617,7 @@ void x86_pmu_disable_all(void) int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; u64 val; if (!test_bit(idx, cpuc->active_mask)) @@ -621,6 +627,8 @@ void x86_pmu_disable_all(void) continue; val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(x86_pmu_config_addr(idx), val); + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(idx + 1), 0); } } @@ -693,7 +701,7 @@ struct sched_state { int counter; /* counter index */ int unassigned; /* number of events to be assigned left */ int nr_gp; /* number of GP counters used */ - unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + u64 used; }; /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ @@ -750,8 +758,12 @@ static bool perf_sched_restore_state(struct perf_sched *sched) sched->saved_states--; sched->state = sched->saved[sched->saved_states]; - /* continue with next counter: */ - clear_bit(sched->state.counter++, sched->state.used); + /* this assignment didn't work out */ + /* XXX broken vs EVENT_PAIR */ + sched->state.used &= ~BIT_ULL(sched->state.counter); + + /* try the next one */ + sched->state.counter++; return true; } @@ -776,20 +788,32 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { idx = INTEL_PMC_IDX_FIXED; for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) { - if (!__test_and_set_bit(idx, sched->state.used)) - goto done; + u64 mask = BIT_ULL(idx); + + if (sched->state.used & mask) + continue; + + sched->state.used |= mask; + goto done; } } /* Grab the first unused counter starting with idx */ idx = sched->state.counter; for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { - if (!__test_and_set_bit(idx, sched->state.used)) { - if (sched->state.nr_gp++ >= sched->max_gp) - return false; + u64 mask = BIT_ULL(idx); - goto done; - } + if (c->flags & PERF_X86_EVENT_PAIR) + mask |= mask << 1; + + if (sched->state.used & mask) + continue; + + if (sched->state.nr_gp++ >= sched->max_gp) + return false; + + sched->state.used |= mask; + goto done; } return false; @@ -866,12 +890,10 @@ EXPORT_SYMBOL_GPL(perf_assign_events); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { struct event_constraint *c; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct perf_event *e; int n0, i, wmin, wmax, unsched = 0; struct hw_perf_event *hwc; - - bitmap_zero(used_mask, X86_PMC_IDX_MAX); + u64 used_mask = 0; /* * Compute the number of events already present; see x86_pmu_add(), @@ -914,6 +936,8 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * fastpath, try to reuse previous register */ for (i = 0; i < n; i++) { + u64 mask; + hwc = &cpuc->event_list[i]->hw; c = cpuc->event_constraint[i]; @@ -925,11 +949,16 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (!test_bit(hwc->idx, c->idxmsk)) break; + mask = BIT_ULL(hwc->idx); + if (is_counter_pair(hwc)) + mask |= mask << 1; + /* not already used */ - if (test_bit(hwc->idx, used_mask)) + if (used_mask & mask) break; - __set_bit(hwc->idx, used_mask); + used_mask |= mask; + if (assign) assign[i] = hwc->idx; } @@ -952,6 +981,15 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) READ_ONCE(cpuc->excl_cntrs->exclusive_present)) gpmax /= 2; + /* + * Reduce the amount of available counters to allow fitting + * the extra Merge events needed by large increment events. + */ + if (x86_pmu.flags & PMU_FL_PAIR) { + gpmax = x86_pmu.num_counters - cpuc->n_pair; + WARN_ON(gpmax <= 0); + } + unsched = perf_assign_events(cpuc->event_constraint, n, wmin, wmax, gpmax, assign); } @@ -1032,6 +1070,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, return -EINVAL; cpuc->event_list[n] = leader; n++; + if (is_counter_pair(&leader->hw)) + cpuc->n_pair++; } if (!dogrp) return n; @@ -1046,6 +1086,8 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, cpuc->event_list[n] = event; n++; + if (is_counter_pair(&event->hw)) + cpuc->n_pair++; } return n; } @@ -1231,6 +1273,13 @@ int x86_perf_event_set_period(struct perf_event *event) wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); + /* + * Clear the Merge event counter's upper 16 bits since + * we currently declare a 48-bit counter width + */ + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_event_addr(idx + 1), 0); + /* * Due to erratum on certan cpu we need * a second write to be sure the register @@ -1641,9 +1690,12 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = { ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { - struct perf_pmu_events_attr *pmu_attr = \ + struct perf_pmu_events_attr *pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - u64 config = x86_pmu.event_map(pmu_attr->id); + u64 config = 0; + + if (pmu_attr->id < x86_pmu.max_events) + config = x86_pmu.event_map(pmu_attr->id); /* string trumps id */ if (pmu_attr->event_str) @@ -1712,6 +1764,9 @@ is_visible(struct kobject *kobj, struct attribute *attr, int idx) { struct perf_pmu_events_attr *pmu_attr; + if (idx >= x86_pmu.max_events) + return 0; + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); /* str trumps id */ return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0; @@ -2097,6 +2152,7 @@ static int x86_pmu_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); + event->destroy = NULL; } if (READ_ONCE(x86_pmu.attr_rdpmc) && @@ -2354,10 +2410,11 @@ static bool perf_hw_regs(struct pt_regs *regs) void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct unwind_state state; unsigned long addr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ return; } @@ -2463,10 +2520,11 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stack_frame frame; const unsigned long __user *fp; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ return; } @@ -2550,18 +2608,21 @@ static unsigned long code_segment_base(struct pt_regs *regs) unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return regs->ip + code_segment_base(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile index 3468b0c1dc7c9..e67a5886336c1 100644 --- a/arch/x86/events/intel/Makefile +++ b/arch/x86/events/intel/Makefile @@ -2,8 +2,6 @@ obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o -obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o -intel-rapl-perf-objs := rapl.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 5ee3fed881d3a..741540d849f3c 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -63,9 +63,17 @@ struct bts_buffer { static struct pmu bts_pmu; +static int buf_nr_pages(struct page *page) +{ + if (!PagePrivate(page)) + return 1; + + return 1 << page_private(page); +} + static size_t buf_size(struct page *page) { - return 1 << (PAGE_SHIFT + page_private(page)); + return buf_nr_pages(page) * PAGE_SIZE; } static void * @@ -83,9 +91,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, /* count all the high order buffers */ for (pg = 0, nbuf = 0; pg < nr_pages;) { page = virt_to_page(pages[pg]); - if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1)) - return NULL; - pg += 1 << page_private(page); + pg += buf_nr_pages(page); nbuf++; } @@ -109,7 +115,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, unsigned int __nr_pages; page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; + __nr_pages = buf_nr_pages(page); buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fcef678c34230..f2976204e8b5d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -243,21 +243,23 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { static struct event_constraint intel_icl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ - INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */ + FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ - INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ - INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */ + INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */ + INTEL_UEVENT_CONSTRAINT(0x14a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */ INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */ INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf), + INTEL_EVENT_CONSTRAINT(0xef, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf), EVENT_CONSTRAINT_END }; @@ -1892,8 +1894,8 @@ static __initconst const u64 tnt_hw_cache_extra_regs static struct extra_reg intel_tnt_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ - INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1), EVENT_EXTRA_END }; @@ -2331,6 +2333,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) { struct perf_sample_data data; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_guest_info_callbacks *guest_cbs; int bit; int handled = 0; @@ -2384,9 +2387,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (__test_and_clear_bit(55, (unsigned long *)&status)) { handled++; - if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() && - perf_guest_cbs->handle_intel_pt_intr)) - perf_guest_cbs->handle_intel_pt_intr(); + + guest_cbs = perf_get_guest_cbs(); + if (unlikely(guest_cbs && guest_cbs->is_in_guest() && + guest_cbs->handle_intel_pt_intr)) + guest_cbs->handle_intel_pt_intr(); else intel_pt_interrupt(); } @@ -3998,9 +4003,12 @@ static const struct x86_cpu_desc isolation_ucodes[] = { INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 3, 0x07000009), INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 4, 0x0f000009), INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_D, 5, 0x0e000002), - INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014), + INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 1, 0x0b000014), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), @@ -4746,6 +4754,7 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_ATOM_TREMONT_D: + case INTEL_FAM6_ATOM_TREMONT: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -5056,7 +5065,7 @@ __init int intel_pmu_init(void) extra_skl_attr = skl_format_attr; mem_attr = icl_events_attrs; tsx_attr = icl_tsx_events_attrs; - x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02); + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); x86_pmu.lbr_pt_coexist = true; intel_pmu_pebs_data_source_skl(pmem); pr_cont("Icelake events, "); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index e1daf4151e116..0b50119ea12cc 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -40,17 +40,18 @@ * Model specific counters: * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 - * Available model: SLM,AMT,GLM,CNL + * Available model: SLM,AMT,GLM,CNL,TNT * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM, - * CNL,KBL,CML + * CNL,KBL,CML,TNT * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, - * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL + * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, + * TNT * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 @@ -60,17 +61,18 @@ * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, - * KBL,CML,ICL,TGL + * KBL,CML,ICL,TGL,TNT * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, - * GLM,CNL,KBL,CML,ICL,TGL + * GLM,CNL,KBL,CML,ICL,TGL,TNT * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW - * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, + * TNT * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -87,7 +89,8 @@ * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL + * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, + * TNT * Scope: Package (physical package) * */ @@ -104,14 +107,14 @@ MODULE_LICENSE("GPL"); #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __cstate_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __cstate_##_var##_show, NULL) static ssize_t cstate_get_attr_cpumask(struct device *dev, @@ -640,8 +643,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT_D, glm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates), diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ce83950036c56..5965d341350ca 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -669,9 +669,7 @@ int intel_pmu_drain_bts_buffer(void) static inline void intel_pmu_drain_pebs_buffer(void) { - struct pt_regs regs; - - x86_pmu.drain_pebs(®s); + x86_pmu.drain_pebs(NULL); } /* @@ -1713,6 +1711,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) old = ((s64)(prev_raw_count << shift) >> shift); local64_add(new - old + count * period, &event->count); + local64_set(&hwc->period_left, -new); + perf_event_update_userpage(event); return 0; @@ -1734,6 +1734,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, struct x86_perf_regs perf_regs; struct pt_regs *regs = &perf_regs.regs; void *at = get_next_pebs_record_by_bit(base, top, bit); + struct pt_regs dummy_iregs; if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { /* @@ -1746,6 +1747,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, } else if (!intel_pmu_save_and_restart(event)) return; + if (!iregs) + iregs = &dummy_iregs; + while (count > 1) { setup_sample(event, iregs, at, &data, regs); perf_event_output(event, &data, regs); @@ -1755,16 +1759,22 @@ static void __intel_pmu_pebs_event(struct perf_event *event, } setup_sample(event, iregs, at, &data, regs); - - /* - * All but the last records are processed. - * The last one is left to be able to call the overflow handler. - */ - if (perf_event_overflow(event, &data, regs)) { - x86_pmu_stop(event, 0); - return; + if (iregs == &dummy_iregs) { + /* + * The PEBS records may be drained in the non-overflow context, + * e.g., large PEBS + context switch. Perf should treat the + * last record the same as other PEBS records, and doesn't + * invoke the generic overflow handler. + */ + perf_event_output(event, &data, regs); + } else { + /* + * All but the last records are processed. + * The last one is left to be able to call the overflow handler. + */ + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); } - } static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) @@ -1880,7 +1890,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) */ if (!pebs_status && cpuc->pebs_enabled && !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) - pebs_status = cpuc->pebs_enabled; + pebs_status = p->status = cpuc->pebs_enabled; bit = find_first_bit((unsigned long *)&pebs_status, x86_pmu.max_pebs_events); @@ -1902,7 +1912,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) * that caused the PEBS record. It's called collision. * If collision happened, the record will be dropped. */ - if (p->status != (1ULL << bit)) { + if (pebs_status != (1ULL << bit)) { for_each_set_bit(i, (unsigned long *)&pebs_status, size) error[i]++; continue; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 05e43d0f430bc..f4d2322f4c629 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -62,7 +62,7 @@ static struct pt_cap_desc { PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)), PT_CAP(output_subsys, 0, CPUID_ECX, BIT(3)), PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)), - PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3), + PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x7), PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000), PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff), PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000), @@ -460,7 +460,7 @@ static u64 pt_config_filters(struct perf_event *event) pt->filters.filter[range].msr_b = filter->msr_b; } - rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; + rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off; } return rtit_ctl; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 86467f85c3831..a335be03aeef1 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -92,8 +92,8 @@ struct pci2phy_map *__find_pci2phy_map(int segment) return map; } -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct uncore_event_desc *event = container_of(attr, struct uncore_event_desc, attr); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index bbfdaa720b456..7b964c63e993c 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -144,7 +144,7 @@ struct intel_uncore_box { #define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2 struct uncore_event_desc { - struct kobj_attribute attr; + struct device_attribute attr; const char *config; }; @@ -165,8 +165,8 @@ struct pci2phy_map { struct pci2phy_map *__find_pci2phy_map(int segment); int uncore_pcibus_to_physid(struct pci_bus *bus); -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf); +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf); #define INTEL_UNCORE_EVENT_DESC(_name, _config) \ { \ @@ -175,14 +175,14 @@ ssize_t uncore_event_show(struct kobject *kobj, } #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __uncore_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL) static inline bool uncore_pmc_fixed(int idx) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index dbaa1b088a30e..0258e0065771a 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,7 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_SKL_E3_IMC 0x1918 #define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c #define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 #define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 @@ -109,6 +110,10 @@ #define ICL_UNC_CBO_0_PER_CTR0 0x702 #define ICL_UNC_CBO_MSR_OFFSET 0x8 +/* ICL ARB register */ +#define ICL_UNC_ARB_PER_CTR 0x3b1 +#define ICL_UNC_ARB_PERFEVTSEL 0x3b3 + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); @@ -296,15 +301,21 @@ void skl_uncore_cpu_init(void) snb_uncore_arb.ops = &skl_uncore_msr_ops; } +static struct intel_uncore_ops icl_uncore_msr_ops = { + .disable_event = snb_uncore_msr_disable_event, + .enable_event = snb_uncore_msr_enable_event, + .read_counter = uncore_msr_read_counter, +}; + static struct intel_uncore_type icl_uncore_cbox = { .name = "cbox", - .num_counters = 4, + .num_counters = 2, .perf_ctr_bits = 44, .perf_ctr = ICL_UNC_CBO_0_PER_CTR0, .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0, .event_mask = SNB_UNC_RAW_EVENT_MASK, .msr_offset = ICL_UNC_CBO_MSR_OFFSET, - .ops = &skl_uncore_msr_ops, + .ops = &icl_uncore_msr_ops, .format_group = &snb_uncore_format_group, }; @@ -333,13 +344,25 @@ static struct intel_uncore_type icl_uncore_clockbox = { .single_fixed = 1, .event_mask = SNB_UNC_CTL_EV_SEL_MASK, .format_group = &icl_uncore_clock_format_group, - .ops = &skl_uncore_msr_ops, + .ops = &icl_uncore_msr_ops, .event_descs = icl_uncore_events, }; +static struct intel_uncore_type icl_uncore_arb = { + .name = "arb", + .num_counters = 1, + .num_boxes = 1, + .perf_ctr_bits = 44, + .perf_ctr = ICL_UNC_ARB_PER_CTR, + .event_ctl = ICL_UNC_ARB_PERFEVTSEL, + .event_mask = SNB_UNC_RAW_EVENT_MASK, + .ops = &icl_uncore_msr_ops, + .format_group = &snb_uncore_format_group, +}; + static struct intel_uncore_type *icl_msr_uncores[] = { &icl_uncore_cbox, - &snb_uncore_arb, + &icl_uncore_arb, &icl_uncore_clockbox, NULL, }; @@ -357,7 +380,6 @@ void icl_uncore_cpu_init(void) { uncore_msr_uncores = icl_msr_uncores; icl_uncore_cbox.num_boxes = icl_get_cbox_num(); - snb_uncore_arb.ops = &skl_uncore_msr_ops; } enum { @@ -553,6 +575,22 @@ int snb_pci2phy_map_init(int devid) return 0; } +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * SNB IMC counters are 32-bit and are laid out back to back + * in MMIO space. Therefore we must use a 32-bit accessor function + * using readq() from uncore_mmio_read_counter() causes problems + * because it is reading 64-bit at a time. This is okay for the + * uncore_perf_event_update() function because it drops the upper + * 32-bits but not okay for plain uncore_read_counter() as invoked + * in uncore_pmu_event_start(). + */ + return (u64)readl(box->io_addr + hwc->event_base); +} + static struct pmu snb_uncore_imc_pmu = { .task_ctx_nr = perf_invalid_context, .event_init = snb_uncore_imc_event_init, @@ -572,7 +610,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = { .disable_event = snb_uncore_imc_disable_event, .enable_event = snb_uncore_imc_enable_event, .hw_config = snb_uncore_imc_hw_config, - .read_counter = uncore_mmio_read_counter, + .read_counter = snb_uncore_imc_read_counter, }; static struct intel_uncore_type snb_uncore_imc = { @@ -657,6 +695,10 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_E3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), @@ -826,6 +868,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(SKL_E3_IMC, &skl_uncore_pci_driver), /* Xeon E3 V5 Gen Core processor */ IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b10a5ec79e48a..0f61f46e6086f 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -369,11 +369,6 @@ #define SNR_M2M_PCI_PMON_BOX_CTL 0x438 #define SNR_M2M_PCI_PMON_UMASK_EXT 0xff -/* SNR PCIE3 */ -#define SNR_PCIE3_PCI_PMON_CTL0 0x508 -#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8 -#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e4 - /* SNR IMC */ #define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54 #define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38 @@ -1098,7 +1093,6 @@ enum { SNBEP_PCI_QPI_PORT0_FILTER, SNBEP_PCI_QPI_PORT1_FILTER, BDX_PCI_QPI_PORT2_FILTER, - HSWEP_PCI_PCU_3, }; static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event) @@ -2755,22 +2749,33 @@ static struct intel_uncore_type *hswep_msr_uncores[] = { NULL, }; -void hswep_uncore_cpu_init(void) +#define HSWEP_PCU_DID 0x2fc0 +#define HSWEP_PCU_CAPID4_OFFET 0x94 +#define hswep_get_chop(_cap) (((_cap) >> 6) & 0x3) + +static bool hswep_has_limit_sbox(unsigned int device) { - int pkg = boot_cpu_data.logical_proc_id; + struct pci_dev *dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, NULL); + u32 capid4; + + if (!dev) + return false; + + pci_read_config_dword(dev, HSWEP_PCU_CAPID4_OFFET, &capid4); + if (!hswep_get_chop(capid4)) + return true; + + return false; +} +void hswep_uncore_cpu_init(void) +{ if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; /* Detect 6-8 core systems with only two SBOXes */ - if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { - u32 capid4; - - pci_read_config_dword(uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3], - 0x94, &capid4); - if (((capid4 >> 6) & 0x3) == 0) - hswep_uncore_sbox.num_boxes = 2; - } + if (hswep_has_limit_sbox(HSWEP_PCU_DID)) + hswep_uncore_sbox.num_boxes = 2; uncore_msr_uncores = hswep_msr_uncores; } @@ -3033,11 +3038,6 @@ static const struct pci_device_id hswep_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, SNBEP_PCI_QPI_PORT1_FILTER), }, - { /* PCU.3 (for Capability registers) */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - HSWEP_PCI_PCU_3), - }, { /* end: all zeroes */ } }; @@ -3129,27 +3129,18 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { EVENT_CONSTRAINT_END }; +#define BDX_PCU_DID 0x6fc0 + void bdx_uncore_cpu_init(void) { - int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id); - if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; - /* BDX-DE doesn't have SBOX */ - if (boot_cpu_data.x86_model == 86) { - uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; /* Detect systems with no SBOXes */ - } else if (uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]) { - struct pci_dev *pdev; - u32 capid4; - - pdev = uncore_extra_pci_dev[pkg].dev[HSWEP_PCI_PCU_3]; - pci_read_config_dword(pdev, 0x94, &capid4); - if (((capid4 >> 6) & 0x3) == 0) - bdx_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; - } + if ((boot_cpu_data.x86_model == 86) || hswep_has_limit_sbox(BDX_PCU_DID)) + uncore_msr_uncores[BDX_MSR_UNCORE_SBOX] = NULL; + hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints; } @@ -3370,11 +3361,6 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, BDX_PCI_QPI_PORT2_FILTER), }, - { /* PCU.3 (for Capability registers) */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0), - .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, - HSWEP_PCI_PCU_3), - }, { /* end: all zeroes */ } }; @@ -3493,6 +3479,9 @@ static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; struct extra_reg *er; int idx = 0; + /* Any of the CHA events may be filtered by Thread/Core-ID.*/ + if (event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN) + idx = SKX_CHA_MSR_PMON_BOX_FILTER_TID; for (er = skx_uncore_cha_extra_regs; er->msr; er++) { if (er->event != (event->hw.config & er->config_mask)) @@ -3560,6 +3549,7 @@ static struct event_constraint skx_uncore_iio_constraints[] = { UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), UNCORE_EVENT_CONSTRAINT(0xd4, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), EVENT_CONSTRAINT_END }; @@ -4328,27 +4318,12 @@ static struct intel_uncore_type snr_uncore_m2m = { .format_group = &snr_m2m_uncore_format_group, }; -static struct intel_uncore_type snr_uncore_pcie3 = { - .name = "pcie3", - .num_counters = 4, - .num_boxes = 1, - .perf_ctr_bits = 48, - .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0, - .event_ctl = SNR_PCIE3_PCI_PMON_CTL0, - .event_mask = SNBEP_PMON_RAW_EVENT_MASK, - .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL, - .ops = &ivbep_uncore_pci_ops, - .format_group = &ivbep_uncore_format_group, -}; - enum { SNR_PCI_UNCORE_M2M, - SNR_PCI_UNCORE_PCIE3, }; static struct intel_uncore_type *snr_pci_uncores[] = { [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m, - [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3, NULL, }; @@ -4357,10 +4332,6 @@ static const struct pci_device_id snr_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0), }, - { /* PCIe3 */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a), - .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0), - }, { /* end: all zeroes */ } }; @@ -4415,7 +4386,7 @@ static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) return; pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); - addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; + addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword); addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; @@ -4536,6 +4507,7 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), + { /* end: all zeroes */ }, }; static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = { diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 6f86650b3f77d..a949f6f55991d 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -75,8 +75,9 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_ATOM_GOLDMONT_D: - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_FAM6_ATOM_TREMONT_D: + case INTEL_FAM6_ATOM_TREMONT: case INTEL_FAM6_XEON_PHI_KNL: case INTEL_FAM6_XEON_PHI_KNM: diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ecacfbf4ebc12..8a30ec8b38557 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -77,6 +77,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) #define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */ #define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */ #define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */ +#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -272,6 +273,7 @@ struct cpu_hw_events { struct amd_nb *amd_nb; /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ u64 perf_ctr_virt_mask; + int n_pair; /* Large increment events */ void *kfree_on_online[X86_PERF_KFREE_MAX]; }; @@ -686,6 +688,7 @@ struct x86_pmu { * AMD bits */ unsigned int amd_nb_constraints : 1; + u64 perf_ctr_pair_en; /* * Extra registers for events @@ -735,6 +738,7 @@ do { \ #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ #define PMU_FL_TFA 0x20 /* deal with TSX force abort */ +#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr @@ -830,6 +834,11 @@ int x86_pmu_hw_config(struct perf_event *event); void x86_pmu_disable_all(void); +static inline bool is_counter_pair(struct hw_perf_event *hwc) +{ + return hwc->flags & PERF_X86_EVENT_PAIR; +} + static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 enable_mask) { @@ -837,6 +846,14 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, if (hwc->extra_reg.reg) wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); + + /* + * Add enabled Merge event on next counter + * if large increment event being enabled on this counter + */ + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(hwc->idx + 1), x86_pmu.perf_ctr_pair_en); + wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); } @@ -850,9 +867,13 @@ void x86_pmu_stop(struct perf_event *event, int flags); static inline void x86_pmu_disable_event(struct perf_event *event) { + u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base, hwc->config); + wrmsrl(hwc->config_base, hwc->config & ~disable_mask); + + if (is_counter_pair(hwc)) + wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0); } void x86_pmu_enable_event(struct perf_event *event); diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/rapl.c similarity index 97% rename from arch/x86/events/intel/rapl.c rename to arch/x86/events/rapl.c index 5053a403e4ae0..9050d7b8abc5a 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/rapl.c @@ -1,11 +1,14 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Support Intel RAPL energy consumption counters + * Support Intel/AMD RAPL energy consumption counters * Copyright (C) 2013 Google, Inc., Stephane Eranian * * Intel RAPL interface is specified in the IA-32 Manual Vol3b * section 14.7.1 (September 2013) * + * AMD RAPL interface for Fam17h is described in the public PPR: + * https://bugzilla.kernel.org/show_bug.cgi?id=206537 + * * RAPL provides more controls than just reporting energy consumption * however here we only expose the 3 energy consumption free running * counters (pp0, pkg, dram). @@ -58,8 +61,8 @@ #include #include #include -#include "../perf_event.h" -#include "../probe.h" +#include "perf_event.h" +#include "probe.h" MODULE_LICENSE("GPL"); @@ -90,18 +93,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { * any other bit is reserved */ #define RAPL_EVENT_MASK 0xFFULL - -#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *page) \ -{ \ - BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ - return sprintf(page, _format "\n"); \ -} \ -static struct kobj_attribute format_attr_##_var = \ - __ATTR(_name, 0444, __rapl_##_var##_show, NULL) - #define RAPL_CNTR_WIDTH 32 #define RAPL_EVENT_ATTR_STR(_name, v, str) \ @@ -430,7 +421,7 @@ static struct attribute_group rapl_pmu_events_group = { .attrs = attrs_empty, }; -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +PMU_FORMAT_ATTR(event, "config:0-7"); static struct attribute *rapl_formats_attr[] = { &format_attr_event.attr, NULL, @@ -639,7 +630,7 @@ static const struct attribute_group *rapl_attr_update[] = { &rapl_events_pkg_group, &rapl_events_ram_group, &rapl_events_gpu_group, - &rapl_events_gpu_group, + &rapl_events_psys_group, NULL, }; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 2db3972c0e0ff..7797bdaf7c33f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -19,9 +19,18 @@ #include #include #include +#include #include #include +#ifndef PKG_ABI +/* + * Preserve the ability to 'make deb-pkg' since PKG_ABI is provided + * by the Ubuntu build rules. + */ +#define PKG_ABI 0 +#endif + void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); @@ -154,7 +163,6 @@ void set_hv_tscchange_cb(void (*cb)(void)) struct hv_reenlightenment_control re_ctrl = { .vector = HYPERV_REENLIGHTENMENT_VECTOR, .enabled = 1, - .target_vp = hv_vp_index[smp_processor_id()] }; struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; @@ -163,13 +171,20 @@ void set_hv_tscchange_cb(void (*cb)(void)) return; } + if (!hv_vp_index) + return; + hv_reenlightenment_cb = cb; /* Make sure callback is registered before we write to MSRs */ wmb(); + re_ctrl.target_vp = hv_vp_index[get_cpu()]; + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); + + put_cpu(); } EXPORT_SYMBOL_GPL(set_hv_tscchange_cb); @@ -297,7 +312,7 @@ void __init hyperv_init(void) * 1. Register the guest ID * 2. Enable the hypercall and register the hypercall page */ - guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); + guest_id = generate_guest_id(0x80 /*Canonical*/, LINUX_VERSION_CODE, PKG_ABI); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); @@ -354,11 +369,14 @@ void hyperv_cleanup(void) } EXPORT_SYMBOL_GPL(hyperv_cleanup); -void hyperv_report_panic(struct pt_regs *regs, long err) +void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) { static bool panic_reported; u64 guest_id; + if (in_die && !panic_on_oops) + return; + /* * We prefer to report panic on 'die' chain as we have proper * registers to report, but if we miss it (e.g. on BUG()) we need diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 5208ba49c89a9..2c87350c1fb09 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -66,11 +66,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, if (!hv_hypercall_pg) goto do_native; - if (cpumask_empty(cpus)) - return; - local_irq_save(flags); + /* + * Only check the mask _after_ interrupt has been disabled to avoid the + * mask changing under our feet. + */ + if (cpumask_empty(cpus)) { + local_irq_restore(flags); + return; + } + flush_pcpu = (struct hv_tlb_flush **) this_cpu_ptr(hyperv_pcpu_input_arg); diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 1cee10091b9fb..30416d7f19d4f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -118,7 +119,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, return err; } -asmlinkage long sys32_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = current_pt_regs(); struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); @@ -144,7 +145,7 @@ asmlinkage long sys32_sigreturn(void) return 0; } -asmlinkage long sys32_rt_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_ia32 __user *frame; diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h index 9aff97f0de7fd..d937c55e717e6 100644 --- a/arch/x86/include/asm/acenv.h +++ b/arch/x86/include/asm/acenv.h @@ -13,7 +13,19 @@ /* Asm macros */ -#define ACPI_FLUSH_CPU_CACHE() wbinvd() +/* + * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states. + * It is required to prevent data loss. + * + * While running inside virtual machine, the kernel can bypass cache flushing. + * Changing sleep state in a virtual machine doesn't affect the host system + * sleep state and cannot lead to data loss. + */ +#define ACPI_FLUSH_CPU_CACHE() \ +do { \ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \ + wbinvd(); \ +} while (0) int __acpi_acquire_global_lock(unsigned int *lock); int __acpi_release_global_lock(unsigned int *lock); diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2ebc17d9c72cd..a49b1aeb2147b 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -140,6 +140,7 @@ extern void apic_soft_disable(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); +extern void apic_intr_mode_select(void); extern void apic_intr_mode_init(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); @@ -173,6 +174,7 @@ static inline int apic_is_clustered_box(void) extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask); extern void lapic_assign_system_vectors(void); extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); +extern void lapic_update_legacy_vectors(void); extern void lapic_online(void); extern void lapic_offline(void); extern bool apic_needs_pit(void); @@ -188,6 +190,7 @@ static inline void disable_local_APIC(void) { } # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } static inline void init_bsp_APIC(void) { } +static inline void apic_intr_mode_select(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } @@ -195,16 +198,6 @@ static inline bool apic_needs_pit(void) { return true; } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC -/* - * Make previous memory operations globally visible before - * sending the IPI through x2apic wrmsr. We need a serializing instruction or - * mfence for this. - */ -static inline void x2apic_wrmsr_fence(void) -{ - asm volatile("mfence" : : : "memory"); -} - static inline void native_apic_msr_write(u32 reg, u32 v) { if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || @@ -257,6 +250,7 @@ static inline u64 native_x2apic_icr_read(void) extern int x2apic_mode; extern int x2apic_phys; +extern void __init x2apic_set_max_apicid(u32 apicid); extern void __init check_x2apic(void); extern void x2apic_setup(void); static inline int x2apic_enabled(void) @@ -452,6 +446,14 @@ static inline void ack_APIC_irq(void) apic_eoi(); } + +static inline bool lapic_vector_set_in_irr(unsigned int vector) +{ + u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + + return !!(irr & (1U << (vector % 32))); +} + static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 4d4015ddcf263..a7f2a7f57635a 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -35,6 +35,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" + ANNOTATE_RETPOLINE_SAFE /* FRBS */ "lcall *%%cs:apm_bios_entry\n\t" "setc %%al\n\t" "popl %%ebp\n\t" @@ -59,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" + ANNOTATE_RETPOLINE_SAFE /* FRBS */ "lcall *%%cs:apm_bios_entry\n\t" "setc %%bl\n\t" "popl %%ebp\n\t" diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index af45e1452f097..feb59461046c6 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -73,10 +73,6 @@ static inline bool rdseed_int(unsigned int *v) return ok; } -/* Conditional execution based on CPU type */ -#define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) -#define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) - /* * These are the generic interfaces; they must not be declared if the * stubs in are to be invoked, @@ -86,22 +82,22 @@ static inline bool rdseed_int(unsigned int *v) static inline bool arch_get_random_long(unsigned long *v) { - return arch_has_random() ? rdrand_long(v) : false; + return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_long(v) : false; } static inline bool arch_get_random_int(unsigned int *v) { - return arch_has_random() ? rdrand_int(v) : false; + return static_cpu_has(X86_FEATURE_RDRAND) ? rdrand_int(v) : false; } static inline bool arch_get_random_seed_long(unsigned long *v) { - return arch_has_random_seed() ? rdseed_long(v) : false; + return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_long(v) : false; } static inline bool arch_get_random_seed_int(unsigned int *v) { - return arch_has_random_seed() ? rdseed_int(v) : false; + return static_cpu_has(X86_FEATURE_RDSEED) ? rdseed_int(v) : false; } extern void x86_init_rdrand(struct cpuinfo_x86 *c); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 3ff577c0b1024..cd339b88d5d46 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -7,9 +7,11 @@ # define __ASM_FORM_RAW(x) x # define __ASM_FORM_COMMA(x) x, #else -# define __ASM_FORM(x) " " #x " " -# define __ASM_FORM_RAW(x) #x -# define __ASM_FORM_COMMA(x) " " #x "," +#include + +# define __ASM_FORM(x) " " __stringify(x) " " +# define __ASM_FORM_RAW(x) __stringify(x) +# define __ASM_FORM_COMMA(x) " " __stringify(x) "," #endif #ifndef __x86_64__ @@ -139,9 +141,6 @@ # define _ASM_EXTABLE_EX(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) - # define _ASM_NOKPROBE(entry) \ .pushsection "_kprobe_blacklist","aw" ; \ _ASM_ALIGN ; \ @@ -170,9 +169,6 @@ # define _ASM_EXTABLE_EX(from, to) \ _ASM_EXTABLE_HANDLE(from, to, ex_handler_ext) -# define _ASM_EXTABLE_REFCOUNT(from, to) \ - _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount) - /* For C file, we already have NOKPROBE_SYMBOL macro */ #endif diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 7f828fe497978..4819d5e5a3353 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -84,4 +84,22 @@ do { \ #include +/* + * Make previous memory operations globally visible before + * a WRMSR. + * + * MFENCE makes writes visible, but only affects load/store + * instructions. WRMSR is unfortunately not a load/store + * instruction and is unaffected by MFENCE. The LFENCE ensures + * that the WRMSR is not reordered. + * + * Most WRMSRs are full serializing instructions themselves and + * do not require this barrier. This is only required for the + * IA32_TSC_DEADLINE and X2APIC MSRs. + */ +static inline void weak_wrmsr_fence(void) +{ + asm volatile("mfence; lfence" : : : "memory"); +} + #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h index 86b63c7feab75..86b2e0dcc4bfe 100644 --- a/arch/x86/include/asm/cacheinfo.h +++ b/arch/x86/include/asm/cacheinfo.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_CACHEINFO_H #define _ASM_X86_CACHEINFO_H -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu); +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu); #endif /* _ASM_X86_CACHEINFO_H */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 22c4dfe659923..b4dd6ab0fdfce 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -31,15 +31,13 @@ typedef s64 __attribute__((aligned(4))) compat_s64; typedef u64 __attribute__((aligned(4))) compat_u64; struct compat_stat { - compat_dev_t st_dev; - u16 __pad1; + u32 st_dev; compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; __compat_uid_t st_uid; __compat_gid_t st_gid; - compat_dev_t st_rdev; - u16 __pad2; + u32 st_rdev; u32 st_size; u32 st_blksize; u32 st_blocks; diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index 31c379c1da41c..cdf39decf7340 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -5,9 +5,163 @@ /* * Declare drivers belonging to specific x86 CPUs * Similar in spirit to pci_device_id and related PCI functions + * + * The wildcard initializers are in mod_devicetable.h because + * file2alias needs them. Sigh. */ - #include +/* Get the INTEL_FAM* model defines */ +#include +/* And the X86_VENDOR_* ones */ +#include + +/* Centaur FAM6 models */ +#define X86_CENTAUR_FAM6_C7_A 0xa +#define X86_CENTAUR_FAM6_C7_D 0xd +#define X86_CENTAUR_FAM6_NANO 0xf + +#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) +/** + * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * Use only if you need all selectors. Otherwise use one of the shorter + * macros of the X86_MATCH_* family. If there is no matching shorthand + * macro, consider to add one. If you really need to wrap one of the macros + * into another macro at the usage site for good reasons, then please + * start this local macro with X86_MATCH to allow easy grepping. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ + _steppings, _feature, _data) { \ + .vendor = X86_VENDOR_##_vendor, \ + .family = _family, \ + .model = _model, \ + .steppings = _steppings, \ + .feature = _feature, \ + .driver_data = (unsigned long) _data \ +} + +/** + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \ + X86_STEPPING_ANY, feature, data) + +/** + * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \ + X86_MODEL_ANY, feature, data) + +/** + * X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \ + X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data) + +/** + * X86_MATCH_FEATURE - Macro for matching a CPU feature + * @feature: A X86_FEATURE bit + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_FEATURE(feature, data) \ + X86_MATCH_VENDOR_FEATURE(ANY, feature, data) + +/* Transitional to keep the existing code working */ +#define X86_FEATURE_MATCH(feature) X86_MATCH_FEATURE(feature, NULL) + +/** + * X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @model: The model number, model constant or X86_MODEL_ANY + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \ + X86_FEATURE_ANY, data) + +/** + * X86_MATCH_VENDOR_FAM - Match vendor and family + * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@vendor + * @family: The family number or X86_FAMILY_ANY + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are + * set of wildcards. + */ +#define X86_MATCH_VENDOR_FAM(vendor, family, data) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data) + +/** + * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model + * @model: The model name without the INTEL_FAM6_ prefix or ANY + * The model name is expanded to INTEL_FAM6_@model internally + * @data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * The vendor is set to INTEL, the family to 6 and all other missing + * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards. + * + * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information. + */ +#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \ + X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data) /* * Match specific microcode revisions. diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 8348f7d69fd58..ea866c7bf31d3 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -78,8 +78,12 @@ struct cpu_entry_area { /* * The GDT is just below entry_stack and thus serves (on x86_64) as - * a a read-only guard page. + * a read-only guard page. On 32-bit the GDT must be writeable, so + * it needs an extra guard page. */ +#ifdef CONFIG_X86_32 + char guard_entry_stack[PAGE_SIZE]; +#endif struct entry_stack_page entry_stack_page; /* @@ -94,7 +98,6 @@ struct cpu_entry_area { */ struct cea_exception_stacks estacks; #endif -#ifdef CONFIG_CPU_SUP_INTEL /* * Per CPU debug store for Intel performance monitoring. Wastes a * full page at the moment. @@ -105,11 +108,13 @@ struct cpu_entry_area { * Reserve enough fixmap PTEs. */ struct debug_store_buffers cpu_debug_buffers; -#endif }; -#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) +#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) + +/* Total size includes the readonly IDT mapping page as well: */ +#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); @@ -117,13 +122,14 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); extern void setup_cpu_entry_areas(void); extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); +/* Single page reserved for the readonly IDT mapping: */ #define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE #define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE) #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) #define CPU_ENTRY_AREA_MAP_SIZE \ - (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE) + (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) extern struct cpu_entry_area *get_cpu_entry_area(int cpu); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 59bf91c57aa85..619c1f80a2abe 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -49,7 +49,7 @@ extern const char * const x86_power_flags[32]; extern const char * const x86_bug_flags[NBUGINTS*32]; #define test_cpu_cap(c, bit) \ - test_bit(bit, (unsigned long *)((c)->x86_capability)) + arch_test_bit(bit, (unsigned long *)((c)->x86_capability)) /* * There are 32 bits/features in each mask word. The high bits diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c4fbe379cc0b2..2ec85d7bfdff2 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -96,6 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ +#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -107,6 +108,7 @@ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +/* free ( 3*32+29) */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ @@ -201,8 +203,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ @@ -284,6 +286,10 @@ #define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -300,6 +306,7 @@ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -357,6 +364,7 @@ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ @@ -401,5 +409,10 @@ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(28) /* CPU is too old and its MMIO Stale Data status is unknown */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index 0acf5ee45a21a..88eadd08ad708 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -2,10 +2,18 @@ #ifndef _ASM_X86_CRASH_H #define _ASM_X86_CRASH_H +struct kimage; + int crash_load_segments(struct kimage *image); int crash_copy_backup_region(struct kimage *image); int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params); void crash_smp_send_stop(void); +#ifdef CONFIG_KEXEC_CORE +void __init crash_reserve_low_1M(void); +#else +static inline void __init crash_reserve_low_1M(void) { } +#endif + #endif /* _ASM_X86_CRASH_H */ diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index a5d86fc0593f2..f1592619dd651 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -32,65 +32,60 @@ extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); /* regular block cipher functions */ -asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src); /* 2-way parallel cipher functions */ -asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src); /* 16-way parallel cipher functions (avx/aes-ni) */ -asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); - -asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -asmlinkage void camellia_xts_enc_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); -asmlinkage void camellia_xts_dec_16way(struct camellia_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); - -static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) +asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src); + +asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); + +asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); + +static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk(ctx, dst, src, false); } -static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst, - const u8 *src) +static inline void camellia_enc_blk_xor(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk(ctx, dst, src, true); } -static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst, +static inline void camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk_2way(ctx, dst, src, false); } -static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst, +static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst, const u8 *src) { __camellia_enc_blk_2way(ctx, dst, src, true); } /* glue helpers */ -extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src); -extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, +extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src); +extern void camellia_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, +extern void camellia_crypt_ctr_2way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void camellia_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern void camellia_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); +extern void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); +extern void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); #endif /* ASM_X86_CAMELLIA_H */ diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 8d4a8e1226ee3..777c0f63418c8 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -11,18 +11,13 @@ #include #include -typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); -typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src); -typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src, +typedef void (*common_glue_func_t)(const void *ctx, u8 *dst, const u8 *src); +typedef void (*common_glue_cbc_func_t)(const void *ctx, u8 *dst, const u8 *src); +typedef void (*common_glue_ctr_func_t)(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -typedef void (*common_glue_xts_func_t)(void *ctx, u128 *dst, const u128 *src, +typedef void (*common_glue_xts_func_t)(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn)) -#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn)) -#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn)) -#define GLUE_XTS_FUNC_CAST(fn) ((common_glue_xts_func_t)(fn)) - struct common_glue_func_entry { unsigned int num_blocks; /* number of blocks that @fn will process */ union { @@ -116,7 +111,8 @@ extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, common_glue_func_t tweak_fn, void *tweak_ctx, void *crypt_ctx, bool decrypt); -extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, - le128 *iv, common_glue_func_t fn); +extern void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, + const u8 *src, le128 *iv, + common_glue_func_t fn); #endif /* _CRYPTO_GLUE_HELPER_H */ diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index db7c9cc322342..251c2c89d7cfe 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -15,26 +15,26 @@ struct serpent_xts_ctx { struct serpent_ctx crypt_ctx; }; -asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src); -asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst, - const u8 *src, le128 *iv); +asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src, + le128 *iv); -asmlinkage void serpent_xts_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -asmlinkage void serpent_xts_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, +extern void __serpent_crypt_ctr(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); +extern void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv); +extern void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv); extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); diff --git a/arch/x86/include/asm/crypto/serpent-sse2.h b/arch/x86/include/asm/crypto/serpent-sse2.h index 1a345e8a7496c..860ca248914b1 100644 --- a/arch/x86/include/asm/crypto/serpent-sse2.h +++ b/arch/x86/include/asm/crypto/serpent-sse2.h @@ -9,25 +9,23 @@ #define SERPENT_PARALLEL_BLOCKS 4 -asmlinkage void __serpent_enc_blk_4way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_4way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src); -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src) { __serpent_enc_blk_4way(ctx, dst, src, false); } -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx, + u8 *dst, const u8 *src) { __serpent_enc_blk_4way(ctx, dst, src, true); } -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src) { serpent_dec_blk_4way(ctx, dst, src); } @@ -36,25 +34,23 @@ static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, #define SERPENT_PARALLEL_BLOCKS 8 -asmlinkage void __serpent_enc_blk_8way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src, bool xor); -asmlinkage void serpent_dec_blk_8way(struct serpent_ctx *ctx, u8 *dst, +asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst, const u8 *src); -static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src) { __serpent_enc_blk_8way(ctx, dst, src, false); } -static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx, + u8 *dst, const u8 *src) { __serpent_enc_blk_8way(ctx, dst, src, true); } -static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst, - const u8 *src) +static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src) { serpent_dec_blk_8way(ctx, dst, src); } diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index f618bf272b900..2c377a8042e17 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -7,22 +7,19 @@ #include /* regular block cipher functions from twofish_x86_64 module */ -asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); -asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void twofish_enc_blk(const void *ctx, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src); /* 3-way parallel cipher functions */ -asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src, bool xor); -asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, - const u8 *src); +asmlinkage void __twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src, + bool xor); +asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src); /* helpers from twofish_x86_64-3way module */ -extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src); -extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, +extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src); +extern void twofish_enc_blk_ctr(const void *ctx, u8 *dst, const u8 *src, le128 *iv); -extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, +extern void twofish_enc_blk_ctr_3way(const void *ctx, u8 *dst, const u8 *src, le128 *iv); #endif /* ASM_X86_TWOFISH_H */ diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index 00f7cf45e6999..8e95aa4b0d172 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h @@ -74,7 +74,7 @@ #define MAX_DMA_PFN ((16UL * 1024 * 1024) >> PAGE_SHIFT) /* 4GB broken PCI/AGP hardware bus master zone */ -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) +#define MAX_DMA32_PFN (1UL << (32 - PAGE_SHIFT)) #ifdef CONFIG_X86_32 /* The maximum address that we can perform a DMA transfer to on this platform */ diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h new file mode 100644 index 0000000000000..70f5b98a52869 --- /dev/null +++ b/arch/x86/include/asm/emulate_prefix.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_EMULATE_PREFIX_H +#define _ASM_X86_EMULATE_PREFIX_H + +/* + * Virt escape sequences to trigger instruction emulation; + * ideally these would decode to 'whole' instruction and not destroy + * the instruction stream; sadly this is not true for the 'kvm' one :/ + */ + +#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */ +#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */ + +#endif diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 0c47aa82e2e22..28183ee3cc42a 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -156,7 +156,7 @@ extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); -void native_set_fixmap(enum fixed_addresses idx, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); #ifndef CONFIG_PARAVIRT_XXL diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index b774c52e5411f..06e767bca0c14 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -16,14 +16,25 @@ * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It * disables preemption so be careful if you intend to use it for long periods * of time. - * If you intend to use the FPU in softirq you need to check first with + * If you intend to use the FPU in irq/softirq you need to check first with * irq_fpu_usable() if it is possible. */ -extern void kernel_fpu_begin(void); + +/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */ +#define KFPU_387 _BITUL(0) /* 387 state will be initialized */ +#define KFPU_MXCSR _BITUL(1) /* MXCSR will be initialized */ + +extern void kernel_fpu_begin_mask(unsigned int kfpu_mask); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); extern void fpregs_mark_activate(void); +/* Code that is unaware of kernel_fpu_begin_mask() can use this */ +static inline void kernel_fpu_begin(void) +{ + kernel_fpu_begin_mask(KFPU_387 | KFPU_MXCSR); +} + /* * Use fpregs_lock() while editing CPU's FPU registers or fpu->state. * A context switch will (and softirq might) save CPU's FPU registers to diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 4c95c365058aa..5ed702e2c55f4 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -102,6 +102,7 @@ static inline void fpstate_init_fxstate(struct fxregs_state *fx) } extern void fpstate_sanitize_xstate(struct fpu *fpu); +/* Returns 0 or the negated trap number, which results in -EFAULT for #PF */ #define user_insn(insn, output, input...) \ ({ \ int err; \ @@ -109,14 +110,14 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); might_fault(); \ \ asm volatile(ASM_STAC "\n" \ - "1:" #insn "\n\t" \ + "1: " #insn "\n" \ "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ + "3: negl %%eax\n" \ " jmp 2b\n" \ ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ + _ASM_EXTABLE_FAULT(1b, 3b) \ + : [err] "=a" (err), output \ : "0"(0), input); \ err; \ }) @@ -203,6 +204,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); } +static inline void fxsave(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); + else + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); +} + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" @@ -210,16 +219,20 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" +/* + * After this @err contains 0 on success or the negated trap number when + * the operation raises an exception. For faults this results in -EFAULT. + */ #define XSTATE_OP(op, st, lmask, hmask, err) \ asm volatile("1:" op "\n\t" \ "xor %[err], %[err]\n" \ "2:\n\t" \ ".pushsection .fixup,\"ax\"\n\t" \ - "3: movl $-2,%[err]\n\t" \ + "3: negl %%eax\n\t" \ "jmp 2b\n\t" \ ".popsection\n\t" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err) \ + _ASM_EXTABLE_FAULT(1b, 3b) \ + : [err] "=a" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") @@ -267,28 +280,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - else - XSTATE_OP(XSAVE, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - /* * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. @@ -509,7 +500,7 @@ static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu) static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) { - return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; + return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; } /* @@ -569,9 +560,11 @@ static inline void __fpregs_load_activate(void) * The FPU context is only stored/restored for a user task and * PF_KTHREAD is used to distinguish between kernel and user threads. */ -static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) +static inline void switch_fpu_prepare(struct task_struct *prev, int cpu) { - if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { + struct fpu *old_fpu = &prev->thread.fpu; + + if (static_cpu_has(X86_FEATURE_FPU) && !(prev->flags & PF_KTHREAD)) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else @@ -590,10 +583,11 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) * Load PKRU from the FPU context if available. Delay loading of the * complete FPU state until the return to userland. */ -static inline void switch_fpu_finish(struct fpu *new_fpu) +static inline void switch_fpu_finish(struct task_struct *next) { u32 pkru_val = init_pkru_value; struct pkru_state *pk; + struct fpu *next_fpu = &next->thread.fpu; if (!static_cpu_has(X86_FEATURE_FPU)) return; @@ -607,10 +601,17 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * PKRU state is switched eagerly because it needs to be valid before we * return to userland e.g. for a copy_to_user() operation. */ - if (current->mm) { - pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); - if (pk) - pkru_val = pk->pkru; + if (!(next->flags & PF_KTHREAD)) { + /* + * If the PKRU bit in xsave.header.xfeatures is not set, + * then the PKRU component was in init state, which means + * XRSTOR will set PKRU to 0. If the bit is not set then + * get_xsave_addr() will return NULL because the PKRU value + * in memory is not valid. This means pkru_val has to be + * set to 0 and not to init_pkru_value. + */ + pk = get_xsave_addr(&next_fpu->state.xsave, XFEATURE_PKRU); + pkru_val = pk ? pk->pkru : 0; } __write_pkru(pkru_val); } @@ -619,6 +620,11 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * MXCSR and XCR definitions: */ +static inline void ldmxcsr(u32 mxcsr) +{ + asm volatile("ldmxcsr %0" :: "m" (mxcsr)); +} + extern unsigned int mxcsr_feature_mask; #define XCR_XFEATURE_ENABLED_MASK 0x00000000 diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 154f27be8bfcb..a8c3d284fa46c 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -45,6 +45,7 @@ struct insn { struct insn_field immediate2; /* for 64bit imm or seg16 */ }; + int emulate_prefix_size; insn_attr_t attr; unsigned char opnd_bytes; unsigned char addr_bytes; @@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn) return (insn->vex_prefix.nbytes == 4); } +static inline int insn_has_emulate_prefix(struct insn *insn) +{ + return !!insn->emulate_prefix_size; +} + /* Ensure this instruction is decoded completely */ static inline int insn_complete(struct insn *insn) { @@ -195,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index c606c0b707382..c1d6d8bbb7dad 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -35,6 +35,9 @@ * The #define line may optionally include a comment including platform names. */ +/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ +#define INTEL_FAM6_ANY X86_MODEL_ANY + #define INTEL_FAM6_CORE_YONAH 0x0E #define INTEL_FAM6_CORE2_MEROM 0x0F @@ -86,6 +89,14 @@ #define INTEL_FAM6_COMETLAKE 0xA5 #define INTEL_FAM6_COMETLAKE_L 0xA6 +#define INTEL_FAM6_ROCKETLAKE 0xA7 + +/* Hybrid Core/Atom Processors */ + +#define INTEL_FAM6_LAKEFIELD 0x8A +#define INTEL_FAM6_ALDERLAKE 0x97 +#define INTEL_FAM6_ALDERLAKE_L 0x9A + /* "Small Core" Processors (Atom) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ @@ -111,12 +122,16 @@ #define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ +#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ +/* Family 5 */ +#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */ + /* Useful macros */ #define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \ { \ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 5e7d6b46de97d..367da081f7d9d 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -201,6 +202,14 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages); #define arch_kexec_pre_free_pages arch_kexec_pre_free_pages +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif #endif typedef void crash_vmclear_fn(void); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4fc61483919aa..4bc476d7fa6c4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -380,19 +380,17 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, + int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); - gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, - struct x86_exception *exception); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, + u32 access, struct x86_exception *exception); gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa); - void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *spte, const void *pte); hpa_t root_hpa; gpa_t root_cr3; union kvm_mmu_role mmu_role; @@ -550,6 +548,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 host_pkru; u32 pkru; u32 hflags; u64 efer; @@ -667,10 +666,10 @@ struct kvm_vcpu_arch { bool pvclock_set_guest_stopped_request; struct { + u8 preempted; u64 msr_val; u64 last_steal; - struct gfn_to_hva_cache stime; - struct kvm_steal_time steal; + struct gfn_to_pfn_cache cache; } st; u64 tsc_offset; @@ -943,7 +942,6 @@ struct kvm_arch { struct kvm_vm_stat { ulong mmu_shadow_zapped; ulong mmu_pte_write; - ulong mmu_pte_updated; ulong mmu_pde_zapped; ulong mmu_flooded; ulong mmu_recycled; @@ -1098,7 +1096,7 @@ struct kvm_x86_ops { void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); - void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); @@ -1128,8 +1126,9 @@ struct kvm_x86_ops { bool (*xsaves_supported)(void); bool (*umip_emulated)(void); bool (*pt_supported)(void); + bool (*pku_supported)(void); - int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); + int (*check_nested_events)(struct kvm_vcpu *vcpu); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); @@ -1158,7 +1157,7 @@ struct kvm_x86_ops { void (*enable_log_dirty_pt_masked)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t offset, unsigned long mask); - int (*write_log_dirty)(struct kvm_vcpu *vcpu); + int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; @@ -1450,7 +1449,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); @@ -1551,12 +1550,14 @@ asmlinkage void kvm_spurious_fault(void); _ASM_EXTABLE(666b, 667b) #define KVM_ARCH_WANT_MMU_NOTIFIER -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, + unsigned flags); int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); @@ -1607,8 +1608,8 @@ void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ - return (irq->delivery_mode == dest_Fixed || - irq->delivery_mode == dest_LowestPrio); + return (irq->delivery_mode == APIC_DM_FIXED || + irq->delivery_mode == APIC_DM_LOWEST); } static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 9b4df6eaa11a6..7ff8ad490a782 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -6,8 +6,6 @@ #include #include -extern void kvmclock_init(void); - #ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); #else @@ -85,13 +83,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, } #ifdef CONFIG_KVM_GUEST +void kvmclock_init(void); +void kvmclock_disable(void); bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); -extern void kvm_disable_steal_time(void); void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); #ifdef CONFIG_PARAVIRT_SPINLOCKS @@ -126,10 +125,6 @@ static inline u32 kvm_read_and_reset_pf_reason(void) return 0; } -static inline void kvm_disable_steal_time(void) -{ - return; -} #endif #endif /* _ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h index eceea92990974..6c57651921028 100644 --- a/arch/x86/include/asm/kvmclock.h +++ b/arch/x86/include/asm/kvmclock.h @@ -2,6 +2,20 @@ #ifndef _ASM_X86_KVM_CLOCK_H #define _ASM_X86_KVM_CLOCK_H +#include + extern struct clocksource kvm_clock; +DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); + +static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) +{ + return &this_cpu_read(hv_clock_per_cpu)->pvti; +} + +static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void) +{ + return this_cpu_read(hv_clock_per_cpu); +} + #endif /* _ASM_X86_KVM_CLOCK_H */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 14caa9d9fb7ff..e07188e8d7632 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -13,9 +13,13 @@ #ifdef __ASSEMBLY__ -#define GLOBAL(name) \ - .globl name; \ - name: +/* + * GLOBAL is DEPRECATED + * + * use SYM_DATA_START, SYM_FUNC_START, SYM_INNER_LABEL, SYM_CODE_START, or + * similar + */ +#define GLOBAL(name) SYM_ENTRY(name, SYM_L_GLOBAL, SYM_A_NONE) #if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) #define __ALIGN .p2align 4, 0x90 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index dc2d4b206ab7e..0f84c20c54297 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -290,6 +290,7 @@ extern void apei_mce_report_mem_error(int corrected, /* These may be used by multiple smca_hwid_mcatypes */ enum smca_bank_types { SMCA_LS = 0, /* Load Store */ + SMCA_LS_V2, /* Load Store */ SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 2b7cc5397f80d..91a06cef50c1b 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -133,11 +133,13 @@ extern void load_ucode_ap(void); void reload_early_microcode(void); extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); extern bool initrd_gone; +void microcode_bsp_resume(void); #else static inline int __init microcode_init(void) { return 0; }; static inline void __init load_ucode_bsp(void) { } static inline void load_ucode_ap(void) { } static inline void reload_early_microcode(void) { } +static inline void microcode_bsp_resume(void) { } static inline bool get_builtin_firmware(struct cpio_data *cd, const char *name) { return false; } #endif diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 209492849566c..5c524d4f71cd7 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -41,7 +41,7 @@ struct microcode_amd { unsigned int mpb[0]; }; -#define PATCH_MAX_SIZE PAGE_SIZE +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) #ifdef CONFIG_MICROCODE_AMD extern void __init load_ucode_amd_bsp(unsigned int family); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 6a3124664289a..713886d5493a8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -47,6 +47,8 @@ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ +#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -82,6 +84,7 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ #define ARCH_CAP_SSB_NO BIT(4) /* * Not susceptible to Speculative Store Bypass @@ -105,6 +108,41 @@ * Not susceptible to * TSX Async Abort (TAA) vulnerabilities. */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ +#define ARCH_CAP_RRSBA BIT(19) /* + * Indicates RET may use predictors + * other than the RSB. With eIBRS + * enabled predictions in kernel mode + * are restricted to targets in + * kernel. + */ +#define ARCH_CAP_PBRSB_NO BIT(24) /* + * Not susceptible to Post-Barrier + * Return Stack Buffer Predictions. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -119,6 +157,11 @@ #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ +/* SRBDS support */ +#define MSR_IA32_MCU_OPT_CTRL 0x00000123 +#define RNGDS_MITG_DIS BIT(0) +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 @@ -428,6 +471,7 @@ #define MSR_AMD64_IBSOP_REG_MASK ((1UL< +#include #include #include #include #include +#include +#include /* * This should be used immediately before a retpoline alternative. It tells @@ -59,9 +62,11 @@ lfence; \ jmp 775b; \ 774: \ + add $(BITS_PER_LONG/8) * 2, sp; \ dec reg; \ jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; + /* barrier for jnz misprediction */ \ + lfence; #ifdef __ASSEMBLY__ @@ -115,7 +120,7 @@ ANNOTATE_NOSPEC_ALTERNATIVE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_LFENCE #else jmp *\reg #endif @@ -126,24 +131,34 @@ ANNOTATE_NOSPEC_ALTERNATIVE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_LFENCE #else call *\reg #endif .endm +.macro ISSUE_UNBALANCED_RET_GUARD + call .Lunbalanced_ret_guard_\@ + int3 +.Lunbalanced_ret_guard_\@: + add $(BITS_PER_LONG/8), %_ASM_SP + lfence +.endm + /* * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP * monstrosity above, manually. */ -.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req -#ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE "jmp .Lskip_rsb_\@", \ - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ - \ftr +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2 +.ifb \ftr2 + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr +.else + ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2 +.endif + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) +.Lunbalanced_\@: + ISSUE_UNBALANCED_RET_GUARD .Lskip_rsb_\@: -#endif .endm #else /* __ASSEMBLY__ */ @@ -171,7 +186,7 @@ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) #else /* CONFIG_X86_32 */ @@ -201,7 +216,7 @@ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif @@ -213,9 +228,12 @@ /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_GENERIC, - SPECTRE_V2_RETPOLINE_AMD, - SPECTRE_V2_IBRS_ENHANCED, + SPECTRE_V2_RETPOLINE, + SPECTRE_V2_LFENCE, + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, + SPECTRE_V2_IBRS, }; /* The indirect branch speculation control variants */ @@ -279,6 +297,9 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; +DECLARE_PER_CPU(u64, x86_spec_ctrl_current); +extern void write_spec_ctrl_current(u64 val, bool force); +extern u64 spec_ctrl_current(void); /* * With retpoline, we must use IBRS to restrict branch prediction @@ -288,18 +309,16 @@ extern u64 x86_spec_ctrl_base; */ #define firmware_restrict_branch_speculation_start() \ do { \ - u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ - \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current() | SPEC_CTRL_IBRS, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - u64 val = x86_spec_ctrl_base; \ - \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, \ + spec_ctrl_current(), \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) @@ -311,6 +330,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + #include /** @@ -320,7 +341,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); * combination with microcode which triggers a CPU buffer flush when the * instruction is executed. */ -static inline void mds_clear_cpu_buffers(void) +static __always_inline void mds_clear_cpu_buffers(void) { static const u16 ds = __KERNEL_DS; @@ -341,7 +362,7 @@ static inline void mds_clear_cpu_buffers(void) * * Clear CPU buffers if the corresponding static key is enabled */ -static inline void mds_user_clear_cpu_buffers(void) +static __always_inline void mds_user_clear_cpu_buffers(void) { if (static_branch_likely(&mds_user_clear)) mds_clear_cpu_buffers(); diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 288b065955b72..9d0b479452720 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -15,7 +15,7 @@ #define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) +#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h index 94597a3cf3d0f..ebb3db2eee41b 100644 --- a/arch/x86/include/asm/pci-direct.h +++ b/arch/x86/include/asm/pci-direct.h @@ -10,9 +10,11 @@ extern u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset); extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); +extern u32 pci_early_find_cap(int bus, int slot, int func, int cap); extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val); +extern unsigned int pci_early_clear_msi; extern int early_pci_allowed(void); #endif /* _ASM_X86_PCI_DIRECT_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0bc530c4eb134..1ae7c20f54691 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -253,6 +253,7 @@ static inline int pmd_large(pmd_t pte) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* NOTE: when predicate huge page, consider also pmd_devmap, or use pmd_large */ static inline int pmd_trans_huge(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; @@ -624,12 +625,15 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return __pmd(val); } -/* mprotect needs to preserve PAT bits when updating vm_page_prot */ +/* + * mprotect needs to preserve PAT and encryption bits when updating + * vm_page_prot + */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) { pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK; - pgprotval_t addbits = pgprot_val(newprot); + pgprotval_t addbits = pgprot_val(newprot) & ~_PAGE_CHG_MASK; return __pgprot(preservebits | addbits); } @@ -1371,8 +1375,8 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #endif #endif -#define PKRU_AD_BIT 0x1 -#define PKRU_WD_BIT 0x2 +#define PKRU_AD_BIT 0x1u +#define PKRU_WD_BIT 0x2u #define PKRU_BITS_PER_PKEY 2 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index b0bc0fff5f1f9..1636eb8e5a5ba 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -44,11 +44,11 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c * to avoid include recursion hell */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 39) -#define CPU_ENTRY_AREA_BASE \ - ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ - & PMD_MASK) +/* The +1 is for the readonly IDT page: */ +#define CPU_ENTRY_AREA_BASE \ + ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK) #define LDT_BASE_ADDR \ ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b5e49e6bac635..ff77f561e11f0 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -123,7 +123,7 @@ */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP) + _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC) #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* @@ -147,6 +147,7 @@ enum page_cache_mode { #endif #define _PAGE_CACHE_MASK (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) +#define _PAGE_LARGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE) #define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) #define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 19b137f1b3beb..2ff9b98812b76 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -4,6 +4,11 @@ #define ARCH_DEFAULT_PKEY 0 +/* + * If more than 16 keys are ever supported, a thorough audit + * will be necessary to ensure that the types that store key + * numbers and masks have sufficient capacity. + */ #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 54f5d54280f60..537c0dd4c3d4a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -334,7 +334,7 @@ struct x86_hw_tss { #define INVALID_IO_BITMAP_OFFSET 0x8000 struct entry_stack { - unsigned long words[64]; + char stack[PAGE_SIZE]; }; struct entry_stack_page { @@ -506,15 +506,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, *size = fpu_kernel_xstate_size; } -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ - /* * Set IOPL bits in EFLAGS from given mask */ diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 6e81788a30c12..0eaca7a130c9f 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -4,6 +4,8 @@ #include +struct task_struct; + /* misc architecture specific prototypes */ void syscall_init(void); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 332eb35258676..902be2e6e96cf 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -309,8 +309,8 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, static const unsigned int argument_offs[] = { #ifdef __i386__ offsetof(struct pt_regs, ax), - offsetof(struct pt_regs, cx), offsetof(struct pt_regs, dx), + offsetof(struct pt_regs, cx), #define NR_REG_ARGUMENTS 3 #else offsetof(struct pt_regs, di), diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 09ecc32f65248..52d7512ea91ab 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -82,6 +82,7 @@ static inline void set_real_mode_mem(phys_addr_t mem) } void reserve_real_mode(void); +void load_trampoline_pgtable(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h deleted file mode 100644 index 232f856e0db06..0000000000000 --- a/arch/x86/include/asm/refcount.h +++ /dev/null @@ -1,126 +0,0 @@ -#ifndef __ASM_X86_REFCOUNT_H -#define __ASM_X86_REFCOUNT_H -/* - * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from - * PaX/grsecurity. - */ -#include -#include - -/* - * This is the first portion of the refcount error handling, which lives in - * .text.unlikely, and is jumped to from the CPU flag check (in the - * following macros). This saves the refcount value location into CX for - * the exception handler to use (in mm/extable.c), and then triggers the - * central refcount exception. The fixup address for the exception points - * back to the regular execution flow in .text. - */ -#define _REFCOUNT_EXCEPTION \ - ".pushsection .text..refcount\n" \ - "111:\tlea %[var], %%" _ASM_CX "\n" \ - "112:\t" ASM_UD2 "\n" \ - ASM_UNREACHABLE \ - ".popsection\n" \ - "113:\n" \ - _ASM_EXTABLE_REFCOUNT(112b, 113b) - -/* Trigger refcount exception if refcount result is negative. */ -#define REFCOUNT_CHECK_LT_ZERO \ - "js 111f\n\t" \ - _REFCOUNT_EXCEPTION - -/* Trigger refcount exception if refcount result is zero or negative. */ -#define REFCOUNT_CHECK_LE_ZERO \ - "jz 111f\n\t" \ - REFCOUNT_CHECK_LT_ZERO - -/* Trigger refcount exception unconditionally. */ -#define REFCOUNT_ERROR \ - "jmp 111f\n\t" \ - _REFCOUNT_EXCEPTION - -static __always_inline void refcount_add(unsigned int i, refcount_t *r) -{ - asm volatile(LOCK_PREFIX "addl %1,%0\n\t" - REFCOUNT_CHECK_LT_ZERO - : [var] "+m" (r->refs.counter) - : "ir" (i) - : "cc", "cx"); -} - -static __always_inline void refcount_inc(refcount_t *r) -{ - asm volatile(LOCK_PREFIX "incl %0\n\t" - REFCOUNT_CHECK_LT_ZERO - : [var] "+m" (r->refs.counter) - : : "cc", "cx"); -} - -static __always_inline void refcount_dec(refcount_t *r) -{ - asm volatile(LOCK_PREFIX "decl %0\n\t" - REFCOUNT_CHECK_LE_ZERO - : [var] "+m" (r->refs.counter) - : : "cc", "cx"); -} - -static __always_inline __must_check -bool refcount_sub_and_test(unsigned int i, refcount_t *r) -{ - bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "er", i, "cx"); - - if (ret) { - smp_acquire__after_ctrl_dep(); - return true; - } - - return false; -} - -static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) -{ - bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", - REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, e, "cx"); - - if (ret) { - smp_acquire__after_ctrl_dep(); - return true; - } - - return false; -} - -static __always_inline __must_check -bool refcount_add_not_zero(unsigned int i, refcount_t *r) -{ - int c, result; - - c = atomic_read(&(r->refs)); - do { - if (unlikely(c == 0)) - return false; - - result = c + i; - - /* Did we try to increment from/to an undesirable state? */ - if (unlikely(c < 0 || c == INT_MAX || result < c)) { - asm volatile(REFCOUNT_ERROR - : : [var] "m" (r->refs.counter) - : "cc", "cx"); - break; - } - - } while (!atomic_try_cmpxchg(&(r->refs), &c, result)); - - return c != 0; -} - -static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r) -{ - return refcount_add_not_zero(1, r); -} - -#endif diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index ac38929204194..6669164abadcb 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -31,6 +31,18 @@ */ #define SEGMENT_RPL_MASK 0x3 +/* + * When running on Xen PV, the actual privilege level of the kernel is 1, + * not 0. Testing the Requested Privilege Level in a segment selector to + * determine whether the context is user mode or kernel mode with + * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level + * matches the 0x3 mask. + * + * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV + * kernels because privilege level 2 is never used. + */ +#define USER_SEGMENT_RPL_MASK 0x2 + /* User mode is privilege level 3: */ #define USER_RPL 0x3 diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 2ee8e469dcf5c..162128cdfbf29 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -85,28 +85,35 @@ void set_kernel_text_rw(void); void set_kernel_text_ro(void); #ifdef CONFIG_X86_64 -static inline int set_mce_nospec(unsigned long pfn) +/* + * Prevent speculative access to the page by either unmapping + * it (if we do not require access to any part of the page) or + * marking it uncacheable (if we want to try to retrieve data + * from non-poisoned lines in the page). + */ +static inline int set_mce_nospec(unsigned long pfn, bool unmap) { unsigned long decoy_addr; int rc; /* - * Mark the linear address as UC to make sure we don't log more - * errors because of speculative access to the page. * We would like to just call: - * set_memory_uc((unsigned long)pfn_to_kaddr(pfn), 1); + * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); * but doing that would radically increase the odds of a * speculative access to the poison page because we'd have * the virtual address of the kernel 1:1 mapping sitting * around in registers. * Instead we get tricky. We create a non-canonical address * that looks just like the one we want, but has bit 63 flipped. - * This relies on set_memory_uc() properly sanitizing any __pa() + * This relies on set_memory_XX() properly sanitizing any __pa() * results with __PHYSICAL_MASK or PTE_PFN_MASK. */ decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); - rc = set_memory_uc(decoy_addr, 1); + if (unmap) + rc = set_memory_np(decoy_addr, 1); + else + rc = set_memory_uc(decoy_addr, 1); if (rc) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); return rc; diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 27c47d183f4b3..8b58d6975d5d4 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -57,8 +57,10 @@ static __always_inline unsigned long smap_save(void) { unsigned long flags; - asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC, - X86_FEATURE_SMAP) + asm volatile ("# smap_save\n\t" + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) + "pushf; pop %0; " __ASM_CLAC "\n\t" + "1:" : "=rm" (flags) : : "memory", "cc"); return flags; @@ -66,7 +68,10 @@ static __always_inline unsigned long smap_save(void) static __always_inline void smap_restore(unsigned long flags) { - asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP) + asm volatile ("# smap_restore\n\t" + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) + "push %0; popf\n\t" + "1:" : : "g" (flags) : "memory", "cc"); } diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 6d37b8fcfc778..2e0cdc64cb50d 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -10,45 +10,47 @@ #include /* - * Volatile isn't enough to prevent the compiler from reordering the - * read/write functions for the control registers and messing everything up. - * A memory clobber would solve the problem, but would prevent reordering of - * all loads stores around it, which can hurt performance. Solution is to - * use a variable and mimic reads and writes to it to enforce serialization + * The compiler should not reorder volatile asm statements with respect to each + * other: they should execute in program order. However GCC 4.9.x and 5.x have + * a bug (which was fixed in 8.1, 7.3 and 6.5) where they might reorder + * volatile asm. The write functions are not affected since they have memory + * clobbers preventing reordering. To prevent reads from being reordered with + * respect to writes, use a dummy memory operand. */ -extern unsigned long __force_order; + +#define __FORCE_ORDER "m"(*(unsigned int *)0x1000UL) void native_write_cr0(unsigned long val); static inline unsigned long native_read_cr0(void) { unsigned long val; - asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order)); + asm volatile("mov %%cr0,%0\n\t" : "=r" (val) : __FORCE_ORDER); return val; } static inline unsigned long native_read_cr2(void) { unsigned long val; - asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order)); + asm volatile("mov %%cr2,%0\n\t" : "=r" (val) : __FORCE_ORDER); return val; } static inline void native_write_cr2(unsigned long val) { - asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order)); + asm volatile("mov %0,%%cr2": : "r" (val) : "memory"); } static inline unsigned long __native_read_cr3(void) { unsigned long val; - asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order)); + asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER); return val; } static inline void native_write_cr3(unsigned long val) { - asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order)); + asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); } static inline unsigned long native_read_cr4(void) @@ -63,10 +65,10 @@ static inline unsigned long native_read_cr4(void) asm volatile("1: mov %%cr4, %0\n" "2:\n" _ASM_EXTABLE(1b, 2b) - : "=r" (val), "=m" (__force_order) : "0" (0)); + : "=r" (val) : "0" (0), __FORCE_ORDER); #else /* CR4 always exists on x86_64. */ - asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order)); + asm volatile("mov %%cr4,%0\n\t" : "=r" (val) : __FORCE_ORDER); #endif return val; } diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 91e29b6a86a5e..9804a7957f4e9 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -55,8 +55,13 @@ /* * Initialize the stackprotector canary value. * - * NOTE: this must only be called from functions that never return, + * NOTE: this must only be called from functions that never return * and it must always be inlined. + * + * In addition, it should be called from a compilation unit for which + * stack protector is disabled. Alternatively, the caller should not end + * with a function call which gets tail-call optimized as that would + * lead to checking a modified canary value. */ static __always_inline void boot_init_stack_canary(void) { diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index fdbd9d7b7bca1..3b97aa9215430 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -21,7 +21,6 @@ struct saved_context { #endif unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; - bool misc_enable_saved; struct saved_msrs saved_msrs; struct desc_ptr gdt_desc; struct desc_ptr idt; @@ -30,6 +29,7 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + bool misc_enable_saved; } __attribute__((packed)); /* routines for saving/restoring kernel state */ diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 35bb35d28733e..54df06687d834 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -14,9 +14,13 @@ * Image of the saved processor state, used by the low level ACPI suspend to * RAM code and by the low level hibernation code. * - * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that - * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c, - * still work as required. + * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S + * and make sure that __save/__restore_processor_state(), defined in + * arch/x86/power/cpu.c, still work as required. + * + * Because the structure is packed, make sure to avoid unaligned members. For + * optimisation purposes but also because tools like kmemleak only search for + * pointers that are aligned. */ struct saved_context { struct pt_regs regs; @@ -36,7 +40,6 @@ struct saved_context { unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; - bool misc_enable_saved; struct saved_msrs saved_msrs; unsigned long efer; u16 gdt_pad; /* Unused */ @@ -48,6 +51,7 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + bool misc_enable_saved; } __attribute__((packed)); #define loaddebug(thread,register) \ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 6ece8561ba661..c29d8fb0ffbe2 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -119,6 +119,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IGN_TPR_SHIFT 20 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) +#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) + #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h index c67caafd33817..43b5e02a7b4b9 100644 --- a/arch/x86/include/asm/sync_core.h +++ b/arch/x86/include/asm/sync_core.h @@ -16,12 +16,13 @@ static inline void sync_core_before_usermode(void) /* With PTI, we unconditionally serialize before running user code. */ if (static_cpu_has(X86_FEATURE_PTI)) return; + /* - * Return from interrupt and NMI is done through iret, which is core - * serializing. + * Even if we're in an interrupt, we might reschedule before returning, + * in which case we could switch to a different thread in the same mm + * and return using SYSRET or SYSEXIT. Instead of trying to keep + * track of our need to sync the core, just sync right away. */ - if (in_irq() || in_nmi()) - return; sync_core(); } diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index e046a405743d8..e2389ce9bf58a 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -6,6 +6,8 @@ #ifndef _ASM_X86_SYSCALL_WRAPPER_H #define _ASM_X86_SYSCALL_WRAPPER_H +struct pt_regs; + /* Mapping of registers to parameters for syscalls on x86-64 and x32 */ #define SC_X86_64_REGS_TO_ARGS(x, ...) \ __MAP(x,__SC_ARGS \ @@ -28,13 +30,21 @@ * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this * case as well. */ +#define __IA32_COMPAT_SYS_STUB0(x, name) \ + asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__ia32_compat_sys_##name, ERRNO); \ + asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs)\ + { \ + return __se_compat_sys_##name(); \ + } + #define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\ ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\ { \ return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + } #define __IA32_SYS_STUBx(x, name, ...) \ asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \ @@ -48,16 +58,23 @@ * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias * named __ia32_sys_*() */ -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ - asmlinkage long __x64_sys_##sname(void) -#define COND_SYSCALL(name) \ - cond_syscall(__x64_sys_##name); \ - cond_syscall(__ia32_sys_##name) +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) + +#define COND_SYSCALL(name) \ + asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \ + { \ + return sys_ni_syscall(); \ + } \ + asmlinkage __weak long __ia32_sys_##name(const struct pt_regs *__unused)\ + { \ + return sys_ni_syscall(); \ + } #define SYS_NI(name) \ SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \ @@ -75,15 +92,24 @@ * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common * with x86_64 obviously do not need such care. */ +#define __X32_COMPAT_SYS_STUB0(x, name, ...) \ + asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__x32_compat_sys_##name, ERRNO); \ + asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs)\ + { \ + return __se_compat_sys_##name();\ + } + #define __X32_COMPAT_SYS_STUBx(x, name, ...) \ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\ ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\ { \ return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ - } \ + } #else /* CONFIG_X86_X32 */ +#define __X32_COMPAT_SYS_STUB0(x, name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #endif /* CONFIG_X86_X32 */ @@ -94,6 +120,17 @@ * mapping of registers to parameters, we need to generate stubs for each * of them. */ +#define COMPAT_SYSCALL_DEFINE0(name) \ + static long __se_compat_sys_##name(void); \ + static inline long __do_compat_sys_##name(void); \ + __IA32_COMPAT_SYS_STUB0(x, name) \ + __X32_COMPAT_SYS_STUB0(x, name) \ + static long __se_compat_sys_##name(void) \ + { \ + return __do_compat_sys_##name(); \ + } \ + static inline long __do_compat_sys_##name(void) + #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ @@ -181,15 +218,19 @@ * macros to work correctly. */ #ifndef SYSCALL_DEFINE0 -#define SYSCALL_DEFINE0(sname) \ - SYSCALL_METADATA(_##sname, 0); \ - asmlinkage long __x64_sys_##sname(void); \ - ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ - asmlinkage long __x64_sys_##sname(void) +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused) #endif #ifndef COND_SYSCALL -#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name) +#define COND_SYSCALL(name) \ + asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \ + { \ + return sys_ni_syscall(); \ + } #endif #ifndef SYS_NI @@ -201,7 +242,6 @@ * For VSYSCALLS, we need to declare these three syscalls with the new * pt_regs-based calling convention for in-kernel use. */ -struct pt_regs; asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs); asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs); asmlinkage long __x64_sys_time(const struct pt_regs *regs); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f9453536f9bbc..a4de7aa7500fb 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -221,10 +221,31 @@ static inline int arch_within_stack_frames(const void * const stack, #endif +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ + +#ifndef __ASSEMBLY__ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ +#define TS_COMPAT_RESTART 0x0008 + +#define arch_set_restart_data arch_set_restart_data + +static inline void arch_set_restart_data(struct restart_block *restart) +{ + struct thread_info *ti = current_thread_info(); + if (ti->status & TS_COMPAT) + ti->status |= TS_COMPAT_RESTART; + else + ti->status &= ~TS_COMPAT_RESTART; +} #endif -#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_32 #define in_ia32_syscall() true diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index a4a8b1b16c0c1..956e4145311b1 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h @@ -5,6 +5,15 @@ #include #include +static inline unsigned long random_get_entropy(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) + return random_get_entropy_fallback(); + return rdtsc(); +} +#define random_get_entropy random_get_entropy + /* Assume we use the PIT time source for the clock tick */ #define CLOCK_TICK_RATE PIT_TICK_RATE diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 4b14d2318251e..f3459df117aa8 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -110,6 +110,8 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); #define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +extern unsigned int __max_die_per_package; + #ifdef CONFIG_SMP #define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) @@ -118,8 +120,6 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) -extern unsigned int __max_die_per_package; - static inline int topology_max_die_per_package(void) { return __max_die_per_package; diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 8a0c25c6bf099..f1ea8f0c8b129 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -22,13 +22,12 @@ extern void disable_TSC(void); static inline cycles_t get_cycles(void) { -#ifndef CONFIG_X86_TSC - if (!boot_cpu_has(X86_FEATURE_TSC)) + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) return 0; -#endif - return rdtsc(); } +#define get_cycles get_cycles extern struct system_counterval_t convert_art_to_tsc(u64 art); extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 61d93f062a36e..d6a0e57ecc073 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -378,18 +378,6 @@ do { \ : "=r" (err), ltype(x) \ : "m" (__m(addr)), "i" (errret), "0" (err)) -#define __get_user_asm_nozero(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("\n" \ - "1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: mov %3,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE_UA(1b, 3b) \ - : "=r" (err), ltype(x) \ - : "m" (__m(addr)), "i" (errret), "0" (err)) - /* * This doesn't do __uaccess_begin/end - the exception handling * around it must do that. @@ -453,6 +441,103 @@ __pu_label: \ __builtin_expect(__gu_err, 0); \ }) +#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_volatile_goto("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_volatile_goto("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : CC_OUT(z) (success), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) +#endif // CONFIG_X86_32 +#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + int __err = 0; \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + CC_SET(z) \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ + %[errout]) \ + : CC_OUT(z) (success), \ + [errout] "+r" (__err), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory"); \ + if (unlikely(__err)) \ + goto label; \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +/* + * Unlike the normal CMPXCHG, hardcode ECX for both success/fail and error. + * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are + * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses + * both ESI and EDI for the memory operand, compilation will fail if the error + * is an input+output as there will be no register available for input. + */ +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + int __result; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + "mov $0, %%ecx\n\t" \ + "setz %%cl\n" \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %%ecx) \ + : [result]"=c" (__result), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory", "cc"); \ + if (unlikely(__result < 0)) \ + goto label; \ + if (unlikely(!__result)) \ + *_old = __old; \ + likely(__result); }) +#endif // CONFIG_X86_32 +#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT + /* FIXME: this hack is definitely wrong -AK */ struct __large_struct { unsigned long buf[100]; }; #define __m(x) (*(struct __large_struct __user *)(x)) @@ -734,6 +819,51 @@ do { \ if (unlikely(__gu_err)) goto err_label; \ } while (0) +extern void __try_cmpxchg_user_wrong_size(void); + +#ifndef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \ + __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label) +#endif + +/* + * Force the pointer to u to match the size expected by the asm helper. + * clang/LLVM compiles all cases and only discards the unused paths after + * processing errors, which breaks i386 if the pointer is an 8-byte value. + */ +#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + bool __ret; \ + __chk_user_ptr(_ptr); \ + switch (sizeof(*(_ptr))) { \ + case 1: __ret = __try_cmpxchg_user_asm("b", "q", \ + (__force u8 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 2: __ret = __try_cmpxchg_user_asm("w", "r", \ + (__force u16 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 4: __ret = __try_cmpxchg_user_asm("l", "r", \ + (__force u32 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\ + (_nval), _label); \ + break; \ + default: __try_cmpxchg_user_wrong_size(); \ + } \ + __ret; }) + +/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */ +#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + int __ret = -EFAULT; \ + __uaccess_begin_nospec(); \ + __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \ +_label: \ + __uaccess_end(); \ + __ret; \ + }) + /* * We want the unsafe accessors to always be inlined and use * the error labels - thus the macro games. diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index ba2dc19306303..388a40660c7b5 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -23,33 +23,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - if (__builtin_constant_p(n)) { - unsigned long ret; - - switch (n) { - case 1: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u8 *)to, from, ret, - "b", "b", "=q", 1); - __uaccess_end(); - return ret; - case 2: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u16 *)to, from, ret, - "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 4: - ret = 0; - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u32 *)to, from, ret, - "l", "k", "=r", 4); - __uaccess_end(); - return ret; - } - } return __copy_user_ll(to, (__force const void *)from, n); } diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 5cd1caa8bc653..bc10e3dc64fed 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -65,117 +65,13 @@ copy_to_user_mcsafe(void *to, const void *from, unsigned len) static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { - int ret = 0; - - if (!__builtin_constant_p(size)) - return copy_user_generic(dst, (__force void *)src, size); - switch (size) { - case 1: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src, - ret, "b", "b", "=q", 1); - __uaccess_end(); - return ret; - case 2: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src, - ret, "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 4: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src, - ret, "l", "k", "=r", 4); - __uaccess_end(); - return ret; - case 8: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 8); - __uaccess_end(); - return ret; - case 10: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 10); - if (likely(!ret)) - __get_user_asm_nozero(*(u16 *)(8 + (char *)dst), - (u16 __user *)(8 + (char __user *)src), - ret, "w", "w", "=r", 2); - __uaccess_end(); - return ret; - case 16: - __uaccess_begin_nospec(); - __get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 16); - if (likely(!ret)) - __get_user_asm_nozero(*(u64 *)(8 + (char *)dst), - (u64 __user *)(8 + (char __user *)src), - ret, "q", "", "=r", 8); - __uaccess_end(); - return ret; - default: - return copy_user_generic(dst, (__force void *)src, size); - } + return copy_user_generic(dst, (__force void *)src, size); } static __always_inline __must_check unsigned long raw_copy_to_user(void __user *dst, const void *src, unsigned long size) { - int ret = 0; - - if (!__builtin_constant_p(size)) - return copy_user_generic((__force void *)dst, src, size); - switch (size) { - case 1: - __uaccess_begin(); - __put_user_asm(*(u8 *)src, (u8 __user *)dst, - ret, "b", "b", "iq", 1); - __uaccess_end(); - return ret; - case 2: - __uaccess_begin(); - __put_user_asm(*(u16 *)src, (u16 __user *)dst, - ret, "w", "w", "ir", 2); - __uaccess_end(); - return ret; - case 4: - __uaccess_begin(); - __put_user_asm(*(u32 *)src, (u32 __user *)dst, - ret, "l", "k", "ir", 4); - __uaccess_end(); - return ret; - case 8: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 8); - __uaccess_end(); - return ret; - case 10: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 10); - if (likely(!ret)) { - asm("":::"memory"); - __put_user_asm(4[(u16 *)src], 4 + (u16 __user *)dst, - ret, "w", "w", "ir", 2); - } - __uaccess_end(); - return ret; - case 16: - __uaccess_begin(); - __put_user_asm(*(u64 *)src, (u64 __user *)dst, - ret, "q", "", "er", 16); - if (likely(!ret)) { - asm("":::"memory"); - __put_user_asm(1[(u64 *)src], 1 + (u64 __user *)dst, - ret, "q", "", "er", 8); - } - __uaccess_end(); - return ret; - default: - return copy_user_generic((__force void *)dst, src, size); - } + return copy_user_generic((__force void *)dst, src, size); } static __always_inline __must_check diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 499578f7e6d7b..70fc159ebe695 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -19,7 +19,7 @@ struct unwind_state { #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; - struct pt_regs *regs; + struct pt_regs *regs, *prev_regs; #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 9aad0e0876fba..fda3e7747c223 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -30,15 +30,22 @@ static inline int cpu_has_vmx(void) } -/** Disable VMX on the current CPU +/** + * cpu_vmxoff() - Disable VMX on the current CPU * - * vmxoff causes a undefined-opcode exception if vmxon was not run - * on the CPU previously. Only call this function if you know VMX - * is enabled. + * Disable VMX and clear CR4.VMXE (even if VMXOFF faults) + * + * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to + * atomically track post-VMXON state, e.g. this may be called in NMI context. + * Eat all faults as all other faults on VMXOFF faults are mode related, i.e. + * faults are guaranteed to be due to the !post-VMXON check unless the CPU is + * magically in RM, VM86, compat mode, or at CPL>0. */ static inline void cpu_vmxoff(void) { - asm volatile ("vmxoff"); + asm_volatile_goto("1: vmxoff\n\t" + _ASM_EXTABLE(1b, %l[fault]) :::: fault); +fault: cr4_clear_bits(X86_CR4_VMXE); } diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 19435858df5f1..96d9cd2086104 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -51,12 +51,14 @@ struct x86_init_resources { * are set up. * @intr_init: interrupt init code * @trap_init: platform specific trap setup + * @intr_mode_select: interrupt delivery mode selection * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); void (*trap_init)(void); + void (*intr_mode_select)(void); void (*intr_mode_init)(void); }; diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 62ca03ef5c657..9139b3e863161 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -379,12 +379,9 @@ struct xen_pmu_arch { * Prefix forces emulation of some non-trapping instructions. * Currently only CPUID. */ -#ifdef __ASSEMBLY__ -#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; -#define XEN_CPUID XEN_EMULATE_PREFIX cpuid -#else -#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " -#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" -#endif +#include + +#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;) +#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid) #endif /* _ASM_X86_XEN_INTERFACE_H */ diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h index 196fdd02b8b1b..be5e2e747f507 100644 --- a/arch/x86/include/uapi/asm/unistd.h +++ b/arch/x86/include/uapi/asm/unistd.h @@ -2,8 +2,15 @@ #ifndef _UAPI_ASM_X86_UNISTD_H #define _UAPI_ASM_X86_UNISTD_H -/* x32 syscall flag bit */ -#define __X32_SYSCALL_BIT 0x40000000UL +/* + * x32 syscall flag bit. Some user programs expect syscall NR macros + * and __X32_SYSCALL_BIT to have type int, even though syscall numbers + * are, for practical purposes, unsigned long. + * + * Fortunately, expressions like (nr & ~__X32_SYSCALL_BIT) do the right + * thing regardless. + */ +#define __X32_SYSCALL_BIT 0x40000000 #ifndef __KERNEL__ # ifdef __i386__ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 04205ce127a12..7b75658b7e9ac 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1339,6 +1339,17 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d) return 0; } +static int __init disable_acpi_xsdt(const struct dmi_system_id *d) +{ + if (!acpi_force) { + pr_notice("%s detected: force use of acpi=rsdt\n", d->ident); + acpi_gbl_do_not_use_xsdt = TRUE; + } else { + pr_notice("Warning: DMI blacklist says broken, but acpi XSDT forced\n"); + } + return 0; +} + static int __init dmi_disable_acpi(const struct dmi_system_id *d) { if (!acpi_force) { @@ -1463,6 +1474,19 @@ static const struct dmi_system_id acpi_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, + /* + * Boxes that need ACPI XSDT use disabled due to corrupted tables + */ + { + .callback = disable_acpi_xsdt, + .ident = "Advantech DAC-BJ01", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Bearlake CRB Board"), + DMI_MATCH(DMI_BIOS_VERSION, "V1.12"), + DMI_MATCH(DMI_BIOS_DATE, "02/01/2011"), + }, + }, {} }; @@ -1553,10 +1577,18 @@ void __init acpi_boot_table_init(void) /* * Initialize the ACPI boot-time table parser. */ - if (acpi_table_init()) { + if (acpi_locate_initial_tables()) disable_acpi(); - return; - } + else + acpi_reserve_initial_tables(); +} + +int __init early_acpi_boot_init(void) +{ + if (acpi_disabled) + return 1; + + acpi_table_init_complete(); acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); @@ -1569,18 +1601,9 @@ void __init acpi_boot_table_init(void) } else { printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); disable_acpi(); - return; + return 1; } } -} - -int __init early_acpi_boot_init(void) -{ - /* - * If acpi_disabled, bail out - */ - if (acpi_disabled) - return 1; /* * Process the Multiple APIC Description Table (MADT), if present @@ -1740,7 +1763,7 @@ int __acpi_acquire_global_lock(unsigned int *lock) new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); val = cmpxchg(lock, old, new); } while (unlikely (val != old)); - return (new < 3) ? -1 : 0; + return ((new & 0x3) < 3) ? -1 : 0; } int __acpi_release_global_lock(unsigned int *lock) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index caf2edccbad2e..49ae4e1ac9cd8 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -161,7 +161,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, /* Make sure we are running on right CPU */ - retval = work_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx); + retval = call_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx, + false); if (retval == 0) { /* Use the hint in CST */ percpu_entry->states[cx->index].eax = cx->address; diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca13851f05701..0487d173f28fa 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "../../realmode/rm/wakeup.h" @@ -128,8 +129,10 @@ static int __init acpi_sleep_setup(char *str) if (strncmp(str, "s3_beep", 7) == 0) acpi_realmode_flags |= 4; #ifdef CONFIG_HIBERNATION + if (strncmp(str, "s4_hwsig", 8) == 0) + acpi_check_s4_hw_signature = 1; if (strncmp(str, "s4_nohwsig", 10) == 0) - acpi_no_s4_hw_signature(); + acpi_check_s4_hw_signature = 0; #endif if (strncmp(str, "nonvs", 5) == 0) acpi_nvs_nosave(); @@ -147,3 +150,21 @@ static int __init acpi_sleep_setup(char *str) } __setup("acpi_sleep=", acpi_sleep_setup); + +#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HYPERVISOR_GUEST) +static int __init init_s4_sigcheck(void) +{ + /* + * If running on a hypervisor, honour the ACPI specification + * by default and trigger a clean reboot when the hardware + * signature in FACS is changed after hibernation. + */ + if (acpi_check_s4_hw_signature == -1 && + !hypervisor_is_type(X86_HYPER_NATIVE)) + acpi_check_s4_hw_signature = 1; + + return 0; +} +/* This must happen before acpi_init() which is a subsys initcall */ +arch_initcall(init_s4_sigcheck); +#endif diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index e95e95960156b..d682fd6f31078 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -3,14 +3,14 @@ #include #include #include +#include # Copyright 2003, 2008 Pavel Machek #include #include +#include # Copyright 2003 Pavel Machek = 0x40; bytes++) { - u8 id; - - pos &= ~3; - id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID); - if (id == 0xff) - break; - if (id == cap) - return pos; - pos = read_pci_config_byte(bus, slot, func, - pos+PCI_CAP_LIST_NEXT); - } - return 0; -} - /* Read a standard AGPv3 bridge header */ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) { @@ -240,8 +214,8 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp) case PCI_CLASS_BRIDGE_HOST: case PCI_CLASS_BRIDGE_OTHER: /* needed? */ /* AGP bridge? */ - cap = find_cap(bus, slot, func, - PCI_CAP_ID_AGP); + cap = pci_early_find_cap(bus, slot, + func, PCI_CAP_ID_AGP); if (!cap) break; *valid_agp = 1; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2b0faf86da1b7..68c7340325233 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -167,7 +168,7 @@ static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; notsc_setup(NULL); - return 0; + return 1; } __setup("apicpmtimer", setup_apicpmtimer); #endif @@ -352,8 +353,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) * According to Intel, MFENCE can do the serialization here. */ asm volatile("mfence" : : : "memory"); - - printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } @@ -474,6 +473,9 @@ static int lapic_next_deadline(unsigned long delta, { u64 tsc; + /* This MSR is special and need a special fence: */ + weak_wrmsr_fence(); + tsc = rdtsc(); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); return 0; @@ -552,7 +554,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); #define DEADLINE_MODEL_MATCH_REV(model, rev) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev } -static u32 hsx_deadline_rev(void) +static __init u32 hsx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x3a; /* EP */ @@ -562,7 +564,7 @@ static u32 hsx_deadline_rev(void) return ~0U; } -static u32 bdx_deadline_rev(void) +static __init u32 bdx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x02: return 0x00000011; @@ -574,7 +576,7 @@ static u32 bdx_deadline_rev(void) return ~0U; } -static u32 skx_deadline_rev(void) +static __init u32 skx_deadline_rev(void) { switch (boot_cpu_data.x86_stepping) { case 0x03: return 0x01000136; @@ -587,7 +589,7 @@ static u32 skx_deadline_rev(void) return ~0U; } -static const struct x86_cpu_id deadline_match[] = { +static const struct x86_cpu_id deadline_match[] __initconst = { DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev), DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020), DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D, bdx_deadline_rev), @@ -609,18 +611,19 @@ static const struct x86_cpu_id deadline_match[] = { {}, }; -static void apic_check_deadline_errata(void) +static __init bool apic_validate_deadline_timer(void) { const struct x86_cpu_id *m; u32 rev; - if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) || - boot_cpu_has(X86_FEATURE_HYPERVISOR)) - return; + if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + return false; + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return true; m = x86_match_cpu(deadline_match); if (!m) - return; + return true; /* * Function pointers will have the MSB set due to address layout, @@ -632,11 +635,12 @@ static void apic_check_deadline_errata(void) rev = (u32)m->driver_data; if (boot_cpu_data.microcode >= rev) - return; + return true; setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; " "please update microcode to version: 0x%x (or later)\n", rev); + return false; } /* @@ -830,8 +834,17 @@ bool __init apic_needs_pit(void) if (!tsc_khz || !cpu_khz) return true; - /* Is there an APIC at all? */ - if (!boot_cpu_has(X86_FEATURE_APIC)) + /* Is there an APIC at all or is it disabled? */ + if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) + return true; + + /* + * If interrupt delivery mode is legacy PIC or virtual wire without + * configuration, the local APIC timer wont be set up. Make sure + * that the PIT is initialized. + */ + if (apic_intr_mode == APIC_PIC || + apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) return true; /* Virt guests may lack ARAT, but still have DEADLINE */ @@ -1322,7 +1335,7 @@ void __init sync_Arb_IDs(void) enum apic_intr_mode_id apic_intr_mode __ro_after_init; -static int __init apic_intr_mode_select(void) +static int __init __apic_intr_mode_select(void) { /* Check kernel option */ if (disable_apic) { @@ -1384,6 +1397,12 @@ static int __init apic_intr_mode_select(void) return APIC_SYMMETRIC_IO; } +/* Select the interrupt delivery mode for the BSP */ +void __init apic_intr_mode_select(void) +{ + apic_intr_mode = __apic_intr_mode_select(); +} + /* * An initial setup of the virtual wire mode. */ @@ -1440,8 +1459,6 @@ void __init apic_intr_mode_init(void) { bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); - apic_intr_mode = apic_intr_mode_select(); - switch (apic_intr_mode) { case APIC_PIC: pr_info("APIC: Keep in PIC mode(8259)\n"); @@ -1873,20 +1890,22 @@ static __init void try_to_enable_x2apic(int remap_mode) return; if (remap_mode != IRQ_REMAP_X2APIC_MODE) { - /* IR is required if there is APIC ID > 255 even when running - * under KVM + /* + * Using X2APIC without IR is not architecturally supported + * on bare metal but may be supported in guests. */ - if (max_physical_apicid > 255 || - !x86_init.hyper.x2apic_available()) { + if (!x86_init.hyper.x2apic_available()) { pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n"); x2apic_disable(); return; } /* - * without IR all CPUs can be addressed by IOAPIC/MSI - * only in physical mode + * Without IR, all CPUs can be addressed by IOAPIC/MSI only + * in physical mode, and CPUs with an APIC ID that cannnot + * be addressed must not be brought online. */ + x2apic_set_max_apicid(255); x2apic_phys = 1; } x2apic_enable(); @@ -2085,7 +2104,8 @@ void __init init_apic_mappings(void) { unsigned int new_apicid; - apic_check_deadline_errata(); + if (apic_validate_deadline_timer()) + pr_info("TSC deadline timer available\n"); if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); @@ -2334,6 +2354,11 @@ static int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = -1, }; +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpuid_to_apicid[cpu]; +} + #ifdef CONFIG_SMP /** * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread @@ -2554,6 +2579,7 @@ static void __init apic_bsp_setup(bool upmode) end_local_APIC_setup(); irq_remap_enable_fault_handling(); setup_IO_APIC(); + lapic_update_legacy_vectors(); } #ifdef CONFIG_UP_LATE_INIT diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d6af97fd170a9..c9301e9ad69de 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1046,6 +1046,16 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; legacy = mp_is_legacy_irq(irq); + /* + * IRQ2 is unusable for historical reasons on systems which + * have a legacy PIC. See the comment vs. IRQ2 further down. + * + * If this gets removed at some point then the related code + * in lapic_assign_system_vectors() needs to be adjusted as + * well. + */ + if (legacy && irq == PIC_CASCADE_IR) + return -EINVAL; } mutex_lock(&ioapic_mutex); @@ -1574,14 +1584,20 @@ void __init setup_ioapic_ids_from_mpc(void) } #endif -int no_timer_check __initdata; +int no_timer_check __initdata = 1; static int __init notimercheck(char *s) { - no_timer_check = 1; - return 1; + int ret = 0; + + if (s) + ret = kstrtoint(s, 0, &no_timer_check); + else + no_timer_check = 1; + + return ret; } -__setup("no_timer_check", notimercheck); +early_param("no_timer_check", notimercheck); static void __init delay_with_tsc(void) { @@ -1727,9 +1743,10 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) static inline bool ioapic_irqd_mask(struct irq_data *data) { - /* If we are moving the irq we need to mask it */ + /* If we are moving the IRQ we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - mask_ioapic_irq(data); + if (!irqd_irq_masked(data)) + mask_ioapic_irq(data); return true; } return false; @@ -1766,7 +1783,9 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) */ if (!io_apic_level_ack_pending(data->chip_data)) irq_move_masked_irq(data); - unmask_ioapic_irq(data); + /* If the IRQ is masked in the core, leave it: */ + if (!irqd_irq_masked(data)) + unmask_ioapic_irq(data); } } #else @@ -1948,7 +1967,8 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct irq_chip ioapic_ir_chip __read_mostly = { @@ -1961,7 +1981,8 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static inline void init_IO_APIC_traps(void) @@ -2253,6 +2274,7 @@ static inline void __init check_timer(void) legacy_pic->init(0); legacy_pic->make_irq(0); apic_write(APIC_LVT0, APIC_DM_EXTINT); + legacy_pic->unmask(0); unlock_ExtINT_logic(); @@ -2326,12 +2348,12 @@ static int mp_irqdomain_create(int ioapic) ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops, (void *)(long)ioapic); - /* Release fw handle if it was allocated above */ - if (!cfg->dev) - irq_domain_free_fwnode(fn); - - if (!ip->irqdomain) + if (!ip->irqdomain) { + /* Release fw handle if it was allocated above */ + if (!cfg->dev) + irq_domain_free_fwnode(fn); return -ENOMEM; + } ip->irqdomain->parent = parent; @@ -2345,8 +2367,13 @@ static int mp_irqdomain_create(int ioapic) static void ioapic_destroy_irqdomain(int idx) { + struct ioapic_domain_cfg *cfg = &ioapics[idx].irqdomain_cfg; + struct fwnode_handle *fn = ioapics[idx].irqdomain->fwnode; + if (ioapics[idx].irqdomain) { irq_domain_remove(ioapics[idx].irqdomain); + if (!cfg->dev) + irq_domain_free_fwnode(fn); ioapics[idx].irqdomain = NULL; } } diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 7f7533462474a..f86e10b1d99ce 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -23,10 +23,8 @@ static struct irq_domain *msi_default_domain; -static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); - msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) @@ -47,6 +45,129 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) MSI_DATA_VECTOR(cfg->vector); } +static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + __irq_msi_compose_msg(irqd_cfg(data), msg); +} + +static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) +{ + struct msi_msg msg[2] = { [1] = { }, }; + + __irq_msi_compose_msg(cfg, msg); + irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg); +} + +static int +msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) +{ + struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd); + struct irq_data *parent = irqd->parent_data; + unsigned int cpu; + int ret; + + /* Save the current configuration */ + cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd)); + old_cfg = *cfg; + + /* Allocate a new target vector */ + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + /* + * For non-maskable and non-remapped MSI interrupts the migration + * to a different destination CPU and a different vector has to be + * done careful to handle the possible stray interrupt which can be + * caused by the non-atomic update of the address/data pair. + * + * Direct update is possible when: + * - The MSI is maskable (remapped MSI does not use this code path)). + * The quirk bit is not set in this case. + * - The new vector is the same as the old vector + * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The interrupt is not yet started up + * - The new destination CPU is the same as the old destination CPU + */ + if (!irqd_msi_nomask_quirk(irqd) || + cfg->vector == old_cfg.vector || + old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + !irqd_is_started(irqd) || + cfg->dest_apicid == old_cfg.dest_apicid) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Paranoia: Validate that the interrupt target is the local + * CPU. + */ + if (WARN_ON_ONCE(cpu != smp_processor_id())) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Redirect the interrupt to the new vector on the current CPU + * first. This might cause a spurious interrupt on this vector if + * the device raises an interrupt right between this update and the + * update to the final destination CPU. + * + * If the vector is in use then the installed device handler will + * denote it as spurious which is no harm as this is a rare event + * and interrupt handlers have to cope with spurious interrupts + * anyway. If the vector is unused, then it is marked so it won't + * trigger the 'No irq handler for vector' warning in do_IRQ(). + * + * This requires to hold vector lock to prevent concurrent updates to + * the affected vector. + */ + lock_vector_lock(); + + /* + * Mark the new target vector on the local CPU if it is currently + * unused. Reuse the VECTOR_RETRIGGERED state which is also used in + * the CPU hotplug path for a similar purpose. This cannot be + * undone here as the current CPU has interrupts disabled and + * cannot handle the interrupt before the whole set_affinity() + * section is done. In the CPU unplug case, the current CPU is + * about to vanish and will not handle any interrupts anymore. The + * vector is cleaned up when the CPU comes online again. + */ + if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector]))) + this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED); + + /* Redirect it to the new vector on the local CPU temporarily */ + old_cfg.vector = cfg->vector; + irq_msi_update_msg(irqd, &old_cfg); + + /* Now transition it to the target CPU */ + irq_msi_update_msg(irqd, cfg); + + /* + * All interrupts after this point are now targeted at the new + * vector/CPU. + * + * Drop vector lock before testing whether the temporary assignment + * to the local CPU was hit by an interrupt raised in the device, + * because the retrigger function acquires vector lock again. + */ + unlock_vector_lock(); + + /* + * Check whether the transition raced with a device interrupt and + * is pending in the local APICs IRR. It is safe to do this outside + * of vector lock as the irq_desc::lock of this interrupt is still + * held and interrupts are disabled: The check is not accessing the + * underlying vector store. It's just checking the local APIC's + * IRR. + */ + if (lapic_vector_set_in_irr(cfg->vector)) + irq_data_get_irq_chip(irqd)->irq_retrigger(irqd); + + return ret; +} + /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. @@ -58,7 +179,9 @@ static struct irq_chip pci_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = irq_msi_compose_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .irq_set_affinity = msi_set_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) @@ -142,10 +265,13 @@ void __init arch_init_msi_domain(struct irq_domain *parent) msi_default_domain = pci_msi_create_irq_domain(fn, &pci_msi_domain_info, parent); - irq_domain_free_fwnode(fn); } - if (!msi_default_domain) + if (!msi_default_domain) { + irq_domain_free_fwnode(fn); pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); + } else { + msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; + } } #ifdef CONFIG_IRQ_REMAP @@ -156,7 +282,8 @@ static struct irq_chip pci_msi_ir_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_vcpu_affinity = irq_chip_set_vcpu_affinity_parent, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct msi_domain_info pci_msi_ir_domain_info = { @@ -178,7 +305,8 @@ struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent, if (!fn) return NULL; d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent); - irq_domain_free_fwnode(fn); + if (!d) + irq_domain_free_fwnode(fn); return d; } #endif @@ -198,7 +326,8 @@ static struct irq_chip dmar_msi_controller = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = irq_msi_compose_msg, .irq_write_msi_msg = dmar_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static irq_hw_number_t dmar_msi_get_hwirq(struct msi_domain_info *info, @@ -241,7 +370,8 @@ static struct irq_domain *dmar_get_irq_domain(void) if (fn) { dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info, x86_vector_domain); - irq_domain_free_fwnode(fn); + if (!dmar_domain) + irq_domain_free_fwnode(fn); } out: mutex_unlock(&dmar_lock); @@ -295,7 +425,7 @@ static struct irq_chip hpet_msi_controller __ro_after_init = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = irq_msi_compose_msg, .irq_write_msi_msg = hpet_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP, }; static irq_hw_number_t hpet_msi_get_hwirq(struct msi_domain_info *info, @@ -366,7 +496,10 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id) } d = msi_create_irq_domain(fn, domain_info, parent); - irq_domain_free_fwnode(fn); + if (!d) { + irq_domain_free_fwnode(fn); + kfree(domain_info); + } return d; } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 2c5676b0a6e7f..6b8b6bf6c5d1c 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -272,20 +272,24 @@ static int assign_irq_vector_any_locked(struct irq_data *irqd) const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); int node = irq_data_get_node(irqd); - if (node == NUMA_NO_NODE) - goto all; - /* Try the intersection of @affmsk and node mask */ - cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); - if (!assign_vector_locked(irqd, vector_searchmask)) - return 0; - /* Try the node mask */ - if (!assign_vector_locked(irqd, cpumask_of_node(node))) - return 0; -all: + if (node != NUMA_NO_NODE) { + /* Try the intersection of @affmsk and node mask */ + cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); + if (!assign_vector_locked(irqd, vector_searchmask)) + return 0; + } + /* Try the full affinity mask */ cpumask_and(vector_searchmask, affmsk, cpu_online_mask); if (!assign_vector_locked(irqd, vector_searchmask)) return 0; + + if (node != NUMA_NO_NODE) { + /* Try the node mask */ + if (!assign_vector_locked(irqd, cpumask_of_node(node))) + return 0; + } + /* Try the full online mask */ return assign_vector_locked(irqd, cpu_online_mask); } @@ -446,12 +450,10 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd, trace_vector_activate(irqd->irq, apicd->is_managed, apicd->can_reserve, reserve); - /* Nothing to do for fixed assigned vectors */ - if (!apicd->can_reserve && !apicd->is_managed) - return 0; - raw_spin_lock_irqsave(&vector_lock, flags); - if (reserve || irqd_is_managed_and_shutdown(irqd)) + if (!apicd->can_reserve && !apicd->is_managed) + assign_irq_vector_any_locked(irqd); + else if (reserve || irqd_is_managed_and_shutdown(irqd)) vector_assign_managed_shutdown(irqd); else if (apicd->is_managed) ret = activate_managed(irqd); @@ -556,6 +558,10 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, irqd->chip_data = apicd; irqd->hwirq = virq + i; irqd_set_single_target(irqd); + + /* Don't invoke affinity setter on deactivated interrupts */ + irqd_set_affinity_on_activate(irqd); + /* * Legacy vectors are already assigned when the IOAPIC * takes them over. They stay on the same vector. This is @@ -674,6 +680,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace) irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace); } +void __init lapic_update_legacy_vectors(void) +{ + unsigned int i; + + if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0) + return; + + /* + * If the IO/APIC is disabled via config, kernel command line or + * lack of enumeration then all legacy interrupts are routed + * through the PIC. Make sure that they are marked as legacy + * vectors. PIC_CASCADE_IRQ has already been marked in + * lapic_assign_system_vectors(). + */ + for (i = 0; i < nr_legacy_irqs(); i++) { + if (i != PIC_CASCADE_IR) + lapic_assign_legacy_vector(i, true); + } +} + void __init lapic_assign_system_vectors(void) { unsigned int i, vector = 0; @@ -703,7 +729,6 @@ int __init arch_early_irq_init(void) x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops, NULL); BUG_ON(x86_vector_domain == NULL); - irq_domain_free_fwnode(fn); irq_set_default_host(x86_vector_domain); arch_init_msi_domain(x86_vector_domain); @@ -769,20 +794,10 @@ void lapic_offline(void) static int apic_set_affinity(struct irq_data *irqd, const struct cpumask *dest, bool force) { - struct apic_chip_data *apicd = apic_chip_data(irqd); int err; - /* - * Core code can call here for inactive interrupts. For inactive - * interrupts which use managed or reservation mode there is no - * point in going through the vector assignment right now as the - * activation will assign a vector which fits the destination - * cpumask. Let the core code store the destination mask and be - * done with it. - */ - if (!irqd_is_activated(irqd) && - (apicd->is_managed || apicd->can_reserve)) - return IRQ_SET_MASK_OK; + if (WARN_ON_ONCE(!irqd_is_activated(irqd))) + return -EIO; raw_spin_lock(&vector_lock); cpumask_and(vector_searchmask, dest, cpu_online_mask); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index b0889c48a2ac5..7eec3c154fa24 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -29,7 +29,8 @@ static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); } @@ -41,7 +42,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long flags; u32 dest; - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); local_irq_save(flags); tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index bc9693841353c..032a00e5d9fa6 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -8,6 +8,12 @@ int x2apic_phys; static struct apic apic_x2apic_phys; +static u32 x2apic_max_apicid __ro_after_init; + +void __init x2apic_set_max_apicid(u32 apicid) +{ + x2apic_max_apicid = apicid; +} static int __init set_x2apic_phys_mode(char *arg) { @@ -37,7 +43,8 @@ static void x2apic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_apicid, cpu); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); } @@ -48,7 +55,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) unsigned long this_cpu; unsigned long flags; - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); local_irq_save(flags); @@ -98,6 +106,9 @@ static int x2apic_phys_probe(void) /* Common x2apic functions, also used by x2apic_cluster */ int x2apic_apic_id_valid(u32 apicid) { + if (x2apic_max_apicid && apicid > x2apic_max_apicid) + return 0; + return 1; } @@ -116,7 +127,8 @@ void __x2apic_send_IPI_shorthand(int vector, u32 which) { unsigned long cfg = __prepare_ICR(which, vector, 0); - x2apic_wrmsr_fence(); + /* x2apic MSRs are special and need a special fence: */ + weak_wrmsr_fence(); native_x2apic_icr_write(cfg, 0); } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 90f75e5158769..5571b28d35b60 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -28,6 +28,7 @@ static const int amd_erratum_383[]; static const int amd_erratum_400[]; +static const int amd_erratum_1054[]; static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); /* @@ -334,7 +335,6 @@ static void amd_get_topology_early(struct cpuinfo_x86 *c) */ static void amd_get_topology(struct cpuinfo_x86 *c) { - u8 node_id; int cpu = smp_processor_id(); /* get information required for multi-node processors */ @@ -344,7 +344,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - node_id = ecx & 0xff; + c->cpu_die_id = ecx & 0xff; if (c->x86 == 0x15) c->cu_id = ebx & 0xff; @@ -364,15 +364,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c) if (!err) c->x86_coreid_bits = get_count_order(c->x86_max_cores); - cacheinfo_amd_init_llc_id(c, cpu, node_id); + cacheinfo_amd_init_llc_id(c, cpu); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - node_id = value & 7; + c->cpu_die_id = value & 7; - per_cpu(cpu_llc_id, cpu) = node_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; } else return; @@ -397,7 +397,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; /* use socket ID also for last level cache */ - per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id; } u16 amd_get_nb_id(int cpu) @@ -545,12 +545,12 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) u32 ecx; ecx = cpuid_ecx(0x8000001e); - nodes_per_socket = ((ecx >> 8) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - nodes_per_socket = ((value >> 3) & 7) + 1; + __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; } if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && @@ -615,9 +615,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) return; clear_all: - clear_cpu_cap(c, X86_FEATURE_SME); + setup_clear_cpu_cap(X86_FEATURE_SME); clear_sev: - clear_cpu_cap(c, X86_FEATURE_SEV); + setup_clear_cpu_cap(X86_FEATURE_SEV); } } @@ -894,12 +894,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) node_reclaim_distance = 32; #endif - /* - * Fix erratum 1076: CPB feature bit not being set in CPUID. - * Always set it, except when running under a hypervisor. - */ - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) - set_cpu_cap(c, X86_FEATURE_CPB); + /* Fix up CPUID bits, but only if not virtualised. */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { + + /* Erratum 1076: CPB feature bit not being set in CPUID. */ + if (!cpu_has(c, X86_FEATURE_CPB)) + set_cpu_cap(c, X86_FEATURE_CPB); + + /* + * Zen3 (Fam19 model < 0x10) parts are not susceptible to + * Branch Type Confusion, but predate the allocation of the + * BTC_NO bit. + */ + if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) + set_cpu_cap(c, X86_FEATURE_BTC_NO); + } } static void init_amd(struct cpuinfo_x86 *c) @@ -978,6 +987,17 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + + /* + * Turn on the Instructions Retired free counter on machines not + * susceptible to erratum #1054 "Instructions Retired Performance + * Counter May Be Inaccurate". + */ + if (cpu_has(c, X86_FEATURE_IRPERF) && + !cpu_has_amd_erratum(c, amd_erratum_1054)) + msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); + + check_null_seg_clears_base(c); } #ifdef CONFIG_X86_32 @@ -1105,6 +1125,9 @@ static const int amd_erratum_400[] = static const int amd_erratum_383[] = AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); +/* #1054: Instructions Retired Performance Counter May Be Inaccurate */ +static const int amd_erratum_1054[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4c7b0fa15a194..9f09c4d395104 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -31,26 +31,57 @@ #include #include #include +#include #include "cpu.h" static void __init spectre_v1_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); +static void __init retbleed_select_mitigation(void); +static void __init spectre_v2_user_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); +static void __init md_clear_update_mitigation(void); +static void __init md_clear_select_mitigation(void); static void __init taa_select_mitigation(void); +static void __init mmio_select_mitigation(void); +static void __init srbds_select_mitigation(void); -/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ +/* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + +/* The current value of the SPEC_CTRL MSR with task-specific bits set */ +DEFINE_PER_CPU(u64, x86_spec_ctrl_current); +EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); + static DEFINE_MUTEX(spec_ctrl_mutex); /* - * The vendor and possibly platform specific bits which can be modified in - * x86_spec_ctrl_base. + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; +void write_spec_ctrl_current(u64 val, bool force) +{ + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; + + this_cpu_write(x86_spec_ctrl_current, val); + + /* + * When KERNEL_IBRS this MSR is written on return-to-user, unless + * forced the update can be delayed until that time. + */ + if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + +u64 spec_ctrl_current(void) +{ + return this_cpu_read(x86_spec_ctrl_current); +} +EXPORT_SYMBOL_GPL(spec_ctrl_current); /* * AMD specific MSR info for Speculative Store Bypass control. @@ -73,6 +104,10 @@ EXPORT_SYMBOL_GPL(mds_user_clear); DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */ +DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); +EXPORT_SYMBOL_GPL(mmio_stale_data_clear); + void __init check_bugs(void) { identify_boot_cpu(); @@ -96,17 +131,25 @@ void __init check_bugs(void) if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - /* Allow STIBP in MSR_SPEC_CTRL if supported */ - if (boot_cpu_has(X86_FEATURE_STIBP)) - x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; - /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); + /* + * retbleed_select_mitigation() relies on the state set by + * spectre_v2_select_mitigation(); specifically it wants to know about + * spectre_v2=ibrs. + */ + retbleed_select_mitigation(); + /* + * spectre_v2_user_select_mitigation() relies on the state set by + * retbleed_select_mitigation(); specifically the STIBP selection is + * forced for UNRET. + */ + spectre_v2_user_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); - mds_select_mitigation(); - taa_select_mitigation(); + md_clear_select_mitigation(); + srbds_select_mitigation(); arch_smt_update(); @@ -142,31 +185,17 @@ void __init check_bugs(void) #endif } +/* + * NOTE: For VMX, this function is not called in the vmexit path. + * It uses vmx_spec_ctrl_restore_host() instead. + */ void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { - u64 msrval, guestval, hostval = x86_spec_ctrl_base; + u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); struct thread_info *ti = current_thread_info(); - /* Is MSR_SPEC_CTRL implemented ? */ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { - /* - * Restrict guest_spec_ctrl to supported values. Clear the - * modifiable bits in the host base value and or the - * modifiable bits from the guest value. - */ - guestval = hostval & ~x86_spec_ctrl_mask; - guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; - - /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) - hostval |= ssbd_tif_to_spec_ctrl(ti->flags); - - /* Conditional STIBP enabled? */ - if (static_branch_unlikely(&switch_to_cond_stibp)) - hostval |= stibp_tif_to_spec_ctrl(ti->flags); - if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); @@ -245,8 +274,6 @@ static void __init mds_select_mitigation(void) (mds_nosmt || cpu_mitigations_auto_nosmt())) cpu_smt_disable(false); } - - pr_info("%s\n", mds_strings[mds_mitigation]); } static int __init mds_cmdline(char *str) @@ -296,7 +323,7 @@ static void __init taa_select_mitigation(void) /* TSX previously disabled by tsx=off */ if (!boot_cpu_has(X86_FEATURE_RTM)) { taa_mitigation = TAA_MITIGATION_TSX_DISABLED; - goto out; + return; } if (cpu_mitigations_off()) { @@ -304,9 +331,13 @@ static void __init taa_select_mitigation(void) return; } - /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */ - if (taa_mitigation == TAA_MITIGATION_OFF) - goto out; + /* + * TAA mitigation via VERW is turned off if both + * tsx_async_abort=off and mds=off are specified. + */ + if (taa_mitigation == TAA_MITIGATION_OFF && + mds_mitigation == MDS_MITIGATION_OFF) + return; if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) taa_mitigation = TAA_MITIGATION_VERW; @@ -338,9 +369,6 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); - -out: - pr_info("%s\n", taa_strings[taa_mitigation]); } static int __init tsx_async_abort_parse_cmdline(char *str) @@ -364,6 +392,247 @@ static int __init tsx_async_abort_parse_cmdline(char *str) } early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "MMIO Stale Data: " fmt + +enum mmio_mitigations { + MMIO_MITIGATION_OFF, + MMIO_MITIGATION_UCODE_NEEDED, + MMIO_MITIGATION_VERW, +}; + +/* Default mitigation for Processor MMIO Stale Data vulnerabilities */ +static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW; +static bool mmio_nosmt __ro_after_init = false; + +static const char * const mmio_strings[] = { + [MMIO_MITIGATION_OFF] = "Vulnerable", + [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", +}; + +static void __init mmio_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || + boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || + cpu_mitigations_off()) { + mmio_mitigation = MMIO_MITIGATION_OFF; + return; + } + + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + /* + * Enable CPU buffer clear mitigation for host and VMM, if also affected + * by MDS or TAA. Otherwise, enable mitigation for VMM only. + */ + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && + boot_cpu_has(X86_FEATURE_RTM))) + static_branch_enable(&mds_user_clear); + else + static_branch_enable(&mmio_stale_data_clear); + + /* + * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can + * be propagated to uncore buffers, clearing the Fill buffers on idle + * is required irrespective of SMT state. + */ + if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + static_branch_enable(&mds_idle_clear); + + /* + * Check if the system has the right microcode. + * + * CPU Fill buffer clear mitigation is enumerated by either an explicit + * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS + * affected systems. + */ + if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + (boot_cpu_has(X86_FEATURE_MD_CLEAR) && + boot_cpu_has(X86_FEATURE_FLUSH_L1D) && + !(ia32_cap & ARCH_CAP_MDS_NO))) + mmio_mitigation = MMIO_MITIGATION_VERW; + else + mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; + + if (mmio_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); +} + +static int __init mmio_stale_data_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + mmio_mitigation = MMIO_MITIGATION_OFF; + } else if (!strcmp(str, "full")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + } else if (!strcmp(str, "full,nosmt")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_nosmt = true; + } + + return 0; +} +early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); + +#undef pr_fmt +#define pr_fmt(fmt) "" fmt + +static void __init md_clear_update_mitigation(void) +{ + if (cpu_mitigations_off()) + return; + + if (!static_key_enabled(&mds_user_clear)) + goto out; + + /* + * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data + * mitigation, if necessary. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } + if (taa_mitigation == TAA_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_VERW; + taa_select_mitigation(); + } + if (mmio_mitigation == MMIO_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_select_mitigation(); + } +out: + if (boot_cpu_has_bug(X86_BUG_MDS)) + pr_info("MDS: %s\n", mds_strings[mds_mitigation]); + if (boot_cpu_has_bug(X86_BUG_TAA)) + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); + else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + pr_info("MMIO Stale Data: Unknown: No mitigations\n"); +} + +static void __init md_clear_select_mitigation(void) +{ + mds_select_mitigation(); + taa_select_mitigation(); + mmio_select_mitigation(); + + /* + * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update + * and print their mitigation after MDS, TAA and MMIO Stale Data + * mitigation selection is done. + */ + md_clear_update_mitigation(); +} + +#undef pr_fmt +#define pr_fmt(fmt) "SRBDS: " fmt + +enum srbds_mitigations { + SRBDS_MITIGATION_OFF, + SRBDS_MITIGATION_UCODE_NEEDED, + SRBDS_MITIGATION_FULL, + SRBDS_MITIGATION_TSX_OFF, + SRBDS_MITIGATION_HYPERVISOR, +}; + +static enum srbds_mitigations srbds_mitigation __ro_after_init = SRBDS_MITIGATION_FULL; + +static const char * const srbds_strings[] = { + [SRBDS_MITIGATION_OFF] = "Vulnerable", + [SRBDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [SRBDS_MITIGATION_FULL] = "Mitigation: Microcode", + [SRBDS_MITIGATION_TSX_OFF] = "Mitigation: TSX disabled", + [SRBDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status", +}; + +static bool srbds_off; + +void update_srbds_msr(void) +{ + u64 mcu_ctrl; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return; + + if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL)) + return; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + + switch (srbds_mitigation) { + case SRBDS_MITIGATION_OFF: + case SRBDS_MITIGATION_TSX_OFF: + mcu_ctrl |= RNGDS_MITG_DIS; + break; + case SRBDS_MITIGATION_FULL: + mcu_ctrl &= ~RNGDS_MITG_DIS; + break; + default: + break; + } + + wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); +} + +static void __init srbds_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return; + + /* + * Check to see if this is one of the MDS_NO systems supporting TSX that + * are only exposed to SRBDS when TSX is enabled or when CPU is affected + * by Processor MMIO Stale Data vulnerability. + */ + ia32_cap = x86_read_arch_cap_msr(); + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; + else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; + else if (!boot_cpu_has(X86_FEATURE_SRBDS_CTRL)) + srbds_mitigation = SRBDS_MITIGATION_UCODE_NEEDED; + else if (cpu_mitigations_off() || srbds_off) + srbds_mitigation = SRBDS_MITIGATION_OFF; + + update_srbds_msr(); + pr_info("%s\n", srbds_strings[srbds_mitigation]); +} + +static int __init srbds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_SRBDS)) + return 0; + + srbds_off = !strcmp(str, "off"); + return 0; +} +early_param("srbds", srbds_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt @@ -456,13 +725,106 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; -static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = +#undef pr_fmt +#define pr_fmt(fmt) "RETBleed: " fmt + +enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_IBRS, + RETBLEED_MITIGATION_EIBRS, +}; + +enum retbleed_mitigation_cmd { + RETBLEED_CMD_OFF, + RETBLEED_CMD_AUTO, +}; + +const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", +}; + +static enum retbleed_mitigation retbleed_mitigation __ro_after_init = + RETBLEED_MITIGATION_NONE; +static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = + RETBLEED_CMD_AUTO; + +static int __init retbleed_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + retbleed_cmd = RETBLEED_CMD_OFF; + else if (!strcmp(str, "auto")) + retbleed_cmd = RETBLEED_CMD_AUTO; + else + pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); + + return 0; +} +early_param("retbleed", retbleed_parse_cmdline); + +#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" +#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + +static void __init retbleed_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + return; + + switch (retbleed_cmd) { + case RETBLEED_CMD_OFF: + return; + + case RETBLEED_CMD_AUTO: + default: + /* + * The Intel mitigation (IBRS) was already selected in + * spectre_v2_select_mitigation(). + */ + + break; + } + + switch (retbleed_mitigation) { + default: + break; + } + + /* + * Let IBRS trump all on Intel without affecting the effects of the + * retbleed= cmdline option. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + switch (spectre_v2_enabled) { + case SPECTRE_V2_IBRS: + retbleed_mitigation = RETBLEED_MITIGATION_IBRS; + break; + case SPECTRE_V2_EIBRS: + case SPECTRE_V2_EIBRS_RETPOLINE: + case SPECTRE_V2_EIBRS_LFENCE: + retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; + break; + default: + pr_err(RETBLEED_INTEL_MSG); + } + } + + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); +} + +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + +static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = + SPECTRE_V2_USER_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = SPECTRE_V2_USER_NONE; #ifdef CONFIG_RETPOLINE @@ -486,6 +848,33 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif +#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" +#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" +#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n" +#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n" + +#ifdef CONFIG_BPF_SYSCALL +void unpriv_ebpf_notify(int new_state) +{ + if (new_state) + return; + + /* Unprivileged eBPF is enabled */ + + switch (spectre_v2_enabled) { + case SPECTRE_V2_EIBRS: + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + break; + case SPECTRE_V2_EIBRS_LFENCE: + if (sched_smt_active()) + pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + break; + default: + break; + } +} +#endif + static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -500,7 +889,11 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_FORCE, SPECTRE_V2_CMD_RETPOLINE, SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, + SPECTRE_V2_CMD_RETPOLINE_LFENCE, + SPECTRE_V2_CMD_EIBRS, + SPECTRE_V2_CMD_EIBRS_RETPOLINE, + SPECTRE_V2_CMD_EIBRS_LFENCE, + SPECTRE_V2_CMD_IBRS, }; enum spectre_v2_user_cmd { @@ -541,13 +934,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) pr_info("spectre_v2_user=%s forced on command line.\n", reason); } +static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; + static enum spectre_v2_user_cmd __init -spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_parse_user_cmdline(void) { char arg[20]; int ret, i; - switch (v2_cmd) { + switch (spectre_v2_cmd) { case SPECTRE_V2_CMD_NONE: return SPECTRE_V2_USER_CMD_NONE; case SPECTRE_V2_CMD_FORCE: @@ -573,8 +968,16 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } +static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) +{ + return mode == SPECTRE_V2_IBRS || + mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE; +} + static void __init -spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +spectre_v2_user_select_mitigation(void) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); @@ -587,7 +990,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - cmd = spectre_v2_parse_user_cmdline(v2_cmd); + cmd = spectre_v2_parse_user_cmdline(); switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; @@ -608,24 +1011,17 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) break; } - /* - * At this point, an STIBP mode other than "off" has been set. - * If STIBP support is not being forced, check if STIBP always-on - * is preferred. - */ - if (mode != SPECTRE_V2_USER_STRICT && - boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) - mode = SPECTRE_V2_USER_STRICT_PREFERRED; - /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + spectre_v2_user_ibpb = mode; switch (cmd) { case SPECTRE_V2_USER_CMD_FORCE: case SPECTRE_V2_USER_CMD_PRCTL_IBPB: case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); + spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: case SPECTRE_V2_USER_CMD_AUTO: @@ -641,28 +1037,38 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) "always-on" : "conditional"); } - /* If enhanced IBRS is enabled no STIBP required */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + /* + * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, + * STIBP is not required. + */ + if (!boot_cpu_has(X86_FEATURE_STIBP) || + !smt_possible || + spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return; /* - * If SMT is not possible or STIBP is not available clear the STIBP - * mode. + * At this point, an STIBP mode other than "off" has been set. + * If STIBP support is not being forced, check if STIBP always-on + * is preferred. */ - if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) - mode = SPECTRE_V2_USER_NONE; + if (mode != SPECTRE_V2_USER_STRICT && + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + + spectre_v2_user_stibp = mode; + set_mode: - spectre_v2_user = mode; - /* Only print the STIBP mode when SMT possible */ - if (smt_possible) - pr_info("%s\n", spectre_v2_user_strings[mode]); + pr_info("%s\n", spectre_v2_user_strings[mode]); } static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", + [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", + [SPECTRE_V2_IBRS] = "Mitigation: IBRS", }; static const struct { @@ -673,9 +1079,14 @@ static const struct { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, + { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "eibrs", SPECTRE_V2_CMD_EIBRS, false }, + { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, + { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, + { "ibrs", SPECTRE_V2_CMD_IBRS, false }, }; static void __init spec_v2_print_cond(const char *reason, bool secure) @@ -711,17 +1122,48 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || - cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || - cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && !IS_ENABLED(CONFIG_RETPOLINE)) { - pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if ((cmd == SPECTRE_V2_CMD_EIBRS || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { + pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } - if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON && - boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) && + !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { + pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { + pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -730,6 +1172,79 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } +static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) +{ + if (!IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("Kernel not compiled with retpoline; no mitigation available!"); + return SPECTRE_V2_NONE; + } + + return SPECTRE_V2_RETPOLINE; +} + +/* Disable in-kernel use of non-RSB RET predictors */ +static void __init spec_ctrl_disable_kernel_rrsba(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + if (ia32_cap & ARCH_CAP_RRSBA) { + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } +} + +static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) +{ + /* + * Similar to context switches, there are two types of RSB attacks + * after VM exit: + * + * 1) RSB underflow + * + * 2) Poisoned RSB entry + * + * When retpoline is enabled, both are mitigated by filling/clearing + * the RSB. + * + * When IBRS is enabled, while #1 would be mitigated by the IBRS branch + * prediction isolation protections, RSB still needs to be cleared + * because of #2. Note that SMEP provides no protection here, unlike + * user-space-poisoned RSB entries. + * + * eIBRS should protect against RSB poisoning, but if the EIBRS_PBRSB + * bug is present then a LITE version of RSB protection is required, + * just a single call needs to retire before a RET is executed. + */ + switch (mode) { + case SPECTRE_V2_NONE: + return; + + case SPECTRE_V2_EIBRS_LFENCE: + case SPECTRE_V2_EIBRS: + if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT_LITE); + pr_info("Spectre v2 / PBRSB-eIBRS: Retire a single CALL on VMEXIT\n"); + } + return; + + case SPECTRE_V2_EIBRS_RETPOLINE: + case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_IBRS: + setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); + pr_info("Spectre v2 / SpectreRSB : Filling RSB on VMEXIT\n"); + return; + } + + pr_warn_once("Unknown Spectre v2 mode, disabling RSB mitigation at VM exit"); + dump_stack(); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -750,86 +1265,161 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_CMD_FORCE: case SPECTRE_V2_CMD_AUTO: if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { - mode = SPECTRE_V2_IBRS_ENHANCED; - /* Force it so VMEXIT will restore correctly */ - x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - goto specv2_set_mode; + mode = SPECTRE_V2_EIBRS; + break; + } + + if (boot_cpu_has_bug(X86_BUG_RETBLEED) && + retbleed_cmd != RETBLEED_CMD_OFF && + boot_cpu_has(X86_FEATURE_IBRS) && + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + mode = SPECTRE_V2_IBRS; + break; } - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_auto; + + mode = spectre_v2_select_retpoline(); break; - case SPECTRE_V2_CMD_RETPOLINE_AMD: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_amd; + + case SPECTRE_V2_CMD_RETPOLINE_LFENCE: + pr_err(SPECTRE_V2_LFENCE_MSG); + mode = SPECTRE_V2_LFENCE; break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_generic; + mode = SPECTRE_V2_RETPOLINE; break; + case SPECTRE_V2_CMD_RETPOLINE: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_auto; + mode = spectre_v2_select_retpoline(); + break; + + case SPECTRE_V2_CMD_IBRS: + mode = SPECTRE_V2_IBRS; + break; + + case SPECTRE_V2_CMD_EIBRS: + mode = SPECTRE_V2_EIBRS; + break; + + case SPECTRE_V2_CMD_EIBRS_LFENCE: + mode = SPECTRE_V2_EIBRS_LFENCE; + break; + + case SPECTRE_V2_CMD_EIBRS_RETPOLINE: + mode = SPECTRE_V2_EIBRS_RETPOLINE; break; } - pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); - return; -retpoline_auto: - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - retpoline_amd: - if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); - goto retpoline_generic; - } - mode = SPECTRE_V2_RETPOLINE_AMD; - setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); - setup_force_cpu_cap(X86_FEATURE_RETPOLINE); - } else { - retpoline_generic: - mode = SPECTRE_V2_RETPOLINE_GENERIC; + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + + if (spectre_v2_in_ibrs_mode(mode)) { + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + + switch (mode) { + case SPECTRE_V2_NONE: + case SPECTRE_V2_EIBRS: + break; + + case SPECTRE_V2_IBRS: + setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); + if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) + pr_warn(SPECTRE_V2_IBRS_PERF_MSG); + break; + + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_EIBRS_LFENCE: + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); + fallthrough; + + case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_EIBRS_RETPOLINE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + break; } -specv2_set_mode: + /* + * Disable alternate RSB predictions in kernel when indirect CALLs and + * JMPs gets protection against BHI and Intramode-BTI, but RET + * prediction from a non-RSB predictor is still a risk. + */ + if (mode == SPECTRE_V2_EIBRS_LFENCE || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_RETPOLINE) + spec_ctrl_disable_kernel_rrsba(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); /* - * If spectre v2 protection has been enabled, unconditionally fill - * RSB during a context switch; this protects against two independent - * issues: + * If Spectre v2 protection has been enabled, fill the RSB during a + * context switch. In general there are two types of RSB attacks + * across context switches, for which the CALLs/RETs may be unbalanced. + * + * 1) RSB underflow + * + * Some Intel parts have "bottomless RSB". When the RSB is empty, + * speculated return targets may come from the branch predictor, + * which could have a user-poisoned BTB or BHB entry. * - * - RSB underflow (and switch to BTB) on Skylake+ - * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs + * AMD has it even worse: *all* returns are speculated from the BTB, + * regardless of the state of the RSB. + * + * When IBRS or eIBRS is enabled, the "user -> kernel" attack + * scenario is mitigated by the IBRS branch prediction isolation + * properties, so the RSB buffer filling wouldn't be necessary to + * protect against this type of attack. + * + * The "user -> user" attack scenario is mitigated by RSB filling. + * + * 2) Poisoned RSB entry + * + * If the 'next' in-kernel return stack is shorter than 'prev', + * 'next' could be tricked into speculating with a user-poisoned RSB + * entry. + * + * The "user -> kernel" attack scenario is mitigated by SMEP and + * eIBRS. + * + * The "user -> user" scenario, also known as SpectreBHB, requires + * RSB clearing. + * + * So to mitigate all cases, unconditionally fill RSB on context + * switches. + * + * FIXME: Is this pointless for retbleed-affected AMD? */ setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + spectre_v2_determine_rsb_fill_type_at_vmexit(mode); + /* - * Retpoline means the kernel is safe because it has no indirect - * branches. Enhanced IBRS protects firmware too, so, enable restricted - * speculation around firmware calls only when Enhanced IBRS isn't - * supported. + * Retpoline protects the kernel, but doesn't protect firmware. IBRS + * and Enhanced IBRS protect firmware too, so enable IBRS around + * firmware calls only when IBRS / Enhanced IBRS aren't otherwise + * enabled. * * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because * the user might select retpoline on the kernel command line and if * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) { + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } /* Set up IBPB and STIBP depending on the general spectre V2 command */ - spectre_v2_user_select_mitigation(cmd); + spectre_v2_cmd = cmd; } static void update_stibp_msr(void * __unused) { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); + write_spec_ctrl_current(val, true); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -864,6 +1454,8 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { + u64 ia32_cap = x86_read_arch_cap_msr(); + /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -875,20 +1467,27 @@ static void update_mds_branch_idle(void) if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) return; - if (sched_smt_active()) + if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); - else + } else if (mmio_mitigation == MMIO_MITIGATION_OFF || + (ia32_cap & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); + } } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); - switch (spectre_v2_user) { + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; case SPECTRE_V2_USER_STRICT: @@ -923,6 +1522,16 @@ void cpu_bugs_smt_update(void) break; } + switch (mmio_mitigation) { + case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(MMIO_MSG_SMT); + break; + case MMIO_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -1026,16 +1635,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) break; } - /* - * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper - * bit in the mask to allow guests to use the mitigation even in the - * case where the host does not enable it. - */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; - } - /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. @@ -1053,7 +1652,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); } } @@ -1127,19 +1726,40 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static bool is_spec_ib_user_controlled(void) +{ + return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP; +} + static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { case PR_SPEC_ENABLE: - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; /* - * Indirect branch speculation is always disabled in strict - * mode. + * With strict mode for both IBPB and STIBP, the instruction + * code paths avoid checking this task flag and instead, + * unconditionally run the instruction. However, STIBP and IBPB + * are independent and either can be set to conditionally + * enabled regardless of the mode of the other. + * + * If either is set to conditional, allow the task flag to be + * updated, unless it was force-disabled by a previous prctl + * call. Currently, this is possible on an AMD CPU which has the + * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the + * kernel is booted with 'spectre_v2_user=seccomp', then + * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and + * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED. */ - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + if (!is_spec_ib_user_controlled() || + task_spec_ib_force_disable(task)) return -EPERM; + task_clear_spec_ib_disable(task); task_update_spec_tif(task); break; @@ -1149,11 +1769,13 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) * Indirect branch speculation is always allowed when * mitigation is force disabled. */ - if (spectre_v2_user == SPECTRE_V2_USER_NONE) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user == SPECTRE_V2_USER_STRICT || - spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED) + + if (!is_spec_ib_user_controlled()) return 0; + task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); @@ -1183,7 +1805,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); - if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -1214,22 +1837,21 @@ static int ib_prctl_get(struct task_struct *task) if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return PR_SPEC_NOT_AFFECTED; - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + else if (is_spec_ib_user_controlled()) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - case SPECTRE_V2_USER_STRICT: - case SPECTRE_V2_USER_STRICT_PREFERRED: + } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) return PR_SPEC_DISABLE; - default: + else return PR_SPEC_NOT_AFFECTED; - } } int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) @@ -1247,7 +1869,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + write_spec_ctrl_current(x86_spec_ctrl_base, true); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); @@ -1463,12 +2085,29 @@ static ssize_t tsx_async_abort_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t mmio_stale_data_show_state(char *buf) +{ + if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + return sysfs_emit(buf, "Unknown: No mitigations\n"); + + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mmio_strings[mmio_mitigation]); + } + + return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) return ""; - switch (spectre_v2_user) { + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: @@ -1495,6 +2134,51 @@ static char *ibpb_state(void) return ""; } +static char *pbrsb_eibrs_state(void) +{ + if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { + if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || + boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) + return ", PBRSB-eIBRS: SW sequence"; + else + return ", PBRSB-eIBRS: Vulnerable"; + } else { + return ", PBRSB-eIBRS: Not affected"; + } +} + +static ssize_t spectre_v2_show_state(char *buf) +{ + if (spectre_v2_enabled == SPECTRE_V2_LFENCE) + return sprintf(buf, "Vulnerable: LFENCE\n"); + + if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); + + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + + return sprintf(buf, "%s%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + pbrsb_eibrs_state(), + spectre_v2_module_string()); +} + +static ssize_t srbds_show_state(char *buf) +{ + return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); +} + +static ssize_t retbleed_show_state(char *buf) +{ + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -1515,12 +2199,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", - spectre_v2_module_string()); + return spectre_v2_show_state(buf); case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); @@ -1539,6 +2218,16 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_ITLB_MULTIHIT: return itlb_multihit_show_state(buf); + case X86_BUG_SRBDS: + return srbds_show_state(buf); + + case X86_BUG_MMIO_STALE_DATA: + case X86_BUG_MMIO_UNKNOWN: + return mmio_stale_data_show_state(buf); + + case X86_BUG_RETBLEED: + return retbleed_show_state(buf); + default: break; } @@ -1585,4 +2274,22 @@ ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr { return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); } + +ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); +} + +ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN); + else + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); +} + +ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); +} #endif diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index c7503be92f359..cae566567e15e 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -646,7 +646,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) return i; } -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu) { /* * We may have multiple LLCs if L3 caches exist, so check if we @@ -657,7 +657,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) if (c->x86 < 0x17) { /* LLC is at the node level. */ - per_cpu(cpu_llc_id, cpu) = node_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* * LLC is at the core complex level. @@ -684,7 +684,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) } } -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu) { /* * We may have multiple LLCs if L3 caches exist, so check if we @@ -985,7 +985,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, this_leaf->priv = base->nb; } -static int __init_cache_level(unsigned int cpu) +int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1014,7 +1014,7 @@ static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) id4_regs->id = c->apicid >> index_msb; } -static int __populate_cache_leaves(unsigned int cpu) +int populate_cache_leaves(unsigned int cpu) { unsigned int idx, ret; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -1033,6 +1033,3 @@ static int __populate_cache_leaves(unsigned int cpu) return 0; } - -DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) -DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fffe219453749..5e1e32f1086ba 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -366,6 +366,9 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c) cr4_clear_bits(X86_CR4_UMIP); } +/* These bits should not change their value after CPU init is finished. */ +static const unsigned long cr4_pinned_mask = + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; @@ -374,7 +377,7 @@ void native_write_cr0(unsigned long val) unsigned long bits_missing = 0; set_register: - asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order)); + asm volatile("mov %0,%%cr0": "+r" (val) : : "memory"); if (static_branch_likely(&cr_pinning)) { if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { @@ -390,20 +393,20 @@ EXPORT_SYMBOL(native_write_cr0); void native_write_cr4(unsigned long val) { - unsigned long bits_missing = 0; + unsigned long bits_changed = 0; set_register: - asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); + asm volatile("mov %0,%%cr4": "+r" (val) : : "memory"); if (static_branch_likely(&cr_pinning)) { - if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) { - bits_missing = ~val & cr4_pinned_bits; - val |= bits_missing; + if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { + bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits; + val = (val & ~cr4_pinned_mask) | cr4_pinned_bits; goto set_register; } - /* Warn after we've set the missing bits. */ - WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n", - bits_missing); + /* Warn after we've corrected the changed bits. */ + WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n", + bits_changed); } } EXPORT_SYMBOL(native_write_cr4); @@ -415,7 +418,7 @@ void cr4_init(void) if (boot_cpu_has(X86_FEATURE_PCID)) cr4 |= X86_CR4_PCIDE; if (static_branch_likely(&cr_pinning)) - cr4 |= cr4_pinned_bits; + cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits; __write_cr4(cr4); @@ -430,10 +433,7 @@ void cr4_init(void) */ static void __init setup_cr_pinning(void) { - unsigned long mask; - - mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP); - cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask; + cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask; static_key_enable(&cr_pinning.key); } @@ -464,7 +464,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) * cpuid bit to be set. We need to ensure that we * update that bit in this CPU's "cpu_info". */ - get_cpu_cap(c); + set_cpu_cap(c, X86_FEATURE_OSPKE); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -1024,6 +1024,9 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define MSBDS_ONLY BIT(5) #define NO_SWAPGS BIT(6) #define NO_ITLB_MULTIHIT BIT(7) +#define NO_SPECTRE_V2 BIT(8) +#define NO_EIBRS_PBRSB BIT(9) +#define NO_MMIO BIT(10) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -1044,6 +1047,11 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), /* Intel Family 6 */ + VULNWL_INTEL(TIGERLAKE, NO_MMIO), + VULNWL_INTEL(TIGERLAKE_L, NO_MMIO), + VULNWL_INTEL(ALDERLAKE, NO_MMIO), + VULNWL_INTEL(ALDERLAKE_L, NO_MMIO), + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), @@ -1062,9 +1070,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -1074,23 +1082,86 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { * good enough for our purposes. */ - VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_TREMONT, NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_TREMONT_L, NO_EIBRS_PBRSB), + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + + /* Zhaoxin Family 7 */ + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_MMIO), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_MMIO), + {} +}; + +#define VULNBL(vendor, family, model, blacklist) \ + X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) + +#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, steppings, \ + X86_FEATURE_ANY, issues) + +#define VULNBL_AMD(family, blacklist) \ + VULNBL(AMD, family, X86_MODEL_ANY, blacklist) + +#define VULNBL_HYGON(family, blacklist) \ + VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) + +#define SRBDS BIT(0) +/* CPU is affected by X86_BUG_MMIO_STALE_DATA */ +#define MMIO BIT(1) +/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ +#define MMIO_SBDS BIT(2) +/* CPU is affected by RETbleed, speculating where you would not expect it */ +#define RETBLEED BIT(3) + +static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), + VULNBL_AMD(0x17, RETBLEED), + VULNBL_HYGON(0x18, RETBLEED), {} }; -static bool __init cpu_matches(unsigned long which) +static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long which) { - const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); + const struct x86_cpu_id *m = x86_match_cpu(table); return m && !!(m->driver_data & which); } @@ -1105,34 +1176,46 @@ u64 x86_read_arch_cap_msr(void) return ia32_cap; } +static bool arch_cap_mmio_immune(u64 ia32_cap) +{ + return (ia32_cap & ARCH_CAP_FBSDP_NO && + ia32_cap & ARCH_CAP_PSDP_NO && + ia32_cap & ARCH_CAP_SBDR_SSDP_NO); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ - if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && + !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); - if (cpu_matches(NO_SPECULATION)) + if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && + if (!cpu_matches(cpu_vuln_whitelist, NO_SPECTRE_V2)) + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + + if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && + !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { + if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && + !(ia32_cap & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); - if (cpu_matches(MSBDS_ONLY)) + if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); } - if (!cpu_matches(NO_SWAPGS)) + if (!cpu_matches(cpu_vuln_whitelist, NO_SWAPGS)) setup_force_cpu_bug(X86_BUG_SWAPGS); /* @@ -1150,7 +1233,47 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); - if (cpu_matches(NO_MELTDOWN)) + /* + * SRBDS affects CPUs which support RDRAND or RDSEED and are listed + * in the vulnerability blacklist. + * + * Some of the implications and mitigation of Shared Buffers Data + * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as + * SRBDS. + */ + if ((cpu_has(c, X86_FEATURE_RDRAND) || + cpu_has(c, X86_FEATURE_RDSEED)) && + cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS)) + setup_force_cpu_bug(X86_BUG_SRBDS); + + /* + * Processor MMIO Stale Data bug enumeration + * + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. + * + * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, + * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. + */ + if (!arch_cap_mmio_immune(ia32_cap)) { + if (cpu_matches(cpu_vuln_blacklist, MMIO)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) + setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); + } + + if (!cpu_has(c, X86_FEATURE_BTC_NO)) { + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + setup_force_cpu_bug(X86_BUG_RETBLEED); + } + + if (cpu_has(c, X86_FEATURE_IBRS_ENHANCED) && + !cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && + !(ia32_cap & ARCH_CAP_PBRSB_NO)) + setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ @@ -1159,7 +1282,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - if (cpu_matches(NO_L1TF)) + if (cpu_matches(cpu_vuln_whitelist, NO_L1TF)) return; setup_force_cpu_bug(X86_BUG_L1TF); @@ -1296,9 +1419,8 @@ void __init early_cpu_init(void) early_identify_cpu(&boot_cpu_data); } -static void detect_null_seg_behavior(struct cpuinfo_x86 *c) +static bool detect_null_seg_behavior(void) { -#ifdef CONFIG_X86_64 /* * Empirically, writing zero to a segment selector on AMD does * not clear the base, whereas writing zero to a segment @@ -1319,10 +1441,43 @@ static void detect_null_seg_behavior(struct cpuinfo_x86 *c) wrmsrl(MSR_FS_BASE, 1); loadsegment(fs, 0); rdmsrl(MSR_FS_BASE, tmp); - if (tmp != 0) - set_cpu_bug(c, X86_BUG_NULL_SEG); wrmsrl(MSR_FS_BASE, old_base); -#endif + return tmp == 0; +} + +void check_null_seg_clears_base(struct cpuinfo_x86 *c) +{ + /* BUG_NULL_SEG is only relevant with 64bit userspace */ + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* Zen3 CPUs advertise Null Selector Clears Base in CPUID. */ + if (c->extended_cpuid_level >= 0x80000021 && + cpuid_eax(0x80000021) & BIT(6)) + return; + + /* + * CPUID bit above wasn't set. If this kernel is still running + * as a HV guest, then the HV has decided not to advertize + * that CPUID bit for whatever reason. For example, one + * member of the migration pool might be vulnerable. Which + * means, the bug is present: set the BUG flag and return. + */ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) { + set_cpu_bug(c, X86_BUG_NULL_SEG); + return; + } + + /* + * Zen2 CPUs also have this behaviour, but no CPUID bit. + * 0x18 is the respective family for Hygon. + */ + if ((c->x86 == 0x17 || c->x86 == 0x18) && + detect_null_seg_behavior()) + return; + + /* All the remaining ones are affected */ + set_cpu_bug(c, X86_BUG_NULL_SEG); } static void generic_identify(struct cpuinfo_x86 *c) @@ -1358,8 +1513,6 @@ static void generic_identify(struct cpuinfo_x86 *c) get_model_name(c); /* Default name */ - detect_null_seg_behavior(c); - /* * ESPFIX is a strange bug. All real CPUs have it. Paravirt * systems that run Linux at CPL > 0 may or may not have the @@ -1597,6 +1750,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) mtrr_ap_init(); validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); + update_srbds_msr(); } static __init int setup_noclflush(char *arg) @@ -1785,7 +1939,7 @@ static void setup_getcpu(int cpu) unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu)); struct desc_struct d = { }; - if (boot_cpu_has(X86_FEATURE_RDTSCP)) + if (boot_cpu_has(X86_FEATURE_RDTSCP) || boot_cpu_has(X86_FEATURE_RDPID)) write_rdtscp_aux(cpudata); /* Store CPU and node number in limit. */ diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 38ab6e115eac8..4d04c127c4a79 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -73,10 +73,12 @@ extern int detect_extended_topology_early(struct cpuinfo_x86 *c); extern int detect_extended_topology(struct cpuinfo_x86 *c); extern int detect_ht_early(struct cpuinfo_x86 *c); extern void detect_ht(struct cpuinfo_x86 *c); +extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); extern void x86_spec_ctrl_setup_ap(void); +extern void update_srbds_msr(void); extern u64 x86_read_arch_cap_msr(void); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 4e28c1fc87499..b232bd0be78d6 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -64,7 +64,6 @@ static void hygon_get_topology_early(struct cpuinfo_x86 *c) */ static void hygon_get_topology(struct cpuinfo_x86 *c) { - u8 node_id; int cpu = smp_processor_id(); /* get information required for multi-node processors */ @@ -74,7 +73,7 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - node_id = ecx & 0xff; + c->cpu_die_id = ecx & 0xff; c->cpu_core_id = ebx & 0xff; @@ -92,14 +91,14 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) /* Socket ID is ApicId[6] for these processors. */ c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; - cacheinfo_hygon_init_llc_id(c, cpu, node_id); + cacheinfo_hygon_init_llc_id(c, cpu); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - node_id = value & 7; + c->cpu_die_id = value & 7; - per_cpu(cpu_llc_id, cpu) = node_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; } else return; @@ -122,7 +121,7 @@ static void hygon_detect_cmp(struct cpuinfo_x86 *c) /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; /* use socket ID also for last level cache */ - per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id; } static void srat_detect_node(struct cpuinfo_x86 *c) @@ -351,6 +350,8 @@ static void init_hygon(struct cpuinfo_x86 *c) /* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + + check_null_seg_clears_base(c); } static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 11d5c5950e2d3..44688917d51fd 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -97,7 +97,7 @@ static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) { ring3mwait_disabled = true; - return 0; + return 1; } __setup("ring3mwait=disable", ring3mwait_disable); diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 6dd78d8235e44..ad6776081e60d 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -16,12 +16,17 @@ * respective wildcard entries. * * A typical table entry would be to match a specific CPU - * { X86_VENDOR_INTEL, 6, 0x12 } - * or to match a specific CPU feature - * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) } + * + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL, + * X86_FEATURE_ANY, NULL); * * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY, - * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor) + * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor) + * + * asm/cpu_device_id.h contains a set of useful macros which are shortcuts + * for various common selections. The above can be shortened to: + * + * X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, NULL); * * Arrays used to match for this should also be declared using * MODULE_DEVICE_TABLE(x86cpu, ...) @@ -34,13 +39,18 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) const struct x86_cpu_id *m; struct cpuinfo_x86 *c = &boot_cpu_data; - for (m = match; m->vendor | m->family | m->model | m->feature; m++) { + for (m = match; + m->vendor | m->family | m->model | m->steppings | m->feature; + m++) { if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) continue; if (m->family != X86_FAMILY_ANY && c->x86 != m->family) continue; if (m->model != X86_MODEL_ANY && c->x86_model != m->model) continue; + if (m->steppings != X86_STEPPING_ANY && + !(BIT(c->x86_stepping) & m->steppings)) + continue; if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) continue; return m; diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 6ea7fdc82f3c7..7d76f41591daa 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -78,6 +78,7 @@ struct smca_bank_name { static struct smca_bank_name smca_names[] = { [SMCA_LS] = { "load_store", "Load Store Unit" }, + [SMCA_LS_V2] = { "load_store", "Load Store Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, [SMCA_DE] = { "decode_unit", "Decode Unit" }, @@ -138,6 +139,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { /* ZN Core (HWID=0xB0) MCA types */ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF }, + { SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF }, { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, @@ -266,10 +268,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu) smca_set_misc_banks_map(bank, cpu); /* Return early if this bank was already initialized. */ - if (smca_banks[bank].hwid) + if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0) return; - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; } @@ -1161,9 +1163,12 @@ static const struct sysfs_ops threshold_ops = { .store = store, }; +static void threshold_block_release(struct kobject *kobj); + static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_attrs = default_attrs, + .release = threshold_block_release, }; static const char *get_name(unsigned int bank, struct threshold_block *b) @@ -1196,8 +1201,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return buf_mcatype; } -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, - unsigned int block, u32 address) +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb, + unsigned int bank, unsigned int block, + u32 address) { struct threshold_block *b = NULL; u32 low, high; @@ -1241,16 +1247,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { - list_add(&b->miscj, - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { - per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } + if (tb->blocks) + list_add(&b->miscj, &tb->blocks->miscj); + else + tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, - per_cpu(threshold_banks, cpu)[bank]->kobj, - get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); if (err) goto out_free; recurse: @@ -1258,7 +1260,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (!address) return 0; - err = allocate_threshold_blocks(cpu, bank, block, address); + err = allocate_threshold_blocks(cpu, tb, bank, block, address); if (err) goto out_free; @@ -1343,8 +1345,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out_free; } - per_cpu(threshold_banks, cpu)[bank] = b; - if (is_shared_bank(bank)) { refcount_set(&b->cpus, 1); @@ -1355,9 +1355,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); - if (!err) - goto out; + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + return 0; out_free: kfree(b); @@ -1366,8 +1370,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) return err; } -static void deallocate_threshold_block(unsigned int cpu, - unsigned int bank) +static void threshold_block_release(struct kobject *kobj) +{ + kfree(to_block(kobj)); +} + +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { struct threshold_block *pos = NULL; struct threshold_block *tmp = NULL; @@ -1377,13 +1385,11 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_put(&pos->kobj); list_del(&pos->miscj); - kfree(pos); + kobject_put(&pos->kobj); } - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; + kobject_put(&head->blocks->kobj); } static void __threshold_remove_blocks(struct threshold_bank *b) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 743370ee49835..9b98a7d8ac604 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -310,11 +310,17 @@ static void wait_for_panic(void) panic("Panicing machine check CPU died"); } -static void mce_panic(const char *msg, struct mce *final, char *exp) +static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) { - int apei_err = 0; struct llist_node *pending; struct mce_evt_llist *l; + int apei_err = 0; + + /* + * Allow instrumentation around external facilities usage. Not that it + * matters a whole lot since the machine is going to panic anyway. + */ + instrumentation_begin(); if (!fake_panic) { /* @@ -329,7 +335,7 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) } else { /* Don't log too much for fake panic */ if (atomic_inc_return(&mce_fake_panicked) > 1) - return; + goto out; } pending = mce_gen_pool_prepare_records(); /* First print corrected ones that are still unlogged */ @@ -367,6 +373,9 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) panic(msg); } else pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); + +out: + instrumentation_end(); } /* Support code for software error injection */ @@ -388,10 +397,38 @@ static int msr_to_offset(u32 msr) return -1; } +static void ex_handler_msr_mce(struct pt_regs *regs, bool wrmsr) +{ + if (wrmsr) { + pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, + regs->ip, (void *)regs->ip); + } else { + pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, regs->ip, (void *)regs->ip); + } + + show_stack_regs(regs); + + panic("MCA architectural violation!\n"); + + while (true) + cpu_relax(); +} + +__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr) +{ + ex_handler_msr_mce(regs, false); + return true; +} + /* MSR access wrappers used for error injection */ static u64 mce_rdmsrl(u32 msr) { - u64 v; + DECLARE_ARGS(val, low, high); if (__this_cpu_read(injectm.finished)) { int offset = msr_to_offset(msr); @@ -401,21 +438,33 @@ static u64 mce_rdmsrl(u32 msr) return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset); } - if (rdmsrl_safe(msr, &v)) { - WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr); - /* - * Return zero in case the access faulted. This should - * not happen normally but can happen if the CPU does - * something weird, or if the code is buggy. - */ - v = 0; - } + /* + * RDMSR on MCA MSRs should not fault. If they do, this is very much an + * architectural violation and needs to be reported to hw vendor. Panic + * the box to not allow any further progress. + */ + asm volatile("1: rdmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault) + : EAX_EDX_RET(val, low, high) : "c" (msr)); + + + return EAX_EDX_VAL(val, low, high); +} - return v; +__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr) +{ + ex_handler_msr_mce(regs, true); + return true; } static void mce_wrmsrl(u32 msr, u64 v) { + u32 low, high; + if (__this_cpu_read(injectm.finished)) { int offset = msr_to_offset(msr); @@ -423,7 +472,15 @@ static void mce_wrmsrl(u32 msr, u64 v) *(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v; return; } - wrmsrl(msr, v); + + low = (u32)v; + high = (u32)(v >> 32); + + /* See comment in mce_rdmsrl() */ + asm volatile("1: wrmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault) + : : "c" (msr), "a"(low), "d" (high) : "memory"); } /* @@ -533,6 +590,13 @@ bool mce_is_memory_error(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_memory_error); +static bool whole_page(struct mce *m) +{ + if (!mca_cfg.ser || !(m->status & MCI_STATUS_MISCV)) + return true; + return MCI_MISC_ADDR_LSB(m->misc) >= PAGE_SHIFT; +} + bool mce_is_correctable(struct mce *m) { if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED) @@ -601,7 +665,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { pfn = mce->addr >> PAGE_SHIFT; if (!memory_failure(pfn, 0)) - set_mce_nospec(pfn); + set_mce_nospec(pfn, whole_page(mce)); } return NOTIFY_OK; @@ -636,7 +700,7 @@ static struct notifier_block mce_default_nb = { /* * Read ADDR and MISC registers. */ -static void mce_read_aux(struct mce *m, int i) +static noinstr void mce_read_aux(struct mce *m, int i) { if (m->status & MCI_STATUS_MISCV) m->misc = mce_rdmsrl(msr_ops.misc(i)); @@ -814,8 +878,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); + m->bank = i; if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { - m->bank = i; mce_read_aux(m, i); *msg = tmp; return 1; @@ -1016,10 +1080,13 @@ static int mce_start(int *no_way_out) * Synchronize between CPUs after main scanning loop. * This invokes the bulk of the Monarch processing. */ -static int mce_end(int order) +static noinstr int mce_end(int order) { - int ret = -1; u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC; + int ret = -1; + + /* Allow instrumentation around external facilities. */ + instrumentation_begin(); if (!timeout) goto reset; @@ -1063,7 +1130,8 @@ static int mce_end(int order) /* * Don't reset anything. That's done by the Monarch. */ - return 0; + ret = 0; + goto out; } /* @@ -1078,6 +1146,10 @@ static int mce_end(int order) * Let others run again. */ atomic_set(&mce_executing, 0); + +out: + instrumentation_end(); + return ret; } @@ -1103,7 +1175,7 @@ static int do_memory_failure(struct mce *m) if (ret) pr_err("Memory error not recovered"); else - set_mce_nospec(m->addr >> PAGE_SHIFT); + set_mce_nospec(m->addr >> PAGE_SHIFT, whole_page(m)); return ret; } @@ -1306,8 +1378,10 @@ void do_machine_check(struct pt_regs *regs, long error_code) * When there's any problem use only local no_way_out state. */ if (!lmce) { - if (mce_end(order) < 0) - no_way_out = worst >= MCE_PANIC_SEVERITY; + if (mce_end(order) < 0) { + if (!no_way_out) + no_way_out = worst >= MCE_PANIC_SEVERITY; + } } else { /* * If there was a fatal machine check we should have diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 1f30117b24ba7..e1fda5b19b6f6 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -347,7 +347,7 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf, char buf[MAX_FLAG_OPT_SIZE], *__buf; int err; - if (cnt > MAX_FLAG_OPT_SIZE) + if (!cnt || cnt > MAX_FLAG_OPT_SIZE) return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) @@ -511,7 +511,7 @@ static void do_inject(void) */ if (inj_type == DFR_INT_INJ) { i_mce.status |= MCI_STATUS_DEFERRED; - i_mce.status |= (i_mce.status & ~MCI_STATUS_UC); + i_mce.status &= ~MCI_STATUS_UC; } /* diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 88cd9598fa57c..f2350967a898a 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -489,17 +489,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) return; if ((val & 3UL) == 1UL) { - /* PPIN available but disabled: */ + /* PPIN locked in disabled mode */ return; } - /* If PPIN is disabled, but not locked, try to enable: */ - if (!(val & 3UL)) { + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); rdmsrl_safe(MSR_PPIN_CTL, &val); } - if ((val & 3UL) == 2UL) + /* Is the enable bit set? */ + if (val & 2UL) set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); } } diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 43031db429d24..231954fe5b4e6 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -172,4 +172,14 @@ extern bool amd_filter_mce(struct mce *m); static inline bool amd_filter_mce(struct mce *m) { return false; }; #endif +__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr); + +__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr); + #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index 87bcdc6dc2f0c..0d09eb13743b4 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -9,9 +9,11 @@ #include #include #include -#include #include +#include +#include + #include "internal.h" /* @@ -40,9 +42,14 @@ static struct severity { unsigned char context; unsigned char excp; unsigned char covered; + unsigned char cpu_model; + unsigned char cpu_minstepping; + unsigned char bank_lo, bank_hi; char *msg; } severities[] = { #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c } +#define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h +#define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s #define KERNEL .context = IN_KERNEL #define USER .context = IN_USER #define KERNEL_RECOV .context = IN_KERNEL_RECOV @@ -97,7 +104,6 @@ static struct severity { KEEP, "Corrected error", NOSER, BITCLR(MCI_STATUS_UC) ), - /* * known AO MCACODs reported via MCE or CMC: * @@ -113,6 +119,18 @@ static struct severity { AO, "Action optional: last level cache writeback error", SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB) ), + /* + * Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured + * to report uncorrected errors using CMCI with a special signature. + * UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported + * in one of the memory controller banks. + * Set severity to "AO" for same action as normal patrol scrub error. + */ + MCESEV( + AO, "Uncorrected Patrol Scrub Error", + SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0), + MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18) + ), /* ignore OVER for UCNA */ MCESEV( @@ -320,6 +338,12 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e continue; if (s->excp && excp != s->excp) continue; + if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model) + continue; + if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping) + continue; + if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi)) + continue; if (msg) *msg = s->msg; s->covered = 1; diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index 6e2becf547c5e..bc441d68d0602 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -188,7 +188,7 @@ static void therm_throt_process(bool new_event, int event, int level) /* if we just entered the thermal event */ if (new_event) { if (event == THERMAL_THROTTLING_EVENT) - pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", + pr_warn("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index cb0fdcaf14157..c95a27513a30c 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -626,16 +626,16 @@ static ssize_t reload_store(struct device *dev, if (val != 1) return size; - tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); - if (tmp_ret != UCODE_NEW) - return size; - get_online_cpus(); ret = check_online_cpus(); if (ret) goto put; + tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); + if (tmp_ret != UCODE_NEW) + goto put; + mutex_lock(µcode_mutex); ret = microcode_reload_late(); mutex_unlock(µcode_mutex); @@ -772,9 +772,9 @@ static struct subsys_interface mc_cpu_interface = { }; /** - * mc_bp_resume - Update boot CPU microcode during resume. + * microcode_bsp_resume - Update boot CPU microcode during resume. */ -static void mc_bp_resume(void) +void microcode_bsp_resume(void) { int cpu = smp_processor_id(); struct ucode_cpu_info *uci = ucode_cpu_info + cpu; @@ -786,7 +786,7 @@ static void mc_bp_resume(void) } static struct syscore_ops mc_syscore_ops = { - .resume = mc_bp_resume, + .resume = microcode_bsp_resume, }; static int mc_cpu_starting(unsigned int cpu) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index ce799cfe9434e..b224d4dae2ffb 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev return find_matching_signature(mc, csig, cpf); } -/* - * Given CPU signature and a microcode patch, this function finds if the - * microcode patch has matching family and model with the CPU. - * - * %true - if there's a match - * %false - otherwise - */ -static bool microcode_matches(struct microcode_header_intel *mc_header, - unsigned long sig) -{ - unsigned long total_size = get_totalsize(mc_header); - unsigned long data_size = get_datasize(mc_header); - struct extended_sigtable *ext_header; - unsigned int fam_ucode, model_ucode; - struct extended_signature *ext_sig; - unsigned int fam, model; - int ext_sigcount, i; - - fam = x86_family(sig); - model = x86_model(sig); - - fam_ucode = x86_family(mc_header->sig); - model_ucode = x86_model(mc_header->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - return false; - - ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - ext_sigcount = ext_header->count; - - for (i = 0; i < ext_sigcount; i++) { - fam_ucode = x86_family(ext_sig->sig); - model_ucode = x86_model(ext_sig->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - ext_sig++; - } - return false; -} - static struct ucode_patch *memdup_patch(void *data, unsigned int size) { struct ucode_patch *p; @@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch(void *data, unsigned int size) return p; } -static void save_microcode_patch(void *data, unsigned int size) +static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size) { struct microcode_header_intel *mc_hdr, *mc_saved_hdr; struct ucode_patch *iter, *tmp, *p = NULL; @@ -210,6 +163,9 @@ static void save_microcode_patch(void *data, unsigned int size) if (!p) return; + if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf)) + return; + /* * Save for early loading. On 32-bit, that needs to be a physical * address as the APs are running from physical addresses, before @@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save) size -= mc_size; - if (!microcode_matches(mc_header, uci->cpu_sig.sig)) { + if (!find_matching_signature(data, uci->cpu_sig.sig, + uci->cpu_sig.pf)) { data += mc_size; continue; } if (save) { - save_microcode_patch(data, mc_size); + save_microcode_patch(uci, data, mc_size); goto next; } @@ -483,14 +440,14 @@ static void show_saved_mc(void) * Save this microcode patch. It will be loaded early when a CPU is * hot-added or resumes. */ -static void save_mc_for_early(u8 *mc, unsigned int size) +static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size) { /* Synchronization during CPU hotplug. */ static DEFINE_MUTEX(x86_cpu_microcode_mutex); mutex_lock(&x86_cpu_microcode_mutex); - save_microcode_patch(mc, size); + save_microcode_patch(uci, mc, size); show_saved_mc(); mutex_unlock(&x86_cpu_microcode_mutex); @@ -934,7 +891,7 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter) * permanent memory. So it will be loaded early when a CPU is hot added * or resumes. */ - save_mc_for_early(new_mc, new_mc_size); + save_mc_for_early(uci, new_mc, new_mc_size); pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c656d92cd708f..51d95c4b692c3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -82,7 +82,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); - add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR); ack_APIC_irq(); exiting_irq(); @@ -227,8 +227,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - pr_info("Hyper-V: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); + pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n", + ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features); ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); @@ -263,6 +263,16 @@ static void __init ms_hyperv_init_platform(void) cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); } + /* + * Hyper-V expects to get crash register data or kmsg when + * crash enlightment is available and system crashes. Set + * crash_kexec_post_notifiers to be true to make sure that + * calling crash enlightment interface before running kdump + * kernel. + */ + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) + crash_kexec_post_notifiers = true; + #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index aa5c064a6a227..4ea906fe1c351 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -167,9 +167,6 @@ static u8 mtrr_type_lookup_variable(u64 start, u64 end, u64 *partial_end, *repeat = 0; *uniform = 1; - /* Make end inclusive instead of exclusive */ - end--; - prev_match = MTRR_TYPE_INVALID; for (i = 0; i < num_var_ranges; ++i) { unsigned short start_state, end_state, inclusive; @@ -261,6 +258,9 @@ u8 mtrr_type_lookup(u64 start, u64 end, u8 *uniform) int repeat; u64 partial_end; + /* Make end inclusive instead of exclusive */ + end--; + if (!mtrr_state_set) return MTRR_TYPE_INVALID; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 03eb90d00af0f..130c3c7f56d4b 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -260,6 +260,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r) r->num_closid = edx.split.cos_max + 1; r->membw.max_delay = eax.split.max_delay + 1; r->default_ctrl = MAX_MBA_BW; + r->membw.mbm_width = MBM_CNTR_WIDTH; if (ecx & MBA_IS_LINEAR) { r->membw.delay_linear = true; r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay; @@ -289,6 +290,7 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r) /* AMD does not use delay */ r->membw.delay_linear = false; + r->membw.mbm_width = MBM_CNTR_WIDTH_AMD; r->membw.min_bw = 0; r->membw.bw_gran = 1; /* Max value is 2048, Data width should be 4 in decimal */ @@ -578,12 +580,16 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) d->id = id; cpumask_set_cpu(cpu, &d->cpu_mask); + rdt_domain_reconfigure_cdp(r); + if (r->alloc_capable && domain_setup_ctrlval(r, d)) { kfree(d); return; } if (r->mon_capable && domain_setup_mon_state(r, d)) { + kfree(d->ctrl_val); + kfree(d->mbps_val); kfree(d); return; } @@ -618,7 +624,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) if (static_branch_unlikely(&rdt_mon_enable_key)) rmdir_mondata_subdir_allrdtgrp(r, d->id); list_del(&d->list); - if (is_mbm_enabled()) + if (r->mon_capable && is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { /* diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index e49b77283924a..499cb2e727a09 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -32,6 +32,7 @@ #define CQM_LIMBOCHECK_INTERVAL 1000 #define MBM_CNTR_WIDTH 24 +#define MBM_CNTR_WIDTH_AMD 44 #define MBM_OVERFLOW_INTERVAL 1000 #define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 @@ -57,6 +58,7 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) } DECLARE_STATIC_KEY_FALSE(rdt_enable_key); +DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); /** * struct mon_evt - Entry in the event list of a resource @@ -274,7 +276,6 @@ struct rftype { * struct mbm_state - status for each MBM counter in each domain * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes) * @prev_msr Value of IA32_QM_CTR for this RMID last time we read it - * @chunks_bw Total local data moved. Used for bandwidth calculation * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting * @prev_bw The most recent bandwidth in MBps * @delta_bw Difference between the current and previous bandwidth @@ -283,7 +284,6 @@ struct rftype { struct mbm_state { u64 chunks; u64 prev_msr; - u64 chunks_bw; u64 prev_bw_msr; u32 prev_bw; u32 delta_bw; @@ -367,6 +367,7 @@ struct rdt_cache { * @min_bw: Minimum memory bandwidth percentage user can request * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale + * @mbm_width: memory B/W monitor counter width * @mba_sc: True if MBA software controller(mba_sc) is enabled * @mb_map: Mapping of memory B/W percentage to memory B/W delay */ @@ -375,6 +376,7 @@ struct rdt_membw { u32 min_bw; u32 bw_gran; u32 delay_linear; + u32 mbm_width; bool mba_sc; u32 *mb_map; }; @@ -600,5 +602,6 @@ bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); void __check_limbo(struct rdt_domain *d, bool force_free); bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r); bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r); +void rdt_domain_reconfigure_cdp(struct rdt_resource *r); #endif /* _ASM_X86_RESCTRL_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 397206f23d14f..008bcb15fe96a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -216,21 +216,21 @@ void free_rmid(u32 rmid) static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr) { - u64 shift = 64 - MBM_CNTR_WIDTH, chunks; + u64 shift, chunks; + shift = 64 - rdt_resources_all[RDT_RESOURCE_MBA].membw.mbm_width; chunks = (cur_msr << shift) - (prev_msr << shift); return chunks >>= shift; } -static int __mon_event_count(u32 rmid, struct rmid_read *rr) +static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) { struct mbm_state *m; u64 chunks, tval; tval = __rmid_read(rmid, rr->evtid); if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { - rr->val = tval; - return -EINVAL; + return tval; } switch (rr->evtid) { case QOS_L3_OCCUP_EVENT_ID: @@ -244,10 +244,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) break; default: /* - * Code would never reach here because - * an invalid event id would fail the __rmid_read. + * Code would never reach here because an invalid + * event id would fail the __rmid_read. */ - return -EINVAL; + return RMID_VAL_ERROR; } if (rr->first) { @@ -279,8 +279,6 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) return; chunks = mbm_overflow_count(m->prev_bw_msr, tval); - m->chunks_bw += chunks; - m->chunks = m->chunks_bw; cur_bw = (chunks * r->mon_scale) >> 20; if (m->delta_comp) @@ -299,23 +297,29 @@ void mon_event_count(void *info) struct rdtgroup *rdtgrp, *entry; struct rmid_read *rr = info; struct list_head *head; + u64 ret_val; rdtgrp = rr->rgrp; - if (__mon_event_count(rdtgrp->mon.rmid, rr)) - return; + ret_val = __mon_event_count(rdtgrp->mon.rmid, rr); /* - * For Ctrl groups read data from child monitor groups. + * For Ctrl groups read data from child monitor groups and + * add them together. Count events which are read successfully. + * Discard the rmid_read's reporting errors. */ head = &rdtgrp->mon.crdtgrp_list; if (rdtgrp->type == RDTCTRL_GROUP) { list_for_each_entry(entry, head, mon.crdtgrp_list) { - if (__mon_event_count(entry->mon.rmid, rr)) - return; + if (__mon_event_count(entry->mon.rmid, rr) == 0) + ret_val = 0; } } + + /* Report error if none of rmid_reads are successful */ + if (ret_val) + rr->val = ret_val; } /* @@ -450,15 +454,14 @@ static void mbm_update(struct rdt_domain *d, int rmid) } if (is_mbm_local_enabled()) { rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; + __mon_event_count(rmid, &rr); /* * Call the MBA software controller only for the * control groups and when user has enabled * the software controller explicitly. */ - if (!is_mba_sc(NULL)) - __mon_event_count(rmid, &rr); - else + if (is_mba_sc(NULL)) mbm_bw_count(rmid, &rr); } } @@ -514,7 +517,7 @@ void mbm_handle_overflow(struct work_struct *work) mutex_lock(&rdtgroup_mutex); - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); @@ -543,7 +546,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms) unsigned long delay = msecs_to_jiffies(delay_ms); int cpu; - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) return; cpu = cpumask_any(&dom->cpu_mask); dom->mbm_work_cpu = cpu; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2e3b06d6bbc6d..28f786289fce4 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -507,85 +507,88 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, return ret ?: nbytes; } -struct task_move_callback { - struct callback_head work; - struct rdtgroup *rdtgrp; -}; - -static void move_myself(struct callback_head *head) +/** + * rdtgroup_remove - the helper to remove resource group safely + * @rdtgrp: resource group to remove + * + * On resource group creation via a mkdir, an extra kernfs_node reference is + * taken to ensure that the rdtgroup structure remains accessible for the + * rdtgroup_kn_unlock() calls where it is removed. + * + * Drop the extra reference here, then free the rdtgroup structure. + * + * Return: void + */ +static void rdtgroup_remove(struct rdtgroup *rdtgrp) { - struct task_move_callback *callback; - struct rdtgroup *rdtgrp; - - callback = container_of(head, struct task_move_callback, work); - rdtgrp = callback->rdtgrp; + kernfs_put(rdtgrp->kn); + kfree(rdtgrp); +} +static void _update_task_closid_rmid(void *task) +{ /* - * If resource group was deleted before this task work callback - * was invoked, then assign the task to root group and free the - * resource group. + * If the task is still current on this CPU, update PQR_ASSOC MSR. + * Otherwise, the MSR is updated when the task is scheduled in. */ - if (atomic_dec_and_test(&rdtgrp->waitcount) && - (rdtgrp->flags & RDT_DELETED)) { - current->closid = 0; - current->rmid = 0; - kfree(rdtgrp); - } - - preempt_disable(); - /* update PQR_ASSOC MSR to make resource group go into effect */ - resctrl_sched_in(); - preempt_enable(); + if (task == current) + resctrl_sched_in(); +} - kfree(callback); +static void update_task_closid_rmid(struct task_struct *t) +{ + if (IS_ENABLED(CONFIG_SMP) && task_curr(t)) + smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1); + else + _update_task_closid_rmid(t); } static int __rdtgroup_move_task(struct task_struct *tsk, struct rdtgroup *rdtgrp) { - struct task_move_callback *callback; - int ret; - - callback = kzalloc(sizeof(*callback), GFP_KERNEL); - if (!callback) - return -ENOMEM; - callback->work.func = move_myself; - callback->rdtgrp = rdtgrp; + /* If the task is already in rdtgrp, no need to move the task. */ + if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid && + tsk->rmid == rdtgrp->mon.rmid) || + (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid && + tsk->closid == rdtgrp->mon.parent->closid)) + return 0; /* - * Take a refcount, so rdtgrp cannot be freed before the - * callback has been invoked. + * Set the task's closid/rmid before the PQR_ASSOC MSR can be + * updated by them. + * + * For ctrl_mon groups, move both closid and rmid. + * For monitor groups, can move the tasks only from + * their parent CTRL group. */ - atomic_inc(&rdtgrp->waitcount); - ret = task_work_add(tsk, &callback->work, true); - if (ret) { - /* - * Task is exiting. Drop the refcount and free the callback. - * No need to check the refcount as the group cannot be - * deleted before the write function unlocks rdtgroup_mutex. - */ - atomic_dec(&rdtgrp->waitcount); - kfree(callback); - rdt_last_cmd_puts("Task exited\n"); - } else { - /* - * For ctrl_mon groups move both closid and rmid. - * For monitor groups, can move the tasks only from - * their parent CTRL group. - */ - if (rdtgrp->type == RDTCTRL_GROUP) { - tsk->closid = rdtgrp->closid; + + if (rdtgrp->type == RDTCTRL_GROUP) { + tsk->closid = rdtgrp->closid; + tsk->rmid = rdtgrp->mon.rmid; + } else if (rdtgrp->type == RDTMON_GROUP) { + if (rdtgrp->mon.parent->closid == tsk->closid) { tsk->rmid = rdtgrp->mon.rmid; - } else if (rdtgrp->type == RDTMON_GROUP) { - if (rdtgrp->mon.parent->closid == tsk->closid) { - tsk->rmid = rdtgrp->mon.rmid; - } else { - rdt_last_cmd_puts("Can't move task to different control group\n"); - ret = -EINVAL; - } + } else { + rdt_last_cmd_puts("Can't move task to different control group\n"); + return -EINVAL; } } - return ret; + + /* + * Ensure the task's closid and rmid are written before determining if + * the task is current that will decide if it will be interrupted. + */ + barrier(); + + /* + * By now, the task's closid and rmid are set. If the task is current + * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource + * group go into effect. If the task is not current, the MSR will be + * updated when the task is scheduled in. + */ + update_task_closid_rmid(tsk); + + return 0; } /** @@ -1027,6 +1030,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { _r_cdp = NULL; + _d_cdp = NULL; ret = -EINVAL; } @@ -1617,7 +1621,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, if (IS_ERR(kn_subdir)) return PTR_ERR(kn_subdir); - kernfs_get(kn_subdir); ret = rdtgroup_kn_set_ugid(kn_subdir); if (ret) return ret; @@ -1640,7 +1643,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); if (IS_ERR(kn_info)) return PTR_ERR(kn_info); - kernfs_get(kn_info); ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); if (ret) @@ -1661,12 +1663,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn_info); - ret = rdtgroup_kn_set_ugid(kn_info); if (ret) goto out_destroy; @@ -1695,12 +1691,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, if (dest_kn) *dest_kn = kn; - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn); - ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -1741,9 +1731,6 @@ static int set_cache_qos_cfg(int level, bool enable) struct rdt_domain *d; int cpu; - if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) - return -ENOMEM; - if (level == RDT_RESOURCE_L3) update = l3_qos_cfg_update; else if (level == RDT_RESOURCE_L2) @@ -1751,6 +1738,9 @@ static int set_cache_qos_cfg(int level, bool enable) else return -EINVAL; + if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) + return -ENOMEM; + r_l = &rdt_resources_all[level]; list_for_each_entry(d, &r_l->domains, list) { /* Pick one CPU from each domain instance to update MSR */ @@ -1769,6 +1759,19 @@ static int set_cache_qos_cfg(int level, bool enable) return 0; } +/* Restore the qos cfg state when a domain comes online */ +void rdt_domain_reconfigure_cdp(struct rdt_resource *r) +{ + if (!r->alloc_capable) + return; + + if (r == &rdt_resources_all[RDT_RESOURCE_L2DATA]) + l2_qos_cfg_update(&r->alloc_enabled); + + if (r == &rdt_resources_all[RDT_RESOURCE_L3DATA]) + l3_qos_cfg_update(&r->alloc_enabled); +} + /* * Enable or disable the MBA software controller * which helps user specify bandwidth in MBps. @@ -1914,8 +1917,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) rdtgroup_pseudo_lock_remove(rdtgrp); kernfs_unbreak_active_protection(kn); - kernfs_put(rdtgrp->kn); - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } else { kernfs_unbreak_active_protection(kn); } @@ -1970,17 +1972,15 @@ static int rdt_get_tree(struct fs_context *fc) if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, - NULL, "mon_groups", + &rdtgroup_default, "mon_groups", &kn_mongrp); if (ret < 0) goto out_info; - kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); if (ret < 0) goto out_mongrp; - kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } @@ -2205,7 +2205,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { free_rmid(sentry->mon.rmid); list_del(&sentry->mon.crdtgrp_list); - kfree(sentry); + + if (atomic_read(&sentry->waitcount) != 0) + sentry->flags = RDT_DELETED; + else + rdtgroup_remove(sentry); } } @@ -2243,7 +2247,11 @@ static void rmdir_all_sub(void) kernfs_remove(rdtgrp->kn); list_del(&rdtgrp->rdtgroup_list); - kfree(rdtgrp); + + if (atomic_read(&rdtgrp->waitcount) != 0) + rdtgrp->flags = RDT_DELETED; + else + rdtgroup_remove(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2343,11 +2351,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (IS_ERR(kn)) return PTR_ERR(kn); - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that kn is always accessible. - */ - kernfs_get(kn); ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2446,7 +2449,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, /* * Create the mon_data directory first. */ - ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn); + ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); if (ret) return ret; @@ -2645,7 +2648,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, uint files = 0; int ret; - prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); + prdtgrp = rdtgroup_kn_lock_live(parent_kn); if (!prdtgrp) { ret = -ENODEV; goto out_unlock; @@ -2683,8 +2686,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, /* * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the - * rdtgroup_kn_unlock(kn} call below. Take one extra reference - * here, which will be dropped inside rdtgroup_kn_unlock(). + * rdtgroup_kn_unlock(kn) call. Take one extra reference here, + * which will be dropped by kernfs_put() in rdtgroup_remove(). */ kernfs_get(kn); @@ -2718,18 +2721,19 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kernfs_activate(kn); /* - * The caller unlocks the prgrp_kn upon success. + * The caller unlocks the parent_kn upon success. */ return 0; out_idfree: free_rmid(rdtgrp->mon.rmid); out_destroy: + kernfs_put(rdtgrp->kn); kernfs_remove(rdtgrp->kn); out_free_rgrp: kfree(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2737,7 +2741,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) { kernfs_remove(rgrp->kn); free_rmid(rgrp->mon.rmid); - kfree(rgrp); + rdtgroup_remove(rgrp); } /* @@ -2767,7 +2771,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, */ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2810,7 +2814,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * Create an empty mon_groups directory to hold the subset * of tasks and cpus to monitor. */ - ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); + ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); if (ret) { rdt_last_cmd_puts("kernfs subdir error\n"); goto out_del_list; @@ -2826,7 +2830,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, out_common_fail: mkdir_rdt_prepare_clean(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2899,11 +2903,6 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); list_del(&rdtgrp->mon.crdtgrp_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; @@ -2915,11 +2914,6 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn, rdtgrp->flags = RDT_DELETED; list_del(&rdtgrp->rdtgroup_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; } @@ -2952,13 +2946,13 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, closid_free(rdtgrp->closid); free_rmid(rdtgrp->mon.rmid); + rdtgroup_ctrl_remove(kn, rdtgrp); + /* * Free all the child monitor group rmids. */ free_all_child_rdtgrp(rdtgrp); - rdtgroup_ctrl_remove(kn, rdtgrp); - return 0; } @@ -2985,7 +2979,8 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) * If the rdtgroup is a mon group and parent directory * is a valid "mon_groups" directory, remove the mon group. */ - if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) { + if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn && + rdtgrp != &rdtgroup_default) { if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { ret = rdtgroup_ctrl_remove(kn, rdtgrp); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index adf9b71386eff..a03e309a0ac5f 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,7 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, @@ -41,6 +42,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 }, { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 }, + { X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index ee48c3fc8a65e..24da5ee4f0220 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -25,10 +25,10 @@ #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -#ifdef CONFIG_SMP unsigned int __max_die_per_package __read_mostly = 1; EXPORT_SYMBOL(__max_die_per_package); +#ifdef CONFIG_SMP /* * Check if given CPUID extended toplogy "leaf" is implemented */ diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 3e20d322bc98b..032509adf9de9 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -115,11 +115,12 @@ void __init tsx_init(void) tsx_disable(); /* - * tsx_disable() will change the state of the - * RTM CPUID bit. Clear it here since it is now - * expected to be not set. + * tsx_disable() will change the state of the RTM and HLE CPUID + * bits. Clear them here since they are now expected to be not + * set. */ setup_clear_cpu_cap(X86_FEATURE_RTM); + setup_clear_cpu_cap(X86_FEATURE_HLE); } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { /* @@ -131,10 +132,10 @@ void __init tsx_init(void) tsx_enable(); /* - * tsx_enable() will change the state of the - * RTM CPUID bit. Force it here since it is now - * expected to be set. + * tsx_enable() will change the state of the RTM and HLE CPUID + * bits. Force them here since they are now expected to be set. */ setup_force_cpu_cap(X86_FEATURE_RTM); + setup_force_cpu_cap(X86_FEATURE_HLE); } } diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index c222f283b4560..32b4dc9030aa9 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -17,12 +17,6 @@ */ static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); -u32 get_umwait_control_msr(void) -{ - return umwait_control_cached; -} -EXPORT_SYMBOL_GPL(get_umwait_control_msr); - /* * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by * hardware or BIOS before kernel boot. diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index eb651fbde92ac..0c319d09378dd 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include /* Used while preparing memory map entries for second kernel */ struct crash_memmap_data { @@ -68,6 +70,19 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void) rcu_read_unlock(); } +/* + * When the crashkernel option is specified, only use the low + * 1M for the real mode trampoline. + */ +void __init crash_reserve_low_1M(void) +{ + if (cmdline_find_option(boot_command_line, "crashkernel", NULL, 0) < 0) + return; + + memblock_reserve(0, 1<<20); + pr_info("Reserving the low 1M of memory for crashkernel\n"); +} + #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static void kdump_nmi_callback(int cpu, struct pt_regs *regs) @@ -349,7 +364,7 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) struct crash_memmap_data cmd; struct crash_mem *cmem; - cmem = vzalloc(sizeof(struct crash_mem)); + cmem = vzalloc(struct_size(cmem, ranges, 1)); if (!cmem) return -ENOMEM; diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index 0b8cedb20d6d9..d5c9b13bafdf7 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -65,6 +65,9 @@ struct x86_hw_tss doublefault_tss __cacheline_aligned = { .ss = __KERNEL_DS, .ds = __USER_DS, .fs = __KERNEL_PERCPU, +#ifndef CONFIG_X86_32_LAZY_GS + .gs = __KERNEL_STACK_CANARY, +#endif .__cr3 = __pa_nodebug(swapper_pg_dir), }; diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 4cba91ec80492..f2ed027d447f8 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -28,6 +28,37 @@ #include #include +static void __init early_pci_clear_msi(int bus, int slot, int func) +{ + int pos; + u16 ctrl; + + if (likely(!pci_early_clear_msi)) + return; + + pr_info_once("Clearing MSI/MSI-X enable bits early in boot (quirk)\n"); + + pos = pci_early_find_cap(bus, slot, func, PCI_CAP_ID_MSI); + if (pos) { + ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSI_FLAGS); + ctrl &= ~PCI_MSI_FLAGS_ENABLE; + write_pci_config_16(bus, slot, func, pos + PCI_MSI_FLAGS, ctrl); + + /* Read again to flush previous write */ + ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSI_FLAGS); + } + + pos = pci_early_find_cap(bus, slot, func, PCI_CAP_ID_MSIX); + if (pos) { + ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSIX_FLAGS); + ctrl &= ~PCI_MSIX_FLAGS_ENABLE; + write_pci_config_16(bus, slot, func, pos + PCI_MSIX_FLAGS, ctrl); + + /* Read again to flush previous write */ + ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSIX_FLAGS); + } +} + static void __init fix_hypertransport_config(int num, int slot, int func) { u32 htcfg; @@ -515,6 +546,7 @@ static const struct intel_early_ops gen11_early_ops __initconst = { .stolen_size = gen9_stolen_size, }; +/* Intel integrated GPUs for which we need to reserve "stolen memory" */ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_I830_IDS(&i830_early_ops), INTEL_I845G_IDS(&i845_early_ops), @@ -587,6 +619,13 @@ static void __init intel_graphics_quirks(int num, int slot, int func) u16 device; int i; + /* + * Reserve "stolen memory" for an integrated GPU. If we've already + * found one, there's nothing to do for other (discrete) GPUs. + */ + if (resource_size(&intel_graphics_stolen_res)) + return; + device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); for (i = 0; i < ARRAY_SIZE(intel_early_ids); i++) { @@ -699,7 +738,7 @@ static struct chipset early_qrk[] __initdata = { { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID, - QFLAG_APPLY_ONCE, intel_graphics_quirks }, + 0, intel_graphics_quirks }, /* * HPET on the current version of the Baytrail platform has accuracy * problems: it will halt in deep idle state - so we disable it. @@ -710,10 +749,9 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, - { PCI_VENDOR_ID_INTEL, 0x3ec4, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, + { PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, early_pci_clear_msi}, {} }; @@ -766,6 +804,10 @@ static int __init check_dev_quirk(int num, int slot, int func) PCI_HEADER_TYPE); if ((type & 0x7f) == PCI_HEADER_TYPE_BRIDGE) { + /* pci_early_clear_msi scans the buses differently. */ + if (pci_early_clear_msi) + return -1; + sec = read_pci_config_byte(num, slot, func, PCI_SECONDARY_BUS); if (sec > num) early_pci_scan_bus(sec); @@ -792,8 +834,13 @@ static void __init early_pci_scan_bus(int bus) void __init early_quirks(void) { + int bus; + if (!early_pci_allowed()) return; early_pci_scan_bus(0); + /* pci_early_clear_msi scans more buses. */ + for (bus = 1; pci_early_clear_msi && bus < 256; bus++) + early_pci_scan_bus(bus); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 12c70840980e4..8c9b202f3e6db 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -82,7 +82,7 @@ bool irq_fpu_usable(void) } EXPORT_SYMBOL(irq_fpu_usable); -void kernel_fpu_begin(void) +void kernel_fpu_begin_mask(unsigned int kfpu_mask) { preempt_disable(); @@ -101,8 +101,15 @@ void kernel_fpu_begin(void) copy_fpregs_to_fpstate(¤t->thread.fpu); } __cpu_invalidate_fpregs_state(); + + /* Put sane initial values into the control registers. */ + if (likely(kfpu_mask & KFPU_MXCSR) && boot_cpu_has(X86_FEATURE_XMM)) + ldmxcsr(MXCSR_DEFAULT); + + if (unlikely(kfpu_mask & KFPU_387) && boot_cpu_has(X86_FEATURE_FPU)) + asm volatile ("fninit"); } -EXPORT_SYMBOL_GPL(kernel_fpu_begin); +EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask); void kernel_fpu_end(void) { diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6ce7e0a23268f..b271da0fa2193 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -242,9 +242,9 @@ static void __init fpu__init_system_ctx_switch(void) */ static void __init fpu__init_parse_early_param(void) { - char arg[32]; + char arg[128]; char *argptr = arg; - int bit; + int arglen, res, bit; #ifdef CONFIG_X86_32 if (cmdline_find_option_bool(boot_command_line, "no387")) @@ -267,12 +267,26 @@ static void __init fpu__init_parse_early_param(void) if (cmdline_find_option_bool(boot_command_line, "noxsaves")) setup_clear_cpu_cap(X86_FEATURE_XSAVES); - if (cmdline_find_option(boot_command_line, "clearcpuid", arg, - sizeof(arg)) && - get_option(&argptr, &bit) && - bit >= 0 && - bit < NCAPINTS * 32) - setup_clear_cpu_cap(bit); + arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg)); + if (arglen <= 0) + return; + + pr_info("Clearing CPUID bits:"); + do { + res = get_option(&argptr, &bit); + if (res == 0 || res == 3) + break; + + /* If the argument was too long, the last bit may be cut off */ + if (res == 1 && arglen >= sizeof(arg)) + break; + + if (bit >= 0 && bit < NCAPINTS * 32) { + pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit)); + setup_clear_cpu_cap(bit); + } + } while (res == 2); + pr_cont("\n"); } /* diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index d652b939ccfb5..68e1fb66e701b 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -124,7 +124,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, /* * A whole standard-format XSAVE buffer is needed: */ - if ((pos != 0) || (count < fpu_user_xstate_size)) + if (pos != 0 || count != fpu_user_xstate_size) return -EFAULT; xsave = &fpu->state.xsave; diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 0071b794ed193..ab2f9c2f0683a 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -289,13 +289,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) return 0; } - if (!access_ok(buf, size)) - return -EACCES; + if (!access_ok(buf, size)) { + ret = -EACCES; + goto out; + } - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; + if (!static_cpu_has(X86_FEATURE_FPU)) { + ret = fpregs_soft_set(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), + NULL, buf); + goto out; + } if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; @@ -333,7 +337,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (ia32_fxstate) { ret = __copy_from_user(&env, buf, sizeof(env)); if (ret) - goto err_out; + goto out; envp = &env; } else { /* @@ -352,6 +356,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); return 0; } + fpregs_deactivate(fpu); fpregs_unlock(); } @@ -368,7 +373,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) ret = validate_xstate_header(&fpu->state.xsave.header); } if (ret) - goto err_out; + goto out; sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); @@ -381,7 +386,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); if (ret) { ret = -EFAULT; - goto err_out; + goto out; } sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); @@ -396,16 +401,18 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } else { ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); if (ret) - goto err_out; + goto out; fpregs_lock(); ret = copy_kernel_to_fregs_err(&fpu->state.fsave); } if (!ret) fpregs_mark_activate(); + else + fpregs_deactivate(fpu); fpregs_unlock(); -err_out: +out: if (ret) fpu__clear(fpu); return ret; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index e5cb67d67c030..046782df37a6d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -398,6 +398,24 @@ static void __init print_xstate_offset_size(void) } } +/* + * All supported features have either init state all zeros or are + * handled in setup_init_fpu() individually. This is an explicit + * feature list and does not use XFEATURE_MASK*SUPPORTED to catch + * newly added supported features at build time and make people + * actually look at the init state for the new feature. + */ +#define XFEATURES_INIT_FPSTATE_HANDLED \ + (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR) + /* * setup the xstate image representing the init state */ @@ -405,6 +423,8 @@ static void __init setup_init_fpu_buf(void) { static int on_boot_cpu __initdata = 1; + BUILD_BUG_ON(XCNTXT_MASK != XFEATURES_INIT_FPSTATE_HANDLED); + WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -423,10 +443,22 @@ static void __init setup_init_fpu_buf(void) copy_kernel_to_xregs_booting(&init_fpstate.xsave); /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. + * All components are now in init state. Read the state back so + * that init_fpstate contains all non-zero init state. This only + * works with XSAVE, but not with XSAVEOPT and XSAVES because + * those use the init optimization which skips writing data for + * components in init state. + * + * XSAVE could be used, but that would require to reshuffle the + * data when XSAVES is available because XSAVES uses xstate + * compaction. But doing so is a pointless exercise because most + * components have an all zeros init state except for the legacy + * ones (FP and SSE). Those can be saved with FXSAVE into the + * legacy area. Adding new features requires to ensure that init + * state is all zeroes or if not to add the necessary handling + * here. */ - copy_xregs_to_kernel_booting(&init_fpstate.xsave); + fxsave(&init_fpstate.fxsave); } static int xfeature_uncompacted_offset(int xfeature_nr) @@ -895,8 +927,6 @@ const void *get_xsave_field_ptr(int xfeature_nr) #ifdef CONFIG_ARCH_HAS_PKEYS -#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) -#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) /* * This will go out and modify PKRU register to set the access * rights for @pkey to @init_val. @@ -915,6 +945,13 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, if (!boot_cpu_has(X86_FEATURE_OSPKE)) return -EINVAL; + /* + * This code should only be called with valid 'pkey' + * values originating from in-kernel users. Complain + * if a bad value is observed. + */ + WARN_ON_ONCE(pkey >= arch_max_pkey()); + /* Set the bits we need in PKRU: */ if (init_val & PKEY_DISABLE_ACCESS) new_pkru_bits |= PKRU_AD_BIT; @@ -952,18 +989,31 @@ static inline bool xfeatures_mxcsr_quirk(u64 xfeatures) return true; } -/* - * This is similar to user_regset_copyout(), but will not add offset to - * the source data pointer or increment pos, count, kbuf, and ubuf. - */ -static inline void -__copy_xstate_to_kernel(void *kbuf, const void *data, - unsigned int offset, unsigned int size, unsigned int size_total) +static void fill_gap(unsigned to, void **kbuf, unsigned *pos, unsigned *count) { - if (offset < size_total) { - unsigned int copy = min(size, size_total - offset); + if (*pos < to) { + unsigned size = to - *pos; + + if (size > *count) + size = *count; + memcpy(*kbuf, (void *)&init_fpstate.xsave + *pos, size); + *kbuf += size; + *pos += size; + *count -= size; + } +} - memcpy(kbuf + offset, data, copy); +static void copy_part(unsigned offset, unsigned size, void *from, + void **kbuf, unsigned *pos, unsigned *count) +{ + fill_gap(offset, kbuf, pos, count); + if (size > *count) + size = *count; + if (size) { + memcpy(*kbuf, from, size); + *kbuf += size; + *pos += size; + *count -= size; } } @@ -976,8 +1026,9 @@ __copy_xstate_to_kernel(void *kbuf, const void *data, */ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) { - unsigned int offset, size; struct xstate_header header; + const unsigned off_mxcsr = offsetof(struct fxregs_state, mxcsr); + unsigned count = size_total; int i; /* @@ -993,46 +1044,42 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of header.xfeatures = xsave->header.xfeatures; header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; + if (header.xfeatures & XFEATURE_MASK_FP) + copy_part(0, off_mxcsr, + &xsave->i387, &kbuf, &offset_start, &count); + if (header.xfeatures & (XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)) + copy_part(off_mxcsr, MXCSR_AND_FLAGS_SIZE, + &xsave->i387.mxcsr, &kbuf, &offset_start, &count); + if (header.xfeatures & XFEATURE_MASK_FP) + copy_part(offsetof(struct fxregs_state, st_space), 128, + &xsave->i387.st_space, &kbuf, &offset_start, &count); + if (header.xfeatures & XFEATURE_MASK_SSE) + copy_part(xstate_offsets[XFEATURE_SSE], 256, + &xsave->i387.xmm_space, &kbuf, &offset_start, &count); + /* + * Fill xsave->i387.sw_reserved value for ptrace frame: + */ + copy_part(offsetof(struct fxregs_state, sw_reserved), 48, + xstate_fx_sw_bytes, &kbuf, &offset_start, &count); /* * Copy xregs_state->header: */ - offset = offsetof(struct xregs_state, header); - size = sizeof(header); - - __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total); + copy_part(offsetof(struct xregs_state, header), sizeof(header), + &header, &kbuf, &offset_start, &count); - for (i = 0; i < XFEATURE_MAX; i++) { + for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { /* * Copy only in-use xstates: */ if ((header.xfeatures >> i) & 1) { void *src = __raw_xsave_addr(xsave, i); - offset = xstate_offsets[i]; - size = xstate_sizes[i]; - - /* The next component has to fit fully into the output buffer: */ - if (offset + size > size_total) - break; - - __copy_xstate_to_kernel(kbuf, src, offset, size, size_total); + copy_part(xstate_offsets[i], xstate_sizes[i], + src, &kbuf, &offset_start, &count); } } - - if (xfeatures_mxcsr_quirk(header.xfeatures)) { - offset = offsetof(struct fxregs_state, mxcsr); - size = MXCSR_AND_FLAGS_SIZE; - __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total); - } - - /* - * Fill xsave->i387.sw_reserved value for ptrace frame: - */ - offset = offsetof(struct fxregs_state, sw_reserved); - size = sizeof(xstate_fx_sw_bytes); - - __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total); + fill_gap(size_total, &kbuf, &offset_start, &count); return 0; } diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index 073aab525d800..2cc0303522c99 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -89,7 +89,7 @@ WEAK(ftrace_stub) ret END(ftrace_caller) -ENTRY(ftrace_regs_caller) +SYM_CODE_START(ftrace_regs_caller) /* * We're here from an mcount/fentry CALL, and the stack frame looks like: * @@ -163,6 +163,7 @@ GLOBAL(ftrace_regs_call) popl %eax jmp .Lftrace_ret +SYM_CODE_END(ftrace_regs_caller) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 206a4b6144c2e..950286016f63c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -383,6 +383,8 @@ static void __init clear_bss(void) { memset(__bss_start, 0, (unsigned long) __bss_stop - (unsigned long) __bss_start); + memset(__brk_base, 0, + (unsigned long) __brk_limit - (unsigned long) __brk_base); } static unsigned long get_cmd_line_ptr(void) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 30f9cb2c0b551..b7eb33db030bc 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -26,6 +26,7 @@ #include #include #include +#include /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -64,7 +65,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE) * can. */ __HEAD -ENTRY(startup_32) +SYM_CODE_START(startup_32) movl pa(initial_stack),%ecx /* test KEEP_SEGMENTS flag to see if the bootloader is asking @@ -153,6 +154,7 @@ ENTRY(startup_32) movl pa(subarch_entries)(,%eax,4), %eax subl $__PAGE_OFFSET, %eax + ANNOTATE_RETPOLINE_SAFE jmp *%eax .Lbad_subarch: @@ -172,6 +174,7 @@ num_subarch_entries = (. - subarch_entries) / 4 #else jmp .Ldefault_entry #endif /* CONFIG_PARAVIRT */ +SYM_CODE_END(startup_32) #ifdef CONFIG_HOTPLUG_CPU /* @@ -302,6 +305,7 @@ ENTRY(startup_32_smp) movl setup_once_ref,%eax andl %eax,%eax jz 1f # Did we do this already? + ANNOTATE_RETPOLINE_SAFE call *%eax 1: @@ -571,6 +575,16 @@ ENTRY(initial_page_table) # error "Kernel PMDs should be 1, 2 or 3" # endif .align PAGE_SIZE /* needs to be page-sized too */ + +#ifdef CONFIG_PAGE_TABLE_ISOLATION + /* + * PTI needs another page so sync_initial_pagetable() works correctly + * and does not scribble over the data which is placed behind the + * actual initial_page_table. See clone_pgd_range(). + */ + .fill 1024, 4, 0 +#endif + #endif .data diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index c6f791bc481eb..9834d221e390f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -9,6 +9,7 @@ #include #include +#include #undef pr_fmt #define pr_fmt(fmt) "hpet: " fmt @@ -806,6 +807,83 @@ static bool __init hpet_counting(void) return false; } +static bool __init mwait_pc10_supported(void) +{ + unsigned int eax, ebx, ecx, mwait_substates; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (!cpu_feature_enabled(X86_FEATURE_MWAIT)) + return false; + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return false; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); + + return (ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) && + (ecx & CPUID5_ECX_INTERRUPT_BREAK) && + (mwait_substates & (0xF << 28)); +} + +/* + * Check whether the system supports PC10. If so force disable HPET as that + * stops counting in PC10. This check is overbroad as it does not take any + * of the following into account: + * + * - ACPI tables + * - Enablement of intel_idle + * - Command line arguments which limit intel_idle C-state support + * + * That's perfectly fine. HPET is a piece of hardware designed by committee + * and the only reasons why it is still in use on modern systems is the + * fact that it is impossible to reliably query TSC and CPU frequency via + * CPUID or firmware. + * + * If HPET is functional it is useful for calibrating TSC, but this can be + * done via PMTIMER as well which seems to be the last remaining timer on + * X86/INTEL platforms that has not been completely wreckaged by feature + * creep. + * + * In theory HPET support should be removed altogether, but there are older + * systems out there which depend on it because TSC and APIC timer are + * dysfunctional in deeper C-states. + * + * It's only 20 years now that hardware people have been asked to provide + * reliable and discoverable facilities which can be used for timekeeping + * and per CPU timer interrupts. + * + * The probability that this problem is going to be solved in the + * forseeable future is close to zero, so the kernel has to be cluttered + * with heuristics to keep up with the ever growing amount of hardware and + * firmware trainwrecks. Hopefully some day hardware people will understand + * that the approach of "This can be fixed in software" is not sustainable. + * Hope dies last... + */ +static bool __init hpet_is_pc10_damaged(void) +{ + unsigned long long pcfg; + + /* Check whether PC10 substates are supported */ + if (!mwait_pc10_supported()) + return false; + + /* Check whether PC10 is enabled in PKG C-state limit */ + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, pcfg); + if ((pcfg & 0xF) < 8) + return false; + + if (hpet_force_user) { + pr_warn("HPET force enabled via command line, but dysfunctional in PC10.\n"); + return false; + } + + pr_info("HPET dysfunctional in PC10. Force disabled.\n"); + boot_hpet_disable = true; + return true; +} + /** * hpet_enable - Try to setup the HPET timer. Returns 1 on success. */ @@ -819,6 +897,9 @@ int __init hpet_enable(void) if (!is_hpet_capable()) return 0; + if (hpet_is_pc10_damaged()) + return 0; + hpet_set_mapping(); if (!hpet_virt_address) return 0; diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 519649ddf1001..fe522691ac717 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -207,7 +207,7 @@ static void mask_and_ack_8259A(struct irq_data *data) * lets ACK and report it. [once per IRQ] */ if (!(spurious_irq_mask & irqmask)) { - printk(KERN_DEBUG + printk_deferred(KERN_DEBUG "spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 87ef69a72c52e..7bb4c3cbf4dcd 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -318,7 +318,11 @@ void __init idt_setup_apic_and_irq_gates(void) #ifdef CONFIG_X86_LOCAL_APIC for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { - set_bit(i, system_vectors); + /* + * Don't set the non assigned system vectors in the + * system_vectors bitmap. Otherwise they show up in + * /proc/interrupts. + */ entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); set_intr_gate(i, entry); } diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index 4d4f5d9faac31..23054909c8ddf 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -10,8 +10,6 @@ extern struct boot_params boot_params; static enum efi_secureboot_mode get_sb_mode(void) { - efi_char16_t efi_SecureBoot_name[] = L"SecureBoot"; - efi_char16_t efi_SetupMode_name[] = L"SecureBoot"; efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; efi_status_t status; unsigned long size; @@ -25,7 +23,7 @@ static enum efi_secureboot_mode get_sb_mode(void) } /* Get variable contents into buffer */ - status = efi.get_variable(efi_SecureBoot_name, &efi_variable_guid, + status = efi.get_variable(L"SecureBoot", &efi_variable_guid, NULL, &size, &secboot); if (status == EFI_NOT_FOUND) { pr_info("ima: secureboot mode disabled\n"); @@ -38,7 +36,7 @@ static enum efi_secureboot_mode get_sb_mode(void) } size = sizeof(setupmode); - status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid, + status = efi.get_variable(L"SetupMode", &efi_variable_guid, NULL, &size, &setupmode); if (status != EFI_SUCCESS) /* ignore unknown SetupMode */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 21efee32e2b12..7dfd0185767cc 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -295,8 +295,10 @@ void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) { if (handler) kvm_posted_intr_wakeup_handler = handler; - else + else { kvm_posted_intr_wakeup_handler = dummy_handler; + synchronize_rcu(); + } } EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 12df3a4abfdd8..6b32ab009c197 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -43,7 +43,7 @@ static int map_irq_stack(unsigned int cpu) pages[i] = pfn_to_page(pa >> PAGE_SHIFT); } - va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); + va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, VM_MAP, PAGE_KERNEL); if (!va) return -ENOMEM; diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index d2f4e706a428c..b8b3b84308edc 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -210,8 +210,7 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; /* Copying screen_info will do? */ - memcpy(¶ms->screen_info, &boot_params.screen_info, - sizeof(struct screen_info)); + memcpy(¶ms->screen_info, &screen_info, sizeof(struct screen_info)); /* Fill in memsize later */ params->screen_info.ext_mem_k = 0; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 43fc13c831af0..3700dc94847c6 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -157,6 +157,8 @@ NOKPROBE_SYMBOL(skip_prefixes); int can_boost(struct insn *insn, void *addr) { kprobe_opcode_t opcode; + insn_byte_t prefix; + int i; if (search_exception_tables((unsigned long)addr)) return 0; /* Page fault may occur on this address. */ @@ -169,9 +171,14 @@ int can_boost(struct insn *insn, void *addr) if (insn->opcode.nbytes != 1) return 0; - /* Can't boost Address-size override prefix */ - if (unlikely(inat_is_address_size_prefix(insn->attr))) - return 0; + for_each_insn_prefix(insn, i, prefix) { + insn_attr_t attr; + + attr = inat_get_opcode_attribute(prefix); + /* Can't boost Address-size override prefix and CS override prefix */ + if (prefix == 0x2e || inat_is_address_size_prefix(attr)) + return 0; + } opcode = insn->opcode.bytes[0]; @@ -196,8 +203,8 @@ int can_boost(struct insn *insn, void *addr) /* clear and set flags are boostable */ return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); default: - /* CS override prefix and call are not boostable */ - return (opcode != 0x2e && opcode != 0x9a); + /* call is not boostable */ + return opcode != 0x9a; } } @@ -351,6 +358,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) kernel_insn_init(insn, dest, MAX_INSN_SIZE); insn_get_length(insn); + /* We can not probe force emulate prefixed instruction */ + if (insn_has_emulate_prefix(insn)) + return 0; + /* Another subsystem puts a breakpoint, failed to recover */ if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; @@ -746,16 +757,11 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline); -static struct kprobe kretprobe_kprobe = { - .addr = (void *)kretprobe_trampoline, -}; - /* * Called from kretprobe_trampoline */ __used __visible void *trampoline_handler(struct pt_regs *regs) { - struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -765,16 +771,12 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false; - preempt_disable(); - /* * Set a dummy kprobe for avoiding kretprobe recursion. * Since kretprobe never run in kprobe handler, kprobe must not * be running at this point. */ - kcb = get_kprobe_ctlblk(); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; + kprobe_busy_begin(); INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -850,7 +852,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) __this_cpu_write(current_kprobe, &ri->rp->kp); ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); + __this_cpu_write(current_kprobe, &kprobe_busy); } recycle_rp_inst(ri, &empty_rp); @@ -866,8 +868,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); - __this_cpu_write(current_kprobe, NULL); - preempt_enable(); + kprobe_busy_end(); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); @@ -1029,6 +1030,11 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) * So clear it by resetting the current kprobe: */ regs->flags &= ~X86_EFLAGS_TF; + /* + * Since the single step (trap) has been cancelled, + * we need to restore BTF here. + */ + restore_btf(); /* * If the TF flag was set before the kprobe hit, diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e820568ed4d5c..46c20b60de020 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include #include #include +#include static int kvmapf = 1; @@ -57,6 +59,7 @@ static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __align DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; static int has_steal_clock = 0; +static int has_guest_poll = 0; /* * No need for any "IO delay" on KVM */ @@ -321,8 +324,7 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); - printk(KERN_INFO"KVM setup async PF for cpu %d\n", - smp_processor_id()); + pr_info("setup async PF for cpu %d\n", smp_processor_id()); } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { @@ -347,35 +349,17 @@ static void kvm_pv_disable_apf(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); __this_cpu_write(apf_reason.enabled, 0); - printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", - smp_processor_id()); + pr_info("disable async PF for cpu %d\n", smp_processor_id()); } -static void kvm_pv_guest_cpu_reboot(void *unused) +static void kvm_disable_steal_time(void) { - /* - * We disable PV EOI before we load a new kernel by kexec, - * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory. - * New kernel can re-enable when it boots. - */ - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - kvm_disable_steal_time(); -} + if (!has_steal_clock) + return; -static int kvm_pv_reboot_notify(struct notifier_block *nb, - unsigned long code, void *unused) -{ - if (code == SYS_RESTART) - on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); - return NOTIFY_DONE; + wrmsr(MSR_KVM_STEAL_TIME, 0, 0); } -static struct notifier_block kvm_pv_reboot_nb = { - .notifier_call = kvm_pv_reboot_notify, -}; - static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -393,14 +377,6 @@ static u64 kvm_steal_clock(int cpu) return steal; } -void kvm_disable_steal_time(void) -{ - if (!has_steal_clock) - return; - - wrmsr(MSR_KVM_STEAL_TIME, 0, 0); -} - static inline void __set_percpu_decrypted(void *ptr, unsigned long size) { early_set_memory_decrypted((unsigned long) ptr, size); @@ -428,6 +404,27 @@ static void __init sev_map_percpu_data(void) } } +static void kvm_guest_cpu_offline(bool shutdown) +{ + kvm_disable_steal_time(); + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) + wrmsrl(MSR_KVM_PV_EOI_EN, 0); + kvm_pv_disable_apf(); + if (!shutdown) + apf_task_wake_all(); + kvmclock_disable(); +} + +static int kvm_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + kvm_guest_cpu_init(); + local_irq_restore(flags); + return 0; +} + #ifdef CONFIG_SMP #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) @@ -464,7 +461,7 @@ static void __send_ipi_mask(const struct cpumask *mask, int vector) } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) { ipi_bitmap <<= min - apic_id; min = apic_id; - } else if (apic_id < min + KVM_IPI_CLUSTER_SIZE) { + } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) { max = apic_id < max ? max : apic_id; } else { ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap, @@ -547,31 +544,46 @@ static void __init kvm_smp_prepare_boot_cpu(void) kvm_spinlock_init(); } -static void kvm_guest_cpu_offline(void) +static int kvm_cpu_down_prepare(unsigned int cpu) { - kvm_disable_steal_time(); - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - apf_task_wake_all(); -} + unsigned long flags; -static int kvm_cpu_online(unsigned int cpu) -{ - local_irq_disable(); - kvm_guest_cpu_init(); - local_irq_enable(); + local_irq_save(flags); + kvm_guest_cpu_offline(false); + local_irq_restore(flags); return 0; } -static int kvm_cpu_down_prepare(unsigned int cpu) +#endif + +static int kvm_suspend(void) { - local_irq_disable(); - kvm_guest_cpu_offline(); - local_irq_enable(); + u64 val = 0; + + kvm_guest_cpu_offline(false); + +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL + if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) + rdmsrl(MSR_KVM_POLL_CONTROL, val); + has_guest_poll = !(val & 1); +#endif return 0; } + +static void kvm_resume(void) +{ + kvm_cpu_online(raw_smp_processor_id()); + +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL + if (kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL) && has_guest_poll) + wrmsrl(MSR_KVM_POLL_CONTROL, 0); #endif +} + +static struct syscore_ops kvm_syscore_ops = { + .suspend = kvm_suspend, + .resume = kvm_resume, +}; static void __init kvm_apf_trap_init(void) { @@ -606,6 +618,37 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask, native_flush_tlb_others(flushmask, info); } +static void kvm_pv_guest_cpu_reboot(void *unused) +{ + kvm_guest_cpu_offline(true); +} + +static int kvm_pv_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (code == SYS_RESTART) + on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_reboot_nb = { + .notifier_call = kvm_pv_reboot_notify, +}; + +/* + * After a PV feature is registered, the host will keep writing to the + * registered memory location. If the guest happens to shutdown, this memory + * won't be valid. In cases like kexec, in which you install a new kernel, this + * means a random memory location will be kept being written. + */ +#ifdef CONFIG_KEXEC_CORE +static void kvm_crash_shutdown(struct pt_regs *regs) +{ + kvm_guest_cpu_offline(true); + native_machine_crash_shutdown(regs); +} +#endif + static void __init kvm_guest_init(void) { int i; @@ -649,6 +692,12 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif +#ifdef CONFIG_KEXEC_CORE + machine_ops.crash_shutdown = kvm_crash_shutdown; +#endif + + register_syscore_ops(&kvm_syscore_ops); + /* * Hard lockup detection is enabled by default. Disable it, as guests * can get false positives too easily, for example if the host is diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 904494b924c13..d81e34e614e00 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -20,7 +20,6 @@ #include #include #include -#include #include static int kvmclock __initdata = 1; @@ -51,18 +50,9 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); static struct pvclock_vsyscall_time_info hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE); static struct pvclock_wall_clock wall_clock __bss_decrypted; -static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); static struct pvclock_vsyscall_time_info *hvclock_mem; - -static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) -{ - return &this_cpu_read(hv_clock_per_cpu)->pvti; -} - -static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void) -{ - return this_cpu_read(hv_clock_per_cpu); -} +DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); +EXPORT_PER_CPU_SYMBOL_GPL(hv_clock_per_cpu); /* * The wallclock is the time of day when we booted. Since then, some time may @@ -197,28 +187,9 @@ static void kvm_setup_secondary_clock(void) } #endif -/* - * After the clock is registered, the host will keep writing to the - * registered memory location. If the guest happens to shutdown, this memory - * won't be valid. In cases like kexec, in which you install a new kernel, this - * means a random memory location will be kept being written. So before any - * kind of shutdown from our side, we unregister the clock by writing anything - * that does not have the 'enable' bit set in the msr - */ -#ifdef CONFIG_KEXEC_CORE -static void kvm_crash_shutdown(struct pt_regs *regs) -{ - native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_crash_shutdown(regs); -} -#endif - -static void kvm_shutdown(void) +void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_shutdown(); } static void __init kvmclock_init_mem(void) @@ -346,10 +317,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; - machine_ops.shutdown = kvm_shutdown; -#ifdef CONFIG_KEXEC_CORE - machine_ops.crash_shutdown = kvm_crash_shutdown; -#endif kvm_get_preset_lpj(); /* diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index d5c72cb877b31..77dabedaa9d12 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -114,6 +114,7 @@ int apply_relocate(Elf32_Shdr *sechdrs, *location += sym->st_value; break; case R_386_PC32: + case R_386_PLT32: /* Add the value, subtract its position */ *location += sym->st_value - (uint32_t)location; break; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e676a9916c498..5bb001c0c771a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -104,18 +104,21 @@ static int __init nmi_warning_debugfs(void) } fs_initcall(nmi_warning_debugfs); -static void nmi_max_handler(struct irq_work *w) +static void nmi_check_duration(struct nmiaction *action, u64 duration) { - struct nmiaction *a = container_of(w, struct nmiaction, irq_work); int remainder_ns, decimal_msecs; - u64 whole_msecs = READ_ONCE(a->max_duration); - remainder_ns = do_div(whole_msecs, (1000 * 1000)); + if (duration < nmi_longest_ns || duration < action->max_duration) + return; + + action->max_duration = duration; + + remainder_ns = do_div(duration, (1000 * 1000)); decimal_msecs = remainder_ns / 1000; printk_ratelimited(KERN_INFO "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", - a->handler, whole_msecs, decimal_msecs); + action->handler, duration, decimal_msecs); } static int nmi_handle(unsigned int type, struct pt_regs *regs) @@ -142,11 +145,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs) delta = sched_clock() - delta; trace_nmi_handler(a->handler, (int)delta, thishandled); - if (delta < nmi_longest_ns || delta < a->max_duration) - continue; - - a->max_duration = delta; - irq_work_queue(&a->irq_work); + nmi_check_duration(a, delta); } rcu_read_unlock(); @@ -164,8 +163,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) if (!action->handler) return -EINVAL; - init_irq_work(&action->irq_work, nmi_max_handler); - raw_spin_lock_irqsave(&desc->lock, flags); /* diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 6b07faaa15798..23154d24b1173 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -27,6 +27,11 @@ static __init int register_e820_pmem(void) * simply here to trigger the module to load on demand. */ pdev = platform_device_alloc("e820_pmem", -1); - return platform_device_add(pdev); + + rc = platform_device_add(pdev); + if (rc) + platform_device_put(pdev); + + return rc; } device_initcall(register_e820_pmem); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5e94c4354d4e4..87cfd2ee9ca0d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -428,28 +428,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, lockdep_assert_irqs_disabled(); - /* - * If TIF_SSBD is different, select the proper mitigation - * method. Note that if SSBD mitigation is disabled or permanentely - * enabled this branch can't be taken because nothing can set - * TIF_SSBD. - */ - if (tif_diff & _TIF_SSBD) { - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + /* Handle change of TIF_SSBD depending on the mitigation method. */ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_ssb_virt_state(tifn); - } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + if (tif_diff & _TIF_SSBD) amd_set_core_ssb_state(tifn); - } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || - static_cpu_has(X86_FEATURE_AMD_SSBD)) { - msr |= ssbd_tif_to_spec_ctrl(tifn); - updmsr = true; - } + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + updmsr |= !!(tif_diff & _TIF_SSBD); + msr |= ssbd_tif_to_spec_ctrl(tifn); } - /* - * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, - * otherwise avoid the MSR write. - */ + /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */ if (IS_ENABLED(CONFIG_SMP) && static_branch_unlikely(&switch_to_cond_stibp)) { updmsr |= !!(tif_diff & _TIF_SPEC_IB); @@ -457,7 +449,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - wrmsrl(MSR_IA32_SPEC_CTRL, msr); + write_spec_ctrl_current(msr, false); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) @@ -667,6 +659,10 @@ static void amd_e400_idle(void) */ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) { + /* User has disallowed the use of MWAIT. Fallback to HALT */ + if (boot_option_idle_override == IDLE_NOMWAIT) + return 0; + if (c->x86_vendor != X86_VENDOR_INTEL) return 0; @@ -777,9 +773,8 @@ static int __init idle_setup(char *str) } else if (!strcmp(str, "nomwait")) { /* * If the boot option of "idle=nomwait" is added, - * it means that mwait will be disabled for CPU C2/C3 - * states. In such case it won't touch the variable - * of boot_option_idle_override. + * it means that mwait will be disabled for CPU C1/C2/C3 + * states. */ boot_option_idle_override = IDLE_NOMWAIT; } else diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b8ceec4974fe3..352f876950ab3 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -229,14 +229,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; - struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + switch_fpu_prepare(prev_p, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -292,7 +290,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); - switch_fpu_finish(next_fpu); + switch_fpu_finish(next_p); /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index af64519b26957..633788362906a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -316,7 +316,7 @@ static unsigned long x86_fsgsbase_read_task(struct task_struct *task, */ mutex_lock(&task->mm->context.lock); ldt = task->mm->context.ldt; - if (unlikely(idx >= ldt->nr_entries)) + if (unlikely(!ldt || idx >= ldt->nr_entries)) base = 0; else base = get_desc_base(ldt->entries + idx); @@ -505,15 +505,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; - struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + switch_fpu_prepare(prev_p, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -565,7 +563,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); - switch_fpu_finish(next_fpu); + switch_fpu_finish(next_p); /* Reload sp0. */ update_task_stack(next_p); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 0cc7c0b106bbf..bf35056205b28 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * Power off function, if any @@ -113,25 +114,17 @@ void __noreturn machine_real_restart(unsigned int type) spin_unlock(&rtc_lock); /* - * Switch back to the initial page table. + * Switch to the trampoline page table. */ -#ifdef CONFIG_X86_32 - load_cr3(initial_page_table); -#else - write_cr3(real_mode_header->trampoline_pgd); - - /* Exiting long mode will fail if CR4.PCIDE is set. */ - if (boot_cpu_has(X86_FEATURE_PCID)) - cr4_clear_bits(X86_CR4_PCIDE); -#endif + load_trampoline_pgtable(); /* Jump to the identity-mapped low memory code */ #ifdef CONFIG_X86_32 - asm volatile("jmpl *%0" : : + asm volatile(ANNOTATE_RETPOLINE_SAFE "jmpl *%0" : : "rm" (real_mode_header->machine_real_restart_asm), "a" (type)); #else - asm volatile("ljmpl *%0" : : + asm volatile(ANNOTATE_RETPOLINE_SAFE "ljmpl *%0" : : "m" (real_mode_header->machine_real_restart_asm), "D" (type)); #endif @@ -197,6 +190,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), }, }, + { /* Handle problems with rebooting on Apple MacBook6,1 */ + .callback = set_pci_reboot, + .ident = "Apple MacBook6,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"), + }, + }, { /* Handle problems with rebooting on Apple MacBookPro5 */ .callback = set_pci_reboot, .ident = "Apple MacBookPro5", @@ -380,10 +381,11 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { }, { /* Handle problems with rebooting on the OptiPlex 990. */ .callback = set_pci_reboot, - .ident = "Dell OptiPlex 990", + .ident = "Dell OptiPlex 990 BIOS A0x", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), + DMI_MATCH(DMI_BIOS_VERSION, "A0"), }, }, { /* Handle problems with rebooting on Dell 300's */ @@ -469,6 +471,15 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { }, }, + { /* PCIe Wifi card isn't detected after reboot otherwise */ + .callback = set_pci_reboot, + .ident = "Zotac ZBOX CI327 nano", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NA"), + DMI_MATCH(DMI_PRODUCT_NAME, "ZBOX-CI327NANO-GS-01"), + }, + }, + /* Sony */ { /* Handle problems with rebooting on Sony VGN-Z540N */ .callback = set_bios_reboot, @@ -478,7 +489,46 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), }, }, - + { /* Handle problems with rebooting on the Latitude E6520. */ + .callback = set_pci_reboot, + .ident = "Dell Latitude E6520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6520"), + }, + }, + { /* Handle problems with rebooting on the OptiPlex 790. */ + .callback = set_pci_reboot, + .ident = "Dell OptiPlex 790", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 790"), + }, + }, + { /* Handle problems with rebooting on the OptiPlex 990. */ + .callback = set_pci_reboot, + .ident = "Dell OptiPlex 990", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"), + }, + }, + { /* Handle problems with rebooting on the Latitude E6220. */ + .callback = set_pci_reboot, + .ident = "Dell Latitude E6220", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude E6220"), + }, + }, + { /* Handle problems with rebooting on the OptiPlex 390. */ + .callback = set_pci_reboot, + .ident = "Dell OptiPlex 390", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 390"), + }, + }, { } }; @@ -530,29 +580,20 @@ static void emergency_vmx_disable_all(void) local_irq_disable(); /* - * We need to disable VMX on all CPUs before rebooting, otherwise - * we risk hanging up the machine, because the CPU ignore INIT - * signals when VMX is enabled. - * - * We can't take any locks and we may be on an inconsistent - * state, so we use NMIs as IPIs to tell the other CPUs to disable - * VMX and halt. + * Disable VMX on all CPUs before rebooting, otherwise we risk hanging + * the machine, because the CPU blocks INIT when it's in VMX root. * - * For safety, we will avoid running the nmi_shootdown_cpus() - * stuff unnecessarily, but we don't have a way to check - * if other CPUs have VMX enabled. So we will call it only if the - * CPU we are running on has VMX enabled. + * We can't take any locks and we may be on an inconsistent state, so + * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt. * - * We will miss cases where VMX is not enabled on all CPUs. This - * shouldn't do much harm because KVM always enable VMX on all - * CPUs anyway. But we can miss it on the small window where KVM - * is still enabling VMX. + * Do the NMI shootdown even if VMX if off on _this_ CPU, as that + * doesn't prevent a different CPU from being in VMX root operation. */ - if (cpu_has_vmx() && cpu_vmx_enabled()) { - /* Disable VMX on this CPU. */ - cpu_vmxoff(); + if (cpu_has_vmx()) { + /* Safely force _this_ CPU out of VMX root operation. */ + __cpu_emergency_vmxoff(); - /* Halt and disable VMX on the other CPUs */ + /* Halt and exit VMX root operation on the other CPUs. */ nmi_shootdown_cpus(vmxoff_nmi); } diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index ee26df08002e6..ebee74ee888b5 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -8,6 +8,7 @@ #include #include #include +#include /* * Must be relocatable PIC code callable as a C function @@ -165,6 +166,7 @@ identity_mapped: movl CP_PA_SWAP_PAGE(%edi), %esp addl $PAGE_SIZE, %esp 2: + ANNOTATE_RETPOLINE_SAFE call *%edx /* get the re-entry point of the peer system */ diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index c51ccff5cd01f..8439f36abdd20 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -9,6 +9,7 @@ #include #include #include +#include /* * Must be relocatable PIC code callable as a C function @@ -192,6 +193,7 @@ identity_mapped: 1: popq %rdx leaq PAGE_SIZE(%r10), %rsp + ANNOTATE_RETPOLINE_SAFE call *%rdx /* get the re-entry point of the peer system */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 77ea96b794bd1..d4e38044ecb9f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include