in sync with the values current in the channel subsystem).
Note: This is an I/O-subchannel specific attribute.
Users: s390-tools, HAL
+
+What: /sys/bus/css/devices/.../driver_override
+Date: June 2019
+Contact: Cornelia Huck <cohuck@redhat.com>
+ linux-s390@vger.kernel.org
+Description: This file allows the driver for a device to be specified. When
+ specified, only a driver with a name matching the value written
+ to driver_override will have an opportunity to bind to the
+ device. The override is specified by writing a string to the
+ driver_override file (echo vfio-ccw > driver_override) and
+ may be cleared with an empty string (echo > driver_override).
+ This returns the device to standard matching rules binding.
+ Writing to driver_override does not automatically unbind the
+ device from its current driver or make any attempt to
+ automatically load the specified driver. If no driver with a
+ matching name is currently loaded in the kernel, the device
+ will not bind to any driver. This also allows devices to
+ opt-out of driver binding using a driver_override name such as
+ "none". Only a single driver may be specified in the override,
+ there is no support for parsing delimiters.
+ Note that unlike the mechanism of the same name for pci, this
+ file does not allow to override basic matching rules. I.e.,
+ the driver must still match the subchannel type of the device.
others).
ccw_timeout_log [S390]
- See Documentation/s390/CommonIO for details.
+ See Documentation/s390/common_io.rst for details.
cgroup_disable= [KNL] Disable a particular controller
Format: {name of the controller(s) to disable}
/selinux/checkreqprot.
cio_ignore= [S390]
- See Documentation/s390/CommonIO for details.
+ See Documentation/s390/common_io.rst for details.
clk_ignore_unused
[CLK]
Prevents the clock framework from automatically gating
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
+HWCAP2_FLAGM2
+
+ Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
+
HWCAP_SSBS
Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
Documentation/arm64/pointer-authentication.txt.
+HWCAP2_FRINT
+
+ Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
+
4. Unused AT_HWCAP bits
-----------------------
.. c:function:: u64 ktime_get_ns( void )
u64 ktime_get_boottime_ns( void )
u64 ktime_get_real_ns( void )
- u64 ktime_get_tai_ns( void )
+ u64 ktime_get_clocktai_ns( void )
u64 ktime_get_raw_ns( void )
Same as the plain ktime_get functions, but returning a u64 number
Some additional variants exist for more specialized cases:
-.. c:function:: ktime_t ktime_get_coarse_boottime( void )
+.. c:function:: ktime_t ktime_get_coarse( void )
+ ktime_t ktime_get_coarse_boottime( void )
ktime_t ktime_get_coarse_real( void )
ktime_t ktime_get_coarse_clocktai( void )
- ktime_t ktime_get_coarse_raw( void )
+
+.. c:function:: u64 ktime_get_coarse_ns( void )
+ u64 ktime_get_coarse_boottime_ns( void )
+ u64 ktime_get_coarse_real_ns( void )
+ u64 ktime_get_coarse_clocktai_ns( void )
.. c:function:: void ktime_get_coarse_ts64( struct timespec64 * )
void ktime_get_coarse_boottime_ts64( struct timespec64 * )
void ktime_get_coarse_real_ts64( struct timespec64 * )
void ktime_get_coarse_clocktai_ts64( struct timespec64 * )
- void ktime_get_coarse_raw_ts64( struct timespec64 * )
These are quicker than the non-coarse versions, but less accurate,
corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE
--- /dev/null
+Amazon's Annapurna Labs Fabric Interrupt Controller
+
+Required properties:
+
+- compatible: should be "amazon,al-fic"
+- reg: physical base address and size of the registers
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: must be 2.
+ First cell defines the index of the interrupt within the controller.
+ Second cell is used to specify the trigger type and must be one of the
+ following:
+ - bits[3:0] trigger type and level flags
+ 1 = low-to-high edge triggered
+ 4 = active high level-sensitive
+- interrupt-parent: specifies the parent interrupt controller.
+- interrupts: describes which input line in the interrupt parent, this
+ fic's output is connected to. This field property depends on the parent's
+ binding
+
+Example:
+
+amazon_fic: interrupt-controller@0xfd8a8500 {
+ compatible = "amazon,al-fic";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0x0 0xfd8a8500 0x0 0x1000>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 0x0 IRQ_TYPE_LEVEL_HIGH>;
+};
"amlogic,meson-gxbb-gpio-intc" for GXBB SoCs (S905) or
"amlogic,meson-gxl-gpio-intc" for GXL SoCs (S905X, S912)
"amlogic,meson-axg-gpio-intc" for AXG SoCs (A113D, A113X)
+ "amlogic,meson-g12a-gpio-intc" for G12A SoCs (S905D2, S905X2, S905Y2)
- reg : Specifies base physical address and size of the registers.
- interrupt-controller : Identifies the node as an interrupt controller.
- #interrupt-cells : Specifies the number of cells needed to encode an
SMP soc, and it also could be used in non-SMP system.
Interrupt number definition:
-
0-15 : software irq, and we use 15 as our IPI_IRQ.
16-31 : private irq, and we use 16 as the co-processor timer.
31-1024: common irq for soc ip.
+Interrupt triger mode: (Defined in dt-bindings/interrupt-controller/irq.h)
+ IRQ_TYPE_LEVEL_HIGH (default)
+ IRQ_TYPE_LEVEL_LOW
+ IRQ_TYPE_EDGE_RISING
+ IRQ_TYPE_EDGE_FALLING
+
=============================
intc node bindings definition
=============================
- #interrupt-cells
Usage: required
Value type: <u32>
- Definition: must be <1>
+ Definition: <2>
- interrupt-controller:
Usage: required
-Examples:
+Examples: ("interrupts = <irq_num IRQ_TYPE_XXX>")
---------
+#include <dt-bindings/interrupt-controller/irq.h>
intc: interrupt-controller {
compatible = "csky,mpintc";
- #interrupt-cells = <1>;
+ #interrupt-cells = <2>;
interrupt-controller;
};
+
+ device: device-example {
+ ...
+ interrupts = <34 IRQ_TYPE_EDGE_RISING>;
+ interrupt-parent = <&intc>;
+ };
--- /dev/null
+DT bindings for the Renesas RZ/A1 Interrupt Controller
+
+The RZ/A1 Interrupt Controller is a front-end for the GIC found on Renesas
+RZ/A1 and RZ/A2 SoCs:
+ - IRQ sense select for 8 external interrupts, 1:1-mapped to 8 GIC SPI
+ interrupts,
+ - NMI edge select.
+
+Required properties:
+ - compatible: Must be "renesas,<soctype>-irqc", and "renesas,rza1-irqc" as
+ fallback.
+ Examples with soctypes are:
+ - "renesas,r7s72100-irqc" (RZ/A1H)
+ - "renesas,r7s9210-irqc" (RZ/A2M)
+ - #interrupt-cells: Must be 2 (an interrupt index and flags, as defined
+ in interrupts.txt in this directory)
+ - #address-cells: Must be zero
+ - interrupt-controller: Marks the device as an interrupt controller
+ - reg: Base address and length of the memory resource used by the interrupt
+ controller
+ - interrupt-map: Specifies the mapping from external interrupts to GIC
+ interrupts
+ - interrupt-map-mask: Must be <7 0>
+
+Example:
+
+ irqc: interrupt-controller@fcfef800 {
+ compatible = "renesas,r7s72100-irqc", "renesas,rza1-irqc";
+ #interrupt-cells = <2>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0xfcfef800 0x6>;
+ interrupt-map =
+ <0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <1 0 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+ <2 0 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>,
+ <3 0 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>,
+ <4 0 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>,
+ <5 0 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>,
+ <6 0 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>,
+ <7 0 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-map-mask = <7 0>;
+ };
--- /dev/null
+* Freescale(NXP) IMX8 DDR performance monitor
+
+Required properties:
+
+- compatible: should be one of:
+ "fsl,imx8-ddr-pmu"
+ "fsl,imx8m-ddr-pmu"
+
+- reg: physical address and size
+
+- interrupts: single interrupt
+ generated by the control block
+
+Example:
+
+ ddr-pmu@5c020000 {
+ compatible = "fsl,imx8-ddr-pmu";
+ reg = <0x5c020000 0x10000>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
+ };
- |
// Example 2: Spike ISA Simulator with 1 Hart
cpus {
- cpu@0 {
- device_type = "cpu";
- reg = <0>;
- compatible = "riscv";
- riscv,isa = "rv64imafdc";
- mmu-type = "riscv,sv48";
- interrupt-controller {
- #interrupt-cells = <1>;
- interrupt-controller;
- compatible = "riscv,cpu-intc";
- };
- };
+ #address-cells = <1>;
+ #size-cells = <0>;
+ cpu@0 {
+ device_type = "cpu";
+ reg = <0>;
+ compatible = "riscv";
+ riscv,isa = "rv64imafdc";
+ mmu-type = "riscv,sv48";
+ interrupt-controller {
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "riscv,cpu-intc";
+ };
+ };
};
...
--- /dev/null
+NXP System Counter Module(sys_ctr)
+
+The system counter(sys_ctr) is a programmable system counter which provides
+a shared time base to Cortex A15, A7, A53, A73, etc. it is intended for use in
+applications where the counter is always powered and support multiple,
+unrelated clocks. The compare frame inside can be used for timer purpose.
+
+Required properties:
+
+- compatible : should be "nxp,sysctr-timer"
+- reg : Specifies the base physical address and size of the comapre
+ frame and the counter control, read & compare.
+- interrupts : should be the first compare frames' interrupt
+- clocks : Specifies the counter clock.
+- clock-names: Specifies the clock's name of this module
+
+Example:
+
+ system_counter: timer@306a0000 {
+ compatible = "nxp,sysctr-timer";
+ reg = <0x306a0000 0x20000>;/* system-counter-rd & compare */
+ clocks = <&clk_8m>;
+ clock-names = "per";
+ interrupts = <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>;
+ };
although they are not the focus of this document.
Some additional information can also be found in the kernel source under
-Documentation/s390/driver-model.txt.
+Documentation/s390/driver-model.rst.
The css bus
===========
* Standard I/O subchannels, for use by the system. They have a child
device on the ccw bus and are described below.
* I/O subchannels bound to the vfio-ccw driver. See
- Documentation/s390/vfio-ccw.txt.
+ Documentation/s390/vfio-ccw.rst.
* Message subchannels. No Linux driver currently exists.
* CHSC subchannels (at most one). The chsc subchannel driver can be used
to send asynchronous chsc commands.
+===============================
IBM 3270 Display System support
+===============================
This file describes the driver that supports local channel attachment
of IBM 3270 devices. It consists of three sections:
+
* Introduction
* Installation
* Operation
-INTRODUCTION.
+Introduction
+============
This paper describes installing and operating 3270 devices under
Linux/390. A 3270 device is a block-mode rows-and-columns terminal of
You may have 3270s in-house and not know it. If you're using the
VM-ESA operating system, define a 3270 to your virtual machine by using
the command "DEF GRAF <hex-address>" This paper presumes you will be
-defining four 3270s with the CP/CMS commands
+defining four 3270s with the CP/CMS commands:
- DEF GRAF 620
- DEF GRAF 621
- DEF GRAF 622
- DEF GRAF 623
+ - DEF GRAF 620
+ - DEF GRAF 621
+ - DEF GRAF 622
+ - DEF GRAF 623
Your network connection from VM-ESA allows you to use x3270, tn3270, or
another 3270 emulator, started from an xterm window on your PC or
dialed-in x3270.
-INSTALLATION.
+Installation
+============
You install the driver by installing a patch, doing a kernel build, and
running the configuration script (config3270.sh, in this directory).
at boot time to a 3270 if it is a 3215.
In brief, these are the steps:
+
1. Install the tub3270 patch
- 2. (If a module) add a line to a file in /etc/modprobe.d/*.conf
+ 2. (If a module) add a line to a file in `/etc/modprobe.d/*.conf`
3. (If VM) define devices with DEF GRAF
4. Reboot
5. Configure
To test that everything works, assuming VM and x3270,
+
1. Bring up an x3270 window.
2. Use the DIAL command in that window.
3. You should immediately see a Linux login screen.
1. The 3270 driver is a part of the official Linux kernel
source. Build a tree with the kernel source and any necessary
- patches. Then do
+ patches. Then do::
+
make oldconfig
(If you wish to disable 3215 console support, edit
.config; change CONFIG_TN3215's value to "n";
make modules_install
2. (Perform this step only if you have configured tub3270 as a
- module.) Add a line to a file /etc/modprobe.d/*.conf to automatically
+ module.) Add a line to a file `/etc/modprobe.d/*.conf` to automatically
load the driver when it's needed. With this line added, you will see
login prompts appear on your 3270s as soon as boot is complete (or
with emulated 3270s, as soon as you dial into your vm guest using the
command "DIAL <vmguestname>"). Since the line-mode major number is
- 227, the line to add should be:
+ 227, the line to add should be::
+
alias char-major-227 tub3270
3. Define graphic devices to your vm guest machine, if you
haven't already. Define them before you reboot (reipl):
- DEFINE GRAF 620
- DEFINE GRAF 621
- DEFINE GRAF 622
- DEFINE GRAF 623
+
+ - DEFINE GRAF 620
+ - DEFINE GRAF 621
+ - DEFINE GRAF 622
+ - DEFINE GRAF 623
4. Reboot. The reboot process scans hardware devices, including
3270s, and this enables the tub3270 driver once loaded to respond
5. Run the 3270 configuration script config3270. It is
distributed in this same directory, Documentation/s390, as
- config3270.sh. Inspect the output script it produces,
+ config3270.sh. Inspect the output script it produces,
/tmp/mkdev3270, and then run that script. This will create the
necessary character special device files and make the necessary
changes to /etc/inittab.
Then notify /sbin/init that /etc/inittab has changed, by issuing
- the telinit command with the q operand:
+ the telinit command with the q operand::
+
cd Documentation/s390
sh config3270.sh
sh /tmp/mkdev3270
telinit q
- This should be sufficient for your first time. If your 3270
+ This should be sufficient for your first time. If your 3270
configuration has changed and you're reusing config3270, you
- should follow these steps:
+ should follow these steps::
+
Change 3270 configuration
Reboot
Run config3270 and /tmp/mkdev3270
1. Bring up an x3270 window, or use an actual hardware 3278 or
3279, or use the 3270 emulator of your choice. You would be
running the emulator on your PC or workstation. You would use
- the command, for example,
+ the command, for example::
+
x3270 vm-esa-domain-name &
+
if you wanted a 3278 Model 4 with 43 rows of 80 columns, the
default model number. The driver does not take advantage of
extended attributes.
2. Use the DIAL command instead of the LOGIN command to connect
to one of the virtual 3270s you defined with the DEF GRAF
- commands:
+ commands::
+
dial my-vm-guest-name
3. You should immediately see a login prompt from your
Wrong major number? Wrong minor number? There's your
problem!
- D. Do you get the message
+ D. Do you get the message::
+
"HCPDIA047E my-vm-guest-name 0620 does not exist"?
+
If so, you must issue the command "DEF GRAF 620" from your VM
3215 console and then reboot the system.
OPERATION.
+==========
The driver defines three areas on the 3270 screen: the log area, the
input area, and the status area.
Running" and nothing typed, the application receives a newline.)
You may change the scrolling timeout value. For example, the following
-command line:
+command line::
+
echo scrolltime=60 > /proc/tty/driver/tty3270
+
changes the scrolling timeout value to 60 sec. Set scrolltime to 0 if
you wish to prevent scrolling entirely.
No PF key is preassigned to cause a job suspension, but you may cause a
job suspension by typing "^Z" and hitting ENTER. You may wish to
assign this function to a PF key. To make PF7 cause job suspension,
-execute the command:
+execute the command::
+
echo pf7=^z > /proc/tty/driver/tty3270
If the input you type does not end with the two characters "^n", the
invisible (such as for password entry) and it is not identical to the
current top entry. PF10 rotates backward through the command stack;
PF11 rotates forward. You may assign the backward function to any PF
-key (or PA key, for that matter), say, PA3, with the command:
+key (or PA key, for that matter), say, PA3, with the command::
+
echo -e pa3=\\033k > /proc/tty/driver/tty3270
+
This assigns the string ESC-k to PA3. Similarly, the string ESC-j
performs the forward function. (Rationale: In bash with vi-mode line
editing, ESC-k and ESC-j retrieve backward and forward history.
Is a stack size of twenty commands not to your liking? Change it on
the fly. To change to saving the last 100 commands, execute the
-command:
+command::
+
echo recallsize=100 > /proc/tty/driver/tty3270
Have a command you issue frequently? Assign it to a PF or PA key! Use
-the command
- echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270
+the command::
+
+ echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270
+
to execute the commands mkdir foobar and cd foobar immediately when you
hit PF24. Want to see the command line first, before you execute it?
-Use the -n option of the echo command:
+Use the -n option of the echo command::
+
echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270
+++ /dev/null
-
- Debugging on Linux for s/390 & z/Architecture
- by
- Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
- Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
- Best viewed with fixed width fonts
-
-Overview of Document:
-=====================
-This document is intended to give a good overview of how to debug Linux for
-s/390 and z/Architecture. It is not intended as a complete reference and not a
-tutorial on the fundamentals of C & assembly. It doesn't go into
-390 IO in any detail. It is intended to complement the documents in the
-reference section below & any other worthwhile references you get.
-
-It is intended like the Enterprise Systems Architecture/390 Reference Summary
-to be printed out & used as a quick cheat sheet self help style reference when
-problems occur.
-
-Contents
-========
-Register Set
-Address Spaces on Intel Linux
-Address Spaces on Linux for s/390 & z/Architecture
-The Linux for s/390 & z/Architecture Kernel Task Structure
-Register Usage & Stackframes on Linux for s/390 & z/Architecture
-A sample program with comments
-Compiling programs for debugging on Linux for s/390 & z/Architecture
-Debugging under VM
-s/390 & z/Architecture IO Overview
-Debugging IO on s/390 & z/Architecture under VM
-GDB on s/390 & z/Architecture
-Stack chaining in gdb by hand
-Examining core dumps
-ldd
-Debugging modules
-The proc file system
-SysRq
-References
-Special Thanks
-
-Register Set
-============
-The current architectures have the following registers.
-
-16 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture,
-r0-r15 (or gpr0-gpr15), used for arithmetic and addressing.
-
-16 Control registers, 32 bit on s/390 and 64 bit on z/Architecture, cr0-cr15,
-kernel usage only, used for memory management, interrupt control, debugging
-control etc.
-
-16 Access registers (ar0-ar15), 32 bit on both s/390 and z/Architecture,
-normally not used by normal programs but potentially could be used as
-temporary storage. These registers have a 1:1 association with general
-purpose registers and are designed to be used in the so-called access
-register mode to select different address spaces.
-Access register 0 (and access register 1 on z/Architecture, which needs a
-64 bit pointer) is currently used by the pthread library as a pointer to
-the current running threads private area.
-
-16 64 bit floating point registers (fp0-fp15 ) IEEE & HFP floating
-point format compliant on G5 upwards & a Floating point control reg (FPC)
-4 64 bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines.
-Note:
-Linux (currently) always uses IEEE & emulates G5 IEEE format on older machines,
-( provided the kernel is configured for this ).
-
-
-The PSW is the most important register on the machine it
-is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of
-a program counter (pc), condition code register,memory space designator.
-In IBM standard notation I am counting bit 0 as the MSB.
-It has several advantages over a normal program counter
-in that you can change address translation & program counter
-in a single instruction. To change address translation,
-e.g. switching address translation off requires that you
-have a logical=physical mapping for the address you are
-currently running at.
-
- Bit Value
-s/390 z/Architecture
-0 0 Reserved ( must be 0 ) otherwise specification exception occurs.
-
-1 1 Program Event Recording 1 PER enabled,
- PER is used to facilitate debugging e.g. single stepping.
-
-2-4 2-4 Reserved ( must be 0 ).
-
-5 5 Dynamic address translation 1=DAT on.
-
-6 6 Input/Output interrupt Mask
-
-7 7 External interrupt Mask used primarily for interprocessor
- signalling and clock interrupts.
-
-8-11 8-11 PSW Key used for complex memory protection mechanism
- (not used under linux)
-
-12 12 1 on s/390 0 on z/Architecture
-
-13 13 Machine Check Mask 1=enable machine check interrupts
-
-14 14 Wait State. Set this to 1 to stop the processor except for
- interrupts and give time to other LPARS. Used in CPU idle in
- the kernel to increase overall usage of processor resources.
-
-15 15 Problem state ( if set to 1 certain instructions are disabled )
- all linux user programs run with this bit 1
- ( useful info for debugging under VM ).
-
-16-17 16-17 Address Space Control
-
- 00 Primary Space Mode:
- The register CR1 contains the primary address-space control ele-
- ment (PASCE), which points to the primary space region/segment
- table origin.
-
- 01 Access register mode
-
- 10 Secondary Space Mode:
- The register CR7 contains the secondary address-space control
- element (SASCE), which points to the secondary space region or
- segment table origin.
-
- 11 Home Space Mode:
- The register CR13 contains the home space address-space control
- element (HASCE), which points to the home space region/segment
- table origin.
-
- See "Address Spaces on Linux for s/390 & z/Architecture" below
- for more information about address space usage in Linux.
-
-18-19 18-19 Condition codes (CC)
-
-20 20 Fixed point overflow mask if 1=FPU exceptions for this event
- occur ( normally 0 )
-
-21 21 Decimal overflow mask if 1=FPU exceptions for this event occur
- ( normally 0 )
-
-22 22 Exponent underflow mask if 1=FPU exceptions for this event occur
- ( normally 0 )
-
-23 23 Significance Mask if 1=FPU exceptions for this event occur
- ( normally 0 )
-
-24-31 24-30 Reserved Must be 0.
-
- 31 Extended Addressing Mode
- 32 Basic Addressing Mode
- Used to set addressing mode
- PSW 31 PSW 32
- 0 0 24 bit
- 0 1 31 bit
- 1 1 64 bit
-
-32 1=31 bit addressing mode 0=24 bit addressing mode (for backward
- compatibility), linux always runs with this bit set to 1
-
-33-64 Instruction address.
- 33-63 Reserved must be 0
- 64-127 Address
- In 24 bits mode bits 64-103=0 bits 104-127 Address
- In 31 bits mode bits 64-96=0 bits 97-127 Address
- Note: unlike 31 bit mode on s/390 bit 96 must be zero
- when loading the address with LPSWE otherwise a
- specification exception occurs, LPSW is fully backward
- compatible.
-
-
-Prefix Page(s)
---------------
-This per cpu memory area is too intimately tied to the processor not to mention.
-It exists between the real addresses 0-4096 on s/390 and between 0-8192 on
-z/Architecture and is exchanged with one page on s/390 or two pages on
-z/Architecture in absolute storage by the set prefix instruction during Linux
-startup.
-This page is mapped to a different prefix for each processor in an SMP
-configuration (assuming the OS designer is sane of course).
-Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on
-z/Architecture are used by the processor itself for holding such information
-as exception indications and entry points for exceptions.
-Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and
-z/Architecture (there is a gap on z/Architecture currently between 0xc00 and
-0x1000, too, which is used by Linux).
-The closest thing to this on traditional architectures is the interrupt
-vector table. This is a good thing & does simplify some of the kernel coding
-however it means that we now cannot catch stray NULL pointers in the
-kernel without hard coded checks.
-
-
-
-Address Spaces on Intel Linux
-=============================
-
-The traditional Intel Linux is approximately mapped as follows forgive
-the ascii art.
-0xFFFFFFFF 4GB Himem *****************
- * *
- * Kernel Space *
- * *
- ***************** ****************
-User Space Himem * User Stack * * *
-(typically 0xC0000000 3GB ) ***************** * *
- * Shared Libs * * Next Process *
- ***************** * to *
- * * <== * Run * <==
- * User Program * * *
- * Data BSS * * *
- * Text * * *
- * Sections * * *
-0x00000000 ***************** ****************
-
-Now it is easy to see that on Intel it is quite easy to recognise a kernel
-address as being one greater than user space himem (in this case 0xC0000000),
-and addresses of less than this are the ones in the current running program on
-this processor (if an smp box).
-If using the virtual machine ( VM ) as a debugger it is quite difficult to
-know which user process is running as the address space you are looking at
-could be from any process in the run queue.
-
-The limitation of Intels addressing technique is that the linux
-kernel uses a very simple real address to virtual addressing technique
-of Real Address=Virtual Address-User Space Himem.
-This means that on Intel the kernel linux can typically only address
-Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines
-can typically use.
-They can lower User Himem to 2GB or lower & thus be
-able to use 2GB of RAM however this shrinks the maximum size
-of User Space from 3GB to 2GB they have a no win limit of 4GB unless
-they go to 64 Bit.
-
-
-On 390 our limitations & strengths make us slightly different.
-For backward compatibility we are only allowed use 31 bits (2GB)
-of our 32 bit addresses, however, we use entirely separate address
-spaces for the user & kernel.
-
-This means we can support 2GB of non Extended RAM on s/390, & more
-with the Extended memory management swap device &
-currently 4TB of physical memory currently on z/Architecture.
-
-
-Address Spaces on Linux for s/390 & z/Architecture
-==================================================
-
-Our addressing scheme is basically as follows:
-
- Primary Space Home Space
-Himem 0x7fffffff 2GB on s/390 ***************** ****************
-currently 0x3ffffffffff (2^42)-1 * User Stack * * *
-on z/Architecture. ***************** * *
- * Shared Libs * * *
- ***************** * *
- * * * Kernel *
- * User Program * * *
- * Data BSS * * *
- * Text * * *
- * Sections * * *
-0x00000000 ***************** ****************
-
-This also means that we need to look at the PSW problem state bit and the
-addressing mode to decide whether we are looking at user or kernel space.
-
-User space runs in primary address mode (or access register mode within
-the vdso code).
-
-The kernel usually also runs in home space mode, however when accessing
-user space the kernel switches to primary or secondary address mode if
-the mvcos instruction is not available or if a compare-and-swap (futex)
-instruction on a user space address is performed.
-
-When also looking at the ASCE control registers, this means:
-
-User space:
-- runs in primary or access register mode
-- cr1 contains the user asce
-- cr7 contains the user asce
-- cr13 contains the kernel asce
-
-Kernel space:
-- runs in home space mode
-- cr1 contains the user or kernel asce
- -> the kernel asce is loaded when a uaccess requires primary or
- secondary address mode
-- cr7 contains the user or kernel asce, (changed with set_fs())
-- cr13 contains the kernel asce
-
-In case of uaccess the kernel changes to:
-- primary space mode in case of a uaccess (copy_to_user) and uses
- e.g. the mvcp instruction to access user space. However the kernel
- will stay in home space mode if the mvcos instruction is available
-- secondary space mode in case of futex atomic operations, so that the
- instructions come from primary address space and data from secondary
- space
-
-In case of KVM, the kernel runs in home space mode, but cr1 gets switched
-to contain the gmap asce before the SIE instruction gets executed. When
-the SIE instruction is finished, cr1 will be switched back to contain the
-user asce.
-
-
-Virtual Addresses on s/390 & z/Architecture
-===========================================
-
-A virtual address on s/390 is made up of 3 parts
-The SX (segment index, roughly corresponding to the PGD & PMD in Linux
-terminology) being bits 1-11.
-The PX (page index, corresponding to the page table entry (pte) in Linux
-terminology) being bits 12-19.
-The remaining bits BX (the byte index are the offset in the page )
-i.e. bits 20 to 31.
-
-On z/Architecture in linux we currently make up an address from 4 parts.
-The region index bits (RX) 0-32 we currently use bits 22-32
-The segment index (SX) being bits 33-43
-The page index (PX) being bits 44-51
-The byte index (BX) being bits 52-63
-
-Notes:
-1) s/390 has no PMD so the PMD is really the PGD also.
-A lot of this stuff is defined in pgtable.h.
-
-2) Also seeing as s/390's page indexes are only 1k in size
-(bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k )
-to make the best use of memory by updating 4 segment indices
-entries each time we mess with a PMD & use offsets
-0,1024,2048 & 3072 in this page as for our segment indexes.
-On z/Architecture our page indexes are now 2k in size
-( bits 12-19 x 8 bytes per pte ) we do a similar trick
-but only mess with 2 segment indices each time we mess with
-a PMD.
-
-3) As z/Architecture supports up to a massive 5-level page table lookup we
-can only use 3 currently on Linux ( as this is all the generic kernel
-currently supports ) however this may change in future
-this allows us to access ( according to my sums )
-4TB of virtual storage per process i.e.
-4096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes,
-enough for another 2 or 3 of years I think :-).
-to do this we use a region-third-table designation type in
-our address space control registers.
-
-
-The Linux for s/390 & z/Architecture Kernel Task Structure
-==========================================================
-Each process/thread under Linux for S390 has its own kernel task_struct
-defined in linux/include/linux/sched.h
-The S390 on initialisation & resuming of a process on a cpu sets
-the __LC_KERNEL_STACK variable in the spare prefix area for this cpu
-(which we use for per-processor globals).
-
-The kernel stack pointer is intimately tied with the task structure for
-each processor as follows.
-
- s/390
- ************************
- * 1 page kernel stack *
- * ( 4K ) *
- ************************
- * 1 page task_struct *
- * ( 4K ) *
-8K aligned ************************
-
- z/Architecture
- ************************
- * 2 page kernel stack *
- * ( 8K ) *
- ************************
- * 2 page task_struct *
- * ( 8K ) *
-16K aligned ************************
-
-What this means is that we don't need to dedicate any register or global
-variable to point to the current running process & can retrieve it with the
-following very simple construct for s/390 & one very similar for z/Architecture.
-
-static inline struct task_struct * get_current(void)
-{
- struct task_struct *current;
- __asm__("lhi %0,-8192\n\t"
- "nr %0,15"
- : "=r" (current) );
- return current;
-}
-
-i.e. just anding the current kernel stack pointer with the mask -8192.
-Thankfully because Linux doesn't have support for nested IO interrupts
-& our devices have large buffers can survive interrupts being shut for
-short amounts of time we don't need a separate stack for interrupts.
-
-
-
-
-Register Usage & Stackframes on Linux for s/390 & z/Architecture
-=================================================================
-Overview:
----------
-This is the code that gcc produces at the top & the bottom of
-each function. It usually is fairly consistent & similar from
-function to function & if you know its layout you can probably
-make some headway in finding the ultimate cause of a problem
-after a crash without a source level debugger.
-
-Note: To follow stackframes requires a knowledge of C or Pascal &
-limited knowledge of one assembly language.
-
-It should be noted that there are some differences between the
-s/390 and z/Architecture stack layouts as the z/Architecture stack layout
-didn't have to maintain compatibility with older linkage formats.
-
-Glossary:
----------
-alloca:
-This is a built in compiler function for runtime allocation
-of extra space on the callers stack which is obviously freed
-up on function exit ( e.g. the caller may choose to allocate nothing
-of a buffer of 4k if required for temporary purposes ), it generates
-very efficient code ( a few cycles ) when compared to alternatives
-like malloc.
-
-automatics: These are local variables on the stack,
-i.e they aren't in registers & they aren't static.
-
-back-chain:
-This is a pointer to the stack pointer before entering a
-framed functions ( see frameless function ) prologue got by
-dereferencing the address of the current stack pointer,
- i.e. got by accessing the 32 bit value at the stack pointers
-current location.
-
-base-pointer:
-This is a pointer to the back of the literal pool which
-is an area just behind each procedure used to store constants
-in each function.
-
-call-clobbered: The caller probably needs to save these registers if there
-is something of value in them, on the stack or elsewhere before making a
-call to another procedure so that it can restore it later.
-
-epilogue:
-The code generated by the compiler to return to the caller.
-
-frameless-function
-A frameless function in Linux for s390 & z/Architecture is one which doesn't
-need more than the register save area (96 bytes on s/390, 160 on z/Architecture)
-given to it by the caller.
-A frameless function never:
-1) Sets up a back chain.
-2) Calls alloca.
-3) Calls other normal functions
-4) Has automatics.
-
-GOT-pointer:
-This is a pointer to the global-offset-table in ELF
-( Executable Linkable Format, Linux'es most common executable format ),
-all globals & shared library objects are found using this pointer.
-
-lazy-binding
-ELF shared libraries are typically only loaded when routines in the shared
-library are actually first called at runtime. This is lazy binding.
-
-procedure-linkage-table
-This is a table found from the GOT which contains pointers to routines
-in other shared libraries which can't be called to by easier means.
-
-prologue:
-The code generated by the compiler to set up the stack frame.
-
-outgoing-args:
-This is extra area allocated on the stack of the calling function if the
-parameters for the callee's cannot all be put in registers, the same
-area can be reused by each function the caller calls.
-
-routine-descriptor:
-A COFF executable format based concept of a procedure reference
-actually being 8 bytes or more as opposed to a simple pointer to the routine.
-This is typically defined as follows
-Routine Descriptor offset 0=Pointer to Function
-Routine Descriptor offset 4=Pointer to Table of Contents
-The table of contents/TOC is roughly equivalent to a GOT pointer.
-& it means that shared libraries etc. can be shared between several
-environments each with their own TOC.
-
-
-static-chain: This is used in nested functions a concept adopted from pascal
-by gcc not used in ansi C or C++ ( although quite useful ), basically it
-is a pointer used to reference local variables of enclosing functions.
-You might come across this stuff once or twice in your lifetime.
-
-e.g.
-The function below should return 11 though gcc may get upset & toss warnings
-about unused variables.
-int FunctionA(int a)
-{
- int b;
- FunctionC(int c)
- {
- b=c+1;
- }
- FunctionC(10);
- return(b);
-}
-
-
-s/390 & z/Architecture Register usage
-=====================================
-r0 used by syscalls/assembly call-clobbered
-r1 used by syscalls/assembly call-clobbered
-r2 argument 0 / return value 0 call-clobbered
-r3 argument 1 / return value 1 (if long long) call-clobbered
-r4 argument 2 call-clobbered
-r5 argument 3 call-clobbered
-r6 argument 4 saved
-r7 pointer-to arguments 5 to ... saved
-r8 this & that saved
-r9 this & that saved
-r10 static-chain ( if nested function ) saved
-r11 frame-pointer ( if function used alloca ) saved
-r12 got-pointer saved
-r13 base-pointer saved
-r14 return-address saved
-r15 stack-pointer saved
-
-f0 argument 0 / return value ( float/double ) call-clobbered
-f2 argument 1 call-clobbered
-f4 z/Architecture argument 2 saved
-f6 z/Architecture argument 3 saved
-The remaining floating points
-f1,f3,f5 f7-f15 are call-clobbered.
-
-Notes:
-------
-1) The only requirement is that registers which are used
-by the callee are saved, e.g. the compiler is perfectly
-capable of using r11 for purposes other than a frame a
-frame pointer if a frame pointer is not needed.
-2) In functions with variable arguments e.g. printf the calling procedure
-is identical to one without variable arguments & the same number of
-parameters. However, the prologue of this function is somewhat more
-hairy owing to it having to move these parameters to the stack to
-get va_start, va_arg & va_end to work.
-3) Access registers are currently unused by gcc but are used in
-the kernel. Possibilities exist to use them at the moment for
-temporary storage but it isn't recommended.
-4) Only 4 of the floating point registers are used for
-parameter passing as older machines such as G3 only have only 4
-& it keeps the stack frame compatible with other compilers.
-However with IEEE floating point emulation under linux on the
-older machines you are free to use the other 12.
-5) A long long or double parameter cannot be have the
-first 4 bytes in a register & the second four bytes in the
-outgoing args area. It must be purely in the outgoing args
-area if crossing this boundary.
-6) Floating point parameters are mixed with outgoing args
-on the outgoing args area in the order the are passed in as parameters.
-7) Floating point arguments 2 & 3 are saved in the outgoing args area for
-z/Architecture
-
-
-Stack Frame Layout
-------------------
-s/390 z/Architecture
-0 0 back chain ( a 0 here signifies end of back chain )
-4 8 eos ( end of stack, not used on Linux for S390 used in other linkage formats )
-8 16 glue used in other s/390 linkage formats for saved routine descriptors etc.
-12 24 glue used in other s/390 linkage formats for saved routine descriptors etc.
-16 32 scratch area
-20 40 scratch area
-24 48 saved r6 of caller function
-28 56 saved r7 of caller function
-32 64 saved r8 of caller function
-36 72 saved r9 of caller function
-40 80 saved r10 of caller function
-44 88 saved r11 of caller function
-48 96 saved r12 of caller function
-52 104 saved r13 of caller function
-56 112 saved r14 of caller function
-60 120 saved r15 of caller function
-64 128 saved f4 of caller function
-72 132 saved f6 of caller function
-80 undefined
-96 160 outgoing args passed from caller to callee
-96+x 160+x possible stack alignment ( 8 bytes desirable )
-96+x+y 160+x+y alloca space of caller ( if used )
-96+x+y+z 160+x+y+z automatics of caller ( if used )
-0 back-chain
-
-A sample program with comments.
-===============================
-
-Comments on the function test
------------------------------
-1) It didn't need to set up a pointer to the constant pool gpr13 as it is not
-used ( :-( ).
-2) This is a frameless function & no stack is bought.
-3) The compiler was clever enough to recognise that it could return the
-value in r2 as well as use it for the passed in parameter ( :-) ).
-4) The basr ( branch relative & save ) trick works as follows the instruction
-has a special case with r0,r0 with some instruction operands is understood as
-the literal value 0, some risc architectures also do this ). So now
-we are branching to the next address & the address new program counter is
-in r13,so now we subtract the size of the function prologue we have executed
-+ the size of the literal pool to get to the top of the literal pool
-0040037c int test(int b)
-{ # Function prologue below
- 40037c: 90 de f0 34 stm %r13,%r14,52(%r15) # Save registers r13 & r14
- 400380: 0d d0 basr %r13,%r0 # Set up pointer to constant pool using
- 400382: a7 da ff fa ahi %r13,-6 # basr trick
- return(5+b);
- # Huge main program
- 400386: a7 2a 00 05 ahi %r2,5 # add 5 to r2
-
- # Function epilogue below
- 40038a: 98 de f0 34 lm %r13,%r14,52(%r15) # restore registers r13 & 14
- 40038e: 07 fe br %r14 # return
-}
-
-Comments on the function main
------------------------------
-1) The compiler did this function optimally ( 8-) )
-
-Literal pool for main.
-400390: ff ff ff ec .long 0xffffffec
-main(int argc,char *argv[])
-{ # Function prologue below
- 400394: 90 bf f0 2c stm %r11,%r15,44(%r15) # Save necessary registers
- 400398: 18 0f lr %r0,%r15 # copy stack pointer to r0
- 40039a: a7 fa ff a0 ahi %r15,-96 # Make area for callee saving
- 40039e: 0d d0 basr %r13,%r0 # Set up r13 to point to
- 4003a0: a7 da ff f0 ahi %r13,-16 # literal pool
- 4003a4: 50 00 f0 00 st %r0,0(%r15) # Save backchain
-
- return(test(5)); # Main Program Below
- 4003a8: 58 e0 d0 00 l %r14,0(%r13) # load relative address of test from
- # literal pool
- 4003ac: a7 28 00 05 lhi %r2,5 # Set first parameter to 5
- 4003b0: 4d ee d0 00 bas %r14,0(%r14,%r13) # jump to test setting r14 as return
- # address using branch & save instruction.
-
- # Function Epilogue below
- 4003b4: 98 bf f0 8c lm %r11,%r15,140(%r15)# Restore necessary registers.
- 4003b8: 07 fe br %r14 # return to do program exit
-}
-
-
-Compiler updates
-----------------
-
-main(int argc,char *argv[])
-{
- 4004fc: 90 7f f0 1c stm %r7,%r15,28(%r15)
- 400500: a7 d5 00 04 bras %r13,400508 <main+0xc>
- 400504: 00 40 04 f4 .long 0x004004f4
- # compiler now puts constant pool in code to so it saves an instruction
- 400508: 18 0f lr %r0,%r15
- 40050a: a7 fa ff a0 ahi %r15,-96
- 40050e: 50 00 f0 00 st %r0,0(%r15)
- return(test(5));
- 400512: 58 10 d0 00 l %r1,0(%r13)
- 400516: a7 28 00 05 lhi %r2,5
- 40051a: 0d e1 basr %r14,%r1
- # compiler adds 1 extra instruction to epilogue this is done to
- # avoid processor pipeline stalls owing to data dependencies on g5 &
- # above as register 14 in the old code was needed directly after being loaded
- # by the lm %r11,%r15,140(%r15) for the br %14.
- 40051c: 58 40 f0 98 l %r4,152(%r15)
- 400520: 98 7f f0 7c lm %r7,%r15,124(%r15)
- 400524: 07 f4 br %r4
-}
-
-
-Hartmut ( our compiler developer ) also has been threatening to take out the
-stack backchain in optimised code as this also causes pipeline stalls, you
-have been warned.
-
-64 bit z/Architecture code disassembly
---------------------------------------
-
-If you understand the stuff above you'll understand the stuff
-below too so I'll avoid repeating myself & just say that
-some of the instructions have g's on the end of them to indicate
-they are 64 bit & the stack offsets are a bigger,
-the only other difference you'll find between 32 & 64 bit is that
-we now use f4 & f6 for floating point arguments on 64 bit.
-00000000800005b0 <test>:
-int test(int b)
-{
- return(5+b);
- 800005b0: a7 2a 00 05 ahi %r2,5
- 800005b4: b9 14 00 22 lgfr %r2,%r2 # downcast to integer
- 800005b8: 07 fe br %r14
- 800005ba: 07 07 bcr 0,%r7
-
-
-}
-
-00000000800005bc <main>:
-main(int argc,char *argv[])
-{
- 800005bc: eb bf f0 58 00 24 stmg %r11,%r15,88(%r15)
- 800005c2: b9 04 00 1f lgr %r1,%r15
- 800005c6: a7 fb ff 60 aghi %r15,-160
- 800005ca: e3 10 f0 00 00 24 stg %r1,0(%r15)
- return(test(5));
- 800005d0: a7 29 00 05 lghi %r2,5
- # brasl allows jumps > 64k & is overkill here bras would do fune
- 800005d4: c0 e5 ff ff ff ee brasl %r14,800005b0 <test>
- 800005da: e3 40 f1 10 00 04 lg %r4,272(%r15)
- 800005e0: eb bf f0 f8 00 04 lmg %r11,%r15,248(%r15)
- 800005e6: 07 f4 br %r4
-}
-
-
-
-Compiling programs for debugging on Linux for s/390 & z/Architecture
-====================================================================
--gdwarf-2 now works it should be considered the default debugging
-format for s/390 & z/Architecture as it is more reliable for debugging
-shared libraries, normal -g debugging works much better now
-Thanks to the IBM java compiler developers bug reports.
-
-This is typically done adding/appending the flags -g or -gdwarf-2 to the
-CFLAGS & LDFLAGS variables Makefile of the program concerned.
-
-If using gdb & you would like accurate displays of registers &
- stack traces compile without optimisation i.e make sure
-that there is no -O2 or similar on the CFLAGS line of the Makefile &
-the emitted gcc commands, obviously this will produce worse code
-( not advisable for shipment ) but it is an aid to the debugging process.
-
-This aids debugging because the compiler will copy parameters passed in
-in registers onto the stack so backtracing & looking at passed in
-parameters will work, however some larger programs which use inline functions
-will not compile without optimisation.
-
-Debugging with optimisation has since much improved after fixing
-some bugs, please make sure you are using gdb-5.0 or later developed
-after Nov'2000.
-
-
-
-Debugging under VM
-==================
-
-Notes
------
-Addresses & values in the VM debugger are always hex never decimal
-Address ranges are of the format <HexValue1>-<HexValue2> or
-<HexValue1>.<HexValue2>
-For example, the address range 0x2000 to 0x3000 can be described as 2000-3000
-or 2000.1000
-
-The VM Debugger is case insensitive.
-
-VM's strengths are usually other debuggers weaknesses you can get at any
-resource no matter how sensitive e.g. memory management resources, change
-address translation in the PSW. For kernel hacking you will reap dividends if
-you get good at it.
-
-The VM Debugger displays operators but not operands, and also the debugger
-displays useful information on the same line as the author of the code probably
-felt that it was a good idea not to go over the 80 columns on the screen.
-This isn't as unintuitive as it may seem as the s/390 instructions are easy to
-decode mentally and you can make a good guess at a lot of them as all the
-operands are nibble (half byte aligned).
-So if you have an objdump listing by hand, it is quite easy to follow, and if
-you don't have an objdump listing keep a copy of the s/390 Reference Summary
-or alternatively the s/390 principles of operation next to you.
-e.g. even I can guess that
-0001AFF8' LR 180F CC 0
-is a ( load register ) lr r0,r15
-
-Also it is very easy to tell the length of a 390 instruction from the 2 most
-significant bits in the instruction (not that this info is really useful except
-if you are trying to make sense of a hexdump of code).
-Here is a table
-Bits Instruction Length
-------------------------------------------
-00 2 Bytes
-01 4 Bytes
-10 4 Bytes
-11 6 Bytes
-
-The debugger also displays other useful info on the same line such as the
-addresses being operated on destination addresses of branches & condition codes.
-e.g.
-00019736' AHI A7DAFF0E CC 1
-000198BA' BRC A7840004 -> 000198C2' CC 0
-000198CE' STM 900EF068 >> 0FA95E78 CC 2
-
-
-
-Useful VM debugger commands
----------------------------
-
-I suppose I'd better mention this before I start
-to list the current active traces do
-Q TR
-there can be a maximum of 255 of these per set
-( more about trace sets later ).
-To stop traces issue a
-TR END.
-To delete a particular breakpoint issue
-TR DEL <breakpoint number>
-
-The PA1 key drops to CP mode so you can issue debugger commands,
-Doing alt c (on my 3270 console at least ) clears the screen.
-hitting b <enter> comes back to the running operating system
-from cp mode ( in our case linux ).
-It is typically useful to add shortcuts to your profile.exec file
-if you have one ( this is roughly equivalent to autoexec.bat in DOS ).
-file here are a few from mine.
-/* this gives me command history on issuing f12 */
-set pf12 retrieve
-/* this continues */
-set pf8 imm b
-/* goes to trace set a */
-set pf1 imm tr goto a
-/* goes to trace set b */
-set pf2 imm tr goto b
-/* goes to trace set c */
-set pf3 imm tr goto c
-
-
-
-Instruction Tracing
--------------------
-Setting a simple breakpoint
-TR I PSWA <address>
-To debug a particular function try
-TR I R <function address range>
-TR I on its own will single step.
-TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics
-e.g.
-TR I DATA 4D R 0197BC.4000
-will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000
-if you were inclined you could add traces for all branch instructions &
-suffix them with the run prefix so you would have a backtrace on screen
-when a program crashes.
-TR BR <INTO OR FROM> will trace branches into or out of an address.
-e.g.
-TR BR INTO 0 is often quite useful if a program is getting awkward & deciding
-to branch to 0 & crashing as this will stop at the address before in jumps to 0.
-TR I R <address range> RUN cmd d g
-single steps a range of addresses but stays running &
-displays the gprs on each step.
-
-
-
-Displaying & modifying Registers
---------------------------------
-D G will display all the gprs
-Adding a extra G to all the commands is necessary to access the full 64 bit
-content in VM on z/Architecture. Obviously this isn't required for access
-registers as these are still 32 bit.
-e.g. DGG instead of DG
-D X will display all the control registers
-D AR will display all the access registers
-D AR4-7 will display access registers 4 to 7
-CPU ALL D G will display the GRPS of all CPUS in the configuration
-D PSW will display the current PSW
-st PSW 2000 will put the value 2000 into the PSW &
-cause crash your machine.
-D PREFIX displays the prefix offset
-
-
-Displaying Memory
------------------
-To display memory mapped using the current PSW's mapping try
-D <range>
-To make VM display a message each time it hits a particular address and
-continue try
-D I<range> will disassemble/display a range of instructions.
-ST addr 32 bit word will store a 32 bit aligned address
-D T<range> will display the EBCDIC in an address (if you are that way inclined)
-D R<range> will display real addresses ( without DAT ) but with prefixing.
-There are other complex options to display if you need to get at say home space
-but are in primary space the easiest thing to do is to temporarily
-modify the PSW to the other addressing mode, display the stuff & then
-restore it.
-
-
-
-Hints
------
-If you want to issue a debugger command without halting your virtual machine
-with the PA1 key try prefixing the command with #CP e.g.
-#cp tr i pswa 2000
-also suffixing most debugger commands with RUN will cause them not
-to stop just display the mnemonic at the current instruction on the console.
-If you have several breakpoints you want to put into your program &
-you get fed up of cross referencing with System.map
-you can do the following trick for several symbols.
-grep do_signal System.map
-which emits the following among other things
-0001f4e0 T do_signal
-now you can do
-
-TR I PSWA 0001f4e0 cmd msg * do_signal
-This sends a message to your own console each time do_signal is entered.
-( As an aside I wrote a perl script once which automatically generated a REXX
-script with breakpoints on every kernel procedure, this isn't a good idea
-because there are thousands of these routines & VM can only set 255 breakpoints
-at a time so you nearly had to spend as long pruning the file down as you would
-entering the msgs by hand), however, the trick might be useful for a single
-object file. In the 3270 terminal emulator x3270 there is a very useful option
-in the file menu called "Save Screen In File" - this is very good for keeping a
-copy of traces.
-
-From CMS help <command name> will give you online help on a particular command.
-e.g.
-HELP DISPLAY
-
-Also CP has a file called profile.exec which automatically gets called
-on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session
-CP has a feature similar to doskey, it may be useful for you to
-use profile.exec to define some keystrokes.
-e.g.
-SET PF9 IMM B
-This does a single step in VM on pressing F8.
-SET PF10 ^
-This sets up the ^ key.
-which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed directly
-into some 3270 consoles.
-SET PF11 ^-
-This types the starting keystrokes for a sysrq see SysRq below.
-SET PF12 RETRIEVE
-This retrieves command history on pressing F12.
-
-
-Sometimes in VM the display is set up to scroll automatically this
-can be very annoying if there are messages you wish to look at
-to stop this do
-TERM MORE 255 255
-This will nearly stop automatic screen updates, however it will
-cause a denial of service if lots of messages go to the 3270 console,
-so it would be foolish to use this as the default on a production machine.
-
-
-Tracing particular processes
-----------------------------
-The kernel's text segment is intentionally at an address in memory that it will
-very seldom collide with text segments of user programs ( thanks Martin ),
-this simplifies debugging the kernel.
-However it is quite common for user processes to have addresses which collide
-this can make debugging a particular process under VM painful under normal
-circumstances as the process may change when doing a
-TR I R <address range>.
-Thankfully after reading VM's online help I figured out how to debug
-I particular process.
-
-Your first problem is to find the STD ( segment table designation )
-of the program you wish to debug.
-There are several ways you can do this here are a few
-1) objdump --syms <program to be debugged> | grep main
-To get the address of main in the program.
-tr i pswa <address of main>
-Start the program, if VM drops to CP on what looks like the entry
-point of the main function this is most likely the process you wish to debug.
-Now do a D X13 or D XG13 on z/Architecture.
-On 31 bit the STD is bits 1-19 ( the STO segment table origin )
-& 25-31 ( the STL segment table length ) of CR13.
-now type
-TR I R STD <CR13's value> 0.7fffffff
-e.g.
-TR I R STD 8F32E1FF 0.7fffffff
-Another very useful variation is
-TR STORE INTO STD <CR13's value> <address range>
-for finding out when a particular variable changes.
-
-An alternative way of finding the STD of a currently running process
-is to do the following, ( this method is more complex but
-could be quite convenient if you aren't updating the kernel much &
-so your kernel structures will stay constant for a reasonable period of
-time ).
-
-grep task /proc/<pid>/status
-from this you should see something like
-task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68
-This now gives you a pointer to the task structure.
-Now make CC:="s390-gcc -g" kernel/sched.s
-To get the task_struct stabinfo.
-( task_struct is defined in include/linux/sched.h ).
-Now we want to look at
-task->active_mm->pgd
-on my machine the active_mm in the task structure stab is
-active_mm:(4,12),672,32
-its offset is 672/8=84=0x54
-the pgd member in the mm_struct stab is
-pgd:(4,6)=*(29,5),96,32
-so its offset is 96/8=12=0xc
-
-so we'll
-hexdump -s 0xf160054 /dev/mem | more
-i.e. task_struct+active_mm offset
-to look at the active_mm member
-f160054 0fee cc60 0019 e334 0000 0000 0000 0011
-hexdump -s 0x0feecc6c /dev/mem | more
-i.e. active_mm+pgd offset
-feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010
-we get something like
-now do
-TR I R STD <pgd|0x7f> 0.7fffffff
-i.e. the 0x7f is added because the pgd only
-gives the page table origin & we need to set the low bits
-to the maximum possible segment table length.
-TR I R STD 0f2c007f 0.7fffffff
-on z/Architecture you'll probably need to do
-TR I R STD <pgd|0x7> 0.ffffffffffffffff
-to set the TableType to 0x1 & the Table length to 3.
-
-
-
-Tracing Program Exceptions
---------------------------
-If you get a crash which says something like
-illegal operation or specification exception followed by a register dump
-You can restart linux & trace these using the tr prog <range or value> trace
-option.
-
-
-The most common ones you will normally be tracing for is
-1=operation exception
-2=privileged operation exception
-4=protection exception
-5=addressing exception
-6=specification exception
-10=segment translation exception
-11=page translation exception
-
-The full list of these is on page 22 of the current s/390 Reference Summary.
-e.g.
-tr prog 10 will trace segment translation exceptions.
-tr prog on its own will trace all program interruption codes.
-
-Trace Sets
-----------
-On starting VM you are initially in the INITIAL trace set.
-You can do a Q TR to verify this.
-If you have a complex tracing situation where you wish to wait for instance
-till a driver is open before you start tracing IO, but know in your
-heart that you are going to have to make several runs through the code till you
-have a clue whats going on.
-
-What you can do is
-TR I PSWA <Driver open address>
-hit b to continue till breakpoint
-reach the breakpoint
-now do your
-TR GOTO B
-TR IO 7c08-7c09 inst int run
-or whatever the IO channels you wish to trace are & hit b
-
-To got back to the initial trace set do
-TR GOTO INITIAL
-& the TR I PSWA <Driver open address> will be the only active breakpoint again.
-
-
-Tracing linux syscalls under VM
--------------------------------
-Syscalls are implemented on Linux for S390 by the Supervisor call instruction
-(SVC). There 256 possibilities of these as the instruction is made up of a 0xA
-opcode and the second byte being the syscall number. They are traced using the
-simple command:
-TR SVC <Optional value or range>
-the syscalls are defined in linux/arch/s390/include/asm/unistd.h
-e.g. to trace all file opens just do
-TR SVC 5 ( as this is the syscall number of open )
-
-
-SMP Specific commands
----------------------
-To find out how many cpus you have
-Q CPUS displays all the CPU's available to your virtual machine
-To find the cpu that the current cpu VM debugger commands are being directed at
-do Q CPU to change the current cpu VM debugger commands are being directed at do
-CPU <desired cpu no>
-
-On a SMP guest issue a command to all CPUs try prefixing the command with cpu
-all. To issue a command to a particular cpu try cpu <cpu number> e.g.
-CPU 01 TR I R 2000.3000
-If you are running on a guest with several cpus & you have a IO related problem
-& cannot follow the flow of code but you know it isn't smp related.
-from the bash prompt issue
-shutdown -h now or halt.
-do a Q CPUS to find out how many cpus you have
-detach each one of them from cp except cpu 0
-by issuing a
-DETACH CPU 01-(number of cpus in configuration)
-& boot linux again.
-TR SIGP will trace inter processor signal processor instructions.
-DEFINE CPU 01-(number in configuration)
-will get your guests cpus back.
-
-
-Help for displaying ascii textstrings
--------------------------------------
-On the very latest VM Nucleus'es VM can now display ascii
-( thanks Neale for the hint ) by doing
-D TX<lowaddr>.<len>
-e.g.
-D TX0.100
-
-Alternatively
-=============
-Under older VM debuggers (I love EBDIC too) you can use following little
-program which converts a command line of hex digits to ascii text. It can be
-compiled under linux and you can copy the hex digits from your x3270 terminal
-to your xterm if you are debugging from a linuxbox.
-
-This is quite useful when looking at a parameter passed in as a text string
-under VM ( unless you are good at decoding ASCII in your head ).
-
-e.g. consider tracing an open syscall
-TR SVC 5
-We have stopped at a breakpoint
-000151B0' SVC 0A05 -> 0001909A' CC 0
-
-D 20.8 to check the SVC old psw in the prefix area and see was it from userspace
-(for the layout of the prefix area consult the "Fixed Storage Locations"
-chapter of the s/390 Reference Summary if you have it available).
-V00000020 070C2000 800151B2
-The problem state bit wasn't set & it's also too early in the boot sequence
-for it to be a userspace SVC if it was we would have to temporarily switch the
-psw to user space addressing so we could get at the first parameter of the open
-in gpr2.
-Next do a
-D G2
-GPR 2 = 00014CB4
-Now display what gpr2 is pointing to
-D 00014CB4.20
-V00014CB4 2F646576 2F636F6E 736F6C65 00001BF5
-V00014CC4 FC00014C B4001001 E0001000 B8070707
-Now copy the text till the first 00 hex ( which is the end of the string
-to an xterm & do hex2ascii on it.
-hex2ascii 2F646576 2F636F6E 736F6C65 00
-outputs
-Decoded Hex:=/ d e v / c o n s o l e 0x00
-We were opening the console device,
-
-You can compile the code below yourself for practice :-),
-/*
- * hex2ascii.c
- * a useful little tool for converting a hexadecimal command line to ascii
- *
- * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
- * (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation.
- */
-#include <stdio.h>
-
-int main(int argc,char *argv[])
-{
- int cnt1,cnt2,len,toggle=0;
- int startcnt=1;
- unsigned char c,hex;
-
- if(argc>1&&(strcmp(argv[1],"-a")==0))
- startcnt=2;
- printf("Decoded Hex:=");
- for(cnt1=startcnt;cnt1<argc;cnt1++)
- {
- len=strlen(argv[cnt1]);
- for(cnt2=0;cnt2<len;cnt2++)
- {
- c=argv[cnt1][cnt2];
- if(c>='0'&&c<='9')
- c=c-'0';
- if(c>='A'&&c<='F')
- c=c-'A'+10;
- if(c>='a'&&c<='f')
- c=c-'a'+10;
- switch(toggle)
- {
- case 0:
- hex=c<<4;
- toggle=1;
- break;
- case 1:
- hex+=c;
- if(hex<32||hex>127)
- {
- if(startcnt==1)
- printf("0x%02X ",(int)hex);
- else
- printf(".");
- }
- else
- {
- printf("%c",hex);
- if(startcnt==1)
- printf(" ");
- }
- toggle=0;
- break;
- }
- }
- }
- printf("\n");
-}
-
-
-
-
-Stack tracing under VM
-----------------------
-A basic backtrace
------------------
-
-Here are the tricks I use 9 out of 10 times it works pretty well,
-
-When your backchain reaches a dead end
---------------------------------------
-This can happen when an exception happens in the kernel and the kernel is
-entered twice. If you reach the NULL pointer at the end of the back chain you
-should be able to sniff further back if you follow the following tricks.
-1) A kernel address should be easy to recognise since it is in
-primary space & the problem state bit isn't set & also
-The Hi bit of the address is set.
-2) Another backchain should also be easy to recognise since it is an
-address pointing to another address approximately 100 bytes or 0x70 hex
-behind the current stackpointer.
-
-
-Here is some practice.
-boot the kernel & hit PA1 at some random time
-d g to display the gprs, this should display something like
-GPR 0 = 00000001 00156018 0014359C 00000000
-GPR 4 = 00000001 001B8888 000003E0 00000000
-GPR 8 = 00100080 00100084 00000000 000FE000
-GPR 12 = 00010400 8001B2DC 8001B36A 000FFED8
-Note that GPR14 is a return address but as we are real men we are going to
-trace the stack.
-display 0x40 bytes after the stack pointer.
-
-V000FFED8 000FFF38 8001B838 80014C8E 000FFF38
-V000FFEE8 00000000 00000000 000003E0 00000000
-V000FFEF8 00100080 00100084 00000000 000FE000
-V000FFF08 00010400 8001B2DC 8001B36A 000FFED8
-
-
-Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if
-you look above at our stackframe & also agrees with GPR14.
-
-now backchain
-d 000FFF38.40
-we now are taking the contents of SP to get our first backchain.
-
-V000FFF38 000FFFA0 00000000 00014995 00147094
-V000FFF48 00147090 001470A0 000003E0 00000000
-V000FFF58 00100080 00100084 00000000 001BF1D0
-V000FFF68 00010400 800149BA 80014CA6 000FFF38
-
-This displays a 2nd return address of 80014CA6
-
-now do d 000FFFA0.40 for our 3rd backchain
-
-V000FFFA0 04B52002 0001107F 00000000 00000000
-V000FFFB0 00000000 00000000 FF000000 0001107F
-V000FFFC0 00000000 00000000 00000000 00000000
-V000FFFD0 00010400 80010802 8001085A 000FFFA0
-
-
-our 3rd return address is 8001085A
-
-as the 04B52002 looks suspiciously like rubbish it is fair to assume that the
-kernel entry routines for the sake of optimisation don't set up a backchain.
-
-now look at System.map to see if the addresses make any sense.
-
-grep -i 0001b3 System.map
-outputs among other things
-0001b304 T cpu_idle
-so 8001B36A
-is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it )
-
-
-grep -i 00014 System.map
-produces among other things
-00014a78 T start_kernel
-so 0014CA6 is start_kernel+some hex number I can't add in my head.
-
-grep -i 00108 System.map
-this produces
-00010800 T _stext
-so 8001085A is _stext+0x5a
-
-Congrats you've done your first backchain.
-
-
-
-s/390 & z/Architecture IO Overview
-==================================
-
-I am not going to give a course in 390 IO architecture as this would take me
-quite a while and I'm no expert. Instead I'll give a 390 IO architecture
-summary for Dummies. If you have the s/390 principles of operation available
-read this instead. If nothing else you may find a few useful keywords in here
-and be able to use them on a web search engine to find more useful information.
-
-Unlike other bus architectures modern 390 systems do their IO using mostly
-fibre optics and devices such as tapes and disks can be shared between several
-mainframes. Also S390 can support up to 65536 devices while a high end PC based
-system might be choking with around 64.
-
-Here is some of the common IO terminology:
-
-Subchannel:
-This is the logical number most IO commands use to talk to an IO device. There
-can be up to 0x10000 (65536) of these in a configuration, typically there are a
-few hundred. Under VM for simplicity they are allocated contiguously, however
-on the native hardware they are not. They typically stay consistent between
-boots provided no new hardware is inserted or removed.
-Under Linux for s390 we use these as IRQ's and also when issuing an IO command
-(CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL,
-START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID
-of the device we wish to talk to. The most important of these instructions are
-START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO
-completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have
-up to 8 channel paths to a device, this offers redundancy if one is not
-available.
-
-Device Number:
-This number remains static and is closely tied to the hardware. There are 65536
-of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and
-another lsb 8 bits. These remain static even if more devices are inserted or
-removed from the hardware. There is a 1 to 1 mapping between subchannels and
-device numbers, provided devices aren't inserted or removed.
-
-Channel Control Words:
-CCWs are linked lists of instructions initially pointed to by an operation
-request block (ORB), which is initially given to Start Subchannel (SSCH)
-command along with the subchannel number for the IO subsystem to process
-while the CPU continues executing normal code.
-CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and
-Format 1 (31 bit). These are typically used to issue read and write (and many
-other) instructions. They consist of a length field and an absolute address
-field.
-Each IO typically gets 1 or 2 interrupts, one for channel end (primary status)
-when the channel is idle, and the second for device end (secondary status).
-Sometimes you get both concurrently. You check how the IO went on by issuing a
-TEST SUBCHANNEL at each interrupt, from which you receive an Interruption
-response block (IRB). If you get channel and device end status in the IRB
-without channel checks etc. your IO probably went okay. If you didn't you
-probably need to examine the IRB, extended status word etc.
-If an error occurs, more sophisticated control units have a facility known as
-concurrent sense. This means that if an error occurs Extended sense information
-will be presented in the Extended status word in the IRB. If not you have to
-issue a subsequent SENSE CCW command after the test subchannel.
-
-
-TPI (Test pending interrupt) can also be used for polled IO, but in
-multitasking multiprocessor systems it isn't recommended except for
-checking special cases (i.e. non looping checks for pending IO etc.).
-
-Store Subchannel and Modify Subchannel can be used to examine and modify
-operating characteristics of a subchannel (e.g. channel paths).
-
-Other IO related Terms:
-Sysplex: S390's Clustering Technology
-QDIO: S390's new high speed IO architecture to support devices such as gigabit
-ethernet, this architecture is also designed to be forward compatible with
-upcoming 64 bit machines.
-
-
-General Concepts
-
-Input Output Processors (IOP's) are responsible for communicating between
-the mainframe CPU's & the channel & relieve the mainframe CPU's from the
-burden of communicating with IO devices directly, this allows the CPU's to
-concentrate on data processing.
-
-IOP's can use one or more links ( known as channel paths ) to talk to each
-IO device. It first checks for path availability & chooses an available one,
-then starts ( & sometimes terminates IO ).
-There are two types of channel path: ESCON & the Parallel IO interface.
-
-IO devices are attached to control units, control units provide the
-logic to interface the channel paths & channel path IO protocols to
-the IO devices, they can be integrated with the devices or housed separately
-& often talk to several similar devices ( typical examples would be raid
-controllers or a control unit which connects to 1000 3270 terminals ).
-
-
- +---------------------------------------------------------------+
- | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
- | | CPU | | CPU | | CPU | | CPU | | Main | | Expanded | |
- | | | | | | | | | | Memory | | Storage | |
- | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
- |---------------------------------------------------------------+
- | IOP | IOP | IOP |
- |---------------------------------------------------------------
- | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C |
- ----------------------------------------------------------------
- || ||
- || Bus & Tag Channel Path || ESCON
- || ====================== || Channel
- || || || || Path
- +----------+ +----------+ +----------+
- | | | | | |
- | CU | | CU | | CU |
- | | | | | |
- +----------+ +----------+ +----------+
- | | | | |
-+----------+ +----------+ +----------+ +----------+ +----------+
-|I/O Device| |I/O Device| |I/O Device| |I/O Device| |I/O Device|
-+----------+ +----------+ +----------+ +----------+ +----------+
- CPU = Central Processing Unit
- C = Channel
- IOP = IP Processor
- CU = Control Unit
-
-The 390 IO systems come in 2 flavours the current 390 machines support both
-
-The Older 360 & 370 Interface,sometimes called the Parallel I/O interface,
-sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers
-Interface (OEMI).
-
-This byte wide Parallel channel path/bus has parity & data on the "Bus" cable
-and control lines on the "Tag" cable. These can operate in byte multiplex mode
-for sharing between several slow devices or burst mode and monopolize the
-channel for the whole burst. Up to 256 devices can be addressed on one of these
-cables. These cables are about one inch in diameter. The maximum unextended
-length supported by these cables is 125 Meters but this can be extended up to
-2km with a fibre optic channel extended such as a 3044. The maximum burst speed
-supported is 4.5 megabytes per second. However, some really old processors
-support only transfer rates of 3.0, 2.0 & 1.0 MB/sec.
-One of these paths can be daisy chained to up to 8 control units.
-
-
-ESCON if fibre optic it is also called FICON
-Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or
-lasers for communication at a signaling rate of up to 200 megabits/sec. As
-10bits are transferred for every 8 bits info this drops to 160 megabits/sec
-and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only
-operates in burst mode.
-
-ESCONs typical max cable length is 3km for the led version and 20km for the
-laser version known as XDF (extended distance facility). This can be further
-extended by using an ESCON director which triples the above mentioned ranges.
-Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture,
-the standard Bus & Tag control protocol is however present within the packets.
-Up to 256 devices can be attached to each control unit that uses one of these
-interfaces.
-
-Common 390 Devices include:
-Network adapters typically OSA2,3172's,2116's & OSA-E gigabit ethernet adapters,
-Consoles 3270 & 3215 (a teletype emulated under linux for a line mode console).
-DASD's direct access storage devices ( otherwise known as hard disks ).
-Tape Drives.
-CTC ( Channel to Channel Adapters ),
-ESCON or Parallel Cables used as a very high speed serial link
-between 2 machines.
-
-
-Debugging IO on s/390 & z/Architecture under VM
-===============================================
-
-Now we are ready to go on with IO tracing commands under VM
-
-A few self explanatory queries:
-Q OSA
-Q CTC
-Q DISK ( This command is CMS specific )
-Q DASD
-
-
-
-
-
-
-Q OSA on my machine returns
-OSA 7C08 ON OSA 7C08 SUBCHANNEL = 0000
-OSA 7C09 ON OSA 7C09 SUBCHANNEL = 0001
-OSA 7C14 ON OSA 7C14 SUBCHANNEL = 0002
-OSA 7C15 ON OSA 7C15 SUBCHANNEL = 0003
-
-If you have a guest with certain privileges you may be able to see devices
-which don't belong to you. To avoid this, add the option V.
-e.g.
-Q V OSA
-
-Now using the device numbers returned by this command we will
-Trace the io starting up on the first device 7c08 & 7c09
-In our simplest case we can trace the
-start subchannels
-like TR SSCH 7C08-7C09
-or the halt subchannels
-or TR HSCH 7C08-7C09
-MSCH's ,STSCH's I think you can guess the rest
-
-A good trick is tracing all the IO's and CCWS and spooling them into the reader
-of another VM guest so he can ftp the logfile back to his own machine. I'll do
-a small bit of this and give you a look at the output.
-
-1) Spool stdout to VM reader
-SP PRT TO (another vm guest ) or * for the local vm guest
-2) Fill the reader with the trace
-TR IO 7c08-7c09 INST INT CCW PRT RUN
-3) Start up linux
-i 00c
-4) Finish the trace
-TR END
-5) close the reader
-C PRT
-6) list reader contents
-RDRLIST
-7) copy it to linux4's minidisk
-RECEIVE / LOG TXT A1 ( replace
-8)
-filel & press F11 to look at it
-You should see something like:
-
-00020942' SSCH B2334000 0048813C CC 0 SCH 0000 DEV 7C08
- CPA 000FFDF0 PARM 00E2C9C4 KEY 0 FPI C0 LPM 80
- CCW 000FFDF0 E4200100 00487FE8 0000 E4240100 ........
- IDAL 43D8AFE8
- IDAL 0FB76000
-00020B0A' I/O DEV 7C08 -> 000197BC' SCH 0000 PARM 00E2C9C4
-00021628' TSCH B2354000 >> 00488164 CC 0 SCH 0000 DEV 7C08
- CCWA 000FFDF8 DEV STS 0C SCH STS 00 CNT 00EC
- KEY 0 FPI C0 CC 0 CTLS 4007
-00022238' STSCH B2344000 >> 00488108 CC 0 SCH 0000 DEV 7C08
-
-If you don't like messing up your readed ( because you possibly booted from it )
-you can alternatively spool it to another readers guest.
-
-
-Other common VM device related commands
----------------------------------------------
-These commands are listed only because they have
-been of use to me in the past & may be of use to
-you too. For more complete info on each of the commands
-use type HELP <command> from CMS.
-detaching devices
-DET <devno range>
-ATT <devno range> <guest>
-attach a device to guest * for your own guest
-READY <devno> cause VM to issue a fake interrupt.
-
-The VARY command is normally only available to VM administrators.
-VARY ON PATH <path> TO <devno range>
-VARY OFF PATH <PATH> FROM <devno range>
-This is used to switch on or off channel paths to devices.
-
-Q CHPID <channel path ID>
-This displays state of devices using this channel path
-D SCHIB <subchannel>
-This displays the subchannel information SCHIB block for the device.
-this I believe is also only available to administrators.
-DEFINE CTC <devno>
-defines a virtual CTC channel to channel connection
-2 need to be defined on each guest for the CTC driver to use.
-COUPLE devno userid remote devno
-Joins a local virtual device to a remote virtual device
-( commonly used for the CTC driver ).
-
-Building a VM ramdisk under CMS which linux can use
-def vfb-<blocksize> <subchannel> <number blocks>
-blocksize is commonly 4096 for linux.
-Formatting it
-format <subchannel> <driver letter e.g. x> (blksize <blocksize>
-
-Sharing a disk between multiple guests
-LINK userid devno1 devno2 mode password
-
-
-
-GDB on S390
-===========
-N.B. if compiling for debugging gdb works better without optimisation
-( see Compiling programs for debugging )
-
-invocation
-----------
-gdb <victim program> <optional corefile>
-
-Online help
------------
-help: gives help on commands
-e.g.
-help
-help display
-Note gdb's online help is very good use it.
-
-
-Assembly
---------
-info registers: displays registers other than floating point.
-info all-registers: displays floating points as well.
-disassemble: disassembles
-e.g.
-disassemble without parameters will disassemble the current function
-disassemble $pc $pc+10
-
-Viewing & modifying variables
------------------------------
-print or p: displays variable or register
-e.g. p/x $sp will display the stack pointer
-
-display: prints variable or register each time program stops
-e.g.
-display/x $pc will display the program counter
-display argc
-
-undisplay : undo's display's
-
-info breakpoints: shows all current breakpoints
-
-info stack: shows stack back trace (if this doesn't work too well, I'll show
-you the stacktrace by hand below).
-
-info locals: displays local variables.
-
-info args: display current procedure arguments.
-
-set args: will set argc & argv each time the victim program is invoked.
-
-set <variable>=value
-set argc=100
-set $pc=0
-
-
-
-Modifying execution
--------------------
-step: steps n lines of sourcecode
-step steps 1 line.
-step 100 steps 100 lines of code.
-
-next: like step except this will not step into subroutines
-
-stepi: steps a single machine code instruction.
-e.g. stepi 100
-
-nexti: steps a single machine code instruction but will not step into
-subroutines.
-
-finish: will run until exit of the current routine
-
-run: (re)starts a program
-
-cont: continues a program
-
-quit: exits gdb.
-
-
-breakpoints
-------------
-
-break
-sets a breakpoint
-e.g.
-
-break main
-
-break *$pc
-
-break *0x400618
-
-Here's a really useful one for large programs
-rbr
-Set a breakpoint for all functions matching REGEXP
-e.g.
-rbr 390
-will set a breakpoint with all functions with 390 in their name.
-
-info breakpoints
-lists all breakpoints
-
-delete: delete breakpoint by number or delete them all
-e.g.
-delete 1 will delete the first breakpoint
-delete will delete them all
-
-watch: This will set a watchpoint ( usually hardware assisted ),
-This will watch a variable till it changes
-e.g.
-watch cnt, will watch the variable cnt till it changes.
-As an aside unfortunately gdb's, architecture independent watchpoint code
-is inconsistent & not very good, watchpoints usually work but not always.
-
-info watchpoints: Display currently active watchpoints
-
-condition: ( another useful one )
-Specify breakpoint number N to break only if COND is true.
-Usage is `condition N COND', where N is an integer and COND is an
-expression to be evaluated whenever breakpoint N is reached.
-
-
-
-User defined functions/macros
------------------------------
-define: ( Note this is very very useful,simple & powerful )
-usage define <name> <list of commands> end
-
-examples which you should consider putting into .gdbinit in your home directory
-define d
-stepi
-disassemble $pc $pc+10
-end
-
-define e
-nexti
-disassemble $pc $pc+10
-end
-
-
-Other hard to classify stuff
-----------------------------
-signal n:
-sends the victim program a signal.
-e.g. signal 3 will send a SIGQUIT.
-
-info signals:
-what gdb does when the victim receives certain signals.
-
-list:
-e.g.
-list lists current function source
-list 1,10 list first 10 lines of current file.
-list test.c:1,10
-
-
-directory:
-Adds directories to be searched for source if gdb cannot find the source.
-(note it is a bit sensitive about slashes)
-e.g. To add the root of the filesystem to the searchpath do
-directory //
-
-
-call <function>
-This calls a function in the victim program, this is pretty powerful
-e.g.
-(gdb) call printf("hello world")
-outputs:
-$1 = 11
-
-You might now be thinking that the line above didn't work, something extra had
-to be done.
-(gdb) call fflush(stdout)
-hello world$2 = 0
-As an aside the debugger also calls malloc & free under the hood
-to make space for the "hello world" string.
-
-
-
-hints
------
-1) command completion works just like bash
-( if you are a bad typist like me this really helps )
-e.g. hit br <TAB> & cursor up & down :-).
-
-2) if you have a debugging problem that takes a few steps to recreate
-put the steps into a file called .gdbinit in your current working directory
-if you have defined a few extra useful user defined commands put these in
-your home directory & they will be read each time gdb is launched.
-
-A typical .gdbinit file might be.
-break main
-run
-break runtime_exception
-cont
-
-
-stack chaining in gdb by hand
------------------------------
-This is done using a the same trick described for VM
-p/x (*($sp+56))&0x7fffffff get the first backchain.
-
-For z/Architecture
-Replace 56 with 112 & ignore the &0x7fffffff
-in the macros below & do nasty casts to longs like the following
-as gdb unfortunately deals with printed arguments as ints which
-messes up everything.
-i.e. here is a 3rd backchain dereference
-p/x *(long *)(***(long ***)$sp+112)
-
-
-this outputs
-$5 = 0x528f18
-on my machine.
-Now you can use
-info symbol (*($sp+56))&0x7fffffff
-you might see something like.
-rl_getc + 36 in section .text telling you what is located at address 0x528f18
-Now do.
-p/x (*(*$sp+56))&0x7fffffff
-This outputs
-$6 = 0x528ed0
-Now do.
-info symbol (*(*$sp+56))&0x7fffffff
-rl_read_key + 180 in section .text
-now do
-p/x (*(**$sp+56))&0x7fffffff
-& so on.
-
-Disassembling instructions without debug info
----------------------------------------------
-gdb typically complains if there is a lack of debugging
-symbols in the disassemble command with
-"No function contains specified address." To get around
-this do
-x/<number lines to disassemble>xi <address>
-e.g.
-x/20xi 0x400730
-
-
-
-Note: Remember gdb has history just like bash you don't need to retype the
-whole line just use the up & down arrows.
-
-
-
-For more info
--------------
-From your linuxbox do
-man gdb or info gdb.
-
-core dumps
-----------
-What a core dump ?,
-A core dump is a file generated by the kernel (if allowed) which contains the
-registers and all active pages of the program which has crashed.
-From this file gdb will allow you to look at the registers, stack trace and
-memory of the program as if it just crashed on your system. It is usually
-called core and created in the current working directory.
-This is very useful in that a customer can mail a core dump to a technical
-support department and the technical support department can reconstruct what
-happened. Provided they have an identical copy of this program with debugging
-symbols compiled in and the source base of this build is available.
-In short it is far more useful than something like a crash log could ever hope
-to be.
-
-Why have I never seen one ?.
-Probably because you haven't used the command
-ulimit -c unlimited in bash
-to allow core dumps, now do
-ulimit -a
-to verify that the limit was accepted.
-
-A sample core dump
-To create this I'm going to do
-ulimit -c unlimited
-gdb
-to launch gdb (my victim app. ) now be bad & do the following from another
-telnet/xterm session to the same machine
-ps -aux | grep gdb
-kill -SIGSEGV <gdb's pid>
-or alternatively use killall -SIGSEGV gdb if you have the killall command.
-Now look at the core dump.
-./gdb core
-Displays the following
-GNU gdb 4.18
-Copyright 1998 Free Software Foundation, Inc.
-GDB is free software, covered by the GNU General Public License, and you are
-welcome to change it and/or distribute copies of it under certain conditions.
-Type "show copying" to see the conditions.
-There is absolutely no warranty for GDB. Type "show warranty" for details.
-This GDB was configured as "s390-ibm-linux"...
-Core was generated by `./gdb'.
-Program terminated with signal 11, Segmentation fault.
-Reading symbols from /usr/lib/libncurses.so.4...done.
-Reading symbols from /lib/libm.so.6...done.
-Reading symbols from /lib/libc.so.6...done.
-Reading symbols from /lib/ld-linux.so.2...done.
-#0 0x40126d1a in read () from /lib/libc.so.6
-Setting up the environment for debugging gdb.
-Breakpoint 1 at 0x4dc6f8: file utils.c, line 471.
-Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
-(top-gdb) info stack
-#0 0x40126d1a in read () from /lib/libc.so.6
-#1 0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402
-#2 0x528ed0 in rl_read_key () at input.c:381
-#3 0x5167e6 in readline_internal_char () at readline.c:454
-#4 0x5168ee in readline_internal_charloop () at readline.c:507
-#5 0x51692c in readline_internal () at readline.c:521
-#6 0x5164fe in readline (prompt=0x7ffff810)
- at readline.c:349
-#7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
- annotation_suffix=0x4d6b44 "prompt") at top.c:2091
-#8 0x4d6cf0 in command_loop () at top.c:1345
-#9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
-
-
-LDD
-===
-This is a program which lists the shared libraries which a library needs,
-Note you also get the relocations of the shared library text segments which
-help when using objdump --source.
-e.g.
- ldd ./gdb
-outputs
-libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000)
-libm.so.6 => /lib/libm.so.6 (0x4005e000)
-libc.so.6 => /lib/libc.so.6 (0x40084000)
-/lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000)
-
-
-Debugging shared libraries
-==========================
-Most programs use shared libraries, however it can be very painful
-when you single step instruction into a function like printf for the
-first time & you end up in functions like _dl_runtime_resolve this is
-the ld.so doing lazy binding, lazy binding is a concept in ELF where
-shared library functions are not loaded into memory unless they are
-actually used, great for saving memory but a pain to debug.
-To get around this either relink the program -static or exit gdb type
-export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing
-the program in question.
-
-
-
-Debugging modules
-=================
-As modules are dynamically loaded into the kernel their address can be
-anywhere to get around this use the -m option with insmod to emit a load
-map which can be piped into a file if required.
-
-The proc file system
-====================
-What is it ?.
-It is a filesystem created by the kernel with files which are created on demand
-by the kernel if read, or can be used to modify kernel parameters,
-it is a powerful concept.
-
-e.g.
-
-cat /proc/sys/net/ipv4/ip_forward
-On my machine outputs
-0
-telling me ip_forwarding is not on to switch it on I can do
-echo 1 > /proc/sys/net/ipv4/ip_forward
-cat it again
-cat /proc/sys/net/ipv4/ip_forward
-On my machine now outputs
-1
-IP forwarding is on.
-There is a lot of useful info in here best found by going in and having a look
-around, so I'll take you through some entries I consider important.
-
-All the processes running on the machine have their own entry defined by
-/proc/<pid>
-So lets have a look at the init process
-cd /proc/1
-
-cat cmdline
-emits
-init [2]
-
-cd /proc/1/fd
-This contains numerical entries of all the open files,
-some of these you can cat e.g. stdout (2)
-
-cat /proc/29/maps
-on my machine emits
-
-00400000-00478000 r-xp 00000000 5f:00 4103 /bin/bash
-00478000-0047e000 rw-p 00077000 5f:00 4103 /bin/bash
-0047e000-00492000 rwxp 00000000 00:00 0
-40000000-40015000 r-xp 00000000 5f:00 14382 /lib/ld-2.1.2.so
-40015000-40016000 rw-p 00014000 5f:00 14382 /lib/ld-2.1.2.so
-40016000-40017000 rwxp 00000000 00:00 0
-40017000-40018000 rw-p 00000000 00:00 0
-40018000-4001b000 r-xp 00000000 5f:00 14435 /lib/libtermcap.so.2.0.8
-4001b000-4001c000 rw-p 00002000 5f:00 14435 /lib/libtermcap.so.2.0.8
-4001c000-4010d000 r-xp 00000000 5f:00 14387 /lib/libc-2.1.2.so
-4010d000-40111000 rw-p 000f0000 5f:00 14387 /lib/libc-2.1.2.so
-40111000-40114000 rw-p 00000000 00:00 0
-40114000-4011e000 r-xp 00000000 5f:00 14408 /lib/libnss_files-2.1.2.so
-4011e000-4011f000 rw-p 00009000 5f:00 14408 /lib/libnss_files-2.1.2.so
-7fffd000-80000000 rwxp ffffe000 00:00 0
-
-
-Showing us the shared libraries init uses where they are in memory
-& memory access permissions for each virtual memory area.
-
-/proc/1/cwd is a softlink to the current working directory.
-/proc/1/root is the root of the filesystem for this process.
-
-/proc/1/mem is the current running processes memory which you
-can read & write to like a file.
-strace uses this sometimes as it is a bit faster than the
-rather inefficient ptrace interface for peeking at DATA.
-
-
-cat status
-
-Name: init
-State: S (sleeping)
-Pid: 1
-PPid: 0
-Uid: 0 0 0 0
-Gid: 0 0 0 0
-Groups:
-VmSize: 408 kB
-VmLck: 0 kB
-VmRSS: 208 kB
-VmData: 24 kB
-VmStk: 8 kB
-VmExe: 368 kB
-VmLib: 0 kB
-SigPnd: 0000000000000000
-SigBlk: 0000000000000000
-SigIgn: 7fffffffd7f0d8fc
-SigCgt: 00000000280b2603
-CapInh: 00000000fffffeff
-CapPrm: 00000000ffffffff
-CapEff: 00000000fffffeff
-
-User PSW: 070de000 80414146
-task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68
-User GPRS:
-00000400 00000000 0000000b 7ffffa90
-00000000 00000000 00000000 0045d9f4
-0045cafc 7ffffa90 7fffff18 0045cb08
-00010400 804039e8 80403af8 7ffff8b0
-User ACRS:
-00000000 00000000 00000000 00000000
-00000001 00000000 00000000 00000000
-00000000 00000000 00000000 00000000
-00000000 00000000 00000000 00000000
-Kernel BackChain CallChain BackChain CallChain
- 004b7ca8 8002bd0c 004b7d18 8002b92c
- 004b7db8 8005cd50 004b7e38 8005d12a
- 004b7f08 80019114
-Showing among other things memory usage & status of some signals &
-the processes'es registers from the kernel task_structure
-as well as a backchain which may be useful if a process crashes
-in the kernel for some unknown reason.
-
-Some driver debugging techniques
-================================
-debug feature
--------------
-Some of our drivers now support a "debug feature" in
-/proc/s390dbf see s390dbf.txt in the linux/Documentation directory
-for more info.
-e.g.
-to switch on the lcs "debug feature"
-echo 5 > /proc/s390dbf/lcs/level
-& then after the error occurred.
-cat /proc/s390dbf/lcs/sprintf >/logfile
-the logfile now contains some information which may help
-tech support resolve a problem in the field.
-
-
-
-high level debugging network drivers
-------------------------------------
-ifconfig is a quite useful command
-it gives the current state of network drivers.
-
-If you suspect your network device driver is dead
-one way to check is type
-ifconfig <network device>
-e.g. tr0
-You should see something like
-tr0 Link encap:16/4 Mbps Token Ring (New) HWaddr 00:04:AC:20:8E:48
- inet addr:9.164.185.132 Bcast:9.164.191.255 Mask:255.255.224.0
- UP BROADCAST RUNNING MULTICAST MTU:2000 Metric:1
- RX packets:246134 errors:0 dropped:0 overruns:0 frame:0
- TX packets:5 errors:0 dropped:0 overruns:0 carrier:0
- collisions:0 txqueuelen:100
-
-if the device doesn't say up
-try
-/etc/rc.d/init.d/network start
-( this starts the network stack & hopefully calls ifconfig tr0 up ).
-ifconfig looks at the output of /proc/net/dev and presents it in a more
-presentable form.
-Now ping the device from a machine in the same subnet.
-if the RX packets count & TX packets counts don't increment you probably
-have problems.
-next
-cat /proc/net/arp
-Do you see any hardware addresses in the cache if not you may have problems.
-Next try
-ping -c 5 <broadcast_addr> i.e. the Bcast field above in the output of
-ifconfig. Do you see any replies from machines other than the local machine
-if not you may have problems. also if the TX packets count in ifconfig
-hasn't incremented either you have serious problems in your driver
-(e.g. the txbusy field of the network device being stuck on )
-or you may have multiple network devices connected.
-
-
-chandev
--------
-There is a new device layer for channel devices, some
-drivers e.g. lcs are registered with this layer.
-If the device uses the channel device layer you'll be
-able to find what interrupts it uses & the current state
-of the device.
-See the manpage chandev.8 &type cat /proc/chandev for more info.
-
-
-SysRq
-=====
-This is now supported by linux for s/390 & z/Architecture.
-To enable it do compile the kernel with
-Kernel Hacking -> Magic SysRq Key Enabled
-echo "1" > /proc/sys/kernel/sysrq
-also type
-echo "8" >/proc/sys/kernel/printk
-To make printk output go to console.
-On 390 all commands are prefixed with
-^-
-e.g.
-^-t will show tasks.
-^-? or some unknown command will display help.
-The sysrq key reading is very picky ( I have to type the keys in an
- xterm session & paste them into the x3270 console )
-& it may be wise to predefine the keys as described in the VM hints above
-
-This is particularly useful for syncing disks unmounting & rebooting
-if the machine gets partially hung.
-
-Read Documentation/admin-guide/sysrq.rst for more info
-
-References:
-===========
-Enterprise Systems Architecture Reference Summary
-Enterprise Systems Architecture Principles of Operation
-Hartmut Penners s390 stack frame sheet.
-IBM Mainframe Channel Attachment a technology brief from a CISCO webpage
-Various bits of man & info pages of Linux.
-Linux & GDB source.
-Various info & man pages.
-CMS Help on tracing commands.
-Linux for s/390 Elf Application Binary Interface
-Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended )
-z/Architecture Principles of Operation SA22-7832-00
-Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the
-Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05
-
-Special Thanks
-==============
-Special thanks to Neale Ferguson who maintains a much
-prettier HTML version of this page at
-http://linuxvm.org/penguinvm/
-Bob Grainger Stefan Bader & others for reporting bugs
+===========================
Linux for S/390 and zSeries
+===========================
Common Device Support (CDS)
Device Driver I/O Support Routines
-Authors : Ingo Adlung
- Cornelia Huck
+Authors:
+ - Ingo Adlung
+ - Cornelia Huck
Copyright, IBM Corp. 1999-2002
Introduction
+============
This document describes the common device support routines for Linux/390.
Different than other hardware architectures, ESA/390 has defined a unified
In order to build common device support for ESA/390 I/O interfaces, a
functional layer was introduced that provides generic I/O access methods to
-the hardware.
+the hardware.
-The common device support layer comprises the I/O support routines defined
-below. Some of them implement common Linux device driver interfaces, while
+The common device support layer comprises the I/O support routines defined
+below. Some of them implement common Linux device driver interfaces, while
some of them are ESA/390 platform specific.
Note:
-In order to write a driver for S/390, you also need to look into the interface
-described in Documentation/s390/driver-model.txt.
+ In order to write a driver for S/390, you also need to look into the interface
+ described in Documentation/s390/driver-model.rst.
Note for porting drivers from 2.4:
+
The major changes are:
+
* The functions use a ccw_device instead of an irq (subchannel).
* All drivers must define a ccw_driver (see driver-model.txt) and the associated
functions.
ccw_device_get_ciw()
get commands from extended sense data.
-ccw_device_start()
-ccw_device_start_timeout()
-ccw_device_start_key()
-ccw_device_start_key_timeout()
+ccw_device_start(), ccw_device_start_timeout(), ccw_device_start_key(), ccw_device_start_key_timeout()
initiate an I/O request.
ccw_device_resume()
resume channel program execution.
-ccw_device_halt()
+ccw_device_halt()
terminate the current I/O request processed on the device.
-do_IRQ()
+do_IRQ()
generic interrupt routine. This function is called by the interrupt entry
routine whenever an I/O interrupt is presented to the system. The do_IRQ()
routine determines the interrupt status and calls the device specific
callable interface. Instead, the functional description of do_IO() also
describes the input to the device specific interrupt handler.
-Note: All explanations apply also to the 64 bit architecture s390x.
+Note:
+ All explanations apply also to the 64 bit architecture s390x.
Common Device Support (CDS) for Linux/390 Device Drivers
+========================================================
General Information
+-------------------
The following chapters describe the I/O related interface routines the
Linux/390 common device support (CDS) provides to allow for device specific
linux/arch/s390/include/asm/irq.h.
Overview of CDS interface concepts
+----------------------------------
Different to other hardware platforms, the ESA/390 architecture doesn't define
interrupt lines managed by a specific interrupt controller and bus systems
determine the device driver owning the device that raised the interrupt.
Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel).
-For internal use of the common I/O layer, these are still there. However,
+For internal use of the common I/O layer, these are still there. However,
device drivers should use the new calling interface via the ccw_device only.
During its startup the Linux/390 system checks for peripheral devices. Each
channel subsystem. While the subchannel numbers are system generated, each
subchannel also takes a user defined attribute, the so called device number.
Both subchannel number and device number cannot exceed 65535. During sysfs
-initialisation, the information about control unit type and device types that
+initialisation, the information about control unit type and device types that
imply specific I/O commands (channel command words - CCWs) in order to operate
the device are gathered. Device drivers can retrieve this set of hardware
information during their initialization step to recognize the devices they
This call enables a device driver to get information about supported commands
from the extended SenseID data.
-struct ciw *
-ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
+::
-cdev - The ccw_device for which the command is to be retrieved.
-cmd - The command type to be retrieved.
+ struct ciw *
+ ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
+
+==== ========================================================
+cdev The ccw_device for which the command is to be retrieved.
+cmd The command type to be retrieved.
+==== ========================================================
ccw_device_get_ciw() returns:
-NULL - No extended data available, invalid device or command not found.
-!NULL - The command requested.
+===== ================================================================
+ NULL No extended data available, invalid device or command not found.
+!NULL The command requested.
+===== ================================================================
+
+::
-ccw_device_start() - Initiate I/O Request
+ ccw_device_start() - Initiate I/O Request
The ccw_device_start() routines is the I/O request front-end processor. All
device driver I/O requests must be issued using this routine. A device driver
driver's interrupt handler as this is related to the rules (flags) defined
with the associated I/O request when calling ccw_device_start().
-int ccw_device_start(struct ccw_device *cdev,
- struct ccw1 *cpa,
- unsigned long intparm,
- __u8 lpm,
- unsigned long flags);
-int ccw_device_start_timeout(struct ccw_device *cdev,
- struct ccw1 *cpa,
- unsigned long intparm,
- __u8 lpm,
- unsigned long flags,
- int expires);
-int ccw_device_start_key(struct ccw_device *cdev,
- struct ccw1 *cpa,
- unsigned long intparm,
- __u8 lpm,
- __u8 key,
- unsigned long flags);
-int ccw_device_start_key_timeout(struct ccw_device *cdev,
- struct ccw1 *cpa,
- unsigned long intparm,
- __u8 lpm,
- __u8 key,
- unsigned long flags,
- int expires);
-
-cdev : ccw_device the I/O is destined for
-cpa : logical start address of channel program
-user_intparm : user specific interrupt information; will be presented
- back to the device driver's interrupt handler. Allows a
- device driver to associate the interrupt with a
- particular I/O request.
-lpm : defines the channel path to be used for a specific I/O
- request. A value of 0 will make cio use the opm.
-key : the storage key to use for the I/O (useful for operating on a
- storage with a storage key != default key)
-flag : defines the action to be performed for I/O processing
-expires : timeout value in jiffies. The common I/O layer will terminate
- the running program after this and call the interrupt handler
- with ERR_PTR(-ETIMEDOUT) as irb.
-
-Possible flag values are :
-
-DOIO_ALLOW_SUSPEND - channel program may become suspended
-DOIO_DENY_PREFETCH - don't allow for CCW prefetch; usually
- this implies the channel program might
- become modified
-DOIO_SUPPRESS_INTER - don't call the handler on intermediate status
-
-The cpa parameter points to the first format 1 CCW of a channel program :
-
-struct ccw1 {
- __u8 cmd_code;/* command code */
- __u8 flags; /* flags, like IDA addressing, etc. */
- __u16 count; /* byte count */
- __u32 cda; /* data address */
-} __attribute__ ((packed,aligned(8)));
-
-with the following CCW flags values defined :
-
-CCW_FLAG_DC - data chaining
-CCW_FLAG_CC - command chaining
-CCW_FLAG_SLI - suppress incorrect length
-CCW_FLAG_SKIP - skip
-CCW_FLAG_PCI - PCI
-CCW_FLAG_IDA - indirect addressing
-CCW_FLAG_SUSPEND - suspend
+::
+
+ int ccw_device_start(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ unsigned long flags);
+ int ccw_device_start_timeout(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ unsigned long flags,
+ int expires);
+ int ccw_device_start_key(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ __u8 key,
+ unsigned long flags);
+ int ccw_device_start_key_timeout(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ __u8 key,
+ unsigned long flags,
+ int expires);
+
+============= =============================================================
+cdev ccw_device the I/O is destined for
+cpa logical start address of channel program
+user_intparm user specific interrupt information; will be presented
+ back to the device driver's interrupt handler. Allows a
+ device driver to associate the interrupt with a
+ particular I/O request.
+lpm defines the channel path to be used for a specific I/O
+ request. A value of 0 will make cio use the opm.
+key the storage key to use for the I/O (useful for operating on a
+ storage with a storage key != default key)
+flag defines the action to be performed for I/O processing
+expires timeout value in jiffies. The common I/O layer will terminate
+ the running program after this and call the interrupt handler
+ with ERR_PTR(-ETIMEDOUT) as irb.
+============= =============================================================
+
+Possible flag values are:
+
+========================= =============================================
+DOIO_ALLOW_SUSPEND channel program may become suspended
+DOIO_DENY_PREFETCH don't allow for CCW prefetch; usually
+ this implies the channel program might
+ become modified
+DOIO_SUPPRESS_INTER don't call the handler on intermediate status
+========================= =============================================
+
+The cpa parameter points to the first format 1 CCW of a channel program::
+
+ struct ccw1 {
+ __u8 cmd_code;/* command code */
+ __u8 flags; /* flags, like IDA addressing, etc. */
+ __u16 count; /* byte count */
+ __u32 cda; /* data address */
+ } __attribute__ ((packed,aligned(8)));
+
+with the following CCW flags values defined:
+
+=================== =========================
+CCW_FLAG_DC data chaining
+CCW_FLAG_CC command chaining
+CCW_FLAG_SLI suppress incorrect length
+CCW_FLAG_SKIP skip
+CCW_FLAG_PCI PCI
+CCW_FLAG_IDA indirect addressing
+CCW_FLAG_SUSPEND suspend
+=================== =========================
Via ccw_device_set_options(), the device driver may specify the following
options for the device:
-DOIO_EARLY_NOTIFICATION - allow for early interrupt notification
-DOIO_REPORT_ALL - report all interrupt conditions
+========================= ======================================
+DOIO_EARLY_NOTIFICATION allow for early interrupt notification
+DOIO_REPORT_ALL report all interrupt conditions
+========================= ======================================
-The ccw_device_start() function returns :
+The ccw_device_start() function returns:
- 0 - successful completion or request successfully initiated
--EBUSY - The device is currently processing a previous I/O request, or there is
- a status pending at the device.
--ENODEV - cdev is invalid, the device is not operational or the ccw_device is
- not online.
+======== ======================================================================
+ 0 successful completion or request successfully initiated
+ -EBUSY The device is currently processing a previous I/O request, or there is
+ a status pending at the device.
+-ENODEV cdev is invalid, the device is not operational or the ccw_device is
+ not online.
+======== ======================================================================
When the I/O request completes, the CDS first level interrupt handler will
accumulate the status in a struct irb and then call the device interrupt handler.
-The intparm field will contain the value the device driver has associated with a
-particular I/O request. If a pending device status was recognized,
+The intparm field will contain the value the device driver has associated with a
+particular I/O request. If a pending device status was recognized,
intparm will be set to 0 (zero). This may happen during I/O initiation or delayed
by an alert status notification. In any case this status is not related to the
current (last) I/O request. In case of a delayed status notification no special
The irb may contain an error value, and the device driver should check for this
first:
--ETIMEDOUT: the common I/O layer terminated the request after the specified
- timeout value
--EIO: the common I/O layer terminated the request due to an error state
+========== =================================================================
+-ETIMEDOUT the common I/O layer terminated the request after the specified
+ timeout value
+-EIO the common I/O layer terminated the request due to an error state
+========== =================================================================
If the concurrent sense flag in the extended status word (esw) in the irb is
set, the field erw.scnt in the esw describes the number of device specific
The device interrupt handler can use the following definitions to investigate
the primary unit check source coded in sense byte 0 :
+======================= ====
SNS0_CMD_REJECT 0x80
SNS0_INTERVENTION_REQ 0x40
SNS0_BUS_OUT_CHECK 0x20
SNS0_DATA_CHECK 0x08
SNS0_OVERRUN 0x04
SNS0_INCOMPL_DOMAIN 0x01
+======================= ====
Depending on the device status, multiple of those values may be set together.
Please refer to the device specific documentation for details.
The irb->scsw.cstat field provides the (accumulated) subchannel status :
-SCHN_STAT_PCI - program controlled interrupt
-SCHN_STAT_INCORR_LEN - incorrect length
-SCHN_STAT_PROG_CHECK - program check
-SCHN_STAT_PROT_CHECK - protection check
-SCHN_STAT_CHN_DATA_CHK - channel data check
-SCHN_STAT_CHN_CTRL_CHK - channel control check
-SCHN_STAT_INTF_CTRL_CHK - interface control check
-SCHN_STAT_CHAIN_CHECK - chaining check
+========================= ============================
+SCHN_STAT_PCI program controlled interrupt
+SCHN_STAT_INCORR_LEN incorrect length
+SCHN_STAT_PROG_CHECK program check
+SCHN_STAT_PROT_CHECK protection check
+SCHN_STAT_CHN_DATA_CHK channel data check
+SCHN_STAT_CHN_CTRL_CHK channel control check
+SCHN_STAT_INTF_CTRL_CHK interface control check
+SCHN_STAT_CHAIN_CHECK chaining check
+========================= ============================
The irb->scsw.dstat field provides the (accumulated) device status :
-DEV_STAT_ATTENTION - attention
-DEV_STAT_STAT_MOD - status modifier
-DEV_STAT_CU_END - control unit end
-DEV_STAT_BUSY - busy
-DEV_STAT_CHN_END - channel end
-DEV_STAT_DEV_END - device end
-DEV_STAT_UNIT_CHECK - unit check
-DEV_STAT_UNIT_EXCEP - unit exception
+===================== =================
+DEV_STAT_ATTENTION attention
+DEV_STAT_STAT_MOD status modifier
+DEV_STAT_CU_END control unit end
+DEV_STAT_BUSY busy
+DEV_STAT_CHN_END channel end
+DEV_STAT_DEV_END device end
+DEV_STAT_UNIT_CHECK unit check
+DEV_STAT_UNIT_EXCEP unit exception
+===================== =================
Please see the ESA/390 Principles of Operation manual for details on the
individual flag meanings.
-Usage Notes :
+Usage Notes:
ccw_device_start() must be called disabled and with the ccw device lock held.
successful completion for all overlapping ccw_device_start() requests that have
been issued since the last secondary (final) status.
-Channel programs that intend to set the suspend flag on a channel command word
-(CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the
-suspend flag will cause a channel program check. At the time the channel program
-becomes suspended an intermediate interrupt will be generated by the channel
+Channel programs that intend to set the suspend flag on a channel command word
+(CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the
+suspend flag will cause a channel program check. At the time the channel program
+becomes suspended an intermediate interrupt will be generated by the channel
subsystem.
-ccw_device_resume() - Resume Channel Program Execution
+ccw_device_resume() - Resume Channel Program Execution
-If a device driver chooses to suspend the current channel program execution by
-setting the CCW suspend flag on a particular CCW, the channel program execution
-is suspended. In order to resume channel program execution the CIO layer
-provides the ccw_device_resume() routine.
+If a device driver chooses to suspend the current channel program execution by
+setting the CCW suspend flag on a particular CCW, the channel program execution
+is suspended. In order to resume channel program execution the CIO layer
+provides the ccw_device_resume() routine.
-int ccw_device_resume(struct ccw_device *cdev);
+::
-cdev - ccw_device the resume operation is requested for
+ int ccw_device_resume(struct ccw_device *cdev);
+
+==== ================================================
+cdev ccw_device the resume operation is requested for
+==== ================================================
The ccw_device_resume() function returns:
- 0 - suspended channel program is resumed
--EBUSY - status pending
--ENODEV - cdev invalid or not-operational subchannel
--EINVAL - resume function not applicable
--ENOTCONN - there is no I/O request pending for completion
+========= ==============================================
+ 0 suspended channel program is resumed
+ -EBUSY status pending
+ -ENODEV cdev invalid or not-operational subchannel
+ -EINVAL resume function not applicable
+-ENOTCONN there is no I/O request pending for completion
+========= ==============================================
Usage Notes:
+
Please have a look at the ccw_device_start() usage notes for more details on
suspended channel programs.
ccw_device_halt() must be called disabled and with the ccw device lock held.
-int ccw_device_halt(struct ccw_device *cdev,
- unsigned long intparm);
+::
+
+ int ccw_device_halt(struct ccw_device *cdev,
+ unsigned long intparm);
-cdev : ccw_device the halt operation is requested for
-intparm : interruption parameter; value is only used if no I/O
- is outstanding, otherwise the intparm associated with
- the I/O request is returned
+======= =====================================================
+cdev ccw_device the halt operation is requested for
+intparm interruption parameter; value is only used if no I/O
+ is outstanding, otherwise the intparm associated with
+ the I/O request is returned
+======= =====================================================
-The ccw_device_halt() function returns :
+The ccw_device_halt() function returns:
- 0 - request successfully initiated
--EBUSY - the device is currently busy, or status pending.
--ENODEV - cdev invalid.
--EINVAL - The device is not operational or the ccw device is not online.
+======= ==============================================================
+ 0 request successfully initiated
+-EBUSY the device is currently busy, or status pending.
+-ENODEV cdev invalid.
+-EINVAL The device is not operational or the ccw device is not online.
+======= ==============================================================
-Usage Notes :
+Usage Notes:
A device driver may write a never-ending channel program by writing a channel
program that at its end loops back to its beginning by means of a transfer in
read to a network device (with or without PCI flag) a ccw_device_halt()
is required to end the pending operation.
-ccw_device_clear() - Terminage I/O Request Processing
+::
+
+ ccw_device_clear() - Terminage I/O Request Processing
In order to terminate all I/O processing at the subchannel, the clear subchannel
(CSCH) command is used. It can be issued via ccw_device_clear().
ccw_device_clear() must be called disabled and with the ccw device lock held.
-int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
+::
+
+ int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
-cdev: ccw_device the clear operation is requested for
-intparm: interruption parameter (see ccw_device_halt())
+======= ===============================================
+cdev ccw_device the clear operation is requested for
+intparm interruption parameter (see ccw_device_halt())
+======= ===============================================
The ccw_device_clear() function returns:
- 0 - request successfully initiated
--ENODEV - cdev invalid
--EINVAL - The device is not operational or the ccw device is not online.
+======= ==============================================================
+ 0 request successfully initiated
+-ENODEV cdev invalid
+-EINVAL The device is not operational or the ccw device is not online.
+======= ==============================================================
Miscellaneous Support Routines
+------------------------------
This chapter describes various routines to be used in a Linux/390 device
driver programming environment.
Get the address of the device specific lock. This is then used in
spin_lock() / spin_unlock() calls.
+::
-__u8 ccw_device_get_path_mask(struct ccw_device *cdev);
+ __u8 ccw_device_get_path_mask(struct ccw_device *cdev);
Get the mask of the path currently available for cdev.
-S/390 common I/O-Layer - command line parameters, procfs and debugfs entries
-============================================================================
+======================
+S/390 common I/O-Layer
+======================
+
+command line parameters, procfs and debugfs entries
+===================================================
Command line parameters
-----------------------
device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>}
The given devices will be ignored by the common I/O-layer; no detection
- and device sensing will be done on any of those devices. The subchannel to
+ and device sensing will be done on any of those devices. The subchannel to
which the device in question is attached will be treated as if no device was
attached.
keywords can be used to refer to the CCW based boot device and CCW console
device respectively (these are probably useful only when combined with the '!'
operator). The '!' operator will cause the I/O-layer to _not_ ignore a device.
- The command line is parsed from left to right.
+ The command line
+ is parsed from left to right.
+
+ For example::
- For example,
cio_ignore=0.0.0023-0.0.0042,0.0.4711
+
will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device
0.0.4711, if detected.
- As another example,
+
+ As another example::
+
cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02
+
will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02.
By default, no devices are ignored.
Lists the ranges of devices (by bus id) which are ignored by common I/O.
- You can un-ignore certain or all devices by piping to /proc/cio_ignore.
- "free all" will un-ignore all ignored devices,
+ You can un-ignore certain or all devices by piping to /proc/cio_ignore.
+ "free all" will un-ignore all ignored devices,
"free <device range>, <device range>, ..." will un-ignore the specified
devices.
For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored,
+
- echo free 0.0.0030-0.0.0032 > /proc/cio_ignore
will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023
to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored;
- echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device
0.0.0041;
- - echo free all > /proc/cio_ignore will un-ignore all remaining ignored
+ - echo free all > /proc/cio_ignore will un-ignore all remaining ignored
devices.
- When a device is un-ignored, device recognition and sensing is performed and
+ When a device is un-ignored, device recognition and sensing is performed and
the device driver will be notified if possible, so the device will become
available to the system. Note that un-ignoring is performed asynchronously.
- You can also add ranges of devices to be ignored by piping to
+ You can also add ranges of devices to be ignored by piping to
/proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the
specified devices.
Note: While already known devices can be added to the list of devices to be
- ignored, there will be no effect on then. However, if such a device
+ ignored, there will be no effect on then. However, if such a device
disappears and then reappears, it will then be ignored. To make
known devices go away, you need the "purge" command (see below).
- For example,
+ For example::
+
"echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore"
+
will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
devices.
- You can remove already known but now ignored devices via
+ You can remove already known but now ignored devices via::
+
"echo purge > /proc/cio_ignore"
+
All devices ignored but still registered and not online (= not in use)
will be deregistered and thus removed from the system.
Various debug messages from the common I/O-layer.
- /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
- Logs the calling of functions in the common I/O-layer and, if applicable,
+ Logs the calling of functions in the common I/O-layer and, if applicable,
which subchannel they were called for, as well as dumps of some data
structures (like irb in an error case).
- The level of logging can be changed to be more or less verbose by piping to
+ The level of logging can be changed to be more or less verbose by piping to
/sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
- documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt)
+ documentation on the S/390 debug feature (Documentation/s390/s390dbf.rst)
for details.
+==================
DASD device driver
+==================
S/390's disk devices (DASDs) are managed by Linux via the DASD device
driver. It is valid for all types of DASDs and represents them to
If you supply kernel parameters the different instances are processed
in order of appearance and a minor number is reserved for any device
covered by the supplied range up to 64 volumes. Additional DASDs are
-ignored. If you do not supply the 'dasd=' kernel parameter at all, the
+ignored. If you do not supply the 'dasd=' kernel parameter at all, the
DASD driver registers all supported DASDs of your system to a minor
number in ascending order of the subchannel number.
The driver currently supports ECKD-devices and there are stubs for
support of the FBA and CKD architectures. For the FBA architecture
only some smart data structures are missing to make the support
-complete.
+complete.
We performed our testing on 3380 and 3390 type disks of different
sizes, under VM and on the bare hardware (LPAR), using internal disks
of the multiprise as well as a RAMAC virtual array. Disks exported by
provide support of partitions, maybe VTOC oriented or using a kind of
partition table in the label record.
-USAGE
+Usage
+=====
-Low-level format (?CKD only)
For using an ECKD-DASD as a Linux harddisk you have to low-level
format the tracks by issuing the BLKDASDFORMAT-ioctl on that
device. This will erase any data on that volume including IBM volume
-labels, VTOCs etc. The ioctl may take a 'struct format_data *' or
-'NULL' as an argument.
-typedef struct {
+labels, VTOCs etc. The ioctl may take a `struct format_data *` or
+'NULL' as an argument::
+
+ typedef struct {
int start_unit;
int stop_unit;
int blksize;
-} format_data_t;
+ } format_data_t;
+
When a NULL argument is passed to the BLKDASDFORMAT ioctl the whole
disk is formatted to a blocksize of 1024 bytes. Otherwise start_unit
and stop_unit are the first and last track to be formatted. If
1kB blocks anyway and you gain approx. 50% of capacity increasing your
blksize from 512 byte to 1kB.
--Make a filesystem
+Make a filesystem
+=================
+
Then you can mk??fs the filesystem of your choice on that volume or
partition. For reasons of sanity you should build your filesystem on
-the partition /dev/dd?1 instead of the whole volume. You only lose 3kB
+the partition /dev/dd?1 instead of the whole volume. You only lose 3kB
but may be sure that you can reuse your data after introduction of a
real partition table.
-BUGS:
+Bugs
+====
+
- Performance sometimes is rather low because we don't fully exploit clustering
-TODO-List:
+TODO-List
+=========
+
- Add IBM'S Disk layout to genhd
- Enhance driver to use more than one major number
- Enable usage as a module
--- /dev/null
+=============================================
+Debugging on Linux for s/390 & z/Architecture
+=============================================
+
+Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+
+Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
+
+.. Best viewed with fixed width fonts
+
+Overview of Document:
+=====================
+This document is intended to give a good overview of how to debug Linux for
+s/390 and z/Architecture. It is not intended as a complete reference and not a
+tutorial on the fundamentals of C & assembly. It doesn't go into
+390 IO in any detail. It is intended to complement the documents in the
+reference section below & any other worthwhile references you get.
+
+It is intended like the Enterprise Systems Architecture/390 Reference Summary
+to be printed out & used as a quick cheat sheet self help style reference when
+problems occur.
+
+.. Contents
+ ========
+ Register Set
+ Address Spaces on Intel Linux
+ Address Spaces on Linux for s/390 & z/Architecture
+ The Linux for s/390 & z/Architecture Kernel Task Structure
+ Register Usage & Stackframes on Linux for s/390 & z/Architecture
+ A sample program with comments
+ Compiling programs for debugging on Linux for s/390 & z/Architecture
+ Debugging under VM
+ s/390 & z/Architecture IO Overview
+ Debugging IO on s/390 & z/Architecture under VM
+ GDB on s/390 & z/Architecture
+ Stack chaining in gdb by hand
+ Examining core dumps
+ ldd
+ Debugging modules
+ The proc file system
+ SysRq
+ References
+ Special Thanks
+
+Register Set
+============
+The current architectures have the following registers.
+
+16 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture,
+r0-r15 (or gpr0-gpr15), used for arithmetic and addressing.
+
+16 Control registers, 32 bit on s/390 and 64 bit on z/Architecture, cr0-cr15,
+kernel usage only, used for memory management, interrupt control, debugging
+control etc.
+
+16 Access registers (ar0-ar15), 32 bit on both s/390 and z/Architecture,
+normally not used by normal programs but potentially could be used as
+temporary storage. These registers have a 1:1 association with general
+purpose registers and are designed to be used in the so-called access
+register mode to select different address spaces.
+Access register 0 (and access register 1 on z/Architecture, which needs a
+64 bit pointer) is currently used by the pthread library as a pointer to
+the current running threads private area.
+
+16 64-bit floating point registers (fp0-fp15 ) IEEE & HFP floating
+point format compliant on G5 upwards & a Floating point control reg (FPC)
+
+4 64-bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines.
+
+Note:
+ Linux (currently) always uses IEEE & emulates G5 IEEE format on older
+ machines, ( provided the kernel is configured for this ).
+
+
+The PSW is the most important register on the machine it
+is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of
+a program counter (pc), condition code register,memory space designator.
+In IBM standard notation I am counting bit 0 as the MSB.
+It has several advantages over a normal program counter
+in that you can change address translation & program counter
+in a single instruction. To change address translation,
+e.g. switching address translation off requires that you
+have a logical=physical mapping for the address you are
+currently running at.
+
++-------------------------+-------------------------------------------------+
+| Bit | |
++--------+----------------+ Value |
+| s/390 | z/Architecture | |
++========+================+=================================================+
+| 0 | 0 | Reserved (must be 0) otherwise specification |
+| | | exception occurs. |
++--------+----------------+-------------------------------------------------+
+| 1 | 1 | Program Event Recording 1 PER enabled, |
+| | | PER is used to facilitate debugging e.g. |
+| | | single stepping. |
++--------+----------------+-------------------------------------------------+
+| 2-4 | 2-4 | Reserved (must be 0). |
++--------+----------------+-------------------------------------------------+
+| 5 | 5 | Dynamic address translation 1=DAT on. |
++--------+----------------+-------------------------------------------------+
+| 6 | 6 | Input/Output interrupt Mask |
++--------+----------------+-------------------------------------------------+
+| 7 | 7 | External interrupt Mask used primarily for |
+| | | interprocessor signalling and clock interrupts. |
++--------+----------------+-------------------------------------------------+
+| 8-11 | 8-11 | PSW Key used for complex memory protection |
+| | | mechanism (not used under linux) |
++--------+----------------+-------------------------------------------------+
+| 12 | 12 | 1 on s/390 0 on z/Architecture |
++--------+----------------+-------------------------------------------------+
+| 13 | 13 | Machine Check Mask 1=enable machine check |
+| | | interrupts |
++--------+----------------+-------------------------------------------------+
+| 14 | 14 | Wait State. Set this to 1 to stop the processor |
+| | | except for interrupts and give time to other |
+| | | LPARS. Used in CPU idle in the kernel to |
+| | | increase overall usage of processor resources. |
++--------+----------------+-------------------------------------------------+
+| 15 | 15 | Problem state (if set to 1 certain instructions |
+| | | are disabled). All linux user programs run with |
+| | | this bit 1 (useful info for debugging under VM).|
++--------+----------------+-------------------------------------------------+
+| 16-17 | 16-17 | Address Space Control |
+| | | |
+| | | 00 Primary Space Mode: |
+| | | |
+| | | The register CR1 contains the primary |
+| | | address-space control element (PASCE), which |
+| | | points to the primary space region/segment |
+| | | table origin. |
+| | | |
+| | | 01 Access register mode |
+| | | |
+| | | 10 Secondary Space Mode: |
+| | | |
+| | | The register CR7 contains the secondary |
+| | | address-space control element (SASCE), which |
+| | | points to the secondary space region or |
+| | | segment table origin. |
+| | | |
+| | | 11 Home Space Mode: |
+| | | |
+| | | The register CR13 contains the home space |
+| | | address-space control element (HASCE), which |
+| | | points to the home space region/segment |
+| | | table origin. |
+| | | |
+| | | See "Address Spaces on Linux for s/390 & |
+| | | z/Architecture" below for more information |
+| | | about address space usage in Linux. |
++--------+----------------+-------------------------------------------------+
+| 18-19 | 18-19 | Condition codes (CC) |
++--------+----------------+-------------------------------------------------+
+| 20 | 20 | Fixed point overflow mask if 1=FPU exceptions |
+| | | for this event occur (normally 0) |
++--------+----------------+-------------------------------------------------+
+| 21 | 21 | Decimal overflow mask if 1=FPU exceptions for |
+| | | this event occur (normally 0) |
++--------+----------------+-------------------------------------------------+
+| 22 | 22 | Exponent underflow mask if 1=FPU exceptions |
+| | | for this event occur (normally 0) |
++--------+----------------+-------------------------------------------------+
+| 23 | 23 | Significance Mask if 1=FPU exceptions for this |
+| | | event occur (normally 0) |
++--------+----------------+-------------------------------------------------+
+| 24-31 | 24-30 | Reserved Must be 0. |
+| +----------------+-------------------------------------------------+
+| | 31 | Extended Addressing Mode |
+| +----------------+-------------------------------------------------+
+| | 32 | Basic Addressing Mode |
+| | | |
+| | | Used to set addressing mode |
+| | | |
+| | | +---------+----------+----------+ |
+| | | | PSW 31 | PSW 32 | | |
+| | | +---------+----------+----------+ |
+| | | | 0 | 0 | 24 bit | |
+| | | +---------+----------+----------+ |
+| | | | 0 | 1 | 31 bit | |
+| | | +---------+----------+----------+ |
+| | | | 1 | 1 | 64 bit | |
+| | | +---------+----------+----------+ |
++--------+----------------+-------------------------------------------------+
+| 32 | | 1=31 bit addressing mode 0=24 bit addressing |
+| | | mode (for backward compatibility), linux |
+| | | always runs with this bit set to 1 |
++--------+----------------+-------------------------------------------------+
+| 33-64 | | Instruction address. |
+| +----------------+-------------------------------------------------+
+| | 33-63 | Reserved must be 0 |
+| +----------------+-------------------------------------------------+
+| | 64-127 | Address |
+| | | |
+| | | - In 24 bits mode bits 64-103=0 bits 104-127 |
+| | | Address |
+| | | - In 31 bits mode bits 64-96=0 bits 97-127 |
+| | | Address |
+| | | |
+| | | Note: |
+| | | unlike 31 bit mode on s/390 bit 96 must be |
+| | | zero when loading the address with LPSWE |
+| | | otherwise a specification exception occurs, |
+| | | LPSW is fully backward compatible. |
++--------+----------------+-------------------------------------------------+
+
+Prefix Page(s)
+--------------
+This per cpu memory area is too intimately tied to the processor not to mention.
+It exists between the real addresses 0-4096 on s/390 and between 0-8192 on
+z/Architecture and is exchanged with one page on s/390 or two pages on
+z/Architecture in absolute storage by the set prefix instruction during Linux
+startup.
+
+This page is mapped to a different prefix for each processor in an SMP
+configuration (assuming the OS designer is sane of course).
+
+Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on
+z/Architecture are used by the processor itself for holding such information
+as exception indications and entry points for exceptions.
+
+Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and
+z/Architecture (there is a gap on z/Architecture currently between 0xc00 and
+0x1000, too, which is used by Linux).
+
+The closest thing to this on traditional architectures is the interrupt
+vector table. This is a good thing & does simplify some of the kernel coding
+however it means that we now cannot catch stray NULL pointers in the
+kernel without hard coded checks.
+
+
+
+Address Spaces on Intel Linux
+=============================
+
+The traditional Intel Linux is approximately mapped as follows forgive
+the ascii art::
+
+ 0xFFFFFFFF 4GB Himem *****************
+ * *
+ * Kernel Space *
+ * *
+ ***************** ****************
+ User Space Himem * User Stack * * *
+ (typically 0xC0000000 3GB ) ***************** * *
+ * Shared Libs * * Next Process *
+ ***************** * to *
+ * * <== * Run * <==
+ * User Program * * *
+ * Data BSS * * *
+ * Text * * *
+ * Sections * * *
+ 0x00000000 ***************** ****************
+
+Now it is easy to see that on Intel it is quite easy to recognise a kernel
+address as being one greater than user space himem (in this case 0xC0000000),
+and addresses of less than this are the ones in the current running program on
+this processor (if an smp box).
+
+If using the virtual machine ( VM ) as a debugger it is quite difficult to
+know which user process is running as the address space you are looking at
+could be from any process in the run queue.
+
+The limitation of Intels addressing technique is that the linux
+kernel uses a very simple real address to virtual addressing technique
+of Real Address=Virtual Address-User Space Himem.
+This means that on Intel the kernel linux can typically only address
+Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines
+can typically use.
+
+They can lower User Himem to 2GB or lower & thus be
+able to use 2GB of RAM however this shrinks the maximum size
+of User Space from 3GB to 2GB they have a no win limit of 4GB unless
+they go to 64 Bit.
+
+
+On 390 our limitations & strengths make us slightly different.
+For backward compatibility we are only allowed use 31 bits (2GB)
+of our 32 bit addresses, however, we use entirely separate address
+spaces for the user & kernel.
+
+This means we can support 2GB of non Extended RAM on s/390, & more
+with the Extended memory management swap device &
+currently 4TB of physical memory currently on z/Architecture.
+
+
+Address Spaces on Linux for s/390 & z/Architecture
+==================================================
+
+Our addressing scheme is basically as follows::
+
+ Primary Space Home Space
+ Himem 0x7fffffff 2GB on s/390 ***************** ****************
+ currently 0x3ffffffffff (2^42)-1 * User Stack * * *
+ on z/Architecture. ***************** * *
+ * Shared Libs * * *
+ ***************** * *
+ * * * Kernel *
+ * User Program * * *
+ * Data BSS * * *
+ * Text * * *
+ * Sections * * *
+ 0x00000000 ***************** ****************
+
+This also means that we need to look at the PSW problem state bit and the
+addressing mode to decide whether we are looking at user or kernel space.
+
+User space runs in primary address mode (or access register mode within
+the vdso code).
+
+The kernel usually also runs in home space mode, however when accessing
+user space the kernel switches to primary or secondary address mode if
+the mvcos instruction is not available or if a compare-and-swap (futex)
+instruction on a user space address is performed.
+
+When also looking at the ASCE control registers, this means:
+
+User space:
+
+- runs in primary or access register mode
+- cr1 contains the user asce
+- cr7 contains the user asce
+- cr13 contains the kernel asce
+
+Kernel space:
+
+- runs in home space mode
+- cr1 contains the user or kernel asce
+
+ - the kernel asce is loaded when a uaccess requires primary or
+ secondary address mode
+
+- cr7 contains the user or kernel asce, (changed with set_fs())
+- cr13 contains the kernel asce
+
+In case of uaccess the kernel changes to:
+
+- primary space mode in case of a uaccess (copy_to_user) and uses
+ e.g. the mvcp instruction to access user space. However the kernel
+ will stay in home space mode if the mvcos instruction is available
+- secondary space mode in case of futex atomic operations, so that the
+ instructions come from primary address space and data from secondary
+ space
+
+In case of KVM, the kernel runs in home space mode, but cr1 gets switched
+to contain the gmap asce before the SIE instruction gets executed. When
+the SIE instruction is finished, cr1 will be switched back to contain the
+user asce.
+
+
+Virtual Addresses on s/390 & z/Architecture
+===========================================
+
+A virtual address on s/390 is made up of 3 parts
+The SX (segment index, roughly corresponding to the PGD & PMD in Linux
+terminology) being bits 1-11.
+
+The PX (page index, corresponding to the page table entry (pte) in Linux
+terminology) being bits 12-19.
+
+The remaining bits BX (the byte index are the offset in the page )
+i.e. bits 20 to 31.
+
+On z/Architecture in linux we currently make up an address from 4 parts.
+
+- The region index bits (RX) 0-32 we currently use bits 22-32
+- The segment index (SX) being bits 33-43
+- The page index (PX) being bits 44-51
+- The byte index (BX) being bits 52-63
+
+Notes:
+ 1) s/390 has no PMD so the PMD is really the PGD also.
+ A lot of this stuff is defined in pgtable.h.
+
+ 2) Also seeing as s/390's page indexes are only 1k in size
+ (bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k )
+ to make the best use of memory by updating 4 segment indices
+ entries each time we mess with a PMD & use offsets
+ 0,1024,2048 & 3072 in this page as for our segment indexes.
+ On z/Architecture our page indexes are now 2k in size
+ ( bits 12-19 x 8 bytes per pte ) we do a similar trick
+ but only mess with 2 segment indices each time we mess with
+ a PMD.
+
+ 3) As z/Architecture supports up to a massive 5-level page table lookup we
+ can only use 3 currently on Linux ( as this is all the generic kernel
+ currently supports ) however this may change in future
+ this allows us to access ( according to my sums )
+ 4TB of virtual storage per process i.e.
+ 4096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes,
+ enough for another 2 or 3 of years I think :-).
+ to do this we use a region-third-table designation type in
+ our address space control registers.
+
+
+The Linux for s/390 & z/Architecture Kernel Task Structure
+==========================================================
+Each process/thread under Linux for S390 has its own kernel task_struct
+defined in linux/include/linux/sched.h
+The S390 on initialisation & resuming of a process on a cpu sets
+the __LC_KERNEL_STACK variable in the spare prefix area for this cpu
+(which we use for per-processor globals).
+
+The kernel stack pointer is intimately tied with the task structure for
+each processor as follows::
+
+ s/390
+ ************************
+ * 1 page kernel stack *
+ * ( 4K ) *
+ ************************
+ * 1 page task_struct *
+ * ( 4K ) *
+ 8K aligned ************************
+
+ z/Architecture
+ ************************
+ * 2 page kernel stack *
+ * ( 8K ) *
+ ************************
+ * 2 page task_struct *
+ * ( 8K ) *
+ 16K aligned ************************
+
+What this means is that we don't need to dedicate any register or global
+variable to point to the current running process & can retrieve it with the
+following very simple construct for s/390 & one very similar for
+z/Architecture::
+
+ static inline struct task_struct * get_current(void)
+ {
+ struct task_struct *current;
+ __asm__("lhi %0,-8192\n\t"
+ "nr %0,15"
+ : "=r" (current) );
+ return current;
+ }
+
+i.e. just anding the current kernel stack pointer with the mask -8192.
+Thankfully because Linux doesn't have support for nested IO interrupts
+& our devices have large buffers can survive interrupts being shut for
+short amounts of time we don't need a separate stack for interrupts.
+
+
+
+
+Register Usage & Stackframes on Linux for s/390 & z/Architecture
+=================================================================
+Overview:
+---------
+This is the code that gcc produces at the top & the bottom of
+each function. It usually is fairly consistent & similar from
+function to function & if you know its layout you can probably
+make some headway in finding the ultimate cause of a problem
+after a crash without a source level debugger.
+
+Note: To follow stackframes requires a knowledge of C or Pascal &
+limited knowledge of one assembly language.
+
+It should be noted that there are some differences between the
+s/390 and z/Architecture stack layouts as the z/Architecture stack layout
+didn't have to maintain compatibility with older linkage formats.
+
+Glossary:
+---------
+alloca:
+ This is a built in compiler function for runtime allocation
+ of extra space on the callers stack which is obviously freed
+ up on function exit ( e.g. the caller may choose to allocate nothing
+ of a buffer of 4k if required for temporary purposes ), it generates
+ very efficient code ( a few cycles ) when compared to alternatives
+ like malloc.
+
+automatics:
+ These are local variables on the stack, i.e they aren't in registers &
+ they aren't static.
+
+back-chain:
+ This is a pointer to the stack pointer before entering a
+ framed functions ( see frameless function ) prologue got by
+ dereferencing the address of the current stack pointer,
+ i.e. got by accessing the 32 bit value at the stack pointers
+ current location.
+
+base-pointer:
+ This is a pointer to the back of the literal pool which
+ is an area just behind each procedure used to store constants
+ in each function.
+
+call-clobbered:
+ The caller probably needs to save these registers if there
+ is something of value in them, on the stack or elsewhere before making a
+ call to another procedure so that it can restore it later.
+
+epilogue:
+ The code generated by the compiler to return to the caller.
+
+frameless-function:
+ A frameless function in Linux for s390 & z/Architecture is one which doesn't
+ need more than the register save area (96 bytes on s/390, 160 on z/Architecture)
+ given to it by the caller.
+
+ A frameless function never:
+
+ 1) Sets up a back chain.
+ 2) Calls alloca.
+ 3) Calls other normal functions
+ 4) Has automatics.
+
+GOT-pointer:
+ This is a pointer to the global-offset-table in ELF
+ ( Executable Linkable Format, Linux'es most common executable format ),
+ all globals & shared library objects are found using this pointer.
+
+lazy-binding
+ ELF shared libraries are typically only loaded when routines in the shared
+ library are actually first called at runtime. This is lazy binding.
+
+procedure-linkage-table
+ This is a table found from the GOT which contains pointers to routines
+ in other shared libraries which can't be called to by easier means.
+
+prologue:
+ The code generated by the compiler to set up the stack frame.
+
+outgoing-args:
+ This is extra area allocated on the stack of the calling function if the
+ parameters for the callee's cannot all be put in registers, the same
+ area can be reused by each function the caller calls.
+
+routine-descriptor:
+ A COFF executable format based concept of a procedure reference
+ actually being 8 bytes or more as opposed to a simple pointer to the routine.
+ This is typically defined as follows:
+
+ - Routine Descriptor offset 0=Pointer to Function
+ - Routine Descriptor offset 4=Pointer to Table of Contents
+
+ The table of contents/TOC is roughly equivalent to a GOT pointer.
+ & it means that shared libraries etc. can be shared between several
+ environments each with their own TOC.
+
+static-chain:
+ This is used in nested functions a concept adopted from pascal
+ by gcc not used in ansi C or C++ ( although quite useful ), basically it
+ is a pointer used to reference local variables of enclosing functions.
+ You might come across this stuff once or twice in your lifetime.
+
+ e.g.
+
+ The function below should return 11 though gcc may get upset & toss warnings
+ about unused variables::
+
+ int FunctionA(int a)
+ {
+ int b;
+ FunctionC(int c)
+ {
+ b=c+1;
+ }
+ FunctionC(10);
+ return(b);
+ }
+
+
+s/390 & z/Architecture Register usage
+=====================================
+
+======== ========================================== ===============
+r0 used by syscalls/assembly call-clobbered
+r1 used by syscalls/assembly call-clobbered
+r2 argument 0 / return value 0 call-clobbered
+r3 argument 1 / return value 1 (if long long) call-clobbered
+r4 argument 2 call-clobbered
+r5 argument 3 call-clobbered
+r6 argument 4 saved
+r7 pointer-to arguments 5 to ... saved
+r8 this & that saved
+r9 this & that saved
+r10 static-chain ( if nested function ) saved
+r11 frame-pointer ( if function used alloca ) saved
+r12 got-pointer saved
+r13 base-pointer saved
+r14 return-address saved
+r15 stack-pointer saved
+
+f0 argument 0 / return value ( float/double ) call-clobbered
+f2 argument 1 call-clobbered
+f4 z/Architecture argument 2 saved
+f6 z/Architecture argument 3 saved
+======== ========================================== ===============
+
+The remaining floating points
+f1,f3,f5 f7-f15 are call-clobbered.
+
+Notes:
+------
+1) The only requirement is that registers which are used
+ by the callee are saved, e.g. the compiler is perfectly
+ capable of using r11 for purposes other than a frame a
+ frame pointer if a frame pointer is not needed.
+2) In functions with variable arguments e.g. printf the calling procedure
+ is identical to one without variable arguments & the same number of
+ parameters. However, the prologue of this function is somewhat more
+ hairy owing to it having to move these parameters to the stack to
+ get va_start, va_arg & va_end to work.
+3) Access registers are currently unused by gcc but are used in
+ the kernel. Possibilities exist to use them at the moment for
+ temporary storage but it isn't recommended.
+4) Only 4 of the floating point registers are used for
+ parameter passing as older machines such as G3 only have only 4
+ & it keeps the stack frame compatible with other compilers.
+ However with IEEE floating point emulation under linux on the
+ older machines you are free to use the other 12.
+5) A long long or double parameter cannot be have the
+ first 4 bytes in a register & the second four bytes in the
+ outgoing args area. It must be purely in the outgoing args
+ area if crossing this boundary.
+6) Floating point parameters are mixed with outgoing args
+ on the outgoing args area in the order the are passed in as parameters.
+7) Floating point arguments 2 & 3 are saved in the outgoing args area for
+ z/Architecture
+
+
+Stack Frame Layout
+------------------
+
+========= ============== ======================================================
+s/390 z/Architecture
+========= ============== ======================================================
+0 0 back chain ( a 0 here signifies end of back chain )
+4 8 eos ( end of stack, not used on Linux for S390 used
+ in other linkage formats )
+8 16 glue used in other s/390 linkage formats for saved
+ routine descriptors etc.
+12 24 glue used in other s/390 linkage formats for saved
+ routine descriptors etc.
+16 32 scratch area
+20 40 scratch area
+24 48 saved r6 of caller function
+28 56 saved r7 of caller function
+32 64 saved r8 of caller function
+36 72 saved r9 of caller function
+40 80 saved r10 of caller function
+44 88 saved r11 of caller function
+48 96 saved r12 of caller function
+52 104 saved r13 of caller function
+56 112 saved r14 of caller function
+60 120 saved r15 of caller function
+64 128 saved f4 of caller function
+72 132 saved f6 of caller function
+80 undefined
+96 160 outgoing args passed from caller to callee
+96+x 160+x possible stack alignment ( 8 bytes desirable )
+96+x+y 160+x+y alloca space of caller ( if used )
+96+x+y+z 160+x+y+z automatics of caller ( if used )
+0 back-chain
+========= ============== ======================================================
+
+A sample program with comments.
+===============================
+
+Comments on the function test
+-----------------------------
+1) It didn't need to set up a pointer to the constant pool gpr13 as it is not
+ used ( :-( ).
+2) This is a frameless function & no stack is bought.
+3) The compiler was clever enough to recognise that it could return the
+ value in r2 as well as use it for the passed in parameter ( :-) ).
+4) The basr ( branch relative & save ) trick works as follows the instruction
+ has a special case with r0,r0 with some instruction operands is understood as
+ the literal value 0, some risc architectures also do this ). So now
+ we are branching to the next address & the address new program counter is
+ in r13,so now we subtract the size of the function prologue we have executed
+ the size of the literal pool to get to the top of the literal pool::
+
+
+ 0040037c int test(int b)
+ { # Function prologue below
+ 40037c: 90 de f0 34 stm %r13,%r14,52(%r15) # Save registers r13 & r14
+ 400380: 0d d0 basr %r13,%r0 # Set up pointer to constant pool using
+ 400382: a7 da ff fa ahi %r13,-6 # basr trick
+ return(5+b);
+ # Huge main program
+ 400386: a7 2a 00 05 ahi %r2,5 # add 5 to r2
+
+ # Function epilogue below
+ 40038a: 98 de f0 34 lm %r13,%r14,52(%r15) # restore registers r13 & 14
+ 40038e: 07 fe br %r14 # return
+ }
+
+Comments on the function main
+-----------------------------
+1) The compiler did this function optimally ( 8-) )::
+
+ Literal pool for main.
+ 400390: ff ff ff ec .long 0xffffffec
+ main(int argc,char *argv[])
+ { # Function prologue below
+ 400394: 90 bf f0 2c stm %r11,%r15,44(%r15) # Save necessary registers
+ 400398: 18 0f lr %r0,%r15 # copy stack pointer to r0
+ 40039a: a7 fa ff a0 ahi %r15,-96 # Make area for callee saving
+ 40039e: 0d d0 basr %r13,%r0 # Set up r13 to point to
+ 4003a0: a7 da ff f0 ahi %r13,-16 # literal pool
+ 4003a4: 50 00 f0 00 st %r0,0(%r15) # Save backchain
+
+ return(test(5)); # Main Program Below
+ 4003a8: 58 e0 d0 00 l %r14,0(%r13) # load relative address of test from
+ # literal pool
+ 4003ac: a7 28 00 05 lhi %r2,5 # Set first parameter to 5
+ 4003b0: 4d ee d0 00 bas %r14,0(%r14,%r13) # jump to test setting r14 as return
+ # address using branch & save instruction.
+
+ # Function Epilogue below
+ 4003b4: 98 bf f0 8c lm %r11,%r15,140(%r15)# Restore necessary registers.
+ 4003b8: 07 fe br %r14 # return to do program exit
+ }
+
+
+Compiler updates
+----------------
+
+::
+
+ main(int argc,char *argv[])
+ {
+ 4004fc: 90 7f f0 1c stm %r7,%r15,28(%r15)
+ 400500: a7 d5 00 04 bras %r13,400508 <main+0xc>
+ 400504: 00 40 04 f4 .long 0x004004f4
+ # compiler now puts constant pool in code to so it saves an instruction
+ 400508: 18 0f lr %r0,%r15
+ 40050a: a7 fa ff a0 ahi %r15,-96
+ 40050e: 50 00 f0 00 st %r0,0(%r15)
+ return(test(5));
+ 400512: 58 10 d0 00 l %r1,0(%r13)
+ 400516: a7 28 00 05 lhi %r2,5
+ 40051a: 0d e1 basr %r14,%r1
+ # compiler adds 1 extra instruction to epilogue this is done to
+ # avoid processor pipeline stalls owing to data dependencies on g5 &
+ # above as register 14 in the old code was needed directly after being loaded
+ # by the lm %r11,%r15,140(%r15) for the br %14.
+ 40051c: 58 40 f0 98 l %r4,152(%r15)
+ 400520: 98 7f f0 7c lm %r7,%r15,124(%r15)
+ 400524: 07 f4 br %r4
+ }
+
+
+Hartmut ( our compiler developer ) also has been threatening to take out the
+stack backchain in optimised code as this also causes pipeline stalls, you
+have been warned.
+
+64 bit z/Architecture code disassembly
+--------------------------------------
+
+If you understand the stuff above you'll understand the stuff
+below too so I'll avoid repeating myself & just say that
+some of the instructions have g's on the end of them to indicate
+they are 64 bit & the stack offsets are a bigger,
+the only other difference you'll find between 32 & 64 bit is that
+we now use f4 & f6 for floating point arguments on 64 bit::
+
+ 00000000800005b0 <test>:
+ int test(int b)
+ {
+ return(5+b);
+ 800005b0: a7 2a 00 05 ahi %r2,5
+ 800005b4: b9 14 00 22 lgfr %r2,%r2 # downcast to integer
+ 800005b8: 07 fe br %r14
+ 800005ba: 07 07 bcr 0,%r7
+
+
+ }
+
+ 00000000800005bc <main>:
+ main(int argc,char *argv[])
+ {
+ 800005bc: eb bf f0 58 00 24 stmg %r11,%r15,88(%r15)
+ 800005c2: b9 04 00 1f lgr %r1,%r15
+ 800005c6: a7 fb ff 60 aghi %r15,-160
+ 800005ca: e3 10 f0 00 00 24 stg %r1,0(%r15)
+ return(test(5));
+ 800005d0: a7 29 00 05 lghi %r2,5
+ # brasl allows jumps > 64k & is overkill here bras would do fune
+ 800005d4: c0 e5 ff ff ff ee brasl %r14,800005b0 <test>
+ 800005da: e3 40 f1 10 00 04 lg %r4,272(%r15)
+ 800005e0: eb bf f0 f8 00 04 lmg %r11,%r15,248(%r15)
+ 800005e6: 07 f4 br %r4
+ }
+
+
+
+Compiling programs for debugging on Linux for s/390 & z/Architecture
+====================================================================
+-gdwarf-2 now works it should be considered the default debugging
+format for s/390 & z/Architecture as it is more reliable for debugging
+shared libraries, normal -g debugging works much better now
+Thanks to the IBM java compiler developers bug reports.
+
+This is typically done adding/appending the flags -g or -gdwarf-2 to the
+CFLAGS & LDFLAGS variables Makefile of the program concerned.
+
+If using gdb & you would like accurate displays of registers &
+stack traces compile without optimisation i.e make sure
+that there is no -O2 or similar on the CFLAGS line of the Makefile &
+the emitted gcc commands, obviously this will produce worse code
+( not advisable for shipment ) but it is an aid to the debugging process.
+
+This aids debugging because the compiler will copy parameters passed in
+in registers onto the stack so backtracing & looking at passed in
+parameters will work, however some larger programs which use inline functions
+will not compile without optimisation.
+
+Debugging with optimisation has since much improved after fixing
+some bugs, please make sure you are using gdb-5.0 or later developed
+after Nov'2000.
+
+
+
+Debugging under VM
+==================
+
+Notes
+-----
+Addresses & values in the VM debugger are always hex never decimal
+Address ranges are of the format <HexValue1>-<HexValue2> or
+<HexValue1>.<HexValue2>
+For example, the address range 0x2000 to 0x3000 can be described as 2000-3000
+or 2000.1000
+
+The VM Debugger is case insensitive.
+
+VM's strengths are usually other debuggers weaknesses you can get at any
+resource no matter how sensitive e.g. memory management resources, change
+address translation in the PSW. For kernel hacking you will reap dividends if
+you get good at it.
+
+The VM Debugger displays operators but not operands, and also the debugger
+displays useful information on the same line as the author of the code probably
+felt that it was a good idea not to go over the 80 columns on the screen.
+This isn't as unintuitive as it may seem as the s/390 instructions are easy to
+decode mentally and you can make a good guess at a lot of them as all the
+operands are nibble (half byte aligned).
+So if you have an objdump listing by hand, it is quite easy to follow, and if
+you don't have an objdump listing keep a copy of the s/390 Reference Summary
+or alternatively the s/390 principles of operation next to you.
+e.g. even I can guess that
+0001AFF8' LR 180F CC 0
+is a ( load register ) lr r0,r15
+
+Also it is very easy to tell the length of a 390 instruction from the 2 most
+significant bits in the instruction (not that this info is really useful except
+if you are trying to make sense of a hexdump of code).
+Here is a table
+
+======================= ==================
+Bits Instruction Length
+======================= ==================
+00 2 Bytes
+01 4 Bytes
+10 4 Bytes
+11 6 Bytes
+======================= ==================
+
+The debugger also displays other useful info on the same line such as the
+addresses being operated on destination addresses of branches & condition codes.
+e.g.::
+
+ 00019736' AHI A7DAFF0E CC 1
+ 000198BA' BRC A7840004 -> 000198C2' CC 0
+ 000198CE' STM 900EF068 >> 0FA95E78 CC 2
+
+
+
+Useful VM debugger commands
+---------------------------
+
+I suppose I'd better mention this before I start
+to list the current active traces do::
+
+ Q TR
+
+there can be a maximum of 255 of these per set
+( more about trace sets later ).
+
+To stop traces issue a::
+
+ TR END.
+
+To delete a particular breakpoint issue::
+
+ TR DEL <breakpoint number>
+
+The PA1 key drops to CP mode so you can issue debugger commands,
+Doing alt c (on my 3270 console at least ) clears the screen.
+
+hitting b <enter> comes back to the running operating system
+from cp mode ( in our case linux ).
+
+It is typically useful to add shortcuts to your profile.exec file
+if you have one ( this is roughly equivalent to autoexec.bat in DOS ).
+file here are a few from mine::
+
+ /* this gives me command history on issuing f12 */
+ set pf12 retrieve
+ /* this continues */
+ set pf8 imm b
+ /* goes to trace set a */
+ set pf1 imm tr goto a
+ /* goes to trace set b */
+ set pf2 imm tr goto b
+ /* goes to trace set c */
+ set pf3 imm tr goto c
+
+
+
+Instruction Tracing
+-------------------
+Setting a simple breakpoint::
+
+ TR I PSWA <address>
+
+To debug a particular function try::
+
+ TR I R <function address range>
+ TR I on its own will single step.
+ TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics
+
+e.g.::
+
+ TR I DATA 4D R 0197BC.4000
+
+will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000
+
+if you were inclined you could add traces for all branch instructions &
+suffix them with the run prefix so you would have a backtrace on screen
+when a program crashes::
+
+ TR BR <INTO OR FROM> will trace branches into or out of an address.
+
+e.g.::
+
+ TR BR INTO 0
+
+is often quite useful if a program is getting awkward & deciding
+to branch to 0 & crashing as this will stop at the address before in jumps to 0.
+
+::
+
+ TR I R <address range> RUN cmd d g
+
+single steps a range of addresses but stays running &
+displays the gprs on each step.
+
+
+
+Displaying & modifying Registers
+--------------------------------
+D G
+ will display all the gprs
+
+Adding a extra G to all the commands is necessary to access the full 64 bit
+content in VM on z/Architecture. Obviously this isn't required for access
+registers as these are still 32 bit.
+
+e.g.
+
+DGG
+ instead of DG
+
+D X
+ will display all the control registers
+D AR
+ will display all the access registers
+D AR4-7
+ will display access registers 4 to 7
+CPU ALL D G
+ will display the GRPS of all CPUS in the configuration
+D PSW
+ will display the current PSW
+st PSW 2000
+ will put the value 2000 into the PSW & cause crash your machine.
+D PREFIX
+ displays the prefix offset
+
+
+Displaying Memory
+-----------------
+To display memory mapped using the current PSW's mapping try::
+
+ D <range>
+
+To make VM display a message each time it hits a particular address and
+continue try:
+
+D I<range>
+ will disassemble/display a range of instructions.
+
+ST addr 32 bit word
+ will store a 32 bit aligned address
+D T<range>
+ will display the EBCDIC in an address (if you are that way inclined)
+D R<range>
+ will display real addresses ( without DAT ) but with prefixing.
+
+There are other complex options to display if you need to get at say home space
+but are in primary space the easiest thing to do is to temporarily
+modify the PSW to the other addressing mode, display the stuff & then
+restore it.
+
+
+
+Hints
+-----
+If you want to issue a debugger command without halting your virtual machine
+with the PA1 key try prefixing the command with #CP e.g.::
+
+ #cp tr i pswa 2000
+
+also suffixing most debugger commands with RUN will cause them not
+to stop just display the mnemonic at the current instruction on the console.
+
+If you have several breakpoints you want to put into your program &
+you get fed up of cross referencing with System.map
+you can do the following trick for several symbols.
+
+::
+
+ grep do_signal System.map
+
+which emits the following among other things::
+
+ 0001f4e0 T do_signal
+
+now you can do::
+
+ TR I PSWA 0001f4e0 cmd msg * do_signal
+
+This sends a message to your own console each time do_signal is entered.
+( As an aside I wrote a perl script once which automatically generated a REXX
+script with breakpoints on every kernel procedure, this isn't a good idea
+because there are thousands of these routines & VM can only set 255 breakpoints
+at a time so you nearly had to spend as long pruning the file down as you would
+entering the msgs by hand), however, the trick might be useful for a single
+object file. In the 3270 terminal emulator x3270 there is a very useful option
+in the file menu called "Save Screen In File" - this is very good for keeping a
+copy of traces.
+
+From CMS help <command name> will give you online help on a particular command.
+e.g.::
+
+ HELP DISPLAY
+
+Also CP has a file called profile.exec which automatically gets called
+on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session
+CP has a feature similar to doskey, it may be useful for you to
+use profile.exec to define some keystrokes.
+
+SET PF9 IMM B
+ This does a single step in VM on pressing F8.
+
+SET PF10 ^
+ This sets up the ^ key.
+ which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed
+ directly into some 3270 consoles.
+
+SET PF11 ^-
+ This types the starting keystrokes for a sysrq see SysRq below.
+SET PF12 RETRIEVE
+ This retrieves command history on pressing F12.
+
+
+Sometimes in VM the display is set up to scroll automatically this
+can be very annoying if there are messages you wish to look at
+to stop this do
+
+TERM MORE 255 255
+ This will nearly stop automatic screen updates, however it will
+ cause a denial of service if lots of messages go to the 3270 console,
+ so it would be foolish to use this as the default on a production machine.
+
+
+Tracing particular processes
+----------------------------
+The kernel's text segment is intentionally at an address in memory that it will
+very seldom collide with text segments of user programs ( thanks Martin ),
+this simplifies debugging the kernel.
+However it is quite common for user processes to have addresses which collide
+this can make debugging a particular process under VM painful under normal
+circumstances as the process may change when doing a::
+
+ TR I R <address range>.
+
+Thankfully after reading VM's online help I figured out how to debug
+I particular process.
+
+Your first problem is to find the STD ( segment table designation )
+of the program you wish to debug.
+There are several ways you can do this here are a few
+
+Run::
+
+ objdump --syms <program to be debugged> | grep main
+
+To get the address of main in the program. Then::
+
+ tr i pswa <address of main>
+
+Start the program, if VM drops to CP on what looks like the entry
+point of the main function this is most likely the process you wish to debug.
+Now do a D X13 or D XG13 on z/Architecture.
+
+On 31 bit the STD is bits 1-19 ( the STO segment table origin )
+& 25-31 ( the STL segment table length ) of CR13.
+
+now type::
+
+ TR I R STD <CR13's value> 0.7fffffff
+
+e.g.::
+
+ TR I R STD 8F32E1FF 0.7fffffff
+
+Another very useful variation is::
+
+ TR STORE INTO STD <CR13's value> <address range>
+
+for finding out when a particular variable changes.
+
+An alternative way of finding the STD of a currently running process
+is to do the following, ( this method is more complex but
+could be quite convenient if you aren't updating the kernel much &
+so your kernel structures will stay constant for a reasonable period of
+time ).
+
+::
+
+ grep task /proc/<pid>/status
+
+from this you should see something like::
+
+ task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68
+
+This now gives you a pointer to the task structure.
+
+Now make::
+
+ CC:="s390-gcc -g" kernel/sched.s
+
+To get the task_struct stabinfo.
+
+( task_struct is defined in include/linux/sched.h ).
+
+Now we want to look at
+task->active_mm->pgd
+
+on my machine the active_mm in the task structure stab is
+active_mm:(4,12),672,32
+
+its offset is 672/8=84=0x54
+
+the pgd member in the mm_struct stab is
+pgd:(4,6)=*(29,5),96,32
+so its offset is 96/8=12=0xc
+
+so we'll::
+
+ hexdump -s 0xf160054 /dev/mem | more
+
+i.e. task_struct+active_mm offset
+to look at the active_mm member::
+
+ f160054 0fee cc60 0019 e334 0000 0000 0000 0011
+
+::
+
+ hexdump -s 0x0feecc6c /dev/mem | more
+
+i.e. active_mm+pgd offset::
+
+ feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010
+
+we get something like
+now do::
+
+ TR I R STD <pgd|0x7f> 0.7fffffff
+
+i.e. the 0x7f is added because the pgd only
+gives the page table origin & we need to set the low bits
+to the maximum possible segment table length.
+
+::
+
+ TR I R STD 0f2c007f 0.7fffffff
+
+on z/Architecture you'll probably need to do::
+
+ TR I R STD <pgd|0x7> 0.ffffffffffffffff
+
+to set the TableType to 0x1 & the Table length to 3.
+
+
+
+Tracing Program Exceptions
+--------------------------
+If you get a crash which says something like
+illegal operation or specification exception followed by a register dump
+You can restart linux & trace these using the tr prog <range or value> trace
+option.
+
+
+The most common ones you will normally be tracing for is:
+
+- 1=operation exception
+- 2=privileged operation exception
+- 4=protection exception
+- 5=addressing exception
+- 6=specification exception
+- 10=segment translation exception
+- 11=page translation exception
+
+The full list of these is on page 22 of the current s/390 Reference Summary.
+e.g.
+
+tr prog 10 will trace segment translation exceptions.
+
+tr prog on its own will trace all program interruption codes.
+
+Trace Sets
+----------
+On starting VM you are initially in the INITIAL trace set.
+You can do a Q TR to verify this.
+If you have a complex tracing situation where you wish to wait for instance
+till a driver is open before you start tracing IO, but know in your
+heart that you are going to have to make several runs through the code till you
+have a clue whats going on.
+
+What you can do is::
+
+ TR I PSWA <Driver open address>
+
+hit b to continue till breakpoint
+
+reach the breakpoint
+
+now do your::
+
+ TR GOTO B
+ TR IO 7c08-7c09 inst int run
+
+or whatever the IO channels you wish to trace are & hit b
+
+To got back to the initial trace set do::
+
+ TR GOTO INITIAL
+
+& the TR I PSWA <Driver open address> will be the only active breakpoint again.
+
+
+Tracing linux syscalls under VM
+-------------------------------
+Syscalls are implemented on Linux for S390 by the Supervisor call instruction
+(SVC). There 256 possibilities of these as the instruction is made up of a 0xA
+opcode and the second byte being the syscall number. They are traced using the
+simple command::
+
+ TR SVC <Optional value or range>
+
+the syscalls are defined in linux/arch/s390/include/asm/unistd.h
+e.g. to trace all file opens just do::
+
+ TR SVC 5 ( as this is the syscall number of open )
+
+
+SMP Specific commands
+---------------------
+To find out how many cpus you have
+Q CPUS displays all the CPU's available to your virtual machine
+To find the cpu that the current cpu VM debugger commands are being directed at
+do Q CPU to change the current cpu VM debugger commands are being directed at
+do::
+
+ CPU <desired cpu no>
+
+On a SMP guest issue a command to all CPUs try prefixing the command with cpu
+all. To issue a command to a particular cpu try cpu <cpu number> e.g.::
+
+ CPU 01 TR I R 2000.3000
+
+If you are running on a guest with several cpus & you have a IO related problem
+& cannot follow the flow of code but you know it isn't smp related.
+
+from the bash prompt issue::
+
+ shutdown -h now or halt.
+
+do a::
+
+ Q CPUS
+
+to find out how many cpus you have detach each one of them from cp except
+cpu 0 by issuing a::
+
+ DETACH CPU 01-(number of cpus in configuration)
+
+& boot linux again.
+
+TR SIGP
+ will trace inter processor signal processor instructions.
+
+DEFINE CPU 01-(number in configuration)
+ will get your guests cpus back.
+
+
+Help for displaying ascii textstrings
+-------------------------------------
+On the very latest VM Nucleus'es VM can now display ascii
+( thanks Neale for the hint ) by doing::
+
+ D TX<lowaddr>.<len>
+
+e.g.::
+
+ D TX0.100
+
+Alternatively
+=============
+Under older VM debuggers (I love EBDIC too) you can use following little
+program which converts a command line of hex digits to ascii text. It can be
+compiled under linux and you can copy the hex digits from your x3270 terminal
+to your xterm if you are debugging from a linuxbox.
+
+This is quite useful when looking at a parameter passed in as a text string
+under VM ( unless you are good at decoding ASCII in your head ).
+
+e.g. consider tracing an open syscall::
+
+ TR SVC 5
+
+We have stopped at a breakpoint::
+
+ 000151B0' SVC 0A05 -> 0001909A' CC 0
+
+D 20.8 to check the SVC old psw in the prefix area and see was it from userspace
+(for the layout of the prefix area consult the "Fixed Storage Locations"
+chapter of the s/390 Reference Summary if you have it available).
+
+::
+
+ V00000020 070C2000 800151B2
+
+The problem state bit wasn't set & it's also too early in the boot sequence
+for it to be a userspace SVC if it was we would have to temporarily switch the
+psw to user space addressing so we could get at the first parameter of the open
+in gpr2.
+
+Next do a::
+
+ D G2
+ GPR 2 = 00014CB4
+
+Now display what gpr2 is pointing to::
+
+ D 00014CB4.20
+ V00014CB4 2F646576 2F636F6E 736F6C65 00001BF5
+ V00014CC4 FC00014C B4001001 E0001000 B8070707
+
+Now copy the text till the first 00 hex ( which is the end of the string
+to an xterm & do hex2ascii on it::
+
+ hex2ascii 2F646576 2F636F6E 736F6C65 00
+
+outputs::
+
+ Decoded Hex:=/ d e v / c o n s o l e 0x00
+
+We were opening the console device,
+
+You can compile the code below yourself for practice :-),
+
+::
+
+ /*
+ * hex2ascii.c
+ * a useful little tool for converting a hexadecimal command line to ascii
+ *
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ * (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation.
+ */
+ #include <stdio.h>
+
+ int main(int argc,char *argv[])
+ {
+ int cnt1,cnt2,len,toggle=0;
+ int startcnt=1;
+ unsigned char c,hex;
+
+ if(argc>1&&(strcmp(argv[1],"-a")==0))
+ startcnt=2;
+ printf("Decoded Hex:=");
+ for(cnt1=startcnt;cnt1<argc;cnt1++)
+ {
+ len=strlen(argv[cnt1]);
+ for(cnt2=0;cnt2<len;cnt2++)
+ {
+ c=argv[cnt1][cnt2];
+ if(c>='0'&&c<='9')
+ c=c-'0';
+ if(c>='A'&&c<='F')
+ c=c-'A'+10;
+ if(c>='a'&&c<='f')
+ c=c-'a'+10;
+ switch(toggle)
+ {
+ case 0:
+ hex=c<<4;
+ toggle=1;
+ break;
+ case 1:
+ hex+=c;
+ if(hex<32||hex>127)
+ {
+ if(startcnt==1)
+ printf("0x%02X ",(int)hex);
+ else
+ printf(".");
+ }
+ else
+ {
+ printf("%c",hex);
+ if(startcnt==1)
+ printf(" ");
+ }
+ toggle=0;
+ break;
+ }
+ }
+ }
+ printf("\n");
+ }
+
+
+
+
+Stack tracing under VM
+----------------------
+A basic backtrace
+-----------------
+
+Here are the tricks I use 9 out of 10 times it works pretty well,
+
+When your backchain reaches a dead end
+--------------------------------------
+This can happen when an exception happens in the kernel and the kernel is
+entered twice. If you reach the NULL pointer at the end of the back chain you
+should be able to sniff further back if you follow the following tricks.
+1) A kernel address should be easy to recognise since it is in
+primary space & the problem state bit isn't set & also
+The Hi bit of the address is set.
+2) Another backchain should also be easy to recognise since it is an
+address pointing to another address approximately 100 bytes or 0x70 hex
+behind the current stackpointer.
+
+
+Here is some practice.
+
+boot the kernel & hit PA1 at some random time
+
+d g to display the gprs, this should display something like::
+
+ GPR 0 = 00000001 00156018 0014359C 00000000
+ GPR 4 = 00000001 001B8888 000003E0 00000000
+ GPR 8 = 00100080 00100084 00000000 000FE000
+ GPR 12 = 00010400 8001B2DC 8001B36A 000FFED8
+
+Note that GPR14 is a return address but as we are real men we are going to
+trace the stack.
+display 0x40 bytes after the stack pointer::
+
+ V000FFED8 000FFF38 8001B838 80014C8E 000FFF38
+ V000FFEE8 00000000 00000000 000003E0 00000000
+ V000FFEF8 00100080 00100084 00000000 000FE000
+ V000FFF08 00010400 8001B2DC 8001B36A 000FFED8
+
+
+Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if
+you look above at our stackframe & also agrees with GPR14.
+
+now backchain::
+
+ d 000FFF38.40
+
+we now are taking the contents of SP to get our first backchain::
+
+ V000FFF38 000FFFA0 00000000 00014995 00147094
+ V000FFF48 00147090 001470A0 000003E0 00000000
+ V000FFF58 00100080 00100084 00000000 001BF1D0
+ V000FFF68 00010400 800149BA 80014CA6 000FFF38
+
+This displays a 2nd return address of 80014CA6
+
+now do::
+
+ d 000FFFA0.40
+
+for our 3rd backchain::
+
+ V000FFFA0 04B52002 0001107F 00000000 00000000
+ V000FFFB0 00000000 00000000 FF000000 0001107F
+ V000FFFC0 00000000 00000000 00000000 00000000
+ V000FFFD0 00010400 80010802 8001085A 000FFFA0
+
+
+our 3rd return address is 8001085A
+
+as the 04B52002 looks suspiciously like rubbish it is fair to assume that the
+kernel entry routines for the sake of optimisation don't set up a backchain.
+
+now look at System.map to see if the addresses make any sense::
+
+ grep -i 0001b3 System.map
+
+outputs among other things::
+
+ 0001b304 T cpu_idle
+
+so 8001B36A
+is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it )
+
+::
+
+ grep -i 00014 System.map
+
+produces among other things::
+
+ 00014a78 T start_kernel
+
+so 0014CA6 is start_kernel+some hex number I can't add in my head.
+
+::
+
+ grep -i 00108 System.map
+
+this produces::
+
+ 00010800 T _stext
+
+so 8001085A is _stext+0x5a
+
+Congrats you've done your first backchain.
+
+
+
+s/390 & z/Architecture IO Overview
+==================================
+
+I am not going to give a course in 390 IO architecture as this would take me
+quite a while and I'm no expert. Instead I'll give a 390 IO architecture
+summary for Dummies. If you have the s/390 principles of operation available
+read this instead. If nothing else you may find a few useful keywords in here
+and be able to use them on a web search engine to find more useful information.
+
+Unlike other bus architectures modern 390 systems do their IO using mostly
+fibre optics and devices such as tapes and disks can be shared between several
+mainframes. Also S390 can support up to 65536 devices while a high end PC based
+system might be choking with around 64.
+
+Here is some of the common IO terminology:
+
+Subchannel:
+ This is the logical number most IO commands use to talk to an IO device. There
+ can be up to 0x10000 (65536) of these in a configuration, typically there are a
+ few hundred. Under VM for simplicity they are allocated contiguously, however
+ on the native hardware they are not. They typically stay consistent between
+ boots provided no new hardware is inserted or removed.
+
+ Under Linux for s390 we use these as IRQ's and also when issuing an IO command
+ (CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL,
+ START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID
+ of the device we wish to talk to. The most important of these instructions are
+ START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO
+ completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have
+ up to 8 channel paths to a device, this offers redundancy if one is not
+ available.
+
+Device Number:
+ This number remains static and is closely tied to the hardware. There are 65536
+ of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and
+ another lsb 8 bits. These remain static even if more devices are inserted or
+ removed from the hardware. There is a 1 to 1 mapping between subchannels and
+ device numbers, provided devices aren't inserted or removed.
+
+Channel Control Words:
+ CCWs are linked lists of instructions initially pointed to by an operation
+ request block (ORB), which is initially given to Start Subchannel (SSCH)
+ command along with the subchannel number for the IO subsystem to process
+ while the CPU continues executing normal code.
+ CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and
+ Format 1 (31 bit). These are typically used to issue read and write (and many
+ other) instructions. They consist of a length field and an absolute address
+ field.
+
+ Each IO typically gets 1 or 2 interrupts, one for channel end (primary status)
+ when the channel is idle, and the second for device end (secondary status).
+ Sometimes you get both concurrently. You check how the IO went on by issuing a
+ TEST SUBCHANNEL at each interrupt, from which you receive an Interruption
+ response block (IRB). If you get channel and device end status in the IRB
+ without channel checks etc. your IO probably went okay. If you didn't you
+ probably need to examine the IRB, extended status word etc.
+ If an error occurs, more sophisticated control units have a facility known as
+ concurrent sense. This means that if an error occurs Extended sense information
+ will be presented in the Extended status word in the IRB. If not you have to
+ issue a subsequent SENSE CCW command after the test subchannel.
+
+
+TPI (Test pending interrupt) can also be used for polled IO, but in
+multitasking multiprocessor systems it isn't recommended except for
+checking special cases (i.e. non looping checks for pending IO etc.).
+
+Store Subchannel and Modify Subchannel can be used to examine and modify
+operating characteristics of a subchannel (e.g. channel paths).
+
+Other IO related Terms:
+
+Sysplex:
+ S390's Clustering Technology
+QDIO:
+ S390's new high speed IO architecture to support devices such as gigabit
+ ethernet, this architecture is also designed to be forward compatible with
+ upcoming 64 bit machines.
+
+
+General Concepts
+----------------
+
+Input Output Processors (IOP's) are responsible for communicating between
+the mainframe CPU's & the channel & relieve the mainframe CPU's from the
+burden of communicating with IO devices directly, this allows the CPU's to
+concentrate on data processing.
+
+IOP's can use one or more links ( known as channel paths ) to talk to each
+IO device. It first checks for path availability & chooses an available one,
+then starts ( & sometimes terminates IO ).
+There are two types of channel path: ESCON & the Parallel IO interface.
+
+IO devices are attached to control units, control units provide the
+logic to interface the channel paths & channel path IO protocols to
+the IO devices, they can be integrated with the devices or housed separately
+& often talk to several similar devices ( typical examples would be raid
+controllers or a control unit which connects to 1000 3270 terminals )::
+
+
+ +---------------------------------------------------------------+
+ | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
+ | | CPU | | CPU | | CPU | | CPU | | Main | | Expanded | |
+ | | | | | | | | | | Memory | | Storage | |
+ | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
+ |---------------------------------------------------------------+
+ | IOP | IOP | IOP |
+ |---------------------------------------------------------------
+ | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C |
+ ----------------------------------------------------------------
+ || ||
+ || Bus & Tag Channel Path || ESCON
+ || ====================== || Channel
+ || || || || Path
+ +----------+ +----------+ +----------+
+ | | | | | |
+ | CU | | CU | | CU |
+ | | | | | |
+ +----------+ +----------+ +----------+
+ | | | | |
+ +----------+ +----------+ +----------+ +----------+ +----------+
+ |I/O Device| |I/O Device| |I/O Device| |I/O Device| |I/O Device|
+ +----------+ +----------+ +----------+ +----------+ +----------+
+ CPU = Central Processing Unit
+ C = Channel
+ IOP = IP Processor
+ CU = Control Unit
+
+The 390 IO systems come in 2 flavours the current 390 machines support both
+
+The Older 360 & 370 Interface,sometimes called the Parallel I/O interface,
+sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers
+Interface (OEMI).
+
+This byte wide Parallel channel path/bus has parity & data on the "Bus" cable
+and control lines on the "Tag" cable. These can operate in byte multiplex mode
+for sharing between several slow devices or burst mode and monopolize the
+channel for the whole burst. Up to 256 devices can be addressed on one of these
+cables. These cables are about one inch in diameter. The maximum unextended
+length supported by these cables is 125 Meters but this can be extended up to
+2km with a fibre optic channel extended such as a 3044. The maximum burst speed
+supported is 4.5 megabytes per second. However, some really old processors
+support only transfer rates of 3.0, 2.0 & 1.0 MB/sec.
+One of these paths can be daisy chained to up to 8 control units.
+
+
+ESCON if fibre optic it is also called FICON
+Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or
+lasers for communication at a signaling rate of up to 200 megabits/sec. As
+10bits are transferred for every 8 bits info this drops to 160 megabits/sec
+and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only
+operates in burst mode.
+
+ESCONs typical max cable length is 3km for the led version and 20km for the
+laser version known as XDF (extended distance facility). This can be further
+extended by using an ESCON director which triples the above mentioned ranges.
+Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture,
+the standard Bus & Tag control protocol is however present within the packets.
+Up to 256 devices can be attached to each control unit that uses one of these
+interfaces.
+
+Common 390 Devices include:
+Network adapters typically OSA2,3172's,2116's & OSA-E gigabit ethernet adapters,
+Consoles 3270 & 3215 (a teletype emulated under linux for a line mode console).
+DASD's direct access storage devices ( otherwise known as hard disks ).
+Tape Drives.
+CTC ( Channel to Channel Adapters ),
+ESCON or Parallel Cables used as a very high speed serial link
+between 2 machines.
+
+
+Debugging IO on s/390 & z/Architecture under VM
+===============================================
+
+Now we are ready to go on with IO tracing commands under VM
+
+A few self explanatory queries::
+
+ Q OSA
+ Q CTC
+ Q DISK ( This command is CMS specific )
+ Q DASD
+
+Q OSA on my machine returns::
+
+ OSA 7C08 ON OSA 7C08 SUBCHANNEL = 0000
+ OSA 7C09 ON OSA 7C09 SUBCHANNEL = 0001
+ OSA 7C14 ON OSA 7C14 SUBCHANNEL = 0002
+ OSA 7C15 ON OSA 7C15 SUBCHANNEL = 0003
+
+If you have a guest with certain privileges you may be able to see devices
+which don't belong to you. To avoid this, add the option V.
+e.g.::
+
+ Q V OSA
+
+Now using the device numbers returned by this command we will
+Trace the io starting up on the first device 7c08 & 7c09
+In our simplest case we can trace the
+start subchannels
+like TR SSCH 7C08-7C09
+or the halt subchannels
+or TR HSCH 7C08-7C09
+MSCH's ,STSCH's I think you can guess the rest
+
+A good trick is tracing all the IO's and CCWS and spooling them into the reader
+of another VM guest so he can ftp the logfile back to his own machine. I'll do
+a small bit of this and give you a look at the output.
+
+1) Spool stdout to VM reader::
+
+ SP PRT TO (another vm guest ) or * for the local vm guest
+
+2) Fill the reader with the trace::
+
+ TR IO 7c08-7c09 INST INT CCW PRT RUN
+
+3) Start up linux::
+
+ i 00c
+4) Finish the trace::
+
+ TR END
+
+5) close the reader::
+
+ C PRT
+
+6) list reader contents::
+
+ RDRLIST
+
+7) copy it to linux4's minidisk::
+
+ RECEIVE / LOG TXT A1 ( replace
+
+8)
+filel & press F11 to look at it
+You should see something like::
+
+ 00020942' SSCH B2334000 0048813C CC 0 SCH 0000 DEV 7C08
+ CPA 000FFDF0 PARM 00E2C9C4 KEY 0 FPI C0 LPM 80
+ CCW 000FFDF0 E4200100 00487FE8 0000 E4240100 ........
+ IDAL 43D8AFE8
+ IDAL 0FB76000
+ 00020B0A' I/O DEV 7C08 -> 000197BC' SCH 0000 PARM 00E2C9C4
+ 00021628' TSCH B2354000 >> 00488164 CC 0 SCH 0000 DEV 7C08
+ CCWA 000FFDF8 DEV STS 0C SCH STS 00 CNT 00EC
+ KEY 0 FPI C0 CC 0 CTLS 4007
+ 00022238' STSCH B2344000 >> 00488108 CC 0 SCH 0000 DEV 7C08
+
+If you don't like messing up your readed ( because you possibly booted from it )
+you can alternatively spool it to another readers guest.
+
+
+Other common VM device related commands
+---------------------------------------------
+These commands are listed only because they have
+been of use to me in the past & may be of use to
+you too. For more complete info on each of the commands
+use type HELP <command> from CMS.
+
+detaching devices::
+
+ DET <devno range>
+ ATT <devno range> <guest>
+
+attach a device to guest * for your own guest
+
+READY <devno>
+ cause VM to issue a fake interrupt.
+
+The VARY command is normally only available to VM administrators::
+
+ VARY ON PATH <path> TO <devno range>
+ VARY OFF PATH <PATH> FROM <devno range>
+
+This is used to switch on or off channel paths to devices.
+
+Q CHPID <channel path ID>
+ This displays state of devices using this channel path
+
+D SCHIB <subchannel>
+ This displays the subchannel information SCHIB block for the device.
+ this I believe is also only available to administrators.
+
+DEFINE CTC <devno>
+ defines a virtual CTC channel to channel connection
+ 2 need to be defined on each guest for the CTC driver to use.
+
+COUPLE devno userid remote devno
+ Joins a local virtual device to a remote virtual device
+ ( commonly used for the CTC driver ).
+
+Building a VM ramdisk under CMS which linux can use::
+
+ def vfb-<blocksize> <subchannel> <number blocks>
+
+blocksize is commonly 4096 for linux.
+
+Formatting it::
+
+ format <subchannel> <driver letter e.g. x> (blksize <blocksize>
+
+Sharing a disk between multiple guests::
+
+ LINK userid devno1 devno2 mode password
+
+
+
+GDB on S390
+===========
+N.B. if compiling for debugging gdb works better without optimisation
+( see Compiling programs for debugging )
+
+invocation
+----------
+gdb <victim program> <optional corefile>
+
+Online help
+-----------
+help: gives help on commands
+
+e.g.::
+
+ help
+ help display
+
+Note gdb's online help is very good use it.
+
+
+Assembly
+--------
+info registers:
+ displays registers other than floating point.
+
+info all-registers:
+ displays floating points as well.
+
+disassemble:
+ disassembles
+
+e.g.::
+
+ disassemble without parameters will disassemble the current function
+ disassemble $pc $pc+10
+
+Viewing & modifying variables
+-----------------------------
+print or p:
+ displays variable or register
+
+e.g. p/x $sp will display the stack pointer
+
+display:
+ prints variable or register each time program stops
+
+e.g.::
+
+ display/x $pc will display the program counter
+ display argc
+
+undisplay:
+ undo's display's
+
+info breakpoints:
+ shows all current breakpoints
+
+info stack:
+ shows stack back trace (if this doesn't work too well, I'll show
+ you the stacktrace by hand below).
+
+info locals:
+ displays local variables.
+
+info args:
+ display current procedure arguments.
+
+set args:
+ will set argc & argv each time the victim program is invoked
+
+e.g.::
+
+ set <variable>=value
+ set argc=100
+ set $pc=0
+
+
+
+Modifying execution
+-------------------
+step:
+ steps n lines of sourcecode
+
+step
+ steps 1 line.
+
+step 100
+ steps 100 lines of code.
+
+next:
+ like step except this will not step into subroutines
+
+stepi:
+ steps a single machine code instruction.
+
+e.g.::
+
+ stepi 100
+
+nexti:
+ steps a single machine code instruction but will not step into
+ subroutines.
+
+finish:
+ will run until exit of the current routine
+
+run:
+ (re)starts a program
+
+cont:
+ continues a program
+
+quit:
+ exits gdb.
+
+
+breakpoints
+------------
+
+break
+ sets a breakpoint
+
+e.g.::
+
+ break main
+ break *$pc
+ break *0x400618
+
+Here's a really useful one for large programs
+
+rbr
+ Set a breakpoint for all functions matching REGEXP
+
+e.g.::
+
+ rbr 390
+
+will set a breakpoint with all functions with 390 in their name.
+
+info breakpoints
+ lists all breakpoints
+
+delete:
+ delete breakpoint by number or delete them all
+
+e.g.
+
+delete 1
+ will delete the first breakpoint
+
+
+delete
+ will delete them all
+
+watch:
+ This will set a watchpoint ( usually hardware assisted ),
+
+This will watch a variable till it changes
+
+e.g.
+
+watch cnt
+ will watch the variable cnt till it changes.
+
+As an aside unfortunately gdb's, architecture independent watchpoint code
+is inconsistent & not very good, watchpoints usually work but not always.
+
+info watchpoints:
+ Display currently active watchpoints
+
+condition: ( another useful one )
+ Specify breakpoint number N to break only if COND is true.
+
+Usage is `condition N COND`, where N is an integer and COND is an
+expression to be evaluated whenever breakpoint N is reached.
+
+
+
+User defined functions/macros
+-----------------------------
+define: ( Note this is very very useful,simple & powerful )
+
+usage define <name> <list of commands> end
+
+examples which you should consider putting into .gdbinit in your home
+directory::
+
+ define d
+ stepi
+ disassemble $pc $pc+10
+ end
+ define e
+ nexti
+ disassemble $pc $pc+10
+ end
+
+
+Other hard to classify stuff
+----------------------------
+signal n:
+ sends the victim program a signal.
+
+e.g. `signal 3` will send a SIGQUIT.
+
+info signals:
+ what gdb does when the victim receives certain signals.
+
+list:
+
+e.g.:
+
+list
+ lists current function source
+list 1,10
+ list first 10 lines of current file.
+
+list test.c:1,10
+
+
+directory:
+ Adds directories to be searched for source if gdb cannot find the source.
+ (note it is a bit sensitive about slashes)
+
+e.g. To add the root of the filesystem to the searchpath do::
+
+ directory //
+
+
+call <function>
+This calls a function in the victim program, this is pretty powerful
+e.g.
+(gdb) call printf("hello world")
+outputs:
+$1 = 11
+
+You might now be thinking that the line above didn't work, something extra had
+to be done.
+(gdb) call fflush(stdout)
+hello world$2 = 0
+As an aside the debugger also calls malloc & free under the hood
+to make space for the "hello world" string.
+
+
+
+hints
+-----
+1) command completion works just like bash
+ ( if you are a bad typist like me this really helps )
+
+e.g. hit br <TAB> & cursor up & down :-).
+
+2) if you have a debugging problem that takes a few steps to recreate
+put the steps into a file called .gdbinit in your current working directory
+if you have defined a few extra useful user defined commands put these in
+your home directory & they will be read each time gdb is launched.
+
+A typical .gdbinit file might be.::
+
+ break main
+ run
+ break runtime_exception
+ cont
+
+
+stack chaining in gdb by hand
+-----------------------------
+This is done using a the same trick described for VM::
+
+ p/x (*($sp+56))&0x7fffffff
+
+get the first backchain.
+
+For z/Architecture
+Replace 56 with 112 & ignore the &0x7fffffff
+in the macros below & do nasty casts to longs like the following
+as gdb unfortunately deals with printed arguments as ints which
+messes up everything.
+
+i.e. here is a 3rd backchain dereference::
+
+ p/x *(long *)(***(long ***)$sp+112)
+
+
+this outputs::
+
+ $5 = 0x528f18
+
+on my machine.
+
+Now you can use::
+
+ info symbol (*($sp+56))&0x7fffffff
+
+you might see something like::
+
+ rl_getc + 36 in section .text
+
+telling you what is located at address 0x528f18
+Now do::
+
+ p/x (*(*$sp+56))&0x7fffffff
+
+This outputs::
+
+ $6 = 0x528ed0
+
+Now do::
+
+ info symbol (*(*$sp+56))&0x7fffffff
+ rl_read_key + 180 in section .text
+
+now do::
+
+ p/x (*(**$sp+56))&0x7fffffff
+
+& so on.
+
+Disassembling instructions without debug info
+---------------------------------------------
+gdb typically complains if there is a lack of debugging
+symbols in the disassemble command with
+"No function contains specified address." To get around
+this do::
+
+ x/<number lines to disassemble>xi <address>
+
+e.g.::
+
+ x/20xi 0x400730
+
+
+
+Note:
+ Remember gdb has history just like bash you don't need to retype the
+ whole line just use the up & down arrows.
+
+
+
+For more info
+-------------
+From your linuxbox do::
+
+ man gdb
+
+or::
+
+ info gdb.
+
+core dumps
+----------
+
+What a core dump ?
+^^^^^^^^^^^^^^^^^^
+
+A core dump is a file generated by the kernel (if allowed) which contains the
+registers and all active pages of the program which has crashed.
+
+From this file gdb will allow you to look at the registers, stack trace and
+memory of the program as if it just crashed on your system. It is usually
+called core and created in the current working directory.
+
+This is very useful in that a customer can mail a core dump to a technical
+support department and the technical support department can reconstruct what
+happened. Provided they have an identical copy of this program with debugging
+symbols compiled in and the source base of this build is available.
+
+In short it is far more useful than something like a crash log could ever hope
+to be.
+
+Why have I never seen one ?
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Probably because you haven't used the command::
+
+ ulimit -c unlimited in bash
+
+to allow core dumps, now do::
+
+ ulimit -a
+
+to verify that the limit was accepted.
+
+A sample core dump
+ To create this I'm going to do::
+
+ ulimit -c unlimited
+ gdb
+
+to launch gdb (my victim app. ) now be bad & do the following from another
+telnet/xterm session to the same machine::
+
+ ps -aux | grep gdb
+ kill -SIGSEGV <gdb's pid>
+
+or alternatively use `killall -SIGSEGV gdb` if you have the killall command.
+
+Now look at the core dump::
+
+ ./gdb core
+
+Displays the following::
+
+ GNU gdb 4.18
+ Copyright 1998 Free Software Foundation, Inc.
+ GDB is free software, covered by the GNU General Public License, and you are
+ welcome to change it and/or distribute copies of it under certain conditions.
+ Type "show copying" to see the conditions.
+ There is absolutely no warranty for GDB. Type "show warranty" for details.
+ This GDB was configured as "s390-ibm-linux"...
+ Core was generated by `./gdb'.
+ Program terminated with signal 11, Segmentation fault.
+ Reading symbols from /usr/lib/libncurses.so.4...done.
+ Reading symbols from /lib/libm.so.6...done.
+ Reading symbols from /lib/libc.so.6...done.
+ Reading symbols from /lib/ld-linux.so.2...done.
+ #0 0x40126d1a in read () from /lib/libc.so.6
+ Setting up the environment for debugging gdb.
+ Breakpoint 1 at 0x4dc6f8: file utils.c, line 471.
+ Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
+ (top-gdb) info stack
+ #0 0x40126d1a in read () from /lib/libc.so.6
+ #1 0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402
+ #2 0x528ed0 in rl_read_key () at input.c:381
+ #3 0x5167e6 in readline_internal_char () at readline.c:454
+ #4 0x5168ee in readline_internal_charloop () at readline.c:507
+ #5 0x51692c in readline_internal () at readline.c:521
+ #6 0x5164fe in readline (prompt=0x7ffff810)
+ at readline.c:349
+ #7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
+ annotation_suffix=0x4d6b44 "prompt") at top.c:2091
+ #8 0x4d6cf0 in command_loop () at top.c:1345
+ #9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
+
+
+LDD
+===
+This is a program which lists the shared libraries which a library needs,
+Note you also get the relocations of the shared library text segments which
+help when using objdump --source.
+
+e.g.::
+
+ ldd ./gdb
+
+outputs::
+
+ libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000)
+ libm.so.6 => /lib/libm.so.6 (0x4005e000)
+ libc.so.6 => /lib/libc.so.6 (0x40084000)
+ /lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000)
+
+
+Debugging shared libraries
+==========================
+Most programs use shared libraries, however it can be very painful
+when you single step instruction into a function like printf for the
+first time & you end up in functions like _dl_runtime_resolve this is
+the ld.so doing lazy binding, lazy binding is a concept in ELF where
+shared library functions are not loaded into memory unless they are
+actually used, great for saving memory but a pain to debug.
+
+To get around this either relink the program -static or exit gdb type
+export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing
+the program in question.
+
+
+
+Debugging modules
+=================
+As modules are dynamically loaded into the kernel their address can be
+anywhere to get around this use the -m option with insmod to emit a load
+map which can be piped into a file if required.
+
+The proc file system
+====================
+What is it ?.
+It is a filesystem created by the kernel with files which are created on demand
+by the kernel if read, or can be used to modify kernel parameters,
+it is a powerful concept.
+
+e.g.::
+
+ cat /proc/sys/net/ipv4/ip_forward
+
+On my machine outputs::
+
+ 0
+
+telling me ip_forwarding is not on to switch it on I can do::
+
+ echo 1 > /proc/sys/net/ipv4/ip_forward
+
+cat it again::
+
+ cat /proc/sys/net/ipv4/ip_forward
+
+On my machine now outputs::
+
+ 1
+
+IP forwarding is on.
+
+There is a lot of useful info in here best found by going in and having a look
+around, so I'll take you through some entries I consider important.
+
+All the processes running on the machine have their own entry defined by
+/proc/<pid>
+
+So lets have a look at the init process::
+
+ cd /proc/1
+ cat cmdline
+
+emits::
+
+ init [2]
+
+::
+
+ cd /proc/1/fd
+
+This contains numerical entries of all the open files,
+some of these you can cat e.g. stdout (2)::
+
+ cat /proc/29/maps
+
+on my machine emits::
+
+ 00400000-00478000 r-xp 00000000 5f:00 4103 /bin/bash
+ 00478000-0047e000 rw-p 00077000 5f:00 4103 /bin/bash
+ 0047e000-00492000 rwxp 00000000 00:00 0
+ 40000000-40015000 r-xp 00000000 5f:00 14382 /lib/ld-2.1.2.so
+ 40015000-40016000 rw-p 00014000 5f:00 14382 /lib/ld-2.1.2.so
+ 40016000-40017000 rwxp 00000000 00:00 0
+ 40017000-40018000 rw-p 00000000 00:00 0
+ 40018000-4001b000 r-xp 00000000 5f:00 14435 /lib/libtermcap.so.2.0.8
+ 4001b000-4001c000 rw-p 00002000 5f:00 14435 /lib/libtermcap.so.2.0.8
+ 4001c000-4010d000 r-xp 00000000 5f:00 14387 /lib/libc-2.1.2.so
+ 4010d000-40111000 rw-p 000f0000 5f:00 14387 /lib/libc-2.1.2.so
+ 40111000-40114000 rw-p 00000000 00:00 0
+ 40114000-4011e000 r-xp 00000000 5f:00 14408 /lib/libnss_files-2.1.2.so
+ 4011e000-4011f000 rw-p 00009000 5f:00 14408 /lib/libnss_files-2.1.2.so
+ 7fffd000-80000000 rwxp ffffe000 00:00 0
+
+
+Showing us the shared libraries init uses where they are in memory
+& memory access permissions for each virtual memory area.
+
+/proc/1/cwd is a softlink to the current working directory.
+
+/proc/1/root is the root of the filesystem for this process.
+
+/proc/1/mem is the current running processes memory which you
+can read & write to like a file.
+
+strace uses this sometimes as it is a bit faster than the
+rather inefficient ptrace interface for peeking at DATA.
+
+::
+
+ cat status
+
+ Name: init
+ State: S (sleeping)
+ Pid: 1
+ PPid: 0
+ Uid: 0 0 0 0
+ Gid: 0 0 0 0
+ Groups:
+ VmSize: 408 kB
+ VmLck: 0 kB
+ VmRSS: 208 kB
+ VmData: 24 kB
+ VmStk: 8 kB
+ VmExe: 368 kB
+ VmLib: 0 kB
+ SigPnd: 0000000000000000
+ SigBlk: 0000000000000000
+ SigIgn: 7fffffffd7f0d8fc
+ SigCgt: 00000000280b2603
+ CapInh: 00000000fffffeff
+ CapPrm: 00000000ffffffff
+ CapEff: 00000000fffffeff
+
+ User PSW: 070de000 80414146
+ task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68
+ User GPRS:
+ 00000400 00000000 0000000b 7ffffa90
+ 00000000 00000000 00000000 0045d9f4
+ 0045cafc 7ffffa90 7fffff18 0045cb08
+ 00010400 804039e8 80403af8 7ffff8b0
+ User ACRS:
+ 00000000 00000000 00000000 00000000
+ 00000001 00000000 00000000 00000000
+ 00000000 00000000 00000000 00000000
+ 00000000 00000000 00000000 00000000
+ Kernel BackChain CallChain BackChain CallChain
+ 004b7ca8 8002bd0c 004b7d18 8002b92c
+ 004b7db8 8005cd50 004b7e38 8005d12a
+ 004b7f08 80019114
+
+Showing among other things memory usage & status of some signals &
+the processes'es registers from the kernel task_structure
+as well as a backchain which may be useful if a process crashes
+in the kernel for some unknown reason.
+
+Some driver debugging techniques
+================================
+debug feature
+-------------
+Some of our drivers now support a "debug feature" in
+/proc/s390dbf see s390dbf.txt in the linux/Documentation directory
+for more info.
+
+e.g.
+to switch on the lcs "debug feature"::
+
+ echo 5 > /proc/s390dbf/lcs/level
+
+& then after the error occurred::
+
+ cat /proc/s390dbf/lcs/sprintf >/logfile
+
+the logfile now contains some information which may help
+tech support resolve a problem in the field.
+
+
+
+high level debugging network drivers
+------------------------------------
+ifconfig is a quite useful command
+it gives the current state of network drivers.
+
+If you suspect your network device driver is dead
+one way to check is type::
+
+ ifconfig <network device>
+
+e.g. tr0
+
+You should see something like::
+
+ ifconfig tr0
+ tr0 Link encap:16/4 Mbps Token Ring (New) HWaddr 00:04:AC:20:8E:48
+ inet addr:9.164.185.132 Bcast:9.164.191.255 Mask:255.255.224.0
+ UP BROADCAST RUNNING MULTICAST MTU:2000 Metric:1
+ RX packets:246134 errors:0 dropped:0 overruns:0 frame:0
+ TX packets:5 errors:0 dropped:0 overruns:0 carrier:0
+ collisions:0 txqueuelen:100
+
+if the device doesn't say up
+try::
+
+ /etc/rc.d/init.d/network start
+
+( this starts the network stack & hopefully calls ifconfig tr0 up ).
+ifconfig looks at the output of /proc/net/dev and presents it in a more
+presentable form.
+
+Now ping the device from a machine in the same subnet.
+
+if the RX packets count & TX packets counts don't increment you probably
+have problems.
+
+next::
+
+ cat /proc/net/arp
+
+Do you see any hardware addresses in the cache if not you may have problems.
+Next try::
+
+ ping -c 5 <broadcast_addr>
+
+i.e. the Bcast field above in the output of
+ifconfig. Do you see any replies from machines other than the local machine
+if not you may have problems. also if the TX packets count in ifconfig
+hasn't incremented either you have serious problems in your driver
+(e.g. the txbusy field of the network device being stuck on )
+or you may have multiple network devices connected.
+
+
+chandev
+-------
+There is a new device layer for channel devices, some
+drivers e.g. lcs are registered with this layer.
+
+If the device uses the channel device layer you'll be
+able to find what interrupts it uses & the current state
+of the device.
+
+See the manpage chandev.8 &type cat /proc/chandev for more info.
+
+
+SysRq
+=====
+This is now supported by linux for s/390 & z/Architecture.
+
+To enable it do compile the kernel with::
+
+ Kernel Hacking -> Magic SysRq Key Enabled
+
+Then::
+
+ echo "1" > /proc/sys/kernel/sysrq
+
+also type::
+
+ echo "8" >/proc/sys/kernel/printk
+
+To make printk output go to console.
+
+On 390 all commands are prefixed with::
+
+ ^-
+
+e.g.::
+
+ ^-t will show tasks.
+ ^-? or some unknown command will display help.
+
+The sysrq key reading is very picky ( I have to type the keys in an
+xterm session & paste them into the x3270 console )
+& it may be wise to predefine the keys as described in the VM hints above
+
+This is particularly useful for syncing disks unmounting & rebooting
+if the machine gets partially hung.
+
+Read Documentation/admin-guide/sysrq.rst for more info
+
+References:
+===========
+- Enterprise Systems Architecture Reference Summary
+- Enterprise Systems Architecture Principles of Operation
+- Hartmut Penners s390 stack frame sheet.
+- IBM Mainframe Channel Attachment a technology brief from a CISCO webpage
+- Various bits of man & info pages of Linux.
+- Linux & GDB source.
+- Various info & man pages.
+- CMS Help on tracing commands.
+- Linux for s/390 Elf Application Binary Interface
+- Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended )
+- z/Architecture Principles of Operation SA22-7832-00
+- Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the
+- Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05
+
+Special Thanks
+==============
+Special thanks to Neale Ferguson who maintains a much
+prettier HTML version of this page at
+http://linuxvm.org/penguinvm/
+Bob Grainger Stefan Bader & others for reporting bugs
+=============================
S/390 driver model interfaces
------------------------------
+=============================
1. CCW devices
--------------
All devices which can be addressed by means of ccws are called 'CCW devices' -
even if they aren't actually driven by ccws.
-All ccw devices are accessed via a subchannel, this is reflected in the
-structures under devices/:
+All ccw devices are accessed via a subchannel, this is reflected in the
+structures under devices/::
-devices/
+ devices/
- system/
- css0/
- - 0.0.0000/0.0.0815/
+ - 0.0.0000/0.0.0815/
- 0.0.0001/0.0.4711/
- 0.0.0002/
- 0.1.0000/0.1.1234/
All ccw devices export some data via sysfs.
-cutype: The control unit type / model.
+cutype:
+ The control unit type / model.
-devtype: The device type / model, if applicable.
+devtype:
+ The device type / model, if applicable.
-availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for
+availability:
+ Can be 'good' or 'boxed'; 'no path' or 'no device' for
disconnected devices.
-online: An interface to set the device online and offline.
+online:
+ An interface to set the device online and offline.
In the special case of the device being disconnected (see the
notify function under 1.2), piping 0 to online will forcibly delete
the device.
There is also some data exported on a per-subchannel basis (see under
bus/css/devices/):
-chpids: Via which chpids the device is connected.
+chpids:
+ Via which chpids the device is connected.
-pimpampom: The path installed, path available and path operational masks.
+pimpampom:
+ The path installed, path available and path operational masks.
There also might be additional data, for example for block devices.
------------------------------------
The basic struct ccw_device and struct ccw_driver data structures can be found
-under include/asm/ccwdev.h.
+under include/asm/ccwdev.h::
-struct ccw_device {
- spinlock_t *ccwlock;
- struct ccw_device_private *private;
- struct ccw_device_id id;
+ struct ccw_device {
+ spinlock_t *ccwlock;
+ struct ccw_device_private *private;
+ struct ccw_device_id id;
- struct ccw_driver *drv;
- struct device dev;
+ struct ccw_driver *drv;
+ struct device dev;
int online;
void (*handler) (struct ccw_device *dev, unsigned long intparm,
- struct irb *irb);
-};
+ struct irb *irb);
+ };
-struct ccw_driver {
- struct module *owner;
- struct ccw_device_id *ids;
- int (*probe) (struct ccw_device *);
+ struct ccw_driver {
+ struct module *owner;
+ struct ccw_device_id *ids;
+ int (*probe) (struct ccw_device *);
int (*remove) (struct ccw_device *);
int (*set_online) (struct ccw_device *);
int (*set_offline) (struct ccw_device *);
int (*notify) (struct ccw_device *, int);
struct device_driver driver;
char *name;
-};
+ };
The 'private' field contains data needed for internal i/o operation only, and
is not available to the device driver.
Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models
and/or device types/models it is interested. This information can later be found
-in the struct ccw_device_id fields:
+in the struct ccw_device_id fields::
-struct ccw_device_id {
- __u16 match_flags;
+ struct ccw_device_id {
+ __u16 match_flags;
- __u16 cu_type;
- __u16 dev_type;
- __u8 cu_model;
- __u8 dev_model;
+ __u16 cu_type;
+ __u16 dev_type;
+ __u8 cu_model;
+ __u8 dev_model;
unsigned long driver_info;
-};
+ };
The functions in ccw_driver should be used in the following way:
-probe: This function is called by the device layer for each device the driver
+
+probe:
+ This function is called by the device layer for each device the driver
is interested in. The driver should only allocate private structures
to put in dev->driver_data and create attributes (if needed). Also,
the interrupt handler (see below) should be set here.
-int (*probe) (struct ccw_device *cdev);
+::
+
+ int (*probe) (struct ccw_device *cdev);
-Parameters: cdev - the device to be probed.
+Parameters:
+ cdev
+ - the device to be probed.
-remove: This function is called by the device layer upon removal of the driver,
+remove:
+ This function is called by the device layer upon removal of the driver,
the device or the module. The driver should perform cleanups here.
-int (*remove) (struct ccw_device *cdev);
+::
-Parameters: cdev - the device to be removed.
+ int (*remove) (struct ccw_device *cdev);
+Parameters:
+ cdev
+ - the device to be removed.
-set_online: This function is called by the common I/O layer when the device is
+
+set_online:
+ This function is called by the common I/O layer when the device is
activated via the 'online' attribute. The driver should finally
setup and activate the device here.
-int (*set_online) (struct ccw_device *);
+::
+
+ int (*set_online) (struct ccw_device *);
-Parameters: cdev - the device to be activated. The common layer has
+Parameters:
+ cdev
+ - the device to be activated. The common layer has
verified that the device is not already online.
de-activated via the 'online' attribute. The driver should shut
down the device, but not de-allocate its private data.
-int (*set_offline) (struct ccw_device *);
+::
-Parameters: cdev - the device to be deactivated. The common layer has
+ int (*set_offline) (struct ccw_device *);
+
+Parameters:
+ cdev
+ - the device to be deactivated. The common layer has
verified that the device is online.
-notify: This function is called by the common I/O layer for some state changes
+notify:
+ This function is called by the common I/O layer for some state changes
of the device.
+
Signalled to the driver are:
+
* In online state, device detached (CIO_GONE) or last path gone
(CIO_NO_PATH). The driver must return !0 to keep the device; for
return code 0, the device will be deleted as usual (also when no
return code of the notify function the device driver signals if it
wants the device back: !0 for keeping, 0 to make the device being
removed and re-registered.
-
-int (*notify) (struct ccw_device *, int);
-Parameters: cdev - the device whose state changed.
- event - the event that happened. This can be one of CIO_GONE,
- CIO_NO_PATH or CIO_OPER.
+::
+
+ int (*notify) (struct ccw_device *, int);
+
+Parameters:
+ cdev
+ - the device whose state changed.
+
+ event
+ - the event that happened. This can be one of CIO_GONE,
+ CIO_NO_PATH or CIO_OPER.
The handler field of the struct ccw_device is meant to be set to the interrupt
-handler for the device. In order to accommodate drivers which use several
+handler for the device. In order to accommodate drivers which use several
distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device
instead of ccw_driver.
The handler is registered with the common layer during set_online() processing
before the driver is called, and is deregistered during set_offline() after the
-driver has been called. Also, after registering / before deregistering, path
+driver has been called. Also, after registering / before deregistering, path
grouping resp. disbanding of the path group (if applicable) are performed.
-void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
+::
-Parameters: dev - the device the handler is called for
+ void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
+
+Parameters: dev - the device the handler is called for
intparm - the intparm which allows the device driver to identify
- the i/o the interrupt is associated with, or to recognize
- the interrupt as unsolicited.
- irb - interruption response block which contains the accumulated
- status.
+ the i/o the interrupt is associated with, or to recognize
+ the interrupt as unsolicited.
+ irb - interruption response block which contains the accumulated
+ status.
-The device driver is called from the common ccw_device layer and can retrieve
+The device driver is called from the common ccw_device layer and can retrieve
information about the interrupt from the irb parameter.
latter consistently due to lacking machine support (we don't need to be aware
of it anyway).
-status - Can be 'online' or 'offline'.
+status
+ - Can be 'online' or 'offline'.
Piping 'on' or 'off' sets the chpid logically online/offline.
Piping 'on' to an online chpid triggers path reprobing for all devices
the chpid connects to. This can be used to force the kernel to re-use
a channel path the user knows to be online, but the machine hasn't
created a machine check for.
-type - The physical type of the channel path.
+type
+ - The physical type of the channel path.
-shared - Whether the channel path is shared.
+shared
+ - Whether the channel path is shared.
-cmg - The channel measurement group.
+cmg
+ - The channel measurement group.
3. System devices
-----------------
-3.1 xpram
+3.1 xpram
---------
xpram shows up under devices/system/ as 'xpram'.
number is assigned sequentially to the connections defined via the 'connection'
attribute.
-user - shows the connection partner.
-
-buffer - maximum buffer size.
- Pipe to it to change buffer size.
-
+user
+ - shows the connection partner.
+buffer
+ - maximum buffer size. Pipe to it to change buffer size.
--- /dev/null
+:orphan:
+
+=================
+s390 Architecture
+=================
+
+.. toctree::
+ :maxdepth: 1
+
+ cds
+ 3270
+ debugging390
+ driver-model
+ monreader
+ qeth
+ s390dbf
+ vfio-ap
+ vfio-ccw
+ zfcpdump
+ dasd
+ common_io
+
+ text_files
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
+=================================================
+Linux API for read access to z/VM Monitor Records
+=================================================
Date : 2004-Nov-26
+
Author: Gerald Schaefer (geraldsc@de.ibm.com)
- Linux API for read access to z/VM Monitor Records
- =================================================
Description
===========
This item delivers a new Linux API in the form of a misc char device that is
usable from user space and allows read access to the z/VM Monitor Records
-collected by the *MONITOR System Service of z/VM.
+collected by the `*MONITOR` System Service of z/VM.
User Requirements
=================
The z/VM guest on which you want to access this API needs to be configured in
-order to allow IUCV connections to the *MONITOR service, i.e. it needs the
-IUCV *MONITOR statement in its user entry. If the monitor DCSS to be used is
+order to allow IUCV connections to the `*MONITOR` service, i.e. it needs the
+IUCV `*MONITOR` statement in its user entry. If the monitor DCSS to be used is
restricted (likely), you also need the NAMESAVE <DCSS NAME> statement.
This item will use the IUCV device driver to access the z/VM services, so you
need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1.
and you have to specify the "mem=" kernel parameter in your parmfile with a
value greater than the ending address of the DCSS.
-Example: DEF STOR 140M
+Example::
+
+ DEF STOR 140M
This defines 140MB storage size for your guest, the parameter "mem=160M" is
added to the parmfile.
in the parmfile.
The default name for the DCSS is "MONDCSS" if none is specified. In case that
-there are other users already connected to the *MONITOR service (e.g.
+there are other users already connected to the `*MONITOR` service (e.g.
Performance Toolkit), the monitor DCSS is already defined and you have to use
the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name
of the monitor DCSS, if already defined, and the users connected to the
-*MONITOR service.
+`*MONITOR` service.
Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor
DCSS if your z/VM doesn't have one already, you need Class E privileges to
define and save a DCSS.
Example:
--------
-modprobe monreader mondcss=MYDCSS
+
+::
+
+ modprobe monreader mondcss=MYDCSS
This loads the module and sets the DCSS name to "MYDCSS".
NOTE:
-----
-This API provides no interface to control the *MONITOR service, e.g. specify
+This API provides no interface to control the `*MONITOR` service, e.g. specify
which data should be collected. This can be done by the CP command MONITOR
(Class E privileged), see "CP Command and Utility Reference".
automatically and you have to create it manually after loading the module.
Therefore you need to know the major and minor numbers of the device. These
numbers can be found in /sys/class/misc/monreader/dev.
+
Typing cat /sys/class/misc/monreader/dev will give an output of the form
<major>:<minor>. The device node can be created via the mknod command, enter
mknod <name> c <major> <minor>, where <name> is the name of the device node
Example:
--------
-# modprobe monreader
-# cat /sys/class/misc/monreader/dev
-10:63
-# mknod /dev/monreader c 10 63
+
+::
+
+ # modprobe monreader
+ # cat /sys/class/misc/monreader/dev
+ 10:63
+ # mknod /dev/monreader c 10 63
This loads the module with the default monitor DCSS (MONDCSS) and creates a
device node.
correctly (domain 1, record 13), i.e. it can be used to determine the record
start offset relative to a 4K page (frame) boundary.
-See "Appendix A: *MONITOR" in the "z/VM Performance" document for a description
+See "Appendix A: `*MONITOR`" in the "z/VM Performance" document for a description
of the monitor control element layout. The layout of the monitor records can
be found here (z/VM 5.1): http://www.vm.ibm.com/pubs/mon510/index.html
-The layout of the data stream provided by the monreader device is as follows:
-...
-<0 byte read>
-<first MCE> \
-<first set of records> |
-... |- data set
-<last MCE> |
-<last set of records> /
-<0 byte read>
-...
+The layout of the data stream provided by the monreader device is as follows::
+
+ ...
+ <0 byte read>
+ <first MCE> \
+ <first set of records> |
+ ... |- data set
+ <last MCE> |
+ <last set of records> /
+ <0 byte read>
+ ...
There may be more than one combination of MCE and corresponding record set
within one data set and the end of each data set is indicated by a successful
negative value for the number of bytes read. In this case, the errno variable
indicates the error condition:
-EIO: reply failed, read data is invalid and the application
+EIO:
+ reply failed, read data is invalid and the application
should discard the data read since the last successful read with 0 size.
-EFAULT: copy_to_user failed, read data is invalid and the application should
- discard the data read since the last successful read with 0 size.
-EAGAIN: occurs on a non-blocking read if there is no data available at the
- moment. There is no data missing or corrupted, just try again or rather
- use polling for non-blocking reads.
-EOVERFLOW: message limit reached, the data read since the last successful
- read with 0 size is valid but subsequent records may be missing.
+EFAULT:
+ copy_to_user failed, read data is invalid and the application should
+ discard the data read since the last successful read with 0 size.
+EAGAIN:
+ occurs on a non-blocking read if there is no data available at the
+ moment. There is no data missing or corrupted, just try again or rather
+ use polling for non-blocking reads.
+EOVERFLOW:
+ message limit reached, the data read since the last successful
+ read with 0 size is valid but subsequent records may be missing.
In the last case (EOVERFLOW) there may be missing data, in the first two cases
(EIO, EFAULT) there will be missing data. It's up to the application if it will
-----
Only one user is allowed to open the char device. If it is already in use, the
open function will fail (return a negative value) and set errno to EBUSY.
-The open function may also fail if an IUCV connection to the *MONITOR service
+The open function may also fail if an IUCV connection to the `*MONITOR` service
cannot be established. In this case errno will be set to EIO and an error
message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER
codes are described in the "z/VM Performance" book, Appendix A.
will account for the message limit, i.e. opening the device without reading
from it will provoke the "message limit reached" error (EOVERFLOW error code)
eventually.
-
+=============================
IBM s390 QDIO Ethernet Driver
+=============================
OSA and HiperSockets Bridge Port Support
+========================================
Uevents
+-------
To generate the events the device must be assigned a role of either
a primary or a secondary Bridge Port. For more information, see
event with ACTION=CHANGE is emitted on behalf of the corresponding
ccwgroup device. The event has the following attributes:
-BRIDGEPORT=statechange - indicates that the Bridge Port device changed
+BRIDGEPORT=statechange
+ indicates that the Bridge Port device changed
its state.
-ROLE={primary|secondary|none} - the role assigned to the port.
+ROLE={primary|secondary|none}
+ the role assigned to the port.
-STATE={active|standby|inactive} - the newly assumed state of the port.
+STATE={active|standby|inactive}
+ the newly assumed state of the port.
When run on HiperSockets Bridge Capable Port hardware with host address
notifications enabled, a udev event with ACTION=CHANGE is emitted.
or a VLAN is registered or unregistered on the network served by the device.
The event has the following attributes:
-BRIDGEDHOST={reset|register|deregister|abort} - host address
+BRIDGEDHOST={reset|register|deregister|abort}
+ host address
notifications are started afresh, a new host or VLAN is registered or
deregistered on the Bridge Port HiperSockets channel, or address
notifications are aborted.
-VLAN=numeric-vlan-id - VLAN ID on which the event occurred. Not included
+VLAN=numeric-vlan-id
+ VLAN ID on which the event occurred. Not included
if no VLAN is involved in the event.
-MAC=xx:xx:xx:xx:xx:xx - MAC address of the host that is being registered
+MAC=xx:xx:xx:xx:xx:xx
+ MAC address of the host that is being registered
or deregistered from the HiperSockets channel. Not reported if the
event reports the creation or destruction of a VLAN.
-NTOK_BUSID=x.y.zzzz - device bus ID (CSSID, SSID and device number).
+NTOK_BUSID=x.y.zzzz
+ device bus ID (CSSID, SSID and device number).
-NTOK_IID=xx - device IID.
+NTOK_IID=xx
+ device IID.
-NTOK_CHPID=xx - device CHPID.
+NTOK_CHPID=xx
+ device CHPID.
-NTOK_CHID=xxxx - device channel ID.
+NTOK_CHID=xxxx
+ device channel ID.
-Note that the NTOK_* attributes refer to devices other than the one
+Note that the `NTOK_*` attributes refer to devices other than the one
connected to the system on which the OS is running.
--- /dev/null
+==================
+S390 Debug Feature
+==================
+
+files:
+ - arch/s390/kernel/debug.c
+ - arch/s390/include/asm/debug.h
+
+Description:
+------------
+The goal of this feature is to provide a kernel debug logging API
+where log records can be stored efficiently in memory, where each component
+(e.g. device drivers) can have one separate debug log.
+One purpose of this is to inspect the debug logs after a production system crash
+in order to analyze the reason for the crash.
+
+If the system still runs but only a subcomponent which uses dbf fails,
+it is possible to look at the debug logs on a live system via the Linux
+debugfs filesystem.
+
+The debug feature may also very useful for kernel and driver development.
+
+Design:
+-------
+Kernel components (e.g. device drivers) can register themselves at the debug
+feature with the function call :c:func:`debug_register()`.
+This function initializes a
+debug log for the caller. For each debug log exists a number of debug areas
+where exactly one is active at one time. Each debug area consists of contiguous
+pages in memory. In the debug areas there are stored debug entries (log records)
+which are written by event- and exception-calls.
+
+An event-call writes the specified debug entry to the active debug
+area and updates the log pointer for the active area. If the end
+of the active debug area is reached, a wrap around is done (ring buffer)
+and the next debug entry will be written at the beginning of the active
+debug area.
+
+An exception-call writes the specified debug entry to the log and
+switches to the next debug area. This is done in order to be sure
+that the records which describe the origin of the exception are not
+overwritten when a wrap around for the current area occurs.
+
+The debug areas themselves are also ordered in form of a ring buffer.
+When an exception is thrown in the last debug area, the following debug
+entries are then written again in the very first area.
+
+There are four versions for the event- and exception-calls: One for
+logging raw data, one for text, one for numbers (unsigned int and long),
+and one for sprintf-like formatted strings.
+
+Each debug entry contains the following data:
+
+- Timestamp
+- Cpu-Number of calling task
+- Level of debug entry (0...6)
+- Return Address to caller
+- Flag, if entry is an exception or not
+
+The debug logs can be inspected in a live system through entries in
+the debugfs-filesystem. Under the toplevel directory "``s390dbf``" there is
+a directory for each registered component, which is named like the
+corresponding component. The debugfs normally should be mounted to
+``/sys/kernel/debug`` therefore the debug feature can be accessed under
+``/sys/kernel/debug/s390dbf``.
+
+The content of the directories are files which represent different views
+to the debug log. Each component can decide which views should be
+used through registering them with the function :c:func:`debug_register_view()`.
+Predefined views for hex/ascii, sprintf and raw binary data are provided.
+It is also possible to define other views. The content of
+a view can be inspected simply by reading the corresponding debugfs file.
+
+All debug logs have an actual debug level (range from 0 to 6).
+The default level is 3. Event and Exception functions have a :c:data:`level`
+parameter. Only debug entries with a level that is lower or equal
+than the actual level are written to the log. This means, when
+writing events, high priority log entries should have a low level
+value whereas low priority entries should have a high one.
+The actual debug level can be changed with the help of the debugfs-filesystem
+through writing a number string "x" to the ``level`` debugfs file which is
+provided for every debug log. Debugging can be switched off completely
+by using "-" on the ``level`` debugfs file.
+
+Example::
+
+ > echo "-" > /sys/kernel/debug/s390dbf/dasd/level
+
+It is also possible to deactivate the debug feature globally for every
+debug log. You can change the behavior using 2 sysctl parameters in
+``/proc/sys/s390dbf``:
+
+There are currently 2 possible triggers, which stop the debug feature
+globally. The first possibility is to use the ``debug_active`` sysctl. If
+set to 1 the debug feature is running. If ``debug_active`` is set to 0 the
+debug feature is turned off.
+
+The second trigger which stops the debug feature is a kernel oops.
+That prevents the debug feature from overwriting debug information that
+happened before the oops. After an oops you can reactivate the debug feature
+by piping 1 to ``/proc/sys/s390dbf/debug_active``. Nevertheless, it's not
+suggested to use an oopsed kernel in a production environment.
+
+If you want to disallow the deactivation of the debug feature, you can use
+the ``debug_stoppable`` sysctl. If you set ``debug_stoppable`` to 0 the debug
+feature cannot be stopped. If the debug feature is already stopped, it
+will stay deactivated.
+
+Kernel Interfaces:
+------------------
+
+.. kernel-doc:: arch/s390/kernel/debug.c
+.. kernel-doc:: arch/s390/include/asm/debug.h
+
+Predefined views:
+-----------------
+
+.. code-block:: c
+
+ extern struct debug_view debug_hex_ascii_view;
+
+ extern struct debug_view debug_raw_view;
+
+ extern struct debug_view debug_sprintf_view;
+
+Examples
+--------
+
+.. code-block:: c
+
+ /*
+ * hex_ascii- + raw-view Example
+ */
+
+ #include <linux/init.h>
+ #include <asm/debug.h>
+
+ static debug_info_t *debug_info;
+
+ static int init(void)
+ {
+ /* register 4 debug areas with one page each and 4 byte data field */
+
+ debug_info = debug_register("test", 1, 4, 4 );
+ debug_register_view(debug_info, &debug_hex_ascii_view);
+ debug_register_view(debug_info, &debug_raw_view);
+
+ debug_text_event(debug_info, 4 , "one ");
+ debug_int_exception(debug_info, 4, 4711);
+ debug_event(debug_info, 3, &debug_info, 4);
+
+ return 0;
+ }
+
+ static void cleanup(void)
+ {
+ debug_unregister(debug_info);
+ }
+
+ module_init(init);
+ module_exit(cleanup);
+
+.. code-block:: c
+
+ /*
+ * sprintf-view Example
+ */
+
+ #include <linux/init.h>
+ #include <asm/debug.h>
+
+ static debug_info_t *debug_info;
+
+ static int init(void)
+ {
+ /* register 4 debug areas with one page each and data field for */
+ /* format string pointer + 2 varargs (= 3 * sizeof(long)) */
+
+ debug_info = debug_register("test", 1, 4, sizeof(long) * 3);
+ debug_register_view(debug_info, &debug_sprintf_view);
+
+ debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
+ debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
+
+ return 0;
+ }
+
+ static void cleanup(void)
+ {
+ debug_unregister(debug_info);
+ }
+
+ module_init(init);
+ module_exit(cleanup);
+
+Debugfs Interface
+-----------------
+Views to the debug logs can be investigated through reading the corresponding
+debugfs-files:
+
+Example::
+
+ > ls /sys/kernel/debug/s390dbf/dasd
+ flush hex_ascii level pages raw
+ > cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
+ 00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | ....
+ 00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE
+ 00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | ....
+ 00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP
+ 01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD
+ 01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | ....
+ 01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ...
+ 01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | ....
+ 01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE
+ 01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | ....
+
+See section about predefined views for explanation of the above output!
+
+Changing the debug level
+------------------------
+
+Example::
+
+
+ > cat /sys/kernel/debug/s390dbf/dasd/level
+ 3
+ > echo "5" > /sys/kernel/debug/s390dbf/dasd/level
+ > cat /sys/kernel/debug/s390dbf/dasd/level
+ 5
+
+Flushing debug areas
+--------------------
+Debug areas can be flushed with piping the number of the desired
+area (0...n) to the debugfs file "flush". When using "-" all debug areas
+are flushed.
+
+Examples:
+
+1. Flush debug area 0::
+
+ > echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
+
+2. Flush all debug areas::
+
+ > echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
+
+Changing the size of debug areas
+------------------------------------
+It is possible the change the size of debug areas through piping
+the number of pages to the debugfs file "pages". The resize request will
+also flush the debug areas.
+
+Example:
+
+Define 4 pages for the debug areas of debug feature "dasd"::
+
+ > echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
+
+Stopping the debug feature
+--------------------------
+Example:
+
+1. Check if stopping is allowed::
+
+ > cat /proc/sys/s390dbf/debug_stoppable
+
+2. Stop debug feature::
+
+ > echo 0 > /proc/sys/s390dbf/debug_active
+
+crash Interface
+----------------
+The ``crash`` tool since v5.1.0 has a built-in command
+``s390dbf`` to display all the debug logs or export them to the file system.
+With this tool it is possible
+to investigate the debug logs on a live system and with a memory dump after
+a system crash.
+
+Investigating raw memory
+------------------------
+One last possibility to investigate the debug logs at a live
+system and after a system crash is to look at the raw memory
+under VM or at the Service Element.
+It is possible to find the anchor of the debug-logs through
+the ``debug_area_first`` symbol in the System map. Then one has
+to follow the correct pointers of the data-structures defined
+in debug.h and find the debug-areas in memory.
+Normally modules which use the debug feature will also have
+a global variable with the pointer to the debug-logs. Following
+this pointer it will also be possible to find the debug logs in
+memory.
+
+For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
+for the length of the data field in :c:func:`debug_register()` in
+order to see the debug entries well formatted.
+
+
+Predefined Views
+----------------
+
+There are three predefined views: hex_ascii, raw and sprintf.
+The hex_ascii view shows the data field in hex and ascii representation
+(e.g. ``45 43 4b 44 | ECKD``).
+The raw view returns a bytestream as the debug areas are stored in memory.
+
+The sprintf view formats the debug entries in the same way as the sprintf
+function would do. The sprintf event/exception functions write to the
+debug entry a pointer to the format string (size = sizeof(long))
+and for each vararg a long value. So e.g. for a debug entry with a format
+string plus two varargs one would need to allocate a (3 * sizeof(long))
+byte data area in the debug_register() function.
+
+IMPORTANT:
+ Using "%s" in sprintf event functions is dangerous. You can only
+ use "%s" in the sprintf event functions, if the memory for the passed string
+ is available as long as the debug feature exists. The reason behind this is
+ that due to performance considerations only a pointer to the string is stored
+ in the debug feature. If you log a string that is freed afterwards, you will
+ get an OOPS when inspecting the debug feature, because then the debug feature
+ will access the already freed memory.
+
+NOTE:
+ If using the sprintf view do NOT use other event/exception functions
+ than the sprintf-event and -exception functions.
+
+The format of the hex_ascii and sprintf view is as follows:
+
+- Number of area
+- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
+ Universal Time (UTC), January 1, 1970)
+- level of debug entry
+- Exception flag (* = Exception)
+- Cpu-Number of calling task
+- Return Address to caller
+- data field
+
+The format of the raw view is:
+
+- Header as described in debug.h
+- datafield
+
+A typical line of the hex_ascii view will look like the following (first line
+is only for explanation and will not be displayed when 'cating' the view)::
+
+ area time level exception cpu caller data (hex + ascii)
+ --------------------------------------------------------------------------
+ 00 00964419409:440690 1 - 00 88023fe
+
+
+Defining views
+--------------
+
+Views are specified with the 'debug_view' structure. There are defined
+callback functions which are used for reading and writing the debugfs files:
+
+.. code-block:: c
+
+ struct debug_view {
+ char name[DEBUG_MAX_PROCF_LEN];
+ debug_prolog_proc_t* prolog_proc;
+ debug_header_proc_t* header_proc;
+ debug_format_proc_t* format_proc;
+ debug_input_proc_t* input_proc;
+ void* private_data;
+ };
+
+where:
+
+.. code-block:: c
+
+ typedef int (debug_header_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ int area,
+ debug_entry_t* entry,
+ char* out_buf);
+
+ typedef int (debug_format_proc_t) (debug_info_t* id,
+ struct debug_view* view, char* out_buf,
+ const char* in_buf);
+ typedef int (debug_prolog_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ char* out_buf);
+ typedef int (debug_input_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ struct file* file, const char* user_buf,
+ size_t in_buf_size, loff_t* offset);
+
+
+The "private_data" member can be used as pointer to view specific data.
+It is not used by the debug feature itself.
+
+The output when reading a debugfs file is structured like this::
+
+ "prolog_proc output"
+
+ "header_proc output 1" "format_proc output 1"
+ "header_proc output 2" "format_proc output 2"
+ "header_proc output 3" "format_proc output 3"
+ ...
+
+When a view is read from the debugfs, the Debug Feature calls the
+'prolog_proc' once for writing the prolog.
+Then 'header_proc' and 'format_proc' are called for each
+existing debug entry.
+
+The input_proc can be used to implement functionality when it is written to
+the view (e.g. like with ``echo "0" > /sys/kernel/debug/s390dbf/dasd/level``).
+
+For header_proc there can be used the default function
+:c:func:`debug_dflt_header_fn()` which is defined in debug.h.
+and which produces the same header output as the predefined views.
+E.g::
+
+ 00 00964419409:440761 2 - 00 88023ec
+
+In order to see how to use the callback functions check the implementation
+of the default views!
+
+Example:
+
+.. code-block:: c
+
+ #include <asm/debug.h>
+
+ #define UNKNOWNSTR "data: %08x"
+
+ const char* messages[] =
+ {"This error...........\n",
+ "That error...........\n",
+ "Problem..............\n",
+ "Something went wrong.\n",
+ "Everything ok........\n",
+ NULL
+ };
+
+ static int debug_test_format_fn(
+ debug_info_t *id, struct debug_view *view,
+ char *out_buf, const char *in_buf
+ )
+ {
+ int i, rc = 0;
+
+ if (id->buf_size >= 4) {
+ int msg_nr = *((int*)in_buf);
+ if (msg_nr < sizeof(messages) / sizeof(char*) - 1)
+ rc += sprintf(out_buf, "%s", messages[msg_nr]);
+ else
+ rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
+ }
+ return rc;
+ }
+
+ struct debug_view debug_test_view = {
+ "myview", /* name of view */
+ NULL, /* no prolog */
+ &debug_dflt_header_fn, /* default header for each entry */
+ &debug_test_format_fn, /* our own format function */
+ NULL, /* no input function */
+ NULL /* no private data */
+ };
+
+test:
+=====
+
+.. code-block:: c
+
+ debug_info_t *debug_info;
+ int i;
+ ...
+ debug_info = debug_register("test", 0, 4, 4);
+ debug_register_view(debug_info, &debug_test_view);
+ for (i = 0; i < 10; i ++)
+ debug_int_event(debug_info, 1, i);
+
+::
+
+ > cat /sys/kernel/debug/s390dbf/test/myview
+ 00 00964419734:611402 1 - 00 88042ca This error...........
+ 00 00964419734:611405 1 - 00 88042ca That error...........
+ 00 00964419734:611408 1 - 00 88042ca Problem..............
+ 00 00964419734:611411 1 - 00 88042ca Something went wrong.
+ 00 00964419734:611414 1 - 00 88042ca Everything ok........
+ 00 00964419734:611417 1 - 00 88042ca data: 00000005
+ 00 00964419734:611419 1 - 00 88042ca data: 00000006
+ 00 00964419734:611422 1 - 00 88042ca data: 00000007
+ 00 00964419734:611425 1 - 00 88042ca data: 00000008
+ 00 00964419734:611428 1 - 00 88042ca data: 00000009
+++ /dev/null
-S390 Debug Feature
-==================
-
-files: arch/s390/kernel/debug.c
- arch/s390/include/asm/debug.h
-
-Description:
-------------
-The goal of this feature is to provide a kernel debug logging API
-where log records can be stored efficiently in memory, where each component
-(e.g. device drivers) can have one separate debug log.
-One purpose of this is to inspect the debug logs after a production system crash
-in order to analyze the reason for the crash.
-If the system still runs but only a subcomponent which uses dbf fails,
-it is possible to look at the debug logs on a live system via the Linux
-debugfs filesystem.
-The debug feature may also very useful for kernel and driver development.
-
-Design:
--------
-Kernel components (e.g. device drivers) can register themselves at the debug
-feature with the function call debug_register(). This function initializes a
-debug log for the caller. For each debug log exists a number of debug areas
-where exactly one is active at one time. Each debug area consists of contiguous
-pages in memory. In the debug areas there are stored debug entries (log records)
-which are written by event- and exception-calls.
-
-An event-call writes the specified debug entry to the active debug
-area and updates the log pointer for the active area. If the end
-of the active debug area is reached, a wrap around is done (ring buffer)
-and the next debug entry will be written at the beginning of the active
-debug area.
-
-An exception-call writes the specified debug entry to the log and
-switches to the next debug area. This is done in order to be sure
-that the records which describe the origin of the exception are not
-overwritten when a wrap around for the current area occurs.
-
-The debug areas themselves are also ordered in form of a ring buffer.
-When an exception is thrown in the last debug area, the following debug
-entries are then written again in the very first area.
-
-There are three versions for the event- and exception-calls: One for
-logging raw data, one for text and one for numbers.
-
-Each debug entry contains the following data:
-
-- Timestamp
-- Cpu-Number of calling task
-- Level of debug entry (0...6)
-- Return Address to caller
-- Flag, if entry is an exception or not
-
-The debug logs can be inspected in a live system through entries in
-the debugfs-filesystem. Under the toplevel directory "s390dbf" there is
-a directory for each registered component, which is named like the
-corresponding component. The debugfs normally should be mounted to
-/sys/kernel/debug therefore the debug feature can be accessed under
-/sys/kernel/debug/s390dbf.
-
-The content of the directories are files which represent different views
-to the debug log. Each component can decide which views should be
-used through registering them with the function debug_register_view().
-Predefined views for hex/ascii, sprintf and raw binary data are provided.
-It is also possible to define other views. The content of
-a view can be inspected simply by reading the corresponding debugfs file.
-
-All debug logs have an actual debug level (range from 0 to 6).
-The default level is 3. Event and Exception functions have a 'level'
-parameter. Only debug entries with a level that is lower or equal
-than the actual level are written to the log. This means, when
-writing events, high priority log entries should have a low level
-value whereas low priority entries should have a high one.
-The actual debug level can be changed with the help of the debugfs-filesystem
-through writing a number string "x" to the 'level' debugfs file which is
-provided for every debug log. Debugging can be switched off completely
-by using "-" on the 'level' debugfs file.
-
-Example:
-
-> echo "-" > /sys/kernel/debug/s390dbf/dasd/level
-
-It is also possible to deactivate the debug feature globally for every
-debug log. You can change the behavior using 2 sysctl parameters in
-/proc/sys/s390dbf:
-There are currently 2 possible triggers, which stop the debug feature
-globally. The first possibility is to use the "debug_active" sysctl. If
-set to 1 the debug feature is running. If "debug_active" is set to 0 the
-debug feature is turned off.
-The second trigger which stops the debug feature is a kernel oops.
-That prevents the debug feature from overwriting debug information that
-happened before the oops. After an oops you can reactivate the debug feature
-by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not
-suggested to use an oopsed kernel in a production environment.
-If you want to disallow the deactivation of the debug feature, you can use
-the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug
-feature cannot be stopped. If the debug feature is already stopped, it
-will stay deactivated.
-
-Kernel Interfaces:
-------------------
-
-----------------------------------------------------------------------------
-debug_info_t *debug_register(char *name, int pages, int nr_areas,
- int buf_size);
-
-Parameter: name: Name of debug log (e.g. used for debugfs entry)
- pages: number of pages, which will be allocated per area
- nr_areas: number of debug areas
- buf_size: size of data area in each debug entry
-
-Return Value: Handle for generated debug area
- NULL if register failed
-
-Description: Allocates memory for a debug log
- Must not be called within an interrupt handler
-
-----------------------------------------------------------------------------
-debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
- int buf_size, mode_t mode, uid_t uid,
- gid_t gid);
-
-Parameter: name: Name of debug log (e.g. used for debugfs entry)
- pages: Number of pages, which will be allocated per area
- nr_areas: Number of debug areas
- buf_size: Size of data area in each debug entry
- mode: File mode for debugfs files. E.g. S_IRWXUGO
- uid: User ID for debugfs files. Currently only 0 is
- supported.
- gid: Group ID for debugfs files. Currently only 0 is
- supported.
-
-Return Value: Handle for generated debug area
- NULL if register failed
-
-Description: Allocates memory for a debug log
- Must not be called within an interrupt handler
-
----------------------------------------------------------------------------
-void debug_unregister (debug_info_t * id);
-
-Parameter: id: handle for debug log
-
-Return Value: none
-
-Description: frees memory for a debug log and removes all registered debug
- views.
- Must not be called within an interrupt handler
-
----------------------------------------------------------------------------
-void debug_set_level (debug_info_t * id, int new_level);
-
-Parameter: id: handle for debug log
- new_level: new debug level
-
-Return Value: none
-
-Description: Sets new actual debug level if new_level is valid.
-
----------------------------------------------------------------------------
-bool debug_level_enabled (debug_info_t * id, int level);
-
-Parameter: id: handle for debug log
- level: debug level
-
-Return Value: True if level is less or equal to the current debug level.
-
-Description: Returns true if debug events for the specified level would be
- logged. Otherwise returns false.
----------------------------------------------------------------------------
-void debug_stop_all(void);
-
-Parameter: none
-
-Return Value: none
-
-Description: stops the debug feature if stopping is allowed. Currently
- used in case of a kernel oops.
-
----------------------------------------------------------------------------
-debug_entry_t* debug_event (debug_info_t* id, int level, void* data,
- int length);
-
-Parameter: id: handle for debug log
- level: debug level
- data: pointer to data for debug entry
- length: length of data in bytes
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry to active debug area (if level <= actual
- debug level)
-
----------------------------------------------------------------------------
-debug_entry_t* debug_int_event (debug_info_t * id, int level,
- unsigned int data);
-debug_entry_t* debug_long_event(debug_info_t * id, int level,
- unsigned long data);
-
-Parameter: id: handle for debug log
- level: debug level
- data: integer value for debug entry
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry to active debug area (if level <= actual
- debug level)
-
----------------------------------------------------------------------------
-debug_entry_t* debug_text_event (debug_info_t * id, int level,
- const char* data);
-
-Parameter: id: handle for debug log
- level: debug level
- data: string for debug entry
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry in ascii format to active debug area
- (if level <= actual debug level)
-
----------------------------------------------------------------------------
-debug_entry_t* debug_sprintf_event (debug_info_t * id, int level,
- char* string,...);
-
-Parameter: id: handle for debug log
- level: debug level
- string: format string for debug entry
- ...: varargs used as in sprintf()
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry with format string and varargs (longs) to
- active debug area (if level $<=$ actual debug level).
- floats and long long datatypes cannot be used as varargs.
-
----------------------------------------------------------------------------
-
-debug_entry_t* debug_exception (debug_info_t* id, int level, void* data,
- int length);
-
-Parameter: id: handle for debug log
- level: debug level
- data: pointer to data for debug entry
- length: length of data in bytes
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry to active debug area (if level <= actual
- debug level) and switches to next debug area
-
----------------------------------------------------------------------------
-debug_entry_t* debug_int_exception (debug_info_t * id, int level,
- unsigned int data);
-debug_entry_t* debug_long_exception(debug_info_t * id, int level,
- unsigned long data);
-
-Parameter: id: handle for debug log
- level: debug level
- data: integer value for debug entry
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry to active debug area (if level <= actual
- debug level) and switches to next debug area
-
----------------------------------------------------------------------------
-debug_entry_t* debug_text_exception (debug_info_t * id, int level,
- const char* data);
-
-Parameter: id: handle for debug log
- level: debug level
- data: string for debug entry
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry in ascii format to active debug area
- (if level <= actual debug level) and switches to next debug
- area
-
----------------------------------------------------------------------------
-debug_entry_t* debug_sprintf_exception (debug_info_t * id, int level,
- char* string,...);
-
-Parameter: id: handle for debug log
- level: debug level
- string: format string for debug entry
- ...: varargs used as in sprintf()
-
-Return Value: Address of written debug entry
-
-Description: writes debug entry with format string and varargs (longs) to
- active debug area (if level $<=$ actual debug level) and
- switches to next debug area.
- floats and long long datatypes cannot be used as varargs.
-
----------------------------------------------------------------------------
-
-int debug_register_view (debug_info_t * id, struct debug_view *view);
-
-Parameter: id: handle for debug log
- view: pointer to debug view struct
-
-Return Value: 0 : ok
- < 0: Error
-
-Description: registers new debug view and creates debugfs dir entry
-
----------------------------------------------------------------------------
-int debug_unregister_view (debug_info_t * id, struct debug_view *view);
-
-Parameter: id: handle for debug log
- view: pointer to debug view struct
-
-Return Value: 0 : ok
- < 0: Error
-
-Description: unregisters debug view and removes debugfs dir entry
-
-
-
-Predefined views:
------------------
-
-extern struct debug_view debug_hex_ascii_view;
-extern struct debug_view debug_raw_view;
-extern struct debug_view debug_sprintf_view;
-
-Examples
---------
-
-/*
- * hex_ascii- + raw-view Example
- */
-
-#include <linux/init.h>
-#include <asm/debug.h>
-
-static debug_info_t* debug_info;
-
-static int init(void)
-{
- /* register 4 debug areas with one page each and 4 byte data field */
-
- debug_info = debug_register ("test", 1, 4, 4 );
- debug_register_view(debug_info,&debug_hex_ascii_view);
- debug_register_view(debug_info,&debug_raw_view);
-
- debug_text_event(debug_info, 4 , "one ");
- debug_int_exception(debug_info, 4, 4711);
- debug_event(debug_info, 3, &debug_info, 4);
-
- return 0;
-}
-
-static void cleanup(void)
-{
- debug_unregister (debug_info);
-}
-
-module_init(init);
-module_exit(cleanup);
-
----------------------------------------------------------------------------
-
-/*
- * sprintf-view Example
- */
-
-#include <linux/init.h>
-#include <asm/debug.h>
-
-static debug_info_t* debug_info;
-
-static int init(void)
-{
- /* register 4 debug areas with one page each and data field for */
- /* format string pointer + 2 varargs (= 3 * sizeof(long)) */
-
- debug_info = debug_register ("test", 1, 4, sizeof(long) * 3);
- debug_register_view(debug_info,&debug_sprintf_view);
-
- debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
- debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
-
- return 0;
-}
-
-static void cleanup(void)
-{
- debug_unregister (debug_info);
-}
-
-module_init(init);
-module_exit(cleanup);
-
-
-
-Debugfs Interface
-----------------
-Views to the debug logs can be investigated through reading the corresponding
-debugfs-files:
-
-Example:
-
-> ls /sys/kernel/debug/s390dbf/dasd
-flush hex_ascii level pages raw
-> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
-00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | ....
-00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE
-00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | ....
-00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP
-01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD
-01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | ....
-01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ...
-01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | ....
-01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE
-01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | ....
-
-See section about predefined views for explanation of the above output!
-
-Changing the debug level
-------------------------
-
-Example:
-
-
-> cat /sys/kernel/debug/s390dbf/dasd/level
-3
-> echo "5" > /sys/kernel/debug/s390dbf/dasd/level
-> cat /sys/kernel/debug/s390dbf/dasd/level
-5
-
-Flushing debug areas
---------------------
-Debug areas can be flushed with piping the number of the desired
-area (0...n) to the debugfs file "flush". When using "-" all debug areas
-are flushed.
-
-Examples:
-
-1. Flush debug area 0:
-> echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
-
-2. Flush all debug areas:
-> echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
-
-Changing the size of debug areas
-------------------------------------
-It is possible the change the size of debug areas through piping
-the number of pages to the debugfs file "pages". The resize request will
-also flush the debug areas.
-
-Example:
-
-Define 4 pages for the debug areas of debug feature "dasd":
-> echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
-
-Stooping the debug feature
---------------------------
-Example:
-
-1. Check if stopping is allowed
-> cat /proc/sys/s390dbf/debug_stoppable
-2. Stop debug feature
-> echo 0 > /proc/sys/s390dbf/debug_active
-
-lcrash Interface
-----------------
-It is planned that the dump analysis tool lcrash gets an additional command
-'s390dbf' to display all the debug logs. With this tool it will be possible
-to investigate the debug logs on a live system and with a memory dump after
-a system crash.
-
-Investigating raw memory
-------------------------
-One last possibility to investigate the debug logs at a live
-system and after a system crash is to look at the raw memory
-under VM or at the Service Element.
-It is possible to find the anker of the debug-logs through
-the 'debug_area_first' symbol in the System map. Then one has
-to follow the correct pointers of the data-structures defined
-in debug.h and find the debug-areas in memory.
-Normally modules which use the debug feature will also have
-a global variable with the pointer to the debug-logs. Following
-this pointer it will also be possible to find the debug logs in
-memory.
-
-For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
-for the length of the data field in debug_register() in
-order to see the debug entries well formatted.
-
-
-Predefined Views
-----------------
-
-There are three predefined views: hex_ascii, raw and sprintf.
-The hex_ascii view shows the data field in hex and ascii representation
-(e.g. '45 43 4b 44 | ECKD').
-The raw view returns a bytestream as the debug areas are stored in memory.
-
-The sprintf view formats the debug entries in the same way as the sprintf
-function would do. The sprintf event/exception functions write to the
-debug entry a pointer to the format string (size = sizeof(long))
-and for each vararg a long value. So e.g. for a debug entry with a format
-string plus two varargs one would need to allocate a (3 * sizeof(long))
-byte data area in the debug_register() function.
-
-IMPORTANT: Using "%s" in sprintf event functions is dangerous. You can only
-use "%s" in the sprintf event functions, if the memory for the passed string is
-available as long as the debug feature exists. The reason behind this is that
-due to performance considerations only a pointer to the string is stored in
-the debug feature. If you log a string that is freed afterwards, you will get
-an OOPS when inspecting the debug feature, because then the debug feature will
-access the already freed memory.
-
-NOTE: If using the sprintf view do NOT use other event/exception functions
-than the sprintf-event and -exception functions.
-
-The format of the hex_ascii and sprintf view is as follows:
-- Number of area
-- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
- Universal Time (UTC), January 1, 1970)
-- level of debug entry
-- Exception flag (* = Exception)
-- Cpu-Number of calling task
-- Return Address to caller
-- data field
-
-The format of the raw view is:
-- Header as described in debug.h
-- datafield
-
-A typical line of the hex_ascii view will look like the following (first line
-is only for explanation and will not be displayed when 'cating' the view):
-
-area time level exception cpu caller data (hex + ascii)
---------------------------------------------------------------------------
-00 00964419409:440690 1 - 00 88023fe
-
-
-Defining views
---------------
-
-Views are specified with the 'debug_view' structure. There are defined
-callback functions which are used for reading and writing the debugfs files:
-
-struct debug_view {
- char name[DEBUG_MAX_PROCF_LEN];
- debug_prolog_proc_t* prolog_proc;
- debug_header_proc_t* header_proc;
- debug_format_proc_t* format_proc;
- debug_input_proc_t* input_proc;
- void* private_data;
-};
-
-where
-
-typedef int (debug_header_proc_t) (debug_info_t* id,
- struct debug_view* view,
- int area,
- debug_entry_t* entry,
- char* out_buf);
-
-typedef int (debug_format_proc_t) (debug_info_t* id,
- struct debug_view* view, char* out_buf,
- const char* in_buf);
-typedef int (debug_prolog_proc_t) (debug_info_t* id,
- struct debug_view* view,
- char* out_buf);
-typedef int (debug_input_proc_t) (debug_info_t* id,
- struct debug_view* view,
- struct file* file, const char* user_buf,
- size_t in_buf_size, loff_t* offset);
-
-
-The "private_data" member can be used as pointer to view specific data.
-It is not used by the debug feature itself.
-
-The output when reading a debugfs file is structured like this:
-
-"prolog_proc output"
-
-"header_proc output 1" "format_proc output 1"
-"header_proc output 2" "format_proc output 2"
-"header_proc output 3" "format_proc output 3"
-...
-
-When a view is read from the debugfs, the Debug Feature calls the
-'prolog_proc' once for writing the prolog.
-Then 'header_proc' and 'format_proc' are called for each
-existing debug entry.
-
-The input_proc can be used to implement functionality when it is written to
-the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level).
-
-For header_proc there can be used the default function
-debug_dflt_header_fn() which is defined in debug.h.
-and which produces the same header output as the predefined views.
-E.g:
-00 00964419409:440761 2 - 00 88023ec
-
-In order to see how to use the callback functions check the implementation
-of the default views!
-
-Example
-
-#include <asm/debug.h>
-
-#define UNKNOWNSTR "data: %08x"
-
-const char* messages[] =
-{"This error...........\n",
- "That error...........\n",
- "Problem..............\n",
- "Something went wrong.\n",
- "Everything ok........\n",
- NULL
-};
-
-static int debug_test_format_fn(
- debug_info_t * id, struct debug_view *view,
- char *out_buf, const char *in_buf
-)
-{
- int i, rc = 0;
-
- if(id->buf_size >= 4) {
- int msg_nr = *((int*)in_buf);
- if(msg_nr < sizeof(messages)/sizeof(char*) - 1)
- rc += sprintf(out_buf, "%s", messages[msg_nr]);
- else
- rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
- }
- out:
- return rc;
-}
-
-struct debug_view debug_test_view = {
- "myview", /* name of view */
- NULL, /* no prolog */
- &debug_dflt_header_fn, /* default header for each entry */
- &debug_test_format_fn, /* our own format function */
- NULL, /* no input function */
- NULL /* no private data */
-};
-
-=====
-test:
-=====
-debug_info_t *debug_info;
-...
-debug_info = debug_register ("test", 0, 4, 4 ));
-debug_register_view(debug_info, &debug_test_view);
-for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i);
-
-> cat /sys/kernel/debug/s390dbf/test/myview
-00 00964419734:611402 1 - 00 88042ca This error...........
-00 00964419734:611405 1 - 00 88042ca That error...........
-00 00964419734:611408 1 - 00 88042ca Problem..............
-00 00964419734:611411 1 - 00 88042ca Something went wrong.
-00 00964419734:611414 1 - 00 88042ca Everything ok........
-00 00964419734:611417 1 - 00 88042ca data: 00000005
-00 00964419734:611419 1 - 00 88042ca data: 00000006
-00 00964419734:611422 1 - 00 88042ca data: 00000007
-00 00964419734:611425 1 - 00 88042ca data: 00000008
-00 00964419734:611428 1 - 00 88042ca data: 00000009
--- /dev/null
+ibm 3270 changelog
+------------------
+
+.. include:: 3270.ChangeLog
+ :literal:
+
+ibm 3270 config3270.sh
+----------------------
+
+.. literalinclude:: config3270.sh
+ :language: shell
-Introduction:
+===============================
+Adjunct Processor (AP) facility
+===============================
+
+
+Introduction
============
The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised
of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards.
facilities which do most of the hard work of providing direct access to AP
devices.
-AP Architectural Overview:
+AP Architectural Overview
=========================
To facilitate the comprehension of the design, let's start with some
definitions:
in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and
creates a sysfs device for each assigned adapter. For example, if AP adapters
4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following
- sysfs device entries:
+ sysfs device entries::
/sys/devices/ap/card04
/sys/devices/ap/card0a
Symbolic links to these devices will also be created in the AP bus devices
- sub-directory:
+ sub-directory::
/sys/bus/ap/devices/[card04]
/sys/bus/ap/devices/[card04]
the cross product of the AP adapter and usage domain numbers detected when the
AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage
domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the
- following sysfs entries:
+ following sysfs entries::
/sys/devices/ap/card04/04.0006
/sys/devices/ap/card04/04.0047
/sys/devices/ap/card0a/0a.0047
The following symbolic links to these devices will be created in the AP bus
- devices subdirectory:
+ devices subdirectory::
/sys/bus/ap/devices/[04.0006]
/sys/bus/ap/devices/[04.0047]
domain that is not one of the usage domains, but the modified domain
must be one of the control domains.
-AP and SIE:
+AP and SIE
==========
Let's now take a look at how AP instructions executed on a guest are interpreted
by the hardware.
The APQNs can provide secure key functionality - i.e., a private key is stored
on the adapter card for each of its domains - so each APQN must be assigned to
-at most one guest or to the linux host.
+at most one guest or to the linux host::
Example 1: Valid configuration:
------------------------------
This is an invalid configuration because both guests have access to
APQN (1,6).
-The Design:
-===========
+The Design
+==========
The design introduces three new objects:
1. AP matrix device
Reserve APQNs for exclusive use of KVM guests
---------------------------------------------
The following block diagram illustrates the mechanism by which APQNs are
-reserved:
-
- +------------------+
- 7 remove | |
- +--------------------> cex4queue driver |
- | | |
- | +------------------+
- |
- |
- | +------------------+ +-----------------+
- | 5 register driver | | 3 create | |
- | +----------------> Device core +----------> matrix device |
- | | | | | |
- | | +--------^---------+ +-----------------+
- | | |
- | | +-------------------+
- | | +-----------------------------------+ |
- | | | 4 register AP driver | | 2 register device
- | | | | |
-+--------+---+-v---+ +--------+-------+-+
-| | | |
-| ap_bus +--------------------- > vfio_ap driver |
-| | 8 probe | |
-+--------^---------+ +--^--^------------+
-6 edit | | |
- apmask | +-----------------------------+ | 9 mdev create
- aqmask | | 1 modprobe |
-+--------+-----+---+ +----------------+-+ +------------------+
-| | | |8 create | mediated |
-| admin | | VFIO device core |---------> matrix |
-| + | | | device |
-+------+-+---------+ +--------^---------+ +--------^---------+
- | | | |
- | | 9 create vfio_ap-passthrough | |
- | +------------------------------+ |
- +-------------------------------------------------------------+
- 10 assign adapter/domain/control domain
+reserved::
+
+ +------------------+
+ 7 remove | |
+ +--------------------> cex4queue driver |
+ | | |
+ | +------------------+
+ |
+ |
+ | +------------------+ +----------------+
+ | 5 register driver | | 3 create | |
+ | +----------------> Device core +----------> matrix device |
+ | | | | | |
+ | | +--------^---------+ +----------------+
+ | | |
+ | | +-------------------+
+ | | +-----------------------------------+ |
+ | | | 4 register AP driver | | 2 register device
+ | | | | |
+ +--------+---+-v---+ +--------+-------+-+
+ | | | |
+ | ap_bus +--------------------- > vfio_ap driver |
+ | | 8 probe | |
+ +--------^---------+ +--^--^------------+
+ 6 edit | | |
+ apmask | +-----------------------------+ | 9 mdev create
+ aqmask | | 1 modprobe |
+ +--------+-----+---+ +----------------+-+ +----------------+
+ | | | |8 create | mediated |
+ | admin | | VFIO device core |---------> matrix |
+ | + | | | device |
+ +------+-+---------+ +--------^---------+ +--------^-------+
+ | | | |
+ | | 9 create vfio_ap-passthrough | |
+ | +------------------------------+ |
+ +-------------------------------------------------------------+
+ 10 assign adapter/domain/control domain
The process for reserving an AP queue for use by a KVM guest is:
device with the device core. This will serve as the parent device for
all mediated matrix devices used to configure an AP matrix for a guest.
3. The /sys/devices/vfio_ap/matrix device is created by the device core
-4 The vfio_ap device driver will register with the AP bus for AP queue devices
+4. The vfio_ap device driver will register with the AP bus for AP queue devices
of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
driver's probe and remove callback interfaces. Devices older than CEX4 queues
are not supported to simplify the implementation by not needlessly
it.
9. The administrator creates a passthrough type mediated matrix device to be
used by a guest
-10 The administrator assigns the adapters, usage domains and control domains
- to be exclusively used by a guest.
+10. The administrator assigns the adapters, usage domains and control domains
+ to be exclusively used by a guest.
Set up the VFIO mediated device interfaces
------------------------------------------
The VFIO AP device driver utilizes the common interface of the VFIO mediated
device core driver to:
+
* Register an AP mediated bus driver to add a mediated matrix device to and
remove it from a VFIO group.
* Create and destroy a mediated matrix device
* Add a mediated matrix device to and remove it from an IOMMU group
The following high-level block diagram shows the main components and interfaces
-of the VFIO AP mediated matrix device driver:
-
- +-------------+
- | |
- | +---------+ | mdev_register_driver() +--------------+
- | | Mdev | +<-----------------------+ |
- | | bus | | | vfio_mdev.ko |
- | | driver | +----------------------->+ |<-> VFIO user
- | +---------+ | probe()/remove() +--------------+ APIs
- | |
- | MDEV CORE |
- | MODULE |
- | mdev.ko |
- | +---------+ | mdev_register_device() +--------------+
- | |Physical | +<-----------------------+ |
- | | device | | | vfio_ap.ko |<-> matrix
- | |interface| +----------------------->+ | device
- | +---------+ | callback +--------------+
- +-------------+
+of the VFIO AP mediated matrix device driver::
+
+ +-------------+
+ | |
+ | +---------+ | mdev_register_driver() +--------------+
+ | | Mdev | +<-----------------------+ |
+ | | bus | | | vfio_mdev.ko |
+ | | driver | +----------------------->+ |<-> VFIO user
+ | +---------+ | probe()/remove() +--------------+ APIs
+ | |
+ | MDEV CORE |
+ | MODULE |
+ | mdev.ko |
+ | +---------+ | mdev_register_device() +--------------+
+ | |Physical | +<-----------------------+ |
+ | | device | | | vfio_ap.ko |<-> matrix
+ | |interface| +----------------------->+ | device
+ | +---------+ | callback +--------------+
+ +-------------+
During initialization of the vfio_ap module, the matrix device is registered
with an 'mdev_parent_ops' structure that provides the sysfs attribute
matrix device.
* sysfs attribute structures:
- * supported_type_groups
+
+ supported_type_groups
The VFIO mediated device framework supports creation of user-defined
mediated device types. These mediated device types are specified
via the 'supported_type_groups' structure when a device is registered
The VFIO AP device driver will register one mediated device type for
passthrough devices:
+
/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough
+
Only the read-only attributes required by the VFIO mdev framework will
- be provided:
- ... name
- ... device_api
- ... available_instances
- ... device_api
- Where:
- * name: specifies the name of the mediated device type
- * device_api: the mediated device type's API
- * available_instances: the number of mediated matrix passthrough devices
- that can be created
- * device_api: specifies the VFIO API
- * mdev_attr_groups
+ be provided::
+
+ ... name
+ ... device_api
+ ... available_instances
+ ... device_api
+
+ Where:
+
+ * name:
+ specifies the name of the mediated device type
+ * device_api:
+ the mediated device type's API
+ * available_instances:
+ the number of mediated matrix passthrough devices
+ that can be created
+ * device_api:
+ specifies the VFIO API
+ mdev_attr_groups
This attribute group identifies the user-defined sysfs attributes of the
mediated device. When a device is registered with the VFIO mediated device
framework, the sysfs attribute files identified in the 'mdev_attr_groups'
structure will be created in the mediated matrix device's directory. The
sysfs attributes for a mediated matrix device are:
- * assign_adapter:
- * unassign_adapter:
+
+ assign_adapter / unassign_adapter:
Write-only attributes for assigning/unassigning an AP adapter to/from the
mediated matrix device. To assign/unassign an adapter, the APID of the
adapter is echoed to the respective attribute file.
- * assign_domain:
- * unassign_domain:
+ assign_domain / unassign_domain:
Write-only attributes for assigning/unassigning an AP usage domain to/from
the mediated matrix device. To assign/unassign a domain, the domain
number of the the usage domain is echoed to the respective attribute
file.
- * matrix:
+ matrix:
A read-only file for displaying the APQNs derived from the cross product
of the adapter and domain numbers assigned to the mediated matrix device.
- * assign_control_domain:
- * unassign_control_domain:
+ assign_control_domain / unassign_control_domain:
Write-only attributes for assigning/unassigning an AP control domain
to/from the mediated matrix device. To assign/unassign a control domain,
the ID of the domain to be assigned/unassigned is echoed to the respective
attribute file.
- * control_domains:
+ control_domains:
A read-only file for displaying the control domain numbers assigned to the
mediated matrix device.
* functions:
- * create:
+
+ create:
allocates the ap_matrix_mdev structure used by the vfio_ap driver to:
+
* Store the reference to the KVM structure for the guest using the mdev
* Store the AP matrix configuration for the adapters, domains, and control
domains assigned via the corresponding sysfs attributes files
- * remove:
+
+ remove:
deallocates the mediated matrix device's ap_matrix_mdev structure. This will
be allowed only if a running guest is not using the mdev.
* callback interfaces
- * open:
+
+ open:
The vfio_ap driver uses this callback to register a
VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix
device. The open is invoked when QEMU connects the VFIO iommu group
to configure the KVM guest is provided via this callback. The KVM structure,
is used to configure the guest's access to the AP matrix defined via the
mediated matrix device's sysfs attribute files.
- * release:
+ release:
unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
mdev matrix device and deconfigures the guest's AP matrix.
-Configure the APM, AQM and ADM in the CRYCB:
+Configure the APM, AQM and ADM in the CRYCB
-------------------------------------------
Configuring the AP matrix for a KVM guest will be performed when the
VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
function is called when QEMU connects to KVM. The guest's AP matrix is
configured via it's CRYCB by:
+
* Setting the bits in the APM corresponding to the APIDs assigned to the
mediated matrix device via its 'assign_adapter' interface.
* Setting the bits in the AQM corresponding to the domains assigned to the
Note: If the user chooses to specify a CPU model different than the 'host'
model to QEMU, the CPU model features and facilities need to be turned on
-explicitly; for example:
+explicitly; for example::
/usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on
A guest can be precluded from using AP features/facilities by turning them off
-explicitly; for example:
+explicitly; for example::
/usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off
drivers will fail since only type 10 and newer devices can be configured for
guest use.
-Example:
+Example
=======
Let's now provide an example to illustrate how KVM guests may be given
access to AP facilities. For this example, we will show how to configure
Guest1
------
+=========== ===== ============
CARD.DOMAIN TYPE MODE
-------------------------------
+=========== ===== ============
05 CEX5C CCA-Coproc
05.0004 CEX5C CCA-Coproc
05.00ab CEX5C CCA-Coproc
06 CEX5A Accelerator
06.0004 CEX5A Accelerator
06.00ab CEX5C CCA-Coproc
+=========== ===== ============
Guest2
------
+=========== ===== ============
CARD.DOMAIN TYPE MODE
-------------------------------
+=========== ===== ============
05 CEX5A Accelerator
05.0047 CEX5A Accelerator
05.00ff CEX5A Accelerator
+=========== ===== ============
Guest2
------
+=========== ===== ============
CARD.DOMAIN TYPE MODE
-------------------------------
+=========== ===== ============
06 CEX5A Accelerator
06.0047 CEX5A Accelerator
06.00ff CEX5A Accelerator
+=========== ===== ============
These are the steps:
* VFIO_MDEV_DEVICE
* KVM
- If using make menuconfig select the following to build the vfio_ap module:
- -> Device Drivers
- -> IOMMU Hardware Support
- select S390 AP IOMMU Support
- -> VFIO Non-Privileged userspace driver framework
- -> Mediated device driver frramework
- -> VFIO driver for Mediated devices
- -> I/O subsystem
- -> VFIO support for AP devices
+ If using make menuconfig select the following to build the vfio_ap module::
+
+ -> Device Drivers
+ -> IOMMU Hardware Support
+ select S390 AP IOMMU Support
+ -> VFIO Non-Privileged userspace driver framework
+ -> Mediated device driver frramework
+ -> VFIO driver for Mediated devices
+ -> I/O subsystem
+ -> VFIO support for AP devices
2. Secure the AP queues to be used by the three guests so that the host can not
access them. To secure them, there are two sysfs files that specify
bitmasks marking a subset of the APQN range as 'usable by the default AP
queue device drivers' or 'not usable by the default device drivers' and thus
available for use by the vfio_ap device driver'. The location of the sysfs
- files containing the masks are:
+ files containing the masks are::
- /sys/bus/ap/apmask
- /sys/bus/ap/aqmask
+ /sys/bus/ap/apmask
+ /sys/bus/ap/aqmask
The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
(APID). Each bit in the mask, from left to right (i.e., from most significant
queue device drivers; otherwise, the APQI is usable by the vfio_ap device
driver.
- Take, for example, the following mask:
+ Take, for example, the following mask::
0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
respective sysfs mask file in one of two formats:
* An absolute hex string starting with 0x - like "0x12345678" - sets
- the mask. If the given string is shorter than the mask, it is padded
- with 0s on the right; for example, specifying a mask value of 0x41 is
- the same as specifying:
+ the mask. If the given string is shorter than the mask, it is padded
+ with 0s on the right; for example, specifying a mask value of 0x41 is
+ the same as specifying::
- 0x4100000000000000000000000000000000000000000000000000000000000000
+ 0x4100000000000000000000000000000000000000000000000000000000000000
- Keep in mind that the mask reads from left to right (i.e., most
- significant to least significant bit in big endian order), so the mask
- above identifies device numbers 1 and 7 (01000001).
+ Keep in mind that the mask reads from left to right (i.e., most
+ significant to least significant bit in big endian order), so the mask
+ above identifies device numbers 1 and 7 (01000001).
- If the string is longer than the mask, the operation is terminated with
- an error (EINVAL).
+ If the string is longer than the mask, the operation is terminated with
+ an error (EINVAL).
* Individual bits in the mask can be switched on and off by specifying
- each bit number to be switched in a comma separated list. Each bit
- number string must be prepended with a ('+') or minus ('-') to indicate
- the corresponding bit is to be switched on ('+') or off ('-'). Some
- valid values are:
+ each bit number to be switched in a comma separated list. Each bit
+ number string must be prepended with a ('+') or minus ('-') to indicate
+ the corresponding bit is to be switched on ('+') or off ('-'). Some
+ valid values are:
- "+0" switches bit 0 on
- "-13" switches bit 13 off
- "+0x41" switches bit 65 on
- "-0xff" switches bit 255 off
+ - "+0" switches bit 0 on
+ - "-13" switches bit 13 off
+ - "+0x41" switches bit 65 on
+ - "-0xff" switches bit 255 off
- The following example:
- +0,-6,+0x47,-0xf0
+ The following example:
- Switches bits 0 and 71 (0x47) on
- Switches bits 6 and 240 (0xf0) off
+ +0,-6,+0x47,-0xf0
- Note that the bits not specified in the list remain as they were before
- the operation.
+ Switches bits 0 and 71 (0x47) on
+
+ Switches bits 6 and 240 (0xf0) off
+
+ Note that the bits not specified in the list remain as they were before
+ the operation.
2. The masks can also be changed at boot time via parameters on the kernel
command line like this:
- ap.apmask=0xffff ap.aqmask=0x40
+ ap.apmask=0xffff ap.aqmask=0x40
- This would create the following masks:
+ This would create the following masks::
- apmask:
- 0xffff000000000000000000000000000000000000000000000000000000000000
+ apmask:
+ 0xffff000000000000000000000000000000000000000000000000000000000000
- aqmask:
- 0x4000000000000000000000000000000000000000000000000000000000000000
+ aqmask:
+ 0x4000000000000000000000000000000000000000000000000000000000000000
- Resulting in these two pools:
+ Resulting in these two pools::
- default drivers pool: adapter 0-15, domain 1
- alternate drivers pool: adapter 16-255, domains 0, 2-255
+ default drivers pool: adapter 0-15, domain 1
+ alternate drivers pool: adapter 16-255, domains 0, 2-255
- Securing the APQNs for our example:
- ----------------------------------
+Securing the APQNs for our example
+----------------------------------
To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
- APQNs can either be removed from the default masks:
+ APQNs can either be removed from the default masks::
echo -5,-6 > /sys/bus/ap/apmask
echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask
- Or the masks can be set as follows:
+ Or the masks can be set as follows::
echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \
> apmask
This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
sysfs directory for the vfio_ap device driver will now contain symbolic links
- to the AP queue devices bound to it:
-
- /sys/bus/ap
- ... [drivers]
- ...... [vfio_ap]
- ......... [05.0004]
- ......... [05.0047]
- ......... [05.00ab]
- ......... [05.00ff]
- ......... [06.0004]
- ......... [06.0047]
- ......... [06.00ab]
- ......... [06.00ff]
+ to the AP queue devices bound to it::
+
+ /sys/bus/ap
+ ... [drivers]
+ ...... [vfio_ap]
+ ......... [05.0004]
+ ......... [05.0047]
+ ......... [05.00ab]
+ ......... [05.00ff]
+ ......... [06.0004]
+ ......... [06.0047]
+ ......... [06.00ab]
+ ......... [06.00ff]
Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
can be bound to the vfio_ap device driver. The reason for this is to
queue device can be read from the parent card's sysfs directory. For example,
to see the hardware type of the queue 05.0004:
- cat /sys/bus/ap/devices/card05/hwtype
+ cat /sys/bus/ap/devices/card05/hwtype
The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
vfio_ap device driver.
3. Create the mediated devices needed to configure the AP matrixes for the
three guests and to provide an interface to the vfio_ap driver for
- use by the guests:
+ use by the guests::
- /sys/devices/vfio_ap/matrix/
- --- [mdev_supported_types]
- ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
- --------- create
- --------- [devices]
+ /sys/devices/vfio_ap/matrix/
+ --- [mdev_supported_types]
+ ------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
+ --------- create
+ --------- [devices]
- To create the mediated devices for the three guests:
+ To create the mediated devices for the three guests::
uuidgen > create
uuidgen > create
uuidgen > create
- or
+ or
- echo $uuid1 > create
- echo $uuid2 > create
- echo $uuid3 > create
+ echo $uuid1 > create
+ echo $uuid2 > create
+ echo $uuid3 > create
This will create three mediated devices in the [devices] subdirectory named
after the UUID written to the create attribute file. We call them $uuid1,
- $uuid2 and $uuid3 and this is the sysfs directory structure after creation:
-
- /sys/devices/vfio_ap/matrix/
- --- [mdev_supported_types]
- ------ [vfio_ap-passthrough]
- --------- [devices]
- ------------ [$uuid1]
- --------------- assign_adapter
- --------------- assign_control_domain
- --------------- assign_domain
- --------------- matrix
- --------------- unassign_adapter
- --------------- unassign_control_domain
- --------------- unassign_domain
-
- ------------ [$uuid2]
- --------------- assign_adapter
- --------------- assign_control_domain
- --------------- assign_domain
- --------------- matrix
- --------------- unassign_adapter
- ----------------unassign_control_domain
- ----------------unassign_domain
-
- ------------ [$uuid3]
- --------------- assign_adapter
- --------------- assign_control_domain
- --------------- assign_domain
- --------------- matrix
- --------------- unassign_adapter
- ----------------unassign_control_domain
- ----------------unassign_domain
+ $uuid2 and $uuid3 and this is the sysfs directory structure after creation::
+
+ /sys/devices/vfio_ap/matrix/
+ --- [mdev_supported_types]
+ ------ [vfio_ap-passthrough]
+ --------- [devices]
+ ------------ [$uuid1]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ --------------- unassign_control_domain
+ --------------- unassign_domain
+
+ ------------ [$uuid2]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ ----------------unassign_control_domain
+ ----------------unassign_domain
+
+ ------------ [$uuid3]
+ --------------- assign_adapter
+ --------------- assign_control_domain
+ --------------- assign_domain
+ --------------- matrix
+ --------------- unassign_adapter
+ ----------------unassign_control_domain
+ ----------------unassign_domain
4. The administrator now needs to configure the matrixes for the mediated
devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
- This is how the matrix is configured for Guest1:
+ This is how the matrix is configured for Guest1::
echo 5 > assign_adapter
echo 6 > assign_adapter
echo 4 > assign_domain
echo 0xab > assign_domain
- Control domains can similarly be assigned using the assign_control_domain
- sysfs file.
+ Control domains can similarly be assigned using the assign_control_domain
+ sysfs file.
- If a mistake is made configuring an adapter, domain or control domain,
- you can use the unassign_xxx files to unassign the adapter, domain or
- control domain.
+ If a mistake is made configuring an adapter, domain or control domain,
+ you can use the unassign_xxx files to unassign the adapter, domain or
+ control domain.
- To display the matrix configuration for Guest1:
+ To display the matrix configuration for Guest1::
- cat matrix
+ cat matrix
- This is how the matrix is configured for Guest2:
+ This is how the matrix is configured for Guest2::
echo 5 > assign_adapter
echo 0x47 > assign_domain
echo 0xff > assign_domain
- This is how the matrix is configured for Guest3:
+ This is how the matrix is configured for Guest3::
echo 6 > assign_adapter
echo 0x47 > assign_domain
configured for the system. If a control domain number higher than the maximum
is specified, the operation will terminate with an error (ENODEV).
-5. Start Guest1:
+5. Start Guest1::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
- -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
-7. Start Guest2:
+7. Start Guest2::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
- -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
-7. Start Guest3:
+7. Start Guest3::
- /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
- -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
+ /usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
+ -device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
When the guest is shut down, the mediated matrix devices may be removed.
-Using our example again, to remove the mediated matrix device $uuid1:
+Using our example again, to remove the mediated matrix device $uuid1::
/sys/devices/vfio_ap/matrix/
--- [mdev_supported_types]
------------ [$uuid1]
--------------- remove
+::
echo 1 > remove
- This will remove all of the mdev matrix device's sysfs structures including
- the mdev device itself. To recreate and reconfigure the mdev matrix device,
- all of the steps starting with step 3 will have to be performed again. Note
- that the remove will fail if a guest using the mdev is still running.
+This will remove all of the mdev matrix device's sysfs structures including
+the mdev device itself. To recreate and reconfigure the mdev matrix device,
+all of the steps starting with step 3 will have to be performed again. Note
+that the remove will fail if a guest using the mdev is still running.
- It is not necessary to remove an mdev matrix device, but one may want to
- remove it if no guest will use it during the remaining lifetime of the linux
- host. If the mdev matrix device is removed, one may want to also reconfigure
- the pool of adapters and queues reserved for use by the default drivers.
+It is not necessary to remove an mdev matrix device, but one may want to
+remove it if no guest will use it during the remaining lifetime of the linux
+host. If the mdev matrix device is removed, one may want to also reconfigure
+the pool of adapters and queues reserved for use by the default drivers.
Limitations
===========
+==================================
vfio-ccw: the basic infrastructure
==================================
Different than other hardware architectures, s390 has defined a unified
I/O access method, which is so called Channel I/O. It has its own access
patterns:
+
- Channel programs run asynchronously on a separate (co)processor.
- The channel subsystem will access any memory designated by the caller
in the channel program directly, i.e. there is no iommu involved.
+
Thus when we introduce vfio support for these devices, we realize it
with a mediated device (mdev) implementation. The vfio mdev will be
added to an iommu group, so as to make itself able to be managed by the
This document does not intend to explain the s390 I/O architecture in
every detail. More information/reference could be found here:
+
- A good start to know Channel I/O in general:
https://en.wikipedia.org/wiki/Channel_I/O
- s390 architecture:
interrupt handler in the form of interrupt response block (IRB).
Back to vfio-ccw, in short:
+
- ORBs and channel programs are built in guest kernel (with guest
physical addresses).
- ORBs and channel programs are passed to the host kernel.
Within this implementation, we have two drivers for two types of
devices:
+
- The vfio_ccw driver for the physical subchannel device.
This is an I/O subchannel driver for the real subchannel device. It
realizes a group of callbacks and registers to the mdev framework as a
vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
backend for the physical devices to pin and unpin pages by demand.
-Below is a high Level block diagram.
+Below is a high Level block diagram::
+-------------+
| |
+-------------+
The process of how these work together.
+
1. vfio_ccw.ko drives the physical I/O subchannel, and registers the
physical device (with callbacks) to mdev framework.
When vfio_ccw probing the subchannel device, it registers device
An I/O region is used to accept channel program request from user
space and store I/O interrupt result for user space to retrieve. The
-definition of the region is:
-
-struct ccw_io_region {
-#define ORB_AREA_SIZE 12
- __u8 orb_area[ORB_AREA_SIZE];
-#define SCSW_AREA_SIZE 12
- __u8 scsw_area[SCSW_AREA_SIZE];
-#define IRB_AREA_SIZE 96
- __u8 irb_area[IRB_AREA_SIZE];
- __u32 ret_code;
-} __packed;
+definition of the region is::
+
+ struct ccw_io_region {
+ #define ORB_AREA_SIZE 12
+ __u8 orb_area[ORB_AREA_SIZE];
+ #define SCSW_AREA_SIZE 12
+ __u8 scsw_area[SCSW_AREA_SIZE];
+ #define IRB_AREA_SIZE 96
+ __u8 irb_area[IRB_AREA_SIZE];
+ __u32 ret_code;
+ } __packed;
While starting an I/O request, orb_area should be filled with the
guest ORB, and scsw_area should be filled with the SCSW of the Virtual
vfio-iommu-type1 as the vfio iommu backend.
* CCW translation APIs
- A group of APIs (start with 'cp_') to do CCW translation. The CCWs
+ A group of APIs (start with `cp_`) to do CCW translation. The CCWs
passed in by a user space program are organized with their guest
physical memory addresses. These APIs will copy the CCWs into kernel
space, and assemble a runnable kernel channel program by updating the
This driver utilizes the CCW translation APIs and introduces
vfio_ccw, which is the driver for the I/O subchannel devices you want
to pass through.
- vfio_ccw implements the following vfio ioctls:
+ vfio_ccw implements the following vfio ioctls::
+
VFIO_DEVICE_GET_INFO
VFIO_DEVICE_GET_IRQ_INFO
VFIO_DEVICE_GET_REGION_INFO
VFIO_DEVICE_RESET
VFIO_DEVICE_SET_IRQS
+
This provides an I/O region, so that the user space program can pass a
channel program to the kernel, to do further CCW translation before
issuing them to a real device.
handled (without error handling).
Explanation:
-Q1-Q7: QEMU side process.
-K1-K5: Kernel side process.
-Q1. Get I/O region info during initialization.
-Q2. Setup event notifier and handler to handle I/O completion.
+- Q1-Q7: QEMU side process.
+- K1-K5: Kernel side process.
+
+Q1.
+ Get I/O region info during initialization.
+
+Q2.
+ Setup event notifier and handler to handle I/O completion.
... ...
-Q3. Intercept a ssch instruction.
-Q4. Write the guest channel program and ORB to the I/O region.
- K1. Copy from guest to kernel.
- K2. Translate the guest channel program to a host kernel space
- channel program, which becomes runnable for a real device.
- K3. With the necessary information contained in the orb passed in
- by QEMU, issue the ccwchain to the device.
- K4. Return the ssch CC code.
-Q5. Return the CC code to the guest.
+Q3.
+ Intercept a ssch instruction.
+Q4.
+ Write the guest channel program and ORB to the I/O region.
+
+ K1.
+ Copy from guest to kernel.
+ K2.
+ Translate the guest channel program to a host kernel space
+ channel program, which becomes runnable for a real device.
+ K3.
+ With the necessary information contained in the orb passed in
+ by QEMU, issue the ccwchain to the device.
+ K4.
+ Return the ssch CC code.
+Q5.
+ Return the CC code to the guest.
... ...
- K5. Interrupt handler gets the I/O result and write the result to
- the I/O region.
- K6. Signal QEMU to retrieve the result.
-Q6. Get the signal and event handler reads out the result from the I/O
+ K5.
+ Interrupt handler gets the I/O result and write the result to
+ the I/O region.
+ K6.
+ Signal QEMU to retrieve the result.
+
+Q6.
+ Get the signal and event handler reads out the result from the I/O
region.
-Q7. Update the irb for the guest.
+Q7.
+ Update the irb for the guest.
Limitations
-----------
1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
3. https://en.wikipedia.org/wiki/Channel_I/O
-4. Documentation/s390/cds.txt
+4. Documentation/s390/cds.rst
5. Documentation/vfio.txt
6. Documentation/vfio-mediated-device.txt
+==================================
The s390 SCSI dump tool (zfcpdump)
+==================================
System z machines (z900 or higher) provide hardware support for creating system
dumps on SCSI disks. The dump process is initiated by booting a dump tool, which
- auto_msgmni
- bootloader_type [ X86 only ]
- bootloader_version [ X86 only ]
-- callhome [ S390 only ]
- cap_last_cap
- core_pattern
- core_pipe_limit
==============================================================
-callhome:
-
-Controls the kernel's callhome behavior in case of a kernel panic.
-
-The s390 hardware allows an operating system to send a notification
-to a service organization (callhome) in case of an operating system panic.
-
-When the value in this file is 0 (which is the default behavior)
-nothing happens in case of a kernel panic. If this value is set to "1"
-the complete kernel oops message is send to the IBM customer service
-organization in case the mainframe the Linux operating system is running
-on has a service contract with IBM.
-
-==============================================================
-
cap_last_cap
Highest valid capability of the running kernel. Exports
F: Documentation/devicetree/bindings/interrupt-controller/arm,vic.txt
F: drivers/irqchip/irq-vic.c
+AMAZON ANNAPURNA LABS FIC DRIVER
+M: Talel Shenhar <talel@amazon.com>
+S: Maintained
+F: Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt
+F: drivers/irqchip/irq-al-fic.c
+
ARM SMMU DRIVERS
M: Will Deacon <will@kernel.org>
R: Robin Murphy <robin.murphy@arm.com>
BROADCOM BCM2835 ARM ARCHITECTURE
M: Eric Anholt <eric@anholt.net>
M: Stefan Wahren <wahrenst@gmx.net>
+L: bcm-kernel-feedback-list@broadcom.com
L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
T: git git://github.com/anholt/linux
BROADCOM BCM53573 ARM ARCHITECTURE
M: Rafał Miłecki <rafal@milecki.pl>
+L: bcm-kernel-feedback-list@broadcom.com
L: linux-arm-kernel@lists.infradead.org
S: Maintained
F: arch/arm/boot/dts/bcm53573*
S: Maintained
F: .clang-format
+CLANG/LLVM BUILD SUPPORT
+L: clang-built-linux@googlegroups.com
+W: https://clangbuiltlinux.github.io/
+B: https://github.com/ClangBuiltLinux/linux/issues
+C: irc://chat.freenode.net/clangbuiltlinux
+S: Supported
+K: \b(?i:clang|llvm)\b
+
CLEANCACHE API
M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/i2c/busses/i2c-cpm.c
+FREESCALE IMX DDR PMU DRIVER
+M: Frank Li <Frank.li@nxp.com>
+L: linux-arm-kernel@lists.infradead.org
+S: Maintained
+F: drivers/perf/fsl_imx8_ddr_perf.c
+F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
+
FREESCALE IMX LPI2C DRIVER
M: Dong Aisheng <aisheng.dong@nxp.com>
L: linux-i2c@vger.kernel.org
S: Supported
F: drivers/uio/uio_pci_generic.c
+GENERIC VDSO LIBRARY:
+M: Andy Lutomirski <luto@kernel.org>
+M: Thomas Gleixner <tglx@linutronix.de>
+M: Vincenzo Frascino <vincenzo.frascino@arm.com>
+L: linux-kernel@vger.kernel.org
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso
+S: Maintained
+F: lib/vdso/
+F: kernel/time/vsyscall.c
+F: include/vdso/
+F: include/asm-generic/vdso/vsyscall.h
+
GENWQE (IBM Generic Workqueue Card)
M: Frank Haverkamp <haver@linux.ibm.com>
S: Supported
F: arch/x86/include/asm/hyperv-tlfs.h
F: arch/x86/kernel/cpu/mshyperv.c
F: arch/x86/hyperv
+F: drivers/clocksource/hyperv_timer.c
F: drivers/hid/hid-hyperv.c
F: drivers/hv/
F: drivers/input/serio/hyperv-keyboard.c
F: drivers/video/fbdev/hyperv_fb.c
F: drivers/iommu/hyperv_iommu.c
F: net/vmw_vsock/hyperv_transport.c
+F: include/clocksource/hyperv_timer.h
F: include/linux/hyperv.h
F: include/uapi/linux/hyperv.h
F: tools/hv/
M: Harvey Hunt <harveyhuntnexus@gmail.com>
L: linux-mtd@lists.infradead.org
S: Maintained
-F: drivers/mtd/nand/raw/jz4780_*
+F: drivers/mtd/nand/raw/ingenic/
INOTIFY
M: Jan Kara <jack@suse.cz>
L: kvm@vger.kernel.org
S: Supported
F: drivers/s390/cio/vfio_ccw*
-F: Documentation/s390/vfio-ccw.txt
+F: Documentation/s390/vfio-ccw.rst
F: include/uapi/linux/vfio_ccw.h
S390 ZCRYPT DRIVER
F: drivers/s390/crypto/vfio_ap_drv.c
F: drivers/s390/crypto/vfio_ap_private.h
F: drivers/s390/crypto/vfio_ap_ops.c
-F: Documentation/s390/vfio-ap.txt
+F: Documentation/s390/vfio-ap.rst
S390 ZFCP DRIVER
M: Steffen Maier <maier@linux.ibm.com>
TEGRA I2C DRIVER
M: Laxman Dewangan <ldewangan@nvidia.com>
+R: Dmitry Osipenko <digetx@gmail.com>
S: Supported
F: drivers/i2c/busses/i2c-tegra.c
VERSION = 5
PATCHLEVEL = 2
SUBLEVEL = 0
-EXTRAVERSION = -rc6
-NAME = Golden Lions
+EXTRAVERSION =
+NAME = Bobtail Squid
# *DOCUMENTATION*
# To see a list of typical targets execute "make help"
smp_imb(void)
{
/* Must wait other processors to flush their icache before continue. */
- if (on_each_cpu(ipi_imb, NULL, 1))
- printk(KERN_CRIT "smp_imb: timed out\n");
+ on_each_cpu(ipi_imb, NULL, 1);
}
EXPORT_SYMBOL(smp_imb);
{
/* Although we don't have any data to pass, we do want to
synchronize with the other processors. */
- if (on_each_cpu(ipi_flush_tlb_all, NULL, 1)) {
- printk(KERN_CRIT "flush_tlb_all: timed out\n");
- }
+ on_each_cpu(ipi_flush_tlb_all, NULL, 1);
}
#define asn_locked() (cpu_data[smp_processor_id()].asn_lock)
}
}
- if (smp_call_function(ipi_flush_tlb_mm, mm, 1)) {
- printk(KERN_CRIT "flush_tlb_mm: timed out\n");
- }
+ smp_call_function(ipi_flush_tlb_mm, mm, 1);
preempt_enable();
}
data.mm = mm;
data.addr = addr;
- if (smp_call_function(ipi_flush_tlb_page, &data, 1)) {
- printk(KERN_CRIT "flush_tlb_page: timed out\n");
- }
+ smp_call_function(ipi_flush_tlb_page, &data, 1);
preempt_enable();
}
}
}
- if (smp_call_function(ipi_flush_icache_page, mm, 1)) {
- printk(KERN_CRIT "flush_icache_page: timed out\n");
- }
+ smp_call_function(ipi_flush_icache_page, mm, 1);
preempt_enable();
}
model->reg_setup(®, ctr, &sys);
/* Configure the registers on all cpus. */
- (void)smp_call_function(model->cpu_setup, ®, 1);
+ smp_call_function(model->cpu_setup, ®, 1);
model->cpu_setup(®);
return 0;
}
static int
op_axp_start(void)
{
- (void)smp_call_function(op_axp_cpu_start, NULL, 1);
+ smp_call_function(op_axp_cpu_start, NULL, 1);
op_axp_cpu_start(NULL);
return 0;
}
static void
op_axp_stop(void)
{
- (void)smp_call_function(op_axp_cpu_stop, NULL, 1);
+ smp_call_function(op_axp_cpu_stop, NULL, 1);
op_axp_cpu_stop(NULL);
}
KBUILD_DEFCONFIG := nsim_hs_defconfig
+ifeq ($(CROSS_COMPILE),)
+CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-)
+endif
+
cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7
cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38
#define ARC_PERIPHERAL_BASE 0xf0000000
#define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000)
-#define CREG_PAE (CREG_BASE + 0x180)
-#define CREG_PAE_UPDATE (CREG_BASE + 0x194)
#define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000)
#define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108)
iowrite32(GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTEN);
}
-static void __init hsdk_init_early(void)
+enum hsdk_axi_masters {
+ M_HS_CORE = 0,
+ M_HS_RTT,
+ M_AXI_TUN,
+ M_HDMI_VIDEO,
+ M_HDMI_AUDIO,
+ M_USB_HOST,
+ M_ETHERNET,
+ M_SDIO,
+ M_GPU,
+ M_DMAC_0,
+ M_DMAC_1,
+ M_DVFS
+};
+
+#define UPDATE_VAL 1
+
+/*
+ * This is modified configuration of AXI bridge. Default settings
+ * are specified in "Table 111 CREG Address Decoder register reset values".
+ *
+ * AXI_M_m_SLV{0|1} - Slave Select register for master 'm'.
+ * Possible slaves are:
+ * - 0 => no slave selected
+ * - 1 => DDR controller port #1
+ * - 2 => SRAM controller
+ * - 3 => AXI tunnel
+ * - 4 => EBI controller
+ * - 5 => ROM controller
+ * - 6 => AXI2APB bridge
+ * - 7 => DDR controller port #2
+ * - 8 => DDR controller port #3
+ * - 9 => HS38x4 IOC
+ * - 10 => HS38x4 DMI
+ * AXI_M_m_OFFSET{0|1} - Addr Offset register for master 'm'
+ *
+ * Please read ARC HS Development IC Specification, section 17.2 for more
+ * information about apertures configuration.
+ *
+ * m master AXI_M_m_SLV0 AXI_M_m_SLV1 AXI_M_m_OFFSET0 AXI_M_m_OFFSET1
+ * 0 HS (CBU) 0x11111111 0x63111111 0xFEDCBA98 0x0E543210
+ * 1 HS (RTT) 0x77777777 0x77777777 0xFEDCBA98 0x76543210
+ * 2 AXI Tunnel 0x88888888 0x88888888 0xFEDCBA98 0x76543210
+ * 3 HDMI-VIDEO 0x77777777 0x77777777 0xFEDCBA98 0x76543210
+ * 4 HDMI-ADUIO 0x77777777 0x77777777 0xFEDCBA98 0x76543210
+ * 5 USB-HOST 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98
+ * 6 ETHERNET 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98
+ * 7 SDIO 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98
+ * 8 GPU 0x77777777 0x77777777 0xFEDCBA98 0x76543210
+ * 9 DMAC (port #1) 0x77777777 0x77777777 0xFEDCBA98 0x76543210
+ * 10 DMAC (port #2) 0x77777777 0x77777777 0xFEDCBA98 0x76543210
+ * 11 DVFS 0x00000000 0x60000000 0x00000000 0x00000000
+ */
+
+#define CREG_AXI_M_SLV0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m)))
+#define CREG_AXI_M_SLV1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x04))
+#define CREG_AXI_M_OFT0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x08))
+#define CREG_AXI_M_OFT1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x0C))
+#define CREG_AXI_M_UPDT(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x14))
+
+#define CREG_AXI_M_HS_CORE_BOOT ((void __iomem *)(CREG_BASE + 0x010))
+
+#define CREG_PAE ((void __iomem *)(CREG_BASE + 0x180))
+#define CREG_PAE_UPDT ((void __iomem *)(CREG_BASE + 0x194))
+
+static void __init hsdk_init_memory_bridge(void)
{
+ u32 reg;
+
+ /*
+ * M_HS_CORE has one unique register - BOOT.
+ * We need to clean boot mirror (BOOT[1:0]) bits in them to avoid first
+ * aperture to be masked by 'boot mirror'.
+ */
+ reg = readl(CREG_AXI_M_HS_CORE_BOOT) & (~0x3);
+ writel(reg, CREG_AXI_M_HS_CORE_BOOT);
+ writel(0x11111111, CREG_AXI_M_SLV0(M_HS_CORE));
+ writel(0x63111111, CREG_AXI_M_SLV1(M_HS_CORE));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_CORE));
+ writel(0x0E543210, CREG_AXI_M_OFT1(M_HS_CORE));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_CORE));
+
+ writel(0x77777777, CREG_AXI_M_SLV0(M_HS_RTT));
+ writel(0x77777777, CREG_AXI_M_SLV1(M_HS_RTT));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_RTT));
+ writel(0x76543210, CREG_AXI_M_OFT1(M_HS_RTT));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_RTT));
+
+ writel(0x88888888, CREG_AXI_M_SLV0(M_AXI_TUN));
+ writel(0x88888888, CREG_AXI_M_SLV1(M_AXI_TUN));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_AXI_TUN));
+ writel(0x76543210, CREG_AXI_M_OFT1(M_AXI_TUN));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_AXI_TUN));
+
+ writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_VIDEO));
+ writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_VIDEO));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_VIDEO));
+ writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_VIDEO));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_VIDEO));
+
+ writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_AUDIO));
+ writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_AUDIO));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_AUDIO));
+ writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_AUDIO));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_AUDIO));
+
+ writel(0x77777777, CREG_AXI_M_SLV0(M_USB_HOST));
+ writel(0x77999999, CREG_AXI_M_SLV1(M_USB_HOST));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_USB_HOST));
+ writel(0x76DCBA98, CREG_AXI_M_OFT1(M_USB_HOST));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_USB_HOST));
+
+ writel(0x77777777, CREG_AXI_M_SLV0(M_ETHERNET));
+ writel(0x77999999, CREG_AXI_M_SLV1(M_ETHERNET));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_ETHERNET));
+ writel(0x76DCBA98, CREG_AXI_M_OFT1(M_ETHERNET));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_ETHERNET));
+
+ writel(0x77777777, CREG_AXI_M_SLV0(M_SDIO));
+ writel(0x77999999, CREG_AXI_M_SLV1(M_SDIO));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_SDIO));
+ writel(0x76DCBA98, CREG_AXI_M_OFT1(M_SDIO));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_SDIO));
+
+ writel(0x77777777, CREG_AXI_M_SLV0(M_GPU));
+ writel(0x77777777, CREG_AXI_M_SLV1(M_GPU));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_GPU));
+ writel(0x76543210, CREG_AXI_M_OFT1(M_GPU));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_GPU));
+
+ writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0));
+ writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_0));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0));
+ writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_0));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0));
+
+ writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1));
+ writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_1));
+ writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1));
+ writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_1));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1));
+
+ writel(0x00000000, CREG_AXI_M_SLV0(M_DVFS));
+ writel(0x60000000, CREG_AXI_M_SLV1(M_DVFS));
+ writel(0x00000000, CREG_AXI_M_OFT0(M_DVFS));
+ writel(0x00000000, CREG_AXI_M_OFT1(M_DVFS));
+ writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DVFS));
+
/*
* PAE remapping for DMA clients does not work due to an RTL bug, so
* CREG_PAE register must be programmed to all zeroes, otherwise it
* will cause problems with DMA to/from peripherals even if PAE40 is
* not used.
*/
+ writel(0x00000000, CREG_PAE);
+ writel(UPDATE_VAL, CREG_PAE_UPDT);
+}
- /* Default is 1, which means "PAE offset = 4GByte" */
- writel_relaxed(0, (void __iomem *) CREG_PAE);
-
- /* Really apply settings made above */
- writel(1, (void __iomem *) CREG_PAE_UPDATE);
+static void __init hsdk_init_early(void)
+{
+ hsdk_init_memory_bridge();
/*
* Switch SDIO external ciu clock divider from default div-by-8 to
status = "disabled";
};
+&uart0 {
+ compatible = "marvell,armada-38x-uart";
+};
+
+&uart1 {
+ compatible = "marvell,armada-38x-uart";
+};
+
};
chosen {
- bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait";
+ bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait consoleblank=300";
stdout-path = "uart0:19200n8";
};
/ {
model = "D-Link DNS-313 1-Bay Network Storage Enclosure";
- compatible = "dlink,dir-313", "cortina,gemini";
+ compatible = "dlink,dns-313", "cortina,gemini";
#address-cells = <1>;
#size-cells = <1>;
pwm1: pwm@2080000 {
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
reg = <0x02080000 0x4000>;
- interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6UL_CLK_PWM1>,
<&clks IMX6UL_CLK_PWM1>;
clock-names = "ipg", "per";
pwm2: pwm@2084000 {
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
reg = <0x02084000 0x4000>;
- interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6UL_CLK_PWM2>,
<&clks IMX6UL_CLK_PWM2>;
clock-names = "ipg", "per";
pwm3: pwm@2088000 {
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
reg = <0x02088000 0x4000>;
- interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6UL_CLK_PWM3>,
<&clks IMX6UL_CLK_PWM3>;
clock-names = "ipg", "per";
pwm4: pwm@208c000 {
compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm";
reg = <0x0208c000 0x4000>;
- interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6UL_CLK_PWM4>,
<&clks IMX6UL_CLK_PWM4>;
clock-names = "ipg", "per";
<GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>,
clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>;
clock-names = "bus", "core";
operating-points-v2 = <&gpu_opp_table>;
- switch-delay = <0xffff>;
};
};
}; /* end of / */
opp-255000000 {
opp-hz = /bits/ 64 <255000000>;
- opp-microvolt = <1150000>;
+ opp-microvolt = <1100000>;
};
opp-364300000 {
opp-hz = /bits/ 64 <364300000>;
- opp-microvolt = <1150000>;
+ opp-microvolt = <1100000>;
};
opp-425000000 {
opp-hz = /bits/ 64 <425000000>;
- opp-microvolt = <1150000>;
+ opp-microvolt = <1100000>;
};
opp-510000000 {
opp-hz = /bits/ 64 <510000000>;
- opp-microvolt = <1150000>;
+ opp-microvolt = <1100000>;
};
opp-637500000 {
opp-hz = /bits/ 64 <637500000>;
- opp-microvolt = <1150000>;
+ opp-microvolt = <1100000>;
turbo-mode;
};
};
clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>;
clock-names = "bus", "core";
operating-points-v2 = <&gpu_opp_table>;
- switch-delay = <0xffff>;
};
};
}; /* end of / */
int bL_switcher_trace_trigger(void)
{
- int ret;
-
preempt_disable();
bL_switcher_trace_trigger_cpu(NULL);
- ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
+ smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
preempt_enable();
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
#include <asm/barrier.h>
#include <asm/errno.h>
+#include <asm/hwcap.h>
#include <linux/clocksource.h>
#include <linux/init.h>
#include <linux/types.h>
isb();
}
+static inline void arch_timer_set_evtstrm_feature(void)
+{
+ elf_hwcap |= HWCAP_EVTSTRM;
+}
+
+static inline bool arch_timer_have_evtstrm_feature(void)
+{
+ return elf_hwcap & HWCAP_EVTSTRM;
+}
#endif
#endif
static struct regulator_init_data da830_evm_usb_vbus_data = {
.consumer_supplies = da830_evm_usb_supplies,
.num_consumer_supplies = ARRAY_SIZE(da830_evm_usb_supplies),
+ .constraints = {
+ .valid_ops_mask = REGULATOR_CHANGE_STATUS,
+ },
};
static struct fixed_voltage_config da830_evm_usb_vbus = {
static struct gpiod_lookup_table da830_evm_usb_vbus_gpio_lookup = {
.dev_id = "reg-fixed-voltage.0",
.table = {
- GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, "vbus", 0),
+ GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, NULL, 0),
{ }
},
};
static struct regulator_init_data hawk_usb_vbus_data = {
.consumer_supplies = hawk_usb_supplies,
.num_consumer_supplies = ARRAY_SIZE(hawk_usb_supplies),
+ .constraints = {
+ .valid_ops_mask = REGULATOR_CHANGE_STATUS,
+ },
};
static struct fixed_voltage_config hawk_usb_vbus = {
* registers, and omap3xxx_prm_reconfigure_io_chain() must be called.
* No return value.
*/
-static void __init omap3xxx_prm_enable_io_wakeup(void)
+static void omap3xxx_prm_enable_io_wakeup(void)
{
if (prm_features & PRM_HAS_IO_WAKEUP)
omap2_prm_set_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD,
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SETUP_DMA_OPS
+ select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
+ select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT)
select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_PCI
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_GENERIC_VDSO
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
def_bool y
config ZONE_DMA32
- def_bool y
+ bool "Support DMA32 zone" if EXPERT
+ default y
config HAVE_GENERIC_GUP
def_bool y
config PARAVIRT_TIME_ACCOUNTING
bool "Paravirtual steal time accounting"
select PARAVIRT
- default n
help
Select this option to enable fine granularity task steal time
accounting. Time spent executing other tasks in parallel with
KVM in the same kernel image.
config ARM64_MODULE_PLTS
- bool
+ bool "Use PLTs to allow module memory to spill over into vmalloc area"
+ depends on MODULES
select HAVE_MOD_ARCH_SPECIFIC
+ help
+ Allocate PLTs when loading modules so that jumps and calls whose
+ targets are too far away for their relative offsets to be encoded
+ in the instructions themselves can be bounced via veneers in the
+ module's PLT. This allows modules to be allocated in the generic
+ vmalloc area after the dedicated module memory area has been
+ exhausted.
+
+ When running with address space randomization (KASLR), the module
+ region itself may be too far away for ordinary relative jumps and
+ calls, and so in that case, module PLTs are required and cannot be
+ disabled.
+
+ Specific errata workaround(s) might also force module PLTs to be
+ enabled (ARM64_ERRATUM_843419).
config ARM64_PSEUDO_NMI
bool "Support for NMI-like interrupts"
- depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com
select CONFIG_ARM_GIC_V3
help
Adds support for mimicking Non-Maskable Interrupts through the use of
If unsure, say N
+if ARM64_PSEUDO_NMI
+config ARM64_DEBUG_PRIORITY_MASKING
+ bool "Debug interrupt priority masking"
+ help
+ This adds runtime checks to functions enabling/disabling
+ interrupts when using priority masking. The additional checks verify
+ the validity of ICC_PMR_EL1 when calling concerned functions.
+
+ If unsure, say N
+endif
+
config RELOCATABLE
bool
help
endif
endif
-KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst)
+ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
+ CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%)
+
+ ifeq ($(CONFIG_CC_IS_CLANG), y)
+ $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built)
+ else ifeq ($(CROSS_COMPILE_COMPAT),)
+ $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built)
+ else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),)
+ $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT)
+ else
+ export CROSS_COMPILE_COMPAT
+ export CONFIG_COMPAT_VDSO := y
+ compat_vdso := -DCONFIG_COMPAT_VDSO=1
+ endif
+endif
+
+KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
-KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
+KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso)
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
prepare: vdso_prepare
vdso_prepare: prepare0
$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h
+ $(if $(CONFIG_COMPAT_VDSO),$(Q)$(MAKE) \
+ $(build)=arch/arm64/kernel/vdso32 \
+ include/generated/vdso32-offsets.h)
endif
define archhelp
enable-method = "psci";
clocks = <&clockgen 1 0>;
next-level-cache = <&l2>;
- cpu-idle-states = <&CPU_PH20>;
+ cpu-idle-states = <&CPU_PW20>;
};
cpu1: cpu@1 {
enable-method = "psci";
clocks = <&clockgen 1 0>;
next-level-cache = <&l2>;
- cpu-idle-states = <&CPU_PH20>;
+ cpu-idle-states = <&CPU_PW20>;
};
l2: l2-cache {
*/
entry-method = "arm,psci";
- CPU_PH20: cpu-ph20 {
- compatible = "arm,idle-state";
- idle-state-name = "PH20";
- arm,psci-suspend-param = <0x00010000>;
- entry-latency-us = <1000>;
- exit-latency-us = <1000>;
- min-residency-us = <3000>;
+ CPU_PW20: cpu-pw20 {
+ compatible = "arm,idle-state";
+ idle-state-name = "PW20";
+ arm,psci-suspend-param = <0x0>;
+ entry-latency-us = <2000>;
+ exit-latency-us = <2000>;
+ min-residency-us = <6000>;
};
};
CONFIG_CRASH_DUMP=y
CONFIG_XEN=y
CONFIG_COMPAT=y
+CONFIG_RANDOMIZE_BASE=y
CONFIG_HIBERNATION=y
CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
CONFIG_ARM_CPUIDLE=y
CONFIG_RTC_DRV_IMX_SC=m
CONFIG_RTC_DRV_XGENE=y
CONFIG_DMADEVICES=y
+CONFIG_FSL_EDMA=y
CONFIG_DMA_BCM2835=m
CONFIG_K3_DMA=y
CONFIG_MV_XOR=y
(!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
(unsigned long)(entry) + (entry)->header.length > (end))
+#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \
+ spe_interrupt) + sizeof(u16))
+
/* Basic configuration for ACPI */
#ifdef CONFIG_ACPI
pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
static inline void gic_pmr_mask_irqs(void)
{
- BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF);
+ BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF |
+ GIC_PRIO_PSR_I_SET));
+ BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON);
gic_write_pmr(GIC_PRIO_IRQOFF);
}
#define __ASM_ARCH_TIMER_H
#include <asm/barrier.h>
+#include <asm/hwcap.h>
#include <asm/sysreg.h>
#include <linux/bug.h>
return 0;
}
+static inline void arch_timer_set_evtstrm_feature(void)
+{
+ cpu_set_named_feature(EVTSTRM);
+#ifdef CONFIG_COMPAT
+ compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
+#endif
+}
+
+static inline bool arch_timer_have_evtstrm_feature(void)
+{
+ return cpu_have_named_feature(EVTSTRM);
+}
#endif
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
-static inline int cache_line_size(void)
+static inline int cache_line_size_of_cpu(void)
{
u32 cwg = cache_type_cwg();
+
return cwg ? 4 << cwg : ARCH_DMA_MINALIGN;
}
+int cache_line_size(void);
+
/*
* Read the effective value of CTR_EL0.
*
int set_memory_valid(unsigned long addr, int numpages, int enable);
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+
#endif
cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
}
+static inline bool system_has_prio_mask_debugging(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) &&
+ system_uses_irq_prio_masking();
+}
+
#define ARM64_SSBD_UNKNOWN -1
#define ARM64_SSBD_FORCE_DISABLE 0
#define ARM64_SSBD_KERNEL 1
#include <linux/irqflags.h>
+#include <asm/arch_gicv3.h>
#include <asm/cpufeature.h>
#define DAIF_PROCCTX 0
/* mask/save/unmask/restore all exceptions, including interrupts. */
static inline void local_daif_mask(void)
{
+ WARN_ON(system_has_prio_mask_debugging() &&
+ (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF |
+ GIC_PRIO_PSR_I_SET)));
+
asm volatile(
"msr daifset, #0xf // local_daif_mask\n"
:
:
: "memory");
+
+ /* Don't really care for a dsb here, we don't intend to enable IRQs */
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
trace_hardirqs_off();
}
if (system_uses_irq_prio_masking()) {
/* If IRQs are masked with PMR, reflect it in the flags */
- if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF)
+ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
flags |= PSR_I_BIT;
}
{
bool irq_disabled = flags & PSR_I_BIT;
+ WARN_ON(system_has_prio_mask_debugging() &&
+ !(read_sysreg(daif) & PSR_I_BIT));
+
if (!irq_disabled) {
trace_hardirqs_on();
- if (system_uses_irq_prio_masking())
- arch_local_irq_enable();
- } else if (!(flags & PSR_A_BIT)) {
- /*
- * If interrupts are disabled but we can take
- * asynchronous errors, we can take NMIs
- */
if (system_uses_irq_prio_masking()) {
- flags &= ~PSR_I_BIT;
+ gic_write_pmr(GIC_PRIO_IRQON);
+ dsb(sy);
+ }
+ } else if (system_uses_irq_prio_masking()) {
+ u64 pmr;
+
+ if (!(flags & PSR_A_BIT)) {
/*
- * There has been concern that the write to daif
- * might be reordered before this write to PMR.
- * From the ARM ARM DDI 0487D.a, section D1.7.1
- * "Accessing PSTATE fields":
- * Writes to the PSTATE fields have side-effects on
- * various aspects of the PE operation. All of these
- * side-effects are guaranteed:
- * - Not to be visible to earlier instructions in
- * the execution stream.
- * - To be visible to later instructions in the
- * execution stream
- *
- * Also, writes to PMR are self-synchronizing, so no
- * interrupts with a lower priority than PMR is signaled
- * to the PE after the write.
- *
- * So we don't need additional synchronization here.
+ * If interrupts are disabled but we can take
+ * asynchronous errors, we can take NMIs
*/
- arch_local_irq_disable();
+ flags &= ~PSR_I_BIT;
+ pmr = GIC_PRIO_IRQOFF;
+ } else {
+ pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET;
}
+
+ /*
+ * There has been concern that the write to daif
+ * might be reordered before this write to PMR.
+ * From the ARM ARM DDI 0487D.a, section D1.7.1
+ * "Accessing PSTATE fields":
+ * Writes to the PSTATE fields have side-effects on
+ * various aspects of the PE operation. All of these
+ * side-effects are guaranteed:
+ * - Not to be visible to earlier instructions in
+ * the execution stream.
+ * - To be visible to later instructions in the
+ * execution stream
+ *
+ * Also, writes to PMR are self-synchronizing, so no
+ * interrupts with a lower priority than PMR is signaled
+ * to the PE after the write.
+ *
+ * So we don't need additional synchronization here.
+ */
+ gic_write_pmr(pmr);
}
write_sysreg(flags, daif);
({ \
set_thread_flag(TIF_32BIT); \
})
+#ifdef CONFIG_GENERIC_COMPAT_VDSO
+#define COMPAT_ARCH_DLINFO \
+do { \
+ /* \
+ * Note that we use Elf64_Off instead of elf_addr_t because \
+ * elf_addr_t in compat is defined as Elf32_Addr and casting \
+ * current->mm->context.vdso to it triggers a cast warning of \
+ * cast from pointer to integer of different size. \
+ */ \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, \
+ (Elf64_Off)current->mm->context.vdso); \
+} while (0)
+#else
#define COMPAT_ARCH_DLINFO
+#endif
extern int aarch32_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
#define compat_arch_setup_additional_pages \
extern void fpsimd_save_state(struct user_fpsimd_state *state);
extern void fpsimd_load_state(struct user_fpsimd_state *state);
-extern void fpsimd_save(void);
-
extern void fpsimd_thread_switch(struct task_struct *next);
extern void fpsimd_flush_thread(void);
void *sve_state, unsigned int sve_vl);
extern void fpsimd_flush_task_state(struct task_struct *target);
-extern void fpsimd_flush_cpu_state(void);
-extern void sve_flush_cpu_state(void);
+extern void fpsimd_save_and_flush_cpu_state(void);
/* Maximum VL that SVE VL-agnostic software can transparently support */
#define SVE_VL_ARCH_MAX 0x100
#define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM)
#define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3)
#define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4)
+#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2)
+#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT)
/*
* This yields a mask that user programs can use to figure out what
*/
static inline void arch_local_irq_enable(void)
{
+ if (system_has_prio_mask_debugging()) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+
+ WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
+ }
+
asm volatile(ALTERNATIVE(
"msr daifclr, #2 // arch_local_irq_enable\n"
"nop",
static inline void arch_local_irq_disable(void)
{
+ if (system_has_prio_mask_debugging()) {
+ u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1);
+
+ WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF);
+ }
+
asm volatile(ALTERNATIVE(
"msr daifset, #2 // arch_local_irq_disable",
__msr_s(SYS_ICC_PMR_EL1, "%0"),
*/
static inline unsigned long arch_local_save_flags(void)
{
- unsigned long daif_bits;
unsigned long flags;
- daif_bits = read_sysreg(daif);
-
- /*
- * The asm is logically equivalent to:
- *
- * if (system_uses_irq_prio_masking())
- * flags = (daif_bits & PSR_I_BIT) ?
- * GIC_PRIO_IRQOFF :
- * read_sysreg_s(SYS_ICC_PMR_EL1);
- * else
- * flags = daif_bits;
- */
asm volatile(ALTERNATIVE(
- "mov %0, %1\n"
- "nop\n"
- "nop",
- __mrs_s("%0", SYS_ICC_PMR_EL1)
- "ands %1, %1, " __stringify(PSR_I_BIT) "\n"
- "csel %0, %0, %2, eq",
- ARM64_HAS_IRQ_PRIO_MASKING)
- : "=&r" (flags), "+r" (daif_bits)
- : "r" ((unsigned long) GIC_PRIO_IRQOFF)
+ "mrs %0, daif",
+ __mrs_s("%0", SYS_ICC_PMR_EL1),
+ ARM64_HAS_IRQ_PRIO_MASKING)
+ : "=&r" (flags)
+ :
: "memory");
return flags;
}
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+ int res;
+
+ asm volatile(ALTERNATIVE(
+ "and %w0, %w1, #" __stringify(PSR_I_BIT),
+ "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON),
+ ARM64_HAS_IRQ_PRIO_MASKING)
+ : "=&r" (res)
+ : "r" ((int) flags)
+ : "memory");
+
+ return res;
+}
+
static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags;
flags = arch_local_save_flags();
- arch_local_irq_disable();
+ /*
+ * There are too many states with IRQs disabled, just keep the current
+ * state if interrupts are already disabled/masked.
+ */
+ if (!arch_irqs_disabled_flags(flags))
+ arch_local_irq_disable();
return flags;
}
__msr_s(SYS_ICC_PMR_EL1, "%0")
"dsb sy",
ARM64_HAS_IRQ_PRIO_MASKING)
- : "+r" (flags)
:
+ : "r" (flags)
: "memory");
}
-static inline int arch_irqs_disabled_flags(unsigned long flags)
-{
- int res;
-
- asm volatile(ALTERNATIVE(
- "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n"
- "nop",
- "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n"
- "cset %w0, ls",
- ARM64_HAS_IRQ_PRIO_MASKING)
- : "=&r" (res)
- : "r" ((int) flags)
- : "memory");
-
- return res;
-}
#endif
#endif
* will not signal the CPU of interrupts of lower priority, and the
* only way to get out will be via guest exceptions.
* Naturally, we want to avoid this.
+ *
+ * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
+ * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
*/
- if (system_uses_irq_prio_masking()) {
- gic_write_pmr(GIC_PRIO_IRQON);
+ if (system_uses_irq_prio_masking())
dsb(sy);
- }
}
static inline void kvm_arm_vhe_guest_exit(void)
* Level 2 descriptor (PMD).
*/
#define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0)
-#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0)
#define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0)
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
#define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1)
/*
* Level 3 descriptor (PTE).
*/
+#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0)
-#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0)
#define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0)
#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
/*
* Software defined PTE bits definition.
*/
-#define PTE_VALID (_AT(pteval_t, 1) << 0)
#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */
#define PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define PTE_SPECIAL (_AT(pteval_t, 1) << 56)
*
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
+
+static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
+ pte_t pte)
{
pte_t old_pte;
- if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
- __sync_icache_dcache(pte);
+ if (!IS_ENABLED(CONFIG_DEBUG_VM))
+ return;
+
+ old_pte = READ_ONCE(*ptep);
+
+ if (!pte_valid(old_pte) || !pte_valid(pte))
+ return;
+ if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1)
+ return;
/*
- * If the existing pte is valid, check for potential race with
- * hardware updates of the pte (ptep_set_access_flags safely changes
- * valid ptes without going through an invalid entry).
+ * Check for potential race with hardware updates of the pte
+ * (ptep_set_access_flags safely changes valid ptes without going
+ * through an invalid entry).
*/
- old_pte = READ_ONCE(*ptep);
- if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
- (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
- VM_WARN_ONCE(!pte_young(pte),
- "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
- __func__, pte_val(old_pte), pte_val(pte));
- VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
- "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
- __func__, pte_val(old_pte), pte_val(pte));
- }
+ VM_WARN_ONCE(!pte_young(pte),
+ "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
+ VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
+ "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
+ __sync_icache_dcache(pte);
+
+ __check_racy_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
}
return __pmd(pte_val(pte));
}
-static inline pgprot_t mk_sect_prot(pgprot_t prot)
+static inline pgprot_t mk_pud_sect_prot(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT);
+}
+
+static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot)
{
- return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT);
+ return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT);
}
#ifdef CONFIG_NUMA_BALANCING
* means masking more IRQs (or at least that the same IRQs remain masked).
*
* To mask interrupts, we clear the most significant bit of PMR.
+ *
+ * Some code sections either automatically switch back to PSR.I or explicitly
+ * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included
+ * in the the priority mask, it indicates that PSR.I should be set and
+ * interrupt disabling temporarily does not rely on IRQ priorities.
*/
-#define GIC_PRIO_IRQON 0xf0
-#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
+#define GIC_PRIO_IRQON 0xc0
+#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80)
+#define GIC_PRIO_PSR_I_SET (1 << 4)
/* Additional SPSR bits not exposed in the UABI */
#define PSR_IL_BIT (1 << 20)
#ifdef CONFIG_COMPAT
#include <linux/compat.h>
+struct compat_sigcontext {
+ /* We always set these two fields to 0 */
+ compat_ulong_t trap_no;
+ compat_ulong_t error_code;
+
+ compat_ulong_t oldmask;
+ compat_ulong_t arm_r0;
+ compat_ulong_t arm_r1;
+ compat_ulong_t arm_r2;
+ compat_ulong_t arm_r3;
+ compat_ulong_t arm_r4;
+ compat_ulong_t arm_r5;
+ compat_ulong_t arm_r6;
+ compat_ulong_t arm_r7;
+ compat_ulong_t arm_r8;
+ compat_ulong_t arm_r9;
+ compat_ulong_t arm_r10;
+ compat_ulong_t arm_fp;
+ compat_ulong_t arm_ip;
+ compat_ulong_t arm_sp;
+ compat_ulong_t arm_lr;
+ compat_ulong_t arm_pc;
+ compat_ulong_t arm_cpsr;
+ compat_ulong_t fault_address;
+};
+
+struct compat_ucontext {
+ compat_ulong_t uc_flags;
+ compat_uptr_t uc_link;
+ compat_stack_t uc_stack;
+ struct compat_sigcontext uc_mcontext;
+ compat_sigset_t uc_sigmask;
+ int __unused[32 - (sizeof(compat_sigset_t) / sizeof(int))];
+ compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8)));
+};
+
+struct compat_sigframe {
+ struct compat_ucontext uc;
+ compat_ulong_t retcode[2];
+};
+
+struct compat_rt_sigframe {
+ struct compat_siginfo info;
+ struct compat_sigframe sig;
+};
+
int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs);
int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
#include <linux/preempt.h>
#include <linux/types.h>
-#ifdef CONFIG_KERNEL_MODE_NEON
+DECLARE_PER_CPU(bool, fpsimd_context_busy);
-DECLARE_PER_CPU(bool, kernel_neon_busy);
+#ifdef CONFIG_KERNEL_MODE_NEON
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
static __must_check inline bool may_use_simd(void)
{
/*
- * kernel_neon_busy is only set while preemption is disabled,
+ * fpsimd_context_busy is only set while preemption is disabled,
* and is clear whenever preemption is enabled. Since
- * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy
+ * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
* cannot change under our feet -- if it's set we cannot be
* migrated, and if it's clear we cannot be migrated to a CPU
* where it is set.
*/
return !in_irq() && !irqs_disabled() && !in_nmi() &&
- !this_cpu_read(kernel_neon_busy);
+ !this_cpu_read(fpsimd_context_busy);
}
#else /* ! CONFIG_KERNEL_MODE_NEON */
/* id_aa64isar1 */
#define ID_AA64ISAR1_SB_SHIFT 36
+#define ID_AA64ISAR1_FRINTTS_SHIFT 32
#define ID_AA64ISAR1_GPI_SHIFT 28
#define ID_AA64ISAR1_GPA_SHIFT 24
#define ID_AA64ISAR1_LRCPC_SHIFT 20
* TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace
* TIF_SYSCALL_AUDIT - syscall auditing
* TIF_SECCOMP - syscall secure computing
+ * TIF_SYSCALL_EMU - syscall emulation active
* TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary
* TIF_NOTIFY_RESUME - callback before returning to user
#define TIF_SYSCALL_AUDIT 9
#define TIF_SYSCALL_TRACEPOINT 10
#define TIF_SECCOMP 11
+#define TIF_SYSCALL_EMU 12
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_FSCHECK (1 << TIF_FSCHECK)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
- _TIF_NOHZ)
+ _TIF_NOHZ | _TIF_SYSCALL_EMU)
#define INIT_THREAD_INFO(tsk) \
{ \
#define __NR_compat_exit 1
#define __NR_compat_read 3
#define __NR_compat_write 4
+#define __NR_compat_gettimeofday 78
#define __NR_compat_sigreturn 119
#define __NR_compat_rt_sigreturn 173
+#define __NR_compat_clock_getres 247
+#define __NR_compat_clock_gettime 263
+#define __NR_compat_clock_gettime64 403
+#define __NR_compat_clock_getres_time64 406
/*
* The following SVCs are ARM private.
#ifndef __ASSEMBLY__
#include <generated/vdso-offsets.h>
+#ifdef CONFIG_COMPAT_VDSO
+#include <generated/vdso32-offsets.h>
+#endif
#define VDSO_SYMBOL(base, name) \
({ \
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __COMPAT_BARRIER_H
+#define __COMPAT_BARRIER_H
+
+#ifndef __ASSEMBLY__
+/*
+ * Warning: This code is meant to be used with
+ * ENABLE_COMPAT_VDSO only.
+ */
+#ifndef ENABLE_COMPAT_VDSO
+#error This header is meant to be used with ENABLE_COMPAT_VDSO only
+#endif
+
+#ifdef dmb
+#undef dmb
+#endif
+
+#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
+
+#if __LINUX_ARM_ARCH__ >= 8
+#define aarch32_smp_mb() dmb(ish)
+#define aarch32_smp_rmb() dmb(ishld)
+#define aarch32_smp_wmb() dmb(ishst)
+#else
+#define aarch32_smp_mb() dmb(ish)
+#define aarch32_smp_rmb() aarch32_smp_mb()
+#define aarch32_smp_wmb() dmb(ishst)
+#endif
+
+
+#undef smp_mb
+#undef smp_rmb
+#undef smp_wmb
+
+#define smp_mb() aarch32_smp_mb()
+#define smp_rmb() aarch32_smp_rmb()
+#define smp_wmb() aarch32_smp_wmb()
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __COMPAT_BARRIER_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#include <asm/vdso/compat_barrier.h>
+
+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct timezone *tz asm("r1") = _tz;
+ register struct __kernel_old_timeval *tv asm("r0") = _tv;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat_gettimeofday;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (tv), "r" (tz), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat_clock_gettime64;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("r1") = _ts;
+ register clockid_t clkid asm("r0") = _clkid;
+ register long ret asm ("r0");
+ register long nr asm("r7") = __NR_compat_clock_getres_time64;
+
+ /* The checks below are required for ABI consistency with arm */
+ if ((_clkid >= MAX_CLOCKS) && (_ts == NULL))
+ return -EINVAL;
+
+ asm volatile(
+ " swi #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ u64 res;
+
+ /*
+ * clock_mode == 0 implies that vDSO are enabled otherwise
+ * fallback on syscall.
+ */
+ if (clock_mode)
+ return __VDSO_USE_SYSCALL;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
+ isb();
+ asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res));
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.
+ */
+ isb();
+
+ return res;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+ const struct vdso_data *ret;
+
+ /*
+ * This simply puts &_vdso_data into ret. The reason why we don't use
+ * `ret = _vdso_data` is that the compiler tends to optimise this in a
+ * very suboptimal way: instead of keeping &_vdso_data in a register,
+ * it goes through a relocation almost every time _vdso_data must be
+ * accessed (even in subfunctions). This is both time and space
+ * consuming: each relocation uses a word in the code section, and it
+ * has to be loaded at runtime.
+ *
+ * This trick hides the assignment from the compiler. Since it cannot
+ * track where the pointer comes from, it will only use one relocation
+ * where __arch_get_vdso_data() is called, and then keep the result in
+ * a register.
+ */
+ asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data));
+
+ return ret;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 ARM Limited
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct timezone *tz asm("x1") = _tz;
+ register struct __kernel_old_timeval *tv asm("x0") = _tv;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_gettimeofday;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (tv), "r" (tz), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_gettime;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register struct __kernel_timespec *ts asm("x1") = _ts;
+ register clockid_t clkid asm("x0") = _clkid;
+ register long ret asm ("x0");
+ register long nr asm("x8") = __NR_clock_getres;
+
+ asm volatile(
+ " svc #0\n"
+ : "=r" (ret)
+ : "r" (clkid), "r" (ts), "r" (nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ u64 res;
+
+ /*
+ * clock_mode == 0 implies that vDSO are enabled otherwise
+ * fallback on syscall.
+ */
+ if (clock_mode)
+ return __VDSO_USE_SYSCALL;
+
+ /*
+ * This isb() is required to prevent that the counter value
+ * is speculated.
+ */
+ isb();
+ asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
+ /*
+ * This isb() is required to prevent that the seq lock is
+ * speculated.#
+ */
+ isb();
+
+ return res;
+}
+
+static __always_inline
+const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return _vdso_data;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+
+#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
+
+extern struct vdso_data *vdso_data;
+
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__arm64_get_k_vdso_data(void)
+{
+ return vdso_data;
+}
+#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
+
+static __always_inline
+int __arm64_get_clock_mode(struct timekeeper *tk)
+{
+ u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
+
+ return use_syscall;
+}
+#define __arch_get_clock_mode __arm64_get_clock_mode
+
+static __always_inline
+int __arm64_use_vsyscall(struct vdso_data *vdata)
+{
+ return !vdata[CS_HRES_COARSE].clock_mode;
+}
+#define __arch_use_vsyscall __arm64_use_vsyscall
+
+static __always_inline
+void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
+{
+ vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
+ vdata[CS_RAW].mask = VDSO_PRECISION_MASK;
+}
+#define __arch_update_vsyscall __arm64_update_vsyscall
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
#define HWCAP2_SVEBITPERM (1 << 4)
#define HWCAP2_SVESHA3 (1 << 5)
#define HWCAP2_SVESM4 (1 << 6)
+#define HWCAP2_FLAGM2 (1 << 7)
+#define HWCAP2_FRINT (1 << 8)
#endif /* _UAPI__ASM_HWCAP_H */
#define PSR_x 0x0000ff00 /* Extension */
#define PSR_c 0x000000ff /* Control */
+/* syscall emulation path in ptrace */
+#define PTRACE_SYSEMU 31
+#define PTRACE_SYSEMU_SINGLESTEP 32
#ifndef __ASSEMBLY__
$(call if_changed,objcopy)
obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
- sigreturn32.o sys_compat.o
+ sys_compat.o
+ifneq ($(CONFIG_COMPAT_VDSO), y)
+obj-$(CONFIG_COMPAT) += sigreturn32.o
+endif
obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
obj-y += vdso/ probes/
+obj-$(CONFIG_COMPAT_VDSO) += vdso32/
head-y := head.o
extra-y += $(head-y) vmlinux.lds
*/
if (table->revision < 5 ||
(table->revision == 5 && fadt->minor_revision < 1)) {
- pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n",
+ pr_err(FW_BUG "Unsupported FADT revision %d.%d, should be 5.1+\n",
table->revision, fadt->minor_revision);
- ret = -EINVAL;
- goto out;
+
+ if (!fadt->arm_boot_flags) {
+ ret = -EINVAL;
+ goto out;
+ }
+ pr_err("FADT has ARM boot flags set, assuming 5.1\n");
}
if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) {
#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
+#include <asm/signal32.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
-#include <asm/vdso_datapage.h>
#include <linux/kbuild.h>
#include <linux/arm-smccc.h>
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
BLANK();
+#ifdef CONFIG_COMPAT
+ DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
+ DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0));
+ BLANK();
+#endif
DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter));
BLANK();
DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
BLANK();
DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
BLANK();
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
- DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res));
- DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
- DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- BLANK();
- DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
- DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
- DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
- DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
- DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
- DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
- DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
- DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
- DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
- DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
- BLANK();
- DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
- DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
- BLANK();
- DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
- DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
- BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
#define CLIDR_CTYPE(clidr, level) \
(((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+int cache_line_size(void)
+{
+ if (coherency_max_size != 0)
+ return coherency_max_size;
+
+ return cache_line_size_of_cpu();
+}
+EXPORT_SYMBOL_GPL(cache_line_size);
+
static inline enum cache_type get_cache_type(int level)
{
u64 clidr;
static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused)
{
static bool undef_hook_registered = false;
- static DEFINE_SPINLOCK(hook_lock);
+ static DEFINE_RAW_SPINLOCK(hook_lock);
- spin_lock(&hook_lock);
+ raw_spin_lock(&hook_lock);
if (!undef_hook_registered) {
register_undef_hook(&ssbs_emulation_hook);
undef_hook_registered = true;
}
- spin_unlock(&hook_lock);
+ raw_spin_unlock(&hook_lock);
if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
"svebitperm",
"svesha3",
"svesm4",
+ "flagm2",
+ "frint",
NULL
};
/*
* Registers that may be useful after this macro is invoked:
*
+ * x20 - ICC_PMR_EL1
* x21 - aborted SP
* x22 - aborted PC
* x23 - aborted PSTATE
irq_stack_exit
.endm
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+ /*
+ * Set res to 0 if irqs were unmasked in interrupted context.
+ * Otherwise set res to non-0 value.
+ */
+ .macro test_irqs_unmasked res:req, pmr:req
+alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+ sub \res, \pmr, #GIC_PRIO_IRQON
+alternative_else
+ mov \res, xzr
+alternative_endif
+ .endm
+#endif
+
+ .macro gic_prio_kentry_setup, tmp:req
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+ mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON)
+ msr_s SYS_ICC_PMR_EL1, \tmp
+ alternative_else_nop_endif
+#endif
+ .endm
+
+ .macro gic_prio_irq_setup, pmr:req, tmp:req
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+ orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET
+ msr_s SYS_ICC_PMR_EL1, \tmp
+ alternative_else_nop_endif
+#endif
+ .endm
+
.text
/*
cmp x24, #ESR_ELx_EC_BRK64 // if BRK64
cinc x24, x24, eq // set bit '0'
tbz x24, #0, el1_inv // EL1 only
+ gic_prio_kentry_setup tmp=x3
mrs x0, far_el1
mov x2, sp // struct pt_regs
bl do_debug_exception
.align 6
el1_irq:
kernel_entry 1
+ gic_prio_irq_setup pmr=x20, tmp=x1
enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
+
#ifdef CONFIG_ARM64_PSEUDO_NMI
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- ldr x20, [sp, #S_PMR_SAVE]
-alternative_else
- mov x20, #GIC_PRIO_IRQON
-alternative_endif
- cmp x20, #GIC_PRIO_IRQOFF
- /* Irqs were disabled, don't trace */
- b.ls 1f
+ test_irqs_unmasked res=x0, pmr=x20
+ cbz x0, 1f
+ bl asm_nmi_enter
+1:
#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
-1:
#endif
irq_handler
bl preempt_schedule_irq // irq en/disable is done inside
1:
#endif
-#ifdef CONFIG_TRACE_IRQFLAGS
+
#ifdef CONFIG_ARM64_PSEUDO_NMI
/*
- * if IRQs were disabled when we received the interrupt, we have an NMI
- * and we are not re-enabling interrupt upon eret. Skip tracing.
+ * When using IRQ priority masking, we can get spurious interrupts while
+ * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a
+ * section with interrupts disabled. Skip tracing in those cases.
*/
- cmp x20, #GIC_PRIO_IRQOFF
- b.ls 1f
+ test_irqs_unmasked res=x0, pmr=x20
+ cbz x0, 1f
+ bl asm_nmi_exit
+1:
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+ test_irqs_unmasked res=x0, pmr=x20
+ cbnz x0, 1f
#endif
bl trace_hardirqs_on
1:
* Instruction abort handling
*/
mrs x26, far_el1
+ gic_prio_kentry_setup tmp=x0
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
* Stack or PC alignment exception handling
*/
mrs x26, far_el1
+ gic_prio_kentry_setup tmp=x0
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
* Debug exception handling
*/
tbnz x24, #0, el0_inv // EL0 only
+ gic_prio_kentry_setup tmp=x3
mrs x0, far_el1
mov x1, x25
mov x2, sp
bl do_debug_exception
- enable_daif
+ enable_da_f
ct_user_exit
b ret_to_user
el0_inv:
el0_irq:
kernel_entry 0
el0_irq_naked:
+ gic_prio_irq_setup pmr=x20, tmp=x0
enable_da_f
+
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_off
#endif
el1_error:
kernel_entry 1
mrs x1, esr_el1
+ gic_prio_kentry_setup tmp=x2
enable_dbg
mov x0, sp
bl do_serror
kernel_entry 0
el0_error_naked:
mrs x1, esr_el1
+ gic_prio_kentry_setup tmp=x2
enable_dbg
mov x0, sp
bl do_serror
- enable_daif
+ enable_da_f
ct_user_exit
b ret_to_user
ENDPROC(el0_error)
*/
ret_to_user:
disable_daif
+ gic_prio_kentry_setup tmp=x3
ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
*/
.align 6
el0_svc:
+ gic_prio_kentry_setup tmp=x1
mov x0, sp
bl el0_svc_handler
b ret_to_user
* To prevent this from racing with the manipulation of the task's FPSIMD state
* from task context and thereby corrupting the state, it is necessary to
* protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
- * flag with local_bh_disable() unless softirqs are already masked.
+ * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
+ * run but prevent them to use FPSIMD.
*
* For a certain task, the sequence may look something like this:
* - the task gets scheduled in; if both the task's fpsimd_cpu field
#endif /* ! CONFIG_ARM64_SVE */
+DEFINE_PER_CPU(bool, fpsimd_context_busy);
+EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
+
+static void __get_cpu_fpsimd_context(void)
+{
+ bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
+
+ WARN_ON(busy);
+}
+
+/*
+ * Claim ownership of the CPU FPSIMD context for use by the calling context.
+ *
+ * The caller may freely manipulate the FPSIMD context metadata until
+ * put_cpu_fpsimd_context() is called.
+ *
+ * The double-underscore version must only be called if you know the task
+ * can't be preempted.
+ */
+static void get_cpu_fpsimd_context(void)
+{
+ preempt_disable();
+ __get_cpu_fpsimd_context();
+}
+
+static void __put_cpu_fpsimd_context(void)
+{
+ bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
+
+ WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
+}
+
+/*
+ * Release the CPU FPSIMD context.
+ *
+ * Must be called from a context in which get_cpu_fpsimd_context() was
+ * previously called, with no call to put_cpu_fpsimd_context() in the
+ * meantime.
+ */
+static void put_cpu_fpsimd_context(void)
+{
+ __put_cpu_fpsimd_context();
+ preempt_enable();
+}
+
+static bool have_cpu_fpsimd_context(void)
+{
+ return !preemptible() && __this_cpu_read(fpsimd_context_busy);
+}
+
/*
* Call __sve_free() directly only if you know task can't be scheduled
* or preempted.
* This function should be called only when the FPSIMD/SVE state in
* thread_struct is known to be up to date, when preparing to enter
* userspace.
- *
- * Softirqs (and preemption) must be disabled.
*/
static void task_fpsimd_load(void)
{
- WARN_ON(!in_softirq() && !irqs_disabled());
+ WARN_ON(!have_cpu_fpsimd_context());
if (system_supports_sve() && test_thread_flag(TIF_SVE))
sve_load_state(sve_pffr(¤t->thread),
/*
* Ensure FPSIMD/SVE storage in memory for the loaded context is up to
* date with respect to the CPU registers.
- *
- * Softirqs (and preemption) must be disabled.
*/
-void fpsimd_save(void)
+static void fpsimd_save(void)
{
struct fpsimd_last_state_struct const *last =
this_cpu_ptr(&fpsimd_last_state);
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
- WARN_ON(!in_softirq() && !irqs_disabled());
+ WARN_ON(!have_cpu_fpsimd_context());
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
* task->thread.sve_state.
*
* Task can be a non-runnable task, or current. In the latter case,
- * softirqs (and preemption) must be disabled.
+ * the caller must have ownership of the cpu FPSIMD context before calling
+ * this function.
* task->thread.sve_state must point to at least sve_state_size(task)
* bytes of allocated kernel memory.
* task->thread.uw.fpsimd_state must be up to date before calling this
* task->thread.uw.fpsimd_state.
*
* Task can be a non-runnable task, or current. In the latter case,
- * softirqs (and preemption) must be disabled.
+ * the caller must have ownership of the cpu FPSIMD context before calling
+ * this function.
* task->thread.sve_state must point to at least sve_state_size(task)
* bytes of allocated kernel memory.
* task->thread.sve_state must be up to date before calling this function.
* non-SVE thread.
*/
if (task == current) {
- local_bh_disable();
+ get_cpu_fpsimd_context();
fpsimd_save();
}
sve_to_fpsimd(task);
if (task == current)
- local_bh_enable();
+ put_cpu_fpsimd_context();
/*
* Force reallocation of task SVE state to the correct size
sve_alloc(current);
- local_bh_disable();
+ get_cpu_fpsimd_context();
fpsimd_save();
if (test_and_set_thread_flag(TIF_SVE))
WARN_ON(1); /* SVE access shouldn't have trapped */
- local_bh_enable();
+ put_cpu_fpsimd_context();
}
/*
if (!system_supports_fpsimd())
return;
+ __get_cpu_fpsimd_context();
+
/* Save unsaved fpsimd state, if any: */
fpsimd_save();
update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
wrong_task || wrong_cpu);
+
+ __put_cpu_fpsimd_context();
}
void fpsimd_flush_thread(void)
if (!system_supports_fpsimd())
return;
- local_bh_disable();
+ get_cpu_fpsimd_context();
fpsimd_flush_task_state(current);
memset(¤t->thread.uw.fpsimd_state, 0,
current->thread.sve_vl_onexec = 0;
}
- local_bh_enable();
+ put_cpu_fpsimd_context();
}
/*
if (!system_supports_fpsimd())
return;
- local_bh_disable();
+ get_cpu_fpsimd_context();
fpsimd_save();
- local_bh_enable();
+ put_cpu_fpsimd_context();
}
/*
/*
* Associate current's FPSIMD context with this cpu
- * Preemption must be disabled when calling this function.
+ * The caller must have ownership of the cpu FPSIMD context before calling
+ * this function.
*/
void fpsimd_bind_task_to_cpu(void)
{
if (!system_supports_fpsimd())
return;
- local_bh_disable();
+ get_cpu_fpsimd_context();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
task_fpsimd_load();
fpsimd_bind_task_to_cpu();
}
- local_bh_enable();
+ put_cpu_fpsimd_context();
}
/*
if (!system_supports_fpsimd())
return;
- local_bh_disable();
+ get_cpu_fpsimd_context();
current->thread.uw.fpsimd_state = *state;
if (system_supports_sve() && test_thread_flag(TIF_SVE))
clear_thread_flag(TIF_FOREIGN_FPSTATE);
- local_bh_enable();
+ put_cpu_fpsimd_context();
}
/*
/*
* Invalidate any task's FPSIMD state that is present on this cpu.
- * This function must be called with softirqs disabled.
+ * The FPSIMD context should be acquired with get_cpu_fpsimd_context()
+ * before calling this function.
*/
-void fpsimd_flush_cpu_state(void)
+static void fpsimd_flush_cpu_state(void)
{
__this_cpu_write(fpsimd_last_state.st, NULL);
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
-#ifdef CONFIG_KERNEL_MODE_NEON
+/*
+ * Save the FPSIMD state to memory and invalidate cpu view.
+ * This function must be called with preemption disabled.
+ */
+void fpsimd_save_and_flush_cpu_state(void)
+{
+ WARN_ON(preemptible());
+ __get_cpu_fpsimd_context();
+ fpsimd_save();
+ fpsimd_flush_cpu_state();
+ __put_cpu_fpsimd_context();
+}
-DEFINE_PER_CPU(bool, kernel_neon_busy);
-EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
+#ifdef CONFIG_KERNEL_MODE_NEON
/*
* Kernel-side NEON support functions
BUG_ON(!may_use_simd());
- local_bh_disable();
-
- __this_cpu_write(kernel_neon_busy, true);
+ get_cpu_fpsimd_context();
/* Save unsaved fpsimd state, if any: */
fpsimd_save();
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
-
- preempt_disable();
-
- local_bh_enable();
}
EXPORT_SYMBOL(kernel_neon_begin);
*/
void kernel_neon_end(void)
{
- bool busy;
-
if (!system_supports_fpsimd())
return;
- busy = __this_cpu_xchg(kernel_neon_busy, false);
- WARN_ON(!busy); /* No matching kernel_neon_begin()? */
-
- preempt_enable();
+ put_cpu_fpsimd_context();
}
EXPORT_SYMBOL(kernel_neon_end);
{
switch (cmd) {
case CPU_PM_ENTER:
- fpsimd_save();
- fpsimd_flush_cpu_state();
+ fpsimd_save_and_flush_cpu_state();
break;
case CPU_PM_EXIT:
break;
#ifdef CONFIG_EFI
-__efistub_stext_offset = stext - _text;
+/*
+ * Use ABSOLUTE() to avoid ld.lld treating this as a relative symbol:
+ * https://github.com/ClangBuiltLinux/linux/issues/561
+ */
+__efistub_stext_offset = ABSOLUTE(stext - _text);
/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/irqchip.h>
+#include <linux/kprobes.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
+#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
unsigned long irq_err_count;
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
+
+ if (system_uses_irq_prio_masking()) {
+ /*
+ * Now that we have a stack for our IRQ handler, set
+ * the PMR/PSR pair to a consistent state.
+ */
+ WARN_ON(read_sysreg(daif) & PSR_A_BIT);
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ }
+}
+
+/*
+ * Stubs to make nmi_enter/exit() code callable from ASM
+ */
+asmlinkage void notrace asm_nmi_enter(void)
+{
+ nmi_enter();
+}
+NOKPROBE_SYMBOL(asm_nmi_enter);
+
+asmlinkage void notrace asm_nmi_exit(void)
+{
+ nmi_exit();
}
+NOKPROBE_SYMBOL(asm_nmi_exit);
void *module_alloc(unsigned long size)
{
+ u64 module_alloc_end = module_alloc_base + MODULES_VSIZE;
gfp_t gfp_mask = GFP_KERNEL;
void *p;
if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
gfp_mask |= __GFP_NOWARN;
+ if (IS_ENABLED(CONFIG_KASAN))
+ /* don't exceed the static module region - see below */
+ module_alloc_end = MODULES_END;
+
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
- module_alloc_base + MODULES_VSIZE,
- gfp_mask, PAGE_KERNEL_EXEC, 0,
+ module_alloc_end, gfp_mask, PAGE_KERNEL, 0,
NUMA_NO_NODE, __builtin_return_address(0));
if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
*/
p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
module_alloc_base + SZ_2G, GFP_KERNEL,
- PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
if (p && (kasan_module_alloc(p, size) < 0)) {
void *page;
page = vmalloc_exec(PAGE_SIZE);
- if (page)
+ if (page) {
set_memory_ro((unsigned long)page, 1);
+ set_vm_flush_reset_perms(page);
+ }
return page;
}
* be raised.
*/
pmr = gic_read_pmr();
- gic_write_pmr(GIC_PRIO_IRQON);
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
__cpu_do_idle();
int syscall_trace_enter(struct pt_regs *regs)
{
- if (test_thread_flag(TIF_SYSCALL_TRACE))
+ if (test_thread_flag(TIF_SYSCALL_TRACE) ||
+ test_thread_flag(TIF_SYSCALL_EMU)) {
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
+ if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU))
+ return -1;
+ }
/* Do the secure computing after ptrace; failures should be fast. */
if (secure_computing(NULL) == -1)
#include <asm/traps.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
-
-struct compat_sigcontext {
- /* We always set these two fields to 0 */
- compat_ulong_t trap_no;
- compat_ulong_t error_code;
-
- compat_ulong_t oldmask;
- compat_ulong_t arm_r0;
- compat_ulong_t arm_r1;
- compat_ulong_t arm_r2;
- compat_ulong_t arm_r3;
- compat_ulong_t arm_r4;
- compat_ulong_t arm_r5;
- compat_ulong_t arm_r6;
- compat_ulong_t arm_r7;
- compat_ulong_t arm_r8;
- compat_ulong_t arm_r9;
- compat_ulong_t arm_r10;
- compat_ulong_t arm_fp;
- compat_ulong_t arm_ip;
- compat_ulong_t arm_sp;
- compat_ulong_t arm_lr;
- compat_ulong_t arm_pc;
- compat_ulong_t arm_cpsr;
- compat_ulong_t fault_address;
-};
-
-struct compat_ucontext {
- compat_ulong_t uc_flags;
- compat_uptr_t uc_link;
- compat_stack_t uc_stack;
- struct compat_sigcontext uc_mcontext;
- compat_sigset_t uc_sigmask;
- int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))];
- compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8)));
-};
+#include <asm/vdso.h>
struct compat_vfp_sigframe {
compat_ulong_t magic;
unsigned long end_magic;
} __attribute__((__aligned__(8)));
-struct compat_sigframe {
- struct compat_ucontext uc;
- compat_ulong_t retcode[2];
-};
-
-struct compat_rt_sigframe {
- struct compat_siginfo info;
- struct compat_sigframe sig;
-};
-
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
retcode = ptr_to_compat(ka->sa.sa_restorer);
} else {
/* Set up sigreturn pointer */
+#ifdef CONFIG_COMPAT_VDSO
+ void *vdso_base = current->mm->context.vdso;
+ void *vdso_trampoline;
+
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ if (thumb) {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_rt_sigreturn_thumb);
+ } else {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_rt_sigreturn_arm);
+ }
+ } else {
+ if (thumb) {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_sigreturn_thumb);
+ } else {
+ vdso_trampoline = VDSO_SYMBOL(vdso_base,
+ compat_sigreturn_arm);
+ }
+ }
+
+ retcode = ptr_to_compat(vdso_trampoline) + thumb;
+#else
unsigned int idx = thumb << 1;
if (ka->sa.sa_flags & SA_SIGINFO)
retcode = (unsigned long)current->mm->context.vdso +
(idx << 2) + thumb;
+#endif
}
regs->regs[0] = usig;
* aff0 = mpidr_masked & 0xff;
* aff1 = mpidr_masked & 0xff00;
* aff2 = mpidr_masked & 0xff0000;
- * aff2 = mpidr_masked & 0xff00000000;
+ * aff3 = mpidr_masked & 0xff00000000;
* dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3);
*}
* Input registers: rs0, rs1, rs2, rs3, mpidr, mask
WARN_ON(!(cpuflags & PSR_I_BIT));
- gic_write_pmr(GIC_PRIO_IRQOFF);
-
- /* We can only unmask PSR.I if we can take aborts */
- if (!(cpuflags & PSR_A_BIT))
- write_sysreg(cpuflags & ~PSR_I_BIT, daif);
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
}
/*
}
#endif
-/*
- * ipi_cpu_stop - handle IPI from smp_send_stop()
- */
-static void ipi_cpu_stop(unsigned int cpu)
+static void local_cpu_stop(void)
{
- set_cpu_online(cpu, false);
+ set_cpu_online(smp_processor_id(), false);
local_daif_mask();
sdei_mask_local_cpu();
+ cpu_park_loop();
+}
- while (1)
- cpu_relax();
+/*
+ * We need to implement panic_smp_self_stop() for parallel panic() calls, so
+ * that cpu_online_mask gets correctly updated and smp_send_stop() can skip
+ * CPUs that have already stopped themselves.
+ */
+void panic_smp_self_stop(void)
+{
+ local_cpu_stop();
}
#ifdef CONFIG_KEXEC_CORE
case IPI_CPU_STOP:
irq_enter();
- ipi_cpu_stop(cpu);
+ local_cpu_stop();
irq_exit();
break;
printk(" %pS\n", (void *)where);
}
-static void __dump_instr(const char *lvl, struct pt_regs *regs)
+static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
{
unsigned long addr = instruction_pointer(regs);
char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
int i;
+ if (user_mode(regs))
+ return;
+
for (i = -4; i < 1; i++) {
unsigned int val, bad;
- bad = get_user(val, &((u32 *)addr)[i]);
+ bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
break;
}
}
- printk("%sCode: %s\n", lvl, str);
-}
-static void dump_instr(const char *lvl, struct pt_regs *regs)
-{
- if (!user_mode(regs)) {
- mm_segment_t fs = get_fs();
- set_fs(KERNEL_DS);
- __dump_instr(lvl, regs);
- set_fs(fs);
- } else {
- __dump_instr(lvl, regs);
- }
+ printk("%sCode: %s\n", lvl, str);
}
void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
print_modules();
show_regs(regs);
- if (!user_mode(regs))
- dump_instr(KERN_EMERG, regs);
+ dump_kernel_instr(KERN_EMERG, regs);
return ret;
}
#include <linux/slab.h>
#include <linux/timekeeper_internal.h>
#include <linux/vmalloc.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>
#include <asm/cacheflush.h>
#include <asm/signal32.h>
#include <asm/vdso.h>
-#include <asm/vdso_datapage.h>
extern char vdso_start[], vdso_end[];
-static unsigned long vdso_pages __ro_after_init;
+#ifdef CONFIG_COMPAT_VDSO
+extern char vdso32_start[], vdso32_end[];
+#endif /* CONFIG_COMPAT_VDSO */
+
+/* vdso_lookup arch_index */
+enum arch_vdso_type {
+ ARM64_VDSO = 0,
+#ifdef CONFIG_COMPAT_VDSO
+ ARM64_VDSO32 = 1,
+#endif /* CONFIG_COMPAT_VDSO */
+};
+#ifdef CONFIG_COMPAT_VDSO
+#define VDSO_TYPES (ARM64_VDSO32 + 1)
+#else
+#define VDSO_TYPES (ARM64_VDSO + 1)
+#endif /* CONFIG_COMPAT_VDSO */
+
+struct __vdso_abi {
+ const char *name;
+ const char *vdso_code_start;
+ const char *vdso_code_end;
+ unsigned long vdso_pages;
+ /* Data Mapping */
+ struct vm_special_mapping *dm;
+ /* Code Mapping */
+ struct vm_special_mapping *cm;
+};
+
+static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = {
+ {
+ .name = "vdso",
+ .vdso_code_start = vdso_start,
+ .vdso_code_end = vdso_end,
+ },
+#ifdef CONFIG_COMPAT_VDSO
+ {
+ .name = "vdso32",
+ .vdso_code_start = vdso32_start,
+ .vdso_code_end = vdso32_end,
+ },
+#endif /* CONFIG_COMPAT_VDSO */
+};
/*
* The vDSO data page.
*/
static union {
- struct vdso_data data;
+ struct vdso_data data[CS_BASES];
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+struct vdso_data *vdso_data = vdso_data_store.data;
+
+static int __vdso_remap(enum arch_vdso_type arch_index,
+ const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+ unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end -
+ vdso_lookup[arch_index].vdso_code_start;
+
+ if (vdso_size != new_size)
+ return -EINVAL;
+
+ current->mm->context.vdso = (void *)new_vma->vm_start;
+
+ return 0;
+}
+
+static int __vdso_init(enum arch_vdso_type arch_index)
+{
+ int i;
+ struct page **vdso_pagelist;
+ unsigned long pfn;
+
+ if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) {
+ pr_err("vDSO is not a valid ELF object!\n");
+ return -EINVAL;
+ }
+
+ vdso_lookup[arch_index].vdso_pages = (
+ vdso_lookup[arch_index].vdso_code_end -
+ vdso_lookup[arch_index].vdso_code_start) >>
+ PAGE_SHIFT;
+
+ /* Allocate the vDSO pagelist, plus a page for the data. */
+ vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1,
+ sizeof(struct page *),
+ GFP_KERNEL);
+ if (vdso_pagelist == NULL)
+ return -ENOMEM;
+
+ /* Grab the vDSO data page. */
+ vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
+
+
+ /* Grab the vDSO code pages. */
+ pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start);
+
+ for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++)
+ vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+
+ vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0];
+ vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1];
+
+ return 0;
+}
+
+static int __setup_additional_pages(enum arch_vdso_type arch_index,
+ struct mm_struct *mm,
+ struct linux_binprm *bprm,
+ int uses_interp)
+{
+ unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+ void *ret;
+
+ vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT;
+ /* Be sure to map the data page */
+ vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+
+ vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ ret = ERR_PTR(vdso_base);
+ goto up_fail;
+ }
+
+ ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
+ VM_READ|VM_MAYREAD,
+ vdso_lookup[arch_index].dm);
+ if (IS_ERR(ret))
+ goto up_fail;
+
+ vdso_base += PAGE_SIZE;
+ mm->context.vdso = (void *)vdso_base;
+ ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+ vdso_lookup[arch_index].cm);
+ if (IS_ERR(ret))
+ goto up_fail;
+
+ return 0;
+
+up_fail:
+ mm->context.vdso = NULL;
+ return PTR_ERR(ret);
+}
#ifdef CONFIG_COMPAT
/*
* Create and map the vectors page for AArch32 tasks.
*/
+#ifdef CONFIG_COMPAT_VDSO
+static int aarch32_vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ return __vdso_remap(ARM64_VDSO32, sm, new_vma);
+}
+#endif /* CONFIG_COMPAT_VDSO */
+
+/*
+ * aarch32_vdso_pages:
+ * 0 - kuser helpers
+ * 1 - sigreturn code
+ * or (CONFIG_COMPAT_VDSO):
+ * 0 - kuser helpers
+ * 1 - vdso data
+ * 2 - vdso code
+ */
#define C_VECTORS 0
+#ifdef CONFIG_COMPAT_VDSO
+#define C_VVAR 1
+#define C_VDSO 2
+#define C_PAGES (C_VDSO + 1)
+#else
#define C_SIGPAGE 1
#define C_PAGES (C_SIGPAGE + 1)
+#endif /* CONFIG_COMPAT_VDSO */
static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init;
-static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
+static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
{
.name = "[vectors]", /* ABI */
.pages = &aarch32_vdso_pages[C_VECTORS],
},
+#ifdef CONFIG_COMPAT_VDSO
+ {
+ .name = "[vvar]",
+ },
+ {
+ .name = "[vdso]",
+ .mremap = aarch32_vdso_mremap,
+ },
+#else
{
.name = "[sigpage]", /* ABI */
.pages = &aarch32_vdso_pages[C_SIGPAGE],
},
+#endif /* CONFIG_COMPAT_VDSO */
};
static int aarch32_alloc_kuser_vdso_page(void)
return 0;
}
-static int __init aarch32_alloc_vdso_pages(void)
+#ifdef CONFIG_COMPAT_VDSO
+static int __aarch32_alloc_vdso_pages(void)
+{
+ int ret;
+
+ vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR];
+ vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO];
+
+ ret = __vdso_init(ARM64_VDSO32);
+ if (ret)
+ return ret;
+
+ ret = aarch32_alloc_kuser_vdso_page();
+ if (ret) {
+ unsigned long c_vvar =
+ (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]);
+ unsigned long c_vdso =
+ (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]);
+
+ free_page(c_vvar);
+ free_page(c_vdso);
+ }
+
+ return ret;
+}
+#else
+static int __aarch32_alloc_vdso_pages(void)
{
extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
return ret;
}
+#endif /* CONFIG_COMPAT_VDSO */
+
+static int __init aarch32_alloc_vdso_pages(void)
+{
+ return __aarch32_alloc_vdso_pages();
+}
arch_initcall(aarch32_alloc_vdso_pages);
static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
return PTR_ERR_OR_ZERO(ret);
}
+#ifndef CONFIG_COMPAT_VDSO
static int aarch32_sigreturn_setup(struct mm_struct *mm)
{
unsigned long addr;
out:
return PTR_ERR_OR_ZERO(ret);
}
+#endif /* !CONFIG_COMPAT_VDSO */
int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
if (ret)
goto out;
+#ifdef CONFIG_COMPAT_VDSO
+ ret = __setup_additional_pages(ARM64_VDSO32,
+ mm,
+ bprm,
+ uses_interp);
+#else
ret = aarch32_sigreturn_setup(mm);
+#endif /* CONFIG_COMPAT_VDSO */
out:
up_write(&mm->mmap_sem);
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma)
{
- unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
- unsigned long vdso_size = vdso_end - vdso_start;
-
- if (vdso_size != new_size)
- return -EINVAL;
-
- current->mm->context.vdso = (void *)new_vma->vm_start;
-
- return 0;
+ return __vdso_remap(ARM64_VDSO, sm, new_vma);
}
-static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
+/*
+ * aarch64_vdso_pages:
+ * 0 - vvar
+ * 1 - vdso
+ */
+#define A_VVAR 0
+#define A_VDSO 1
+#define A_PAGES (A_VDSO + 1)
+static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = {
{
.name = "[vvar]",
},
static int __init vdso_init(void)
{
- int i;
- struct page **vdso_pagelist;
- unsigned long pfn;
-
- if (memcmp(vdso_start, "\177ELF", 4)) {
- pr_err("vDSO is not a valid ELF object!\n");
- return -EINVAL;
- }
-
- vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
-
- /* Allocate the vDSO pagelist, plus a page for the data. */
- vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
- GFP_KERNEL);
- if (vdso_pagelist == NULL)
- return -ENOMEM;
-
- /* Grab the vDSO data page. */
- vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data));
-
-
- /* Grab the vDSO code pages. */
- pfn = sym_to_pfn(vdso_start);
-
- for (i = 0; i < vdso_pages; i++)
- vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
+ vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR];
+ vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO];
- vdso_spec[0].pages = &vdso_pagelist[0];
- vdso_spec[1].pages = &vdso_pagelist[1];
-
- return 0;
+ return __vdso_init(ARM64_VDSO);
}
arch_initcall(vdso_init);
int uses_interp)
{
struct mm_struct *mm = current->mm;
- unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
- void *ret;
-
- vdso_text_len = vdso_pages << PAGE_SHIFT;
- /* Be sure to map the data page */
- vdso_mapping_len = vdso_text_len + PAGE_SIZE;
+ int ret;
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
- vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
- if (IS_ERR_VALUE(vdso_base)) {
- ret = ERR_PTR(vdso_base);
- goto up_fail;
- }
- ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE,
- VM_READ|VM_MAYREAD,
- &vdso_spec[0]);
- if (IS_ERR(ret))
- goto up_fail;
-
- vdso_base += PAGE_SIZE;
- mm->context.vdso = (void *)vdso_base;
- ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &vdso_spec[1]);
- if (IS_ERR(ret))
- goto up_fail;
+ ret = __setup_additional_pages(ARM64_VDSO,
+ mm,
+ bprm,
+ uses_interp);
up_write(&mm->mmap_sem);
- return 0;
-
-up_fail:
- mm->context.vdso = NULL;
- up_write(&mm->mmap_sem);
- return PTR_ERR(ret);
-}
-/*
- * Update the vDSO data page to keep in sync with kernel timekeeping.
- */
-void update_vsyscall(struct timekeeper *tk)
-{
- u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
-
- ++vdso_data->tb_seq_count;
- smp_wmb();
-
- vdso_data->use_syscall = use_syscall;
- vdso_data->xtime_coarse_sec = tk->xtime_sec;
- vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
- tk->tkr_mono.shift;
- vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
- vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
-
- /* Read without the seqlock held by clock_getres() */
- WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution);
-
- if (!use_syscall) {
- /* tkr_mono.cycle_last == tkr_raw.cycle_last */
- vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
- vdso_data->raw_time_sec = tk->raw_sec;
- vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
- vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- vdso_data->cs_mono_mult = tk->tkr_mono.mult;
- vdso_data->cs_raw_mult = tk->tkr_raw.mult;
- /* tkr_mono.shift == tkr_raw.shift */
- vdso_data->cs_shift = tk->tkr_mono.shift;
- }
-
- smp_wmb();
- ++vdso_data->tb_seq_count;
-}
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ return ret;
}
# Heavily based on the vDSO Makefiles for other archs.
#
-obj-vdso := gettimeofday.o note.o sigreturn.o
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64
+include $(srctree)/lib/vdso/Makefile
+
+obj-vdso := vgettimeofday.o note.o sigreturn.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \
--build-id -n -T
+ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
+ccflags-y += -DDISABLE_BRANCH_PROFILING
+
+VDSO_LDFLAGS := -Bsymbolic
+
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+KBUILD_CFLAGS += $(DISABLE_LTO)
+KASAN_SANITIZE := n
+UBSAN_SANITIZE := n
+OBJECT_FILES_NON_STANDARD := y
+KCOV_INSTRUMENT := n
+
+ifeq ($(c-gettimeofday-y),)
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
+else
+CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
+endif
+
+# Clang versions less than 8 do not support -mcmodel=tiny
+ifeq ($(CONFIG_CC_IS_CLANG), y)
+ ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
+ CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
+ endif
+endif
+
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,ld)
+ $(call if_changed,vdso_check)
# Strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
-# Assembly rules for the .S files
-$(obj-vdso): %.o: %.S FORCE
- $(call if_changed_dep,vdsoas)
-
# Actual build commands
-quiet_cmd_vdsoas = VDSOA $@
- cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdsocc = VDSOCC $@
+ cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
# Install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Userspace implementations of gettimeofday() and friends.
- *
- * Copyright (C) 2012 ARM Limited
- *
- * Author: Will Deacon <will.deacon@arm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-#define NSEC_PER_SEC_LO16 0xca00
-#define NSEC_PER_SEC_HI16 0x3b9a
-
-vdso_data .req x6
-seqcnt .req w7
-w_tmp .req w8
-x_tmp .req x8
-
-/*
- * Conventions for macro arguments:
- * - An argument is write-only if its name starts with "res".
- * - All other arguments are read-only, unless otherwise specified.
- */
-
- .macro seqcnt_acquire
-9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
- tbnz seqcnt, #0, 9999b
- dmb ishld
- .endm
-
- .macro seqcnt_check fail
- dmb ishld
- ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
- cmp w_tmp, seqcnt
- b.ne \fail
- .endm
-
- .macro syscall_check fail
- ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
- cbnz w_tmp, \fail
- .endm
-
- .macro get_nsec_per_sec res
- mov \res, #NSEC_PER_SEC_LO16
- movk \res, #NSEC_PER_SEC_HI16, lsl #16
- .endm
-
- /*
- * Returns the clock delta, in nanoseconds left-shifted by the clock
- * shift.
- */
- .macro get_clock_shifted_nsec res, cycle_last, mult
- /* Read the virtual counter. */
- isb
- mrs x_tmp, cntvct_el0
- /* Calculate cycle delta and convert to ns. */
- sub \res, x_tmp, \cycle_last
- /* We can only guarantee 56 bits of precision. */
- movn x_tmp, #0xff00, lsl #48
- and \res, x_tmp, \res
- mul \res, \res, \mult
- /*
- * Fake address dependency from the value computed from the counter
- * register to subsequent data page accesses so that the sequence
- * locking also orders the read of the counter.
- */
- and x_tmp, \res, xzr
- add vdso_data, vdso_data, x_tmp
- .endm
-
- /*
- * Returns in res_{sec,nsec} the REALTIME timespec, based on the
- * "wall time" (xtime) and the clock_mono delta.
- */
- .macro get_ts_realtime res_sec, res_nsec, \
- clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
- add \res_nsec, \clock_nsec, \xtime_nsec
- udiv x_tmp, \res_nsec, \nsec_to_sec
- add \res_sec, \xtime_sec, x_tmp
- msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
- .endm
-
- /*
- * Returns in res_{sec,nsec} the timespec based on the clock_raw delta,
- * used for CLOCK_MONOTONIC_RAW.
- */
- .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec
- udiv \res_sec, \clock_nsec, \nsec_to_sec
- msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec
- .endm
-
- /* sec and nsec are modified in place. */
- .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
- /* Add timespec. */
- add \sec, \sec, \ts_sec
- add \nsec, \nsec, \ts_nsec
-
- /* Normalise the new timespec. */
- cmp \nsec, \nsec_to_sec
- b.lt 9999f
- sub \nsec, \nsec, \nsec_to_sec
- add \sec, \sec, #1
-9999:
- cmp \nsec, #0
- b.ge 9998f
- add \nsec, \nsec, \nsec_to_sec
- sub \sec, \sec, #1
-9998:
- .endm
-
- .macro clock_gettime_return, shift=0
- .if \shift == 1
- lsr x11, x11, x12
- .endif
- stp x10, x11, [x1, #TSPEC_TV_SEC]
- mov x0, xzr
- ret
- .endm
-
- .macro jump_slot jumptable, index, label
- .if (. - \jumptable) != 4 * (\index)
- .error "Jump slot index mismatch"
- .endif
- b \label
- .endm
-
- .text
-
-/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
-ENTRY(__kernel_gettimeofday)
- .cfi_startproc
- adr vdso_data, _vdso_data
- /* If tv is NULL, skip to the timezone code. */
- cbz x0, 2f
-
- /* Compute the time of day. */
-1: seqcnt_acquire
- syscall_check fail=4f
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=1b
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- /* Convert ns to us. */
- mov x13, #1000
- lsl x13, x13, x12
- udiv x11, x11, x13
- stp x10, x11, [x0, #TVAL_TV_SEC]
-2:
- /* If tz is NULL, return 0. */
- cbz x1, 3f
- ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
- stp w4, w5, [x1, #TZ_MINWEST]
-3:
- mov x0, xzr
- ret
-4:
- /* Syscall fallback. */
- mov x8, #__NR_gettimeofday
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_gettimeofday)
-
-#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
-
-/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
-ENTRY(__kernel_clock_gettime)
- .cfi_startproc
- cmp w0, #JUMPSLOT_MAX
- b.hi syscall
- adr vdso_data, _vdso_data
- adr x_tmp, jumptable
- add x_tmp, x_tmp, w0, uxtw #2
- br x_tmp
-
- ALIGN
-jumptable:
- jump_slot jumptable, CLOCK_REALTIME, realtime
- jump_slot jumptable, CLOCK_MONOTONIC, monotonic
- b syscall
- b syscall
- jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw
- jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
- jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
-
- .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
- .error "Wrong jumptable size"
- .endif
-
- ALIGN
-realtime:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=realtime
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_mono_mult, w12 = cs_shift */
- ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
- ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- lsl x4, x4, x12
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=monotonic
- get_ts_realtime res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-monotonic_raw:
- seqcnt_acquire
- syscall_check fail=syscall
- ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
- /* w11 = cs_raw_mult, w12 = cs_shift */
- ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
- ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
-
- /* All computations are done with left-shifted nsecs. */
- get_nsec_per_sec res=x9
- lsl x9, x9, x12
-
- get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
- seqcnt_check fail=monotonic_raw
- get_ts_clock_raw res_sec=x10, res_nsec=x11, \
- clock_nsec=x15, nsec_to_sec=x9
-
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return, shift=1
-
- ALIGN
-realtime_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- seqcnt_check fail=realtime_coarse
- clock_gettime_return
-
- ALIGN
-monotonic_coarse:
- seqcnt_acquire
- ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
- ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
- seqcnt_check fail=monotonic_coarse
-
- /* Computations are done in (non-shifted) nsecs. */
- get_nsec_per_sec res=x9
- add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
- clock_gettime_return
-
- ALIGN
-syscall: /* Syscall fallback. */
- mov x8, #__NR_clock_gettime
- svc #0
- ret
- .cfi_endproc
-ENDPROC(__kernel_clock_gettime)
-
-/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
-ENTRY(__kernel_clock_getres)
- .cfi_startproc
- cmp w0, #CLOCK_REALTIME
- ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
- ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
- b.ne 1f
-
- adr vdso_data, _vdso_data
- ldr w2, [vdso_data, #CLOCK_REALTIME_RES]
- b 2f
-1:
- cmp w0, #CLOCK_REALTIME_COARSE
- ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
- b.ne 4f
- ldr x2, 5f
-2:
- cbz x1, 3f
- stp xzr, x2, [x1]
-
-3: /* res == NULL. */
- mov w0, wzr
- ret
-
-4: /* Syscall fallback. */
- mov x8, #__NR_clock_getres
- svc #0
- ret
-5:
- .quad CLOCK_COARSE_RES
- .cfi_endproc
-ENDPROC(__kernel_clock_getres)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 userspace implementations of gettimeofday() and similar.
+ *
+ * Copyright (C) 2018 ARM Limited
+ *
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __kernel_clock_gettime(clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __kernel_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __kernel_clock_getres(clockid_t clock_id,
+ struct __kernel_timespec *res)
+{
+ return __cvdso_clock_getres(clock_id, res);
+}
--- /dev/null
+vdso.lds
+vdso.so.raw
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for vdso32
+#
+
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
+include $(srctree)/lib/vdso/Makefile
+
+COMPATCC := $(CROSS_COMPILE_COMPAT)gcc
+
+# Same as cc-*option, but using COMPATCC instead of CC
+cc32-option = $(call try-run,\
+ $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+cc32-disable-warning = $(call try-run,\
+ $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+cc32-ldoption = $(call try-run,\
+ $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+
+# We cannot use the global flags to compile the vDSO files, the main reason
+# being that the 32-bit compiler may be older than the main (64-bit) compiler
+# and therefore may not understand flags set using $(cc-option ...). Besides,
+# arch-specific options should be taken from the arm Makefile instead of the
+# arm64 one.
+# As a result we set our own flags here.
+
+# From top-level Makefile
+# NOSTDINC_FLAGS
+VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include)
+VDSO_CPPFLAGS += $(LINUXINCLUDE)
+VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS)
+
+# Common C and assembly flags
+# From top-level Makefile
+VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
+VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
+ifdef CONFIG_DEBUG_INFO
+VDSO_CAFLAGS += -g
+endif
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y)
+VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
+# From arm Makefile
+VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm)
+VDSO_CAFLAGS += -mabi=aapcs-linux -mfloat-abi=soft
+ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
+VDSO_CAFLAGS += -mbig-endian
+else
+VDSO_CAFLAGS += -mlittle-endian
+endif
+
+# From arm vDSO Makefile
+VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector
+VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
+
+# Try to compile for ARMv8. If the compiler is too old and doesn't support it,
+# fall back to v7. There is no easy way to check for what architecture the code
+# is being compiled, so define a macro specifying that (see arch/arm/Makefile).
+VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\
+ -march=armv7-a -D__LINUX_ARM_ARCH__=7)
+
+VDSO_CFLAGS := $(VDSO_CAFLAGS)
+VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1
+# KBUILD_CFLAGS from top-level Makefile
+VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
+ -fno-strict-aliasing -fno-common \
+ -Werror-implicit-function-declaration \
+ -Wno-format-security \
+ -std=gnu89
+VDSO_CFLAGS += -O2
+# Some useful compiler-dependent flags from top-level Makefile
+VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,)
+VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign)
+VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow)
+VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes)
+VDSO_CFLAGS += $(call cc32-option,-Werror=date-time)
+VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types)
+
+# The 32-bit compiler does not provide 128-bit integers, which are used in
+# some headers that are indirectly included from the vDSO code.
+# This hack makes the compiler happy and should trigger a warning/error if
+# variables of such type are referenced.
+VDSO_CFLAGS += -D__uint128_t='void*'
+# Silence some warnings coming from headers that operate on long's
+# (on GCC 4.8 or older, there is unfortunately no way to silence this warning)
+VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow)
+VDSO_CFLAGS += -Wno-int-to-pointer-cast
+
+VDSO_AFLAGS := $(VDSO_CAFLAGS)
+VDSO_AFLAGS += -D__ASSEMBLY__
+
+VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
+# From arm vDSO Makefile
+VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
+VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft
+VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id)
+VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd)
+
+
+# Borrow vdsomunge.c from the arm vDSO
+# We have to use a relative path because scripts/Makefile.host prefixes
+# $(hostprogs-y) with $(obj)
+munge := ../../../arm/vdso/vdsomunge
+hostprogs-y := $(munge)
+
+c-obj-vdso := note.o
+c-obj-vdso-gettimeofday := vgettimeofday.o
+asm-obj-vdso := sigreturn.o
+
+ifneq ($(c-gettimeofday-y),)
+VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y)
+endif
+
+VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+
+# Build rules
+targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw
+c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso))
+c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday))
+asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso))
+obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso)
+
+obj-y += vdso.o
+extra-y += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# Force dependency (vdso.s includes vdso.so through incbin)
+$(obj)/vdso.o: $(obj)/vdso.so
+
+include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,vdsosym)
+
+# Strip rule for vdso.so
+$(obj)/vdso.so: OBJCOPYFLAGS := -S
+$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
+ $(call if_changed,vdsomunge)
+
+# Link rule for the .so file, .lds has to be first
+$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
+ $(call if_changed,vdsold)
+ $(call if_changed,vdso_check)
+
+# Compilation rules for the vDSO sources
+$(c-obj-vdso): %.o: %.c FORCE
+ $(call if_changed_dep,vdsocc)
+$(c-obj-vdso-gettimeofday): %.o: %.c FORCE
+ $(call if_changed_dep,vdsocc_gettimeofday)
+$(asm-obj-vdso): %.o: %.S FORCE
+ $(call if_changed_dep,vdsoas)
+
+# Actual build commands
+quiet_cmd_vdsold = VDSOL $@
+ cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
+ -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
+quiet_cmd_vdsocc = VDSOC $@
+ cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
+quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD $@
+ cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
+quiet_cmd_vdsoas = VDSOA $@
+ cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
+
+quiet_cmd_vdsomunge = MUNGE $@
+ cmd_vdsomunge = $(obj)/$(munge) $< $@
+
+# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO)
+gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+# The AArch64 nm should be able to read an AArch32 binary
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+# Install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so
+
+vdso.so: $(obj)/vdso.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso.so
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012-2018 ARM Limited
+ *
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+#include <linux/build-salt.h>
+
+ELFNOTE32("Linux", 0, LINUX_VERSION_CODE);
+BUILD_SALT;
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file provides both A32 and T32 versions, in accordance with the
+ * arm sigreturn code.
+ *
+ * Copyright (C) 2018 ARM Limited
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+#define ARM_ENTRY(name) \
+ ENTRY(name)
+
+#define ARM_ENDPROC(name) \
+ .type name, %function; \
+ END(name)
+
+ .text
+
+ .arm
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_sigreturn_arm)
+ mov r7, #__NR_compat_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_sigreturn_arm)
+
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_rt_sigreturn_arm)
+ mov r7, #__NR_compat_rt_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_rt_sigreturn_arm)
+
+ .thumb
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_sigreturn_thumb)
+ mov r7, #__NR_compat_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_sigreturn_thumb)
+
+ .fnstart
+ .save {r0-r15}
+ .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET
+ nop
+ARM_ENTRY(__kernel_rt_sigreturn_thumb)
+ mov r7, #__NR_compat_rt_sigreturn
+ svc #0
+ .fnend
+ARM_ENDPROC(__kernel_rt_sigreturn_thumb)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2012 ARM Limited
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/page.h>
+
+ .globl vdso32_start, vdso32_end
+ .section .rodata
+ .balign PAGE_SIZE
+vdso32_start:
+ .incbin "arch/arm64/kernel/vdso32/vdso.so"
+ .balign PAGE_SIZE
+vdso32_end:
+
+ .previous
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Adapted from arm64 version.
+ *
+ * GNU linker script for the VDSO library.
+ * Heavily based on the vDSO linker scripts for other archs.
+ *
+ * Copyright (C) 2012-2018 ARM Limited
+ */
+
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm")
+OUTPUT_ARCH(arm)
+
+SECTIONS
+{
+ PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE);
+ . = VDSO_LBASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .rodata : { *(.rodata*) } :text
+
+ .text : { *(.text*) } :text =0xe7f001f2
+
+ .got : { *(.got) }
+ .rel.plt : { *(.rel.plt) }
+
+ /DISCARD/ : {
+ *(.note.GNU-stack)
+ *(.data .data.* .gnu.linkonce.d.* .sdata*)
+ *(.bss .sbss .dynbss .dynsbss)
+ }
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+}
+
+VERSION
+{
+ LINUX_2.6 {
+ global:
+ __vdso_clock_gettime;
+ __vdso_gettimeofday;
+ __vdso_clock_getres;
+ __kernel_sigreturn_arm;
+ __kernel_sigreturn_thumb;
+ __kernel_rt_sigreturn_arm;
+ __kernel_rt_sigreturn_thumb;
+ __vdso_clock_gettime64;
+ local: *;
+ };
+}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm;
+VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb;
+VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm;
+VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 compat userspace implementations of gettimeofday() and similar.
+ *
+ * Copyright (C) 2018 ARM Limited
+ *
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __vdso_clock_gettime(clockid_t clock,
+ struct old_timespec32 *ts)
+{
+ /* The checks below are required for ABI consistency with arm */
+ if ((u32)ts >= TASK_SIZE_32)
+ return -EFAULT;
+
+ return __cvdso_clock_gettime32(clock, ts);
+}
+
+int __vdso_clock_gettime64(clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ /* The checks below are required for ABI consistency with arm */
+ if ((u32)ts >= TASK_SIZE_32)
+ return -EFAULT;
+
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __vdso_clock_getres(clockid_t clock_id,
+ struct old_timespec32 *res)
+{
+ /* The checks below are required for ABI consistency with arm */
+ if ((u32)res >= TASK_SIZE_32)
+ return -EFAULT;
+
+ return __cvdso_clock_getres_time32(clock_id, res);
+}
+
+/* Avoid unresolved references emitted by GCC */
+
+void __aeabi_unwind_cpp_pr0(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr1(void)
+{
+}
+
+void __aeabi_unwind_cpp_pr2(void)
+{
+}
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1];
- /* Clean guest FP state to memory and invalidate cpu view */
- fpsimd_save();
- fpsimd_flush_cpu_state();
+ fpsimd_save_and_flush_cpu_state();
if (guest_has_sve)
*guest_zcr = read_sysreg_s(SYS_ZCR_EL12);
#define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64)
#define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64)
-#define vq_present(vqs, vq) ((vqs)[vq_word(vq)] & vq_mask(vq))
+#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq)))
static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
* Naturally, we want to avoid this.
*/
if (system_uses_irq_prio_masking()) {
- gic_write_pmr(GIC_PRIO_IRQON);
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
dsb(sy);
}
static int __init arm64_dma_init(void)
{
- WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
- TAINT_CPU_OUT_OF_SPEC,
- "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
- ARCH_DMA_MINALIGN, cache_line_size());
return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
}
arch_initcall(arm64_dma_init);
void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
+ int cls = cache_line_size_of_cpu();
+
+ WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN,
+ TAINT_CPU_OUT_OF_SPEC,
+ "%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
+ dev_driver_string(dev), dev_name(dev),
+ ARCH_DMA_MINALIGN, cls);
+
dev->dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
#define VM_FAULT_BADACCESS 0x020000
static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int mm_flags, unsigned long vm_flags,
- struct task_struct *tsk)
+ unsigned int mm_flags, unsigned long vm_flags)
{
- struct vm_area_struct *vma;
- vm_fault_t fault;
+ struct vm_area_struct *vma = find_vma(mm, addr);
- vma = find_vma(mm, addr);
- fault = VM_FAULT_BADMAP;
if (unlikely(!vma))
- goto out;
- if (unlikely(vma->vm_start > addr))
- goto check_stack;
+ return VM_FAULT_BADMAP;
/*
* Ok, we have a good vm_area for this memory access, so we can handle
* it.
*/
-good_area:
+ if (unlikely(vma->vm_start > addr)) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ return VM_FAULT_BADMAP;
+ if (expand_stack(vma, addr))
+ return VM_FAULT_BADMAP;
+ }
+
/*
* Check that the permissions on the VMA allow for the fault which
* occurred.
*/
- if (!(vma->vm_flags & vm_flags)) {
- fault = VM_FAULT_BADACCESS;
- goto out;
- }
-
+ if (!(vma->vm_flags & vm_flags))
+ return VM_FAULT_BADACCESS;
return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
-
-check_stack:
- if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
- goto good_area;
-out:
- return fault;
}
static bool is_el0_instruction_abort(unsigned int esr)
return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
}
+/*
+ * Note: not valid for EL1 DC IVAC, but we never use that such that it
+ * should fault. EL0 cannot issue DC IVAC (undef).
+ */
+static bool is_write_abort(unsigned int esr)
+{
+ return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM);
+}
+
static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf;
- struct task_struct *tsk;
- struct mm_struct *mm;
+ struct mm_struct *mm = current->mm;
vm_fault_t fault, major = 0;
unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (notify_page_fault(regs, esr))
return 0;
- tsk = current;
- mm = tsk->mm;
-
/*
* If we're in an interrupt or have no user context, we must not take
* the fault.
if (is_el0_instruction_abort(esr)) {
vm_flags = VM_EXEC;
- } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) {
+ mm_flags |= FAULT_FLAG_INSTRUCTION;
+ } else if (is_write_abort(esr)) {
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
}
*/
might_sleep();
#ifdef CONFIG_DEBUG_VM
- if (!user_mode(regs) && !search_exception_tables(regs->pc))
+ if (!user_mode(regs) && !search_exception_tables(regs->pc)) {
+ up_read(&mm->mmap_sem);
goto no_context;
+ }
#endif
}
- fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
+ fault = __do_page_fault(mm, addr, mm_flags, vm_flags);
major |= fault & VM_FAULT_MAJOR;
if (fault & VM_FAULT_RETRY) {
* that point.
*/
if (major) {
- tsk->maj_flt++;
+ current->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
} else {
- tsk->min_flt++;
+ current->min_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
}
if (sz == PUD_SIZE) {
ptep = (pte_t *)pudp;
- } else if (sz == (PAGE_SIZE * CONT_PTES)) {
+ } else if (sz == (CONT_PTE_SIZE)) {
pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
ptep = huge_pmd_share(mm, addr, pudp);
else
ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
- } else if (sz == (PMD_SIZE * CONT_PMDS)) {
+ } else if (sz == (CONT_PMD_SIZE)) {
pmdp = pmd_alloc(mm, pudp, addr);
WARN_ON(addr & (sz - 1));
return (pte_t *)pmdp;
#ifdef CONFIG_ARM64_4K_PAGES
add_huge_page_size(PUD_SIZE);
#endif
- add_huge_page_size(PMD_SIZE * CONT_PMDS);
+ add_huge_page_size(CONT_PMD_SIZE);
add_huge_page_size(PMD_SIZE);
- add_huge_page_size(PAGE_SIZE * CONT_PTES);
+ add_huge_page_size(CONT_PTE_SIZE);
return 0;
}
#ifdef CONFIG_ARM64_4K_PAGES
case PUD_SIZE:
#endif
- case PMD_SIZE * CONT_PMDS:
+ case CONT_PMD_SIZE:
case PMD_SIZE:
- case PAGE_SIZE * CONT_PTES:
+ case CONT_PTE_SIZE:
add_huge_page_size(ps);
return 1;
}
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
- if (IS_ENABLED(CONFIG_ZONE_DMA32))
- max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
+#ifdef CONFIG_ZONE_DMA32
+ max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
+#endif
max_zone_pfns[ZONE_NORMAL] = max;
free_area_init_nodes(max_zone_pfns);
return 0;
}
-#endif /* CONFIG_ARM64_64K_PAGES */
+#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */
void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{
int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
- pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
- pgprot_val(mk_sect_prot(prot)));
- pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot);
+ pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot));
/* Only allow permission changes for now */
if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)),
pud_val(new_pud)))
return 0;
- BUG_ON(phys & ~PUD_MASK);
+ VM_BUG_ON(phys & ~PUD_MASK);
set_pud(pudp, new_pud);
return 1;
}
int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
{
- pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
- pgprot_val(mk_sect_prot(prot)));
- pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot);
+ pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot));
/* Only allow permission changes for now */
if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)),
pmd_val(new_pmd)))
return 0;
- BUG_ON(phys & ~PMD_MASK);
+ VM_BUG_ON(phys & ~PMD_MASK);
set_pmd(pmdp, new_pmd);
return 1;
}
__pgprot(PTE_VALID));
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
+int set_direct_map_invalid_noflush(struct page *page)
+{
+ struct page_change_data data = {
+ .set_mask = __pgprot(0),
+ .clear_mask = __pgprot(PTE_VALID),
+ };
+
+ if (!rodata_full)
+ return 0;
+
+ return apply_to_page_range(&init_mm,
+ (unsigned long)page_address(page),
+ PAGE_SIZE, change_page_range, &data);
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+ struct page_change_data data = {
+ .set_mask = __pgprot(PTE_VALID | PTE_WRITE),
+ .clear_mask = __pgprot(PTE_RDONLY),
+ };
+
+ if (!rodata_full)
+ return 0;
+
+ return apply_to_page_range(&init_mm,
+ (unsigned long)page_address(page),
+ PAGE_SIZE, change_page_range, &data);
+}
+
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
+ if (!debug_pagealloc_enabled() && !rodata_full)
+ return;
+
set_memory_valid((unsigned long)page_address(page), numpages, enable);
}
-#ifdef CONFIG_HIBERNATION
+
/*
- * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function
- * is used to determine if a linear map page has been marked as not-valid by
- * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit.
- * This is based on kern_addr_valid(), which almost does what we need.
+ * This function is used to determine if a linear map page has been marked as
+ * not-valid. Walk the page table and check the PTE_VALID bit. This is based
+ * on kern_addr_valid(), which almost does what we need.
*
* Because this is only called on the kernel linear map, p?d_sect() implies
* p?d_present(). When debug_pagealloc is enabled, sections mappings are
pte_t *ptep;
unsigned long addr = (unsigned long)page_address(page);
+ if (!debug_pagealloc_enabled() && !rodata_full)
+ return true;
+
pgdp = pgd_offset_k(addr);
if (pgd_none(READ_ONCE(*pgdp)))
return false;
ptep = pte_offset_kernel(pmdp, addr);
return pte_valid(READ_ONCE(*ptep));
}
-#endif /* CONFIG_HIBERNATION */
-#endif /* CONFIG_DEBUG_PAGEALLOC */
{
return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
BPF_JIT_REGION_END, GFP_KERNEL,
- PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
#endif
struct rt_sigframe {
+ /*
+ * pad[3] is compatible with the same struct defined in
+ * gcc/libgcc/config/csky/linux-unwind.h
+ */
+ int pad[3];
struct siginfo info;
struct ucontext uc;
};
}
/* save the current system wide pmu states */
- ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
- if (ret) {
- DPRINT(("on_each_cpu() failed: %d\n", ret));
- goto cleanup_reserve;
- }
+ on_each_cpu(pfm_alt_save_pmu_state, NULL, 1);
/* officially change to the alternate interrupt handler */
pfm_alt_intr_handler = hdl;
pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl)
{
int i;
- int ret;
if (hdl == NULL) return -EINVAL;
pfm_alt_intr_handler = NULL;
- ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
- if (ret) {
- DPRINT(("on_each_cpu() failed: %d\n", ret));
- }
+ on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1);
for_each_online_cpu(i) {
pfm_unreserve_session(NULL, 1, i);
status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL);
if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) {
atomic_set(&uc_pool->status, 0);
- status = smp_call_function(uncached_ipi_visibility, uc_pool, 1);
- if (status || atomic_read(&uc_pool->status))
+ smp_call_function(uncached_ipi_visibility, uc_pool, 1);
+ if (atomic_read(&uc_pool->status))
goto failed;
} else if (status != PAL_VISIBILITY_OK)
goto failed;
if (status != PAL_STATUS_SUCCESS)
goto failed;
atomic_set(&uc_pool->status, 0);
- status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
- if (status || atomic_read(&uc_pool->status))
+ smp_call_function(uncached_ipi_mc_drain, uc_pool, 1);
+ if (atomic_read(&uc_pool->status))
goto failed;
/*
bool
default y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE
+ select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
select ARCH_NO_COHERENT_DMA_MMAP if !MMU
select ARCH_NO_PREEMPT if !COLDFIRE
+ select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE
select HAVE_IDE
select HAVE_AOUT if MMU
select HAVE_DEBUG_BUGVERBOSE
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_FB_AMIGA_ECS=y
CONFIG_FB_AMIGA_AGA=y
CONFIG_FB_FM2=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
# CONFIG_HWMON is not set
CONFIG_FB=y
CONFIG_FB_ATARI=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_FB=y
CONFIG_FB_VALKYRIE=y
CONFIG_FB_MAC=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_FB_ATARI=y
CONFIG_FB_VALKYRIE=y
CONFIG_FB_MAC=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_PPS_CLIENT_LDISC=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_HID=m
CONFIG_HIDRAW=y
CONFIG_UHID=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_INET_ESP=m
CONFIG_INET_ESP_OFFLOAD=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_INET_RAW_DIAG=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NFT_DUP_IPV4=m
CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_TEST_ASYNC_DRIVER_PROBE=m
CONFIG_PTP_1588_CLOCK=m
# CONFIG_HWMON is not set
CONFIG_FB=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID=m
CONFIG_DLM=m
CONFIG_ENCRYPTED_KEYS=m
CONFIG_HARDENED_USERCOPY=y
-CONFIG_CRYPTO_RSA=m
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_MANAGER=y
CONFIG_CRYPTO_USER=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECRDSA=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
CONFIG_CRYPTO_AEGIS128L=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3=m
-CONFIG_CRYPTO_STREEBOG=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_ASYNC_RAID6_TEST=m
CONFIG_TEST_HEXDUMP=m
CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_STRSCPY=m
CONFIG_TEST_KSTRTOX=m
CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
#include <asm/pgalloc.h>
#if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE)
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
- gfp_t flag, unsigned long attrs)
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
- struct page *page, **map;
- pgprot_t pgprot;
- void *addr;
- int i, order;
-
- pr_debug("dma_alloc_coherent: %d,%x\n", size, flag);
-
- size = PAGE_ALIGN(size);
- order = get_order(size);
-
- page = alloc_pages(flag | __GFP_ZERO, order);
- if (!page)
- return NULL;
-
- *handle = page_to_phys(page);
- map = kmalloc(sizeof(struct page *) << order, flag & ~__GFP_DMA);
- if (!map) {
- __free_pages(page, order);
- return NULL;
- }
- split_page(page, order);
-
- order = 1 << order;
- size >>= PAGE_SHIFT;
- map[0] = page;
- for (i = 1; i < size; i++)
- map[i] = page + i;
- for (; i < order; i++)
- __free_page(page + i);
- pgprot = __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY);
- if (CPU_IS_040_OR_060)
- pgprot_val(pgprot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S;
- else
- pgprot_val(pgprot) |= _PAGE_NOCACHE030;
- addr = vmap(map, size, VM_MAP, pgprot);
- kfree(map);
-
- return addr;
+ cache_push(page_to_phys(page), size);
}
-void arch_dma_free(struct device *dev, size_t size, void *addr,
- dma_addr_t handle, unsigned long attrs)
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+ unsigned long attrs)
{
- pr_debug("dma_free_coherent: %p, %x\n", addr, handle);
- vfree(addr);
+ if (CPU_IS_040_OR_060) {
+ pgprot_val(prot) &= ~_PAGE_CACHE040;
+ pgprot_val(prot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S;
+ } else {
+ pgprot_val(prot) |= _PAGE_NOCACHE030;
+ }
+ return prot;
}
-
#else
#include <asm/cacheflush.h>
$(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs
KBUILD_DEFCONFIG := 32r2el_defconfig
+KBUILD_DTBS := dtbs
#
# Select the object file format to substitute into the linker script.
vmlinux.64: vmlinux
$(call cmd,64)
-all: $(all-y)
+all: $(all-y) $(KBUILD_DTBS)
# boot
$(boot-y): $(vmlinux-32) FORCE
$(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE
$(call if_changed,objcopy)
+HOSTCFLAGS_calc_vmlinuz_load_addr.o += $(LINUXINCLUDE)
+
# Calculate the load address of the compressed kernel image
hostprogs-y := calc_vmlinuz_load_addr
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
-#include "../../../../include/linux/sizes.h"
+#include <linux/sizes.h>
int main(int argc, char *argv[])
{
#define AR933X_UART_CS_PARITY_S 0
#define AR933X_UART_CS_PARITY_M 0x3
#define AR933X_UART_CS_PARITY_NONE 0
-#define AR933X_UART_CS_PARITY_ODD 1
-#define AR933X_UART_CS_PARITY_EVEN 2
+#define AR933X_UART_CS_PARITY_ODD 2
+#define AR933X_UART_CS_PARITY_EVEN 3
#define AR933X_UART_CS_IF_MODE_S 2
#define AR933X_UART_CS_IF_MODE_M 0x3
#define AR933X_UART_CS_IF_MODE_NONE 0
}
/**
+ * mips_gic_vx_map_reg() - Return GIC_Vx_<intr>_MAP register offset
+ * @intr: A GIC local interrupt
+ *
+ * Determine the index of the GIC_VL_<intr>_MAP or GIC_VO_<intr>_MAP register
+ * within the block of GIC map registers. This is almost the same as the order
+ * of interrupts in the pending & mask registers, as used by enum
+ * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the
+ * interrupts after it...
+ *
+ * Return: The map register index corresponding to @intr.
+ *
+ * The return value is suitable for use with the (read|write)_gic_v[lo]_map
+ * accessor functions.
+ */
+static inline unsigned int
+mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr)
+{
+ /* WD, Compare & Timer are 1:1 */
+ if (intr <= GIC_LOCAL_INT_TIMER)
+ return intr;
+
+ /* FDC moves to after Timer... */
+ if (intr == GIC_LOCAL_INT_FDC)
+ return GIC_LOCAL_INT_TIMER + 1;
+
+ /* As a result everything else is offset by 1 */
+ return intr + 1;
+}
+
+/**
* gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq
*
* Determine the virq number to use for the coprocessor 0 count/compare
bool __virt_addr_valid(const volatile void *kaddr)
{
- unsigned long vaddr = (unsigned long)vaddr;
+ unsigned long vaddr = (unsigned long)kaddr;
if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE))
return false;
static void build_restore_work_registers(u32 **p)
{
if (scratch_reg >= 0) {
+ uasm_i_ehb(p);
UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
return;
}
uasm_i_mtc0(p, 0, C0_PAGEMASK);
uasm_il_b(p, r, lid);
}
- if (scratch_reg >= 0)
+ if (scratch_reg >= 0) {
+ uasm_i_ehb(p);
UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
- else
+ } else {
UASM_i_LW(p, 1, scratchpad_offset(0), 0);
+ }
} else {
/* Reset default page size */
if (PM_DEFAULT_MASK >> 16) {
uasm_i_jr(p, ptr);
if (mode == refill_scratch) {
- if (scratch_reg >= 0)
+ if (scratch_reg >= 0) {
+ uasm_i_ehb(p);
UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
- else
+ } else {
UASM_i_LW(p, 1, scratchpad_offset(0), 0);
+ }
} else {
uasm_i_nop(p);
}
UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */
if (c0_scratch_reg >= 0) {
+ uasm_i_ehb(p);
UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg);
build_tlb_write_entry(p, l, r, tlb_random);
uasm_l_leave(l, *p);
uasm_i_dinsm(&p, a0, 0, 29, 64 - 29);
uasm_l_tlbl_goaround1(&l, p);
UASM_i_SLL(&p, a0, a0, 11);
- uasm_i_jr(&p, 31);
UASM_i_MTC0(&p, a0, C0_CONTEXT);
+ uasm_i_jr(&p, 31);
+ uasm_i_ehb(&p);
} else {
/* PGD in c0_KScratch */
- uasm_i_jr(&p, 31);
if (cpu_has_ldpte)
UASM_i_MTC0(&p, a0, C0_PWBASE);
else
UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
+ uasm_i_jr(&p, 31);
+ uasm_i_ehb(&p);
}
#else
#ifdef CONFIG_SMP
UASM_i_LA_mostly(&p, a2, pgdc);
UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2);
#endif /* SMP */
- uasm_i_jr(&p, 31);
/* if pgd_reg is allocated, save PGD also to scratch register */
- if (pgd_reg != -1)
+ if (pgd_reg != -1) {
UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
- else
+ uasm_i_jr(&p, 31);
+ uasm_i_ehb(&p);
+ } else {
+ uasm_i_jr(&p, 31);
uasm_i_nop(&p);
+ }
#endif
if (p >= (u32 *)tlbmiss_handler_setup_pgd_end)
panic("tlbmiss_handler_setup_pgd space exceeded");
/* 32-bit PC relative address */
*loc = val - dot - 8 + addend;
break;
+ case R_PARISC_PCREL64:
+ /* 64-bit PC relative address */
+ *loc64 = val - dot - 8 + addend;
+ break;
case R_PARISC_DIR64:
/* 64-bit effective address */
*loc64 = val + addend;
#define spin_cpu_relax() barrier()
-#define spin_cpu_yield() spin_cpu_relax()
-
#define spin_end() HMT_medium()
#define spin_until_cond(cond) \
mfspr r11,SPRN_DSISR /* Save DSISR */
std r11,_DSISR(r1)
std r9,_CCR(r1) /* Save CR in stackframe */
- kuap_save_amr_and_lock r9, r10, cr1
+ /* We don't touch AMR here, we never go to virtual mode */
/* Save r9 through r13 from EXMC save area to stack frame. */
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
mfmsr r11 /* get MSR value */
{
/* make sure the single step bit is not set. */
user_disable_single_step(child);
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
}
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
/* Call function on all CPUs. One of us will make the
* rtas call
*/
- if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
- atomic_set(&data.error, -EINVAL);
+ on_each_cpu(rtas_percpu_suspend_me, &data, 0);
wait_for_completion(&done);
void slb_setup_new_exec(void);
+static int realloc_context_ids(mm_context_t *ctx)
+{
+ int i, id;
+
+ /*
+ * id 0 (aka. ctx->id) is special, we always allocate a new one, even if
+ * there wasn't one allocated previously (which happens in the exec
+ * case where ctx is newly allocated).
+ *
+ * We have to be a bit careful here. We must keep the existing ids in
+ * the array, so that we can test if they're non-zero to decide if we
+ * need to allocate a new one. However in case of error we must free the
+ * ids we've allocated but *not* any of the existing ones (or risk a
+ * UAF). That's why we decrement i at the start of the error handling
+ * loop, to skip the id that we just tested but couldn't reallocate.
+ */
+ for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
+ if (i == 0 || ctx->extended_id[i]) {
+ id = hash__alloc_context_id();
+ if (id < 0)
+ goto error;
+
+ ctx->extended_id[i] = id;
+ }
+ }
+
+ /* The caller expects us to return id */
+ return ctx->id;
+
+error:
+ for (i--; i >= 0; i--) {
+ if (ctx->extended_id[i])
+ ida_free(&mmu_context_ida, ctx->extended_id[i]);
+ }
+
+ return id;
+}
+
static int hash__init_new_context(struct mm_struct *mm)
{
int index;
- index = hash__alloc_context_id();
- if (index < 0)
- return index;
-
mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
GFP_KERNEL);
- if (!mm->context.hash_context) {
- ida_free(&mmu_context_ida, index);
+ if (!mm->context.hash_context)
return -ENOMEM;
- }
/*
* The old code would re-promote on fork, we don't do that when using
mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
GFP_KERNEL);
if (!mm->context.hash_context->spt) {
- ida_free(&mmu_context_ida, index);
kfree(mm->context.hash_context);
return -ENOMEM;
}
}
#endif
+ }
+ index = realloc_context_ids(&mm->context);
+ if (index < 0) {
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ kfree(mm->context.hash_context->spt);
+#endif
+ kfree(mm->context.hash_context);
+ return index;
}
pkey_mm_init(mm);
interrupt-parent = <&plic0>;
interrupts = <4>;
clocks = <&prci PRCI_CLK_TLCLK>;
+ status = "disabled";
};
uart1: serial@10011000 {
compatible = "sifive,fu540-c000-uart", "sifive,uart0";
interrupt-parent = <&plic0>;
interrupts = <5>;
clocks = <&prci PRCI_CLK_TLCLK>;
+ status = "disabled";
};
i2c0: i2c@10030000 {
compatible = "sifive,fu540-c000-i2c", "sifive,i2c0";
reg-io-width = <1>;
#address-cells = <1>;
#size-cells = <0>;
+ status = "disabled";
};
qspi0: spi@10040000 {
compatible = "sifive,fu540-c000-spi", "sifive,spi0";
clocks = <&prci PRCI_CLK_TLCLK>;
#address-cells = <1>;
#size-cells = <0>;
+ status = "disabled";
};
qspi1: spi@10041000 {
compatible = "sifive,fu540-c000-spi", "sifive,spi0";
clocks = <&prci PRCI_CLK_TLCLK>;
#address-cells = <1>;
#size-cells = <0>;
+ status = "disabled";
};
qspi2: spi@10050000 {
compatible = "sifive,fu540-c000-spi", "sifive,spi0";
clocks = <&prci PRCI_CLK_TLCLK>;
#address-cells = <1>;
#size-cells = <0>;
+ status = "disabled";
};
};
};
};
};
+&uart0 {
+ status = "okay";
+};
+
+&uart1 {
+ status = "okay";
+};
+
+&i2c0 {
+ status = "okay";
+};
+
&qspi0 {
+ status = "okay";
flash@0 {
compatible = "issi,is25wp256", "jedec,spi-nor";
reg = <0>;
CONFIG_CLK_SIFIVE=y
CONFIG_CLK_SIFIVE_FU540_PRCI=y
CONFIG_SIFIVE_PLIC=y
+CONFIG_SPI_SIFIVE=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_AUTOFS4_FS=y
CONFIG_CRYPTO_USER_API_HASH=y
CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_PRINTK_TIME=y
+CONFIG_SPI=y
+CONFIG_MMC_SPI=y
+CONFIG_MMC=y
+CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_RCU_TRACE is not set
* entries, but in RISC-V, SFENCE.VMA specifies an
* ordering constraint, not a cache flush; it is
* necessary even after writing invalid entries.
- * Relying on flush_tlb_fix_spurious_fault would
- * suffice, but the extra traps reduce
- * performance. So, eagerly SFENCE.VMA.
*/
local_flush_tlb_page(addr);
# SPDX-License-Identifier: GPL-2.0
+config ARCH_HAS_MEM_ENCRYPT
+ def_bool y
+
config MMU
def_bool y
def_bool y
config GENERIC_LOCKBREAK
- def_bool y if SMP && PREEMPT
+ def_bool y if PREEMPT
config PGSTE
def_bool y if KVM
select DYNAMIC_FTRACE if FUNCTION_TRACER
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_AUTOPROBE
- select GENERIC_CPU_DEVICES if !SMP
select GENERIC_CPU_VULNERABILITIES
select GENERIC_FIND_FIRST_BIT
select GENERIC_SMP_IDLE_THREAD
select VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME
select HAVE_NMI
+ select SWIOTLB
+ select GENERIC_ALLOCATOR
config SCHED_OMIT_FRAME_POINTER
config SMP
def_bool y
- prompt "Symmetric multi-processing support"
- ---help---
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
-
- If you say N here, the kernel will run on uni- and multiprocessor
- machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
- uniprocessor machines. On a uniprocessor machine, the kernel
- will run faster if you say N here.
-
- See also the SMP-HOWTO available at
- <http://www.tldp.org/docs.html#howto>.
-
- Even if you don't know what to do here, say Y.
config NR_CPUS
int "Maximum number of CPUs (2-512)"
range 2 512
- depends on SMP
default "64"
help
This allows you to specify the maximum number of CPUs which this
config HOTPLUG_CPU
def_bool y
- prompt "Support for hot-pluggable CPUs"
- depends on SMP
- help
- Say Y here to be able to turn CPUs off and on. CPUs
- can be controlled through /sys/devices/system/cpu/cpu#.
- Say N if you want to disable CPU hotplug.
# Some NUMA nodes have memory ranges that span
# other nodes. Even though a pfn is valid and
config NUMA
bool "NUMA support"
- depends on SMP && SCHED_TOPOLOGY
+ depends on SCHED_TOPOLOGY
default n
help
Enable NUMA support
config SCHED_TOPOLOGY
def_bool y
prompt "Topology scheduler support"
- depends on SMP
select SCHED_SMT
select SCHED_MC
select SCHED_BOOK
This allows you to specify the maximum number of PCI functions which
this kernel will support.
-endif # PCI
+endif # PCI
config HAS_IOMEM
def_bool PCI
config CRASH_DUMP
bool "kernel crash dumps"
- depends on SMP
select KEXEC
help
Generate crash dump after being started by kexec.
Crash dump kernels are loaded in the main kernel with kexec-tools
into a specially reserved region and then later executed after
a crash by kdump/kexec.
- Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+ Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this.
This option also enables s390 zfcpdump.
- See also <file:Documentation/s390/zfcpdump.txt>
+ See also <file:Documentation/s390/zfcpdump.rst>
endmenu
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_AP=m
+CONFIG_VFIO_CCW=m
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_S390_AP_IOMMU=y
+CONFIG_S390_CCW_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_USELIB=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-# CONFIG_CPU_ISOLATION is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
-CONFIG_CGROUPS=y
+CONFIG_NUMA_BALANCING=y
+# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
-CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_PERF=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
-CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_LIVEPATCH=y
-CONFIG_NR_CPUS=256
-CONFIG_NUMA=y
-CONFIG_HZ_100=y
-CONFIG_KEXEC_FILE=y
-CONFIG_KEXEC_VERIFY_SIG=y
-CONFIG_CRASH_DUMP=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_DEBUG=y
-CONFIG_CMM=m
-CONFIG_OPROFILE=y
+CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
-CONFIG_STATIC_KEYS_SELFTEST=y
CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BLK_WBT=y
+CONFIG_BLK_WBT_SQ=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
-CONFIG_BINFMT_MISC=m
+CONFIG_LIVEPATCH=y
+CONFIG_TUNE_ZEC12=y
+CONFIG_NR_CPUS=512
+CONFIG_NUMA=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
+CONFIG_EXPOLINE=y
+CONFIG_EXPOLINE_AUTO=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
+CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
CONFIG_ZBUD=m
CONFIG_ZSMALLOC=m
CONFIG_ZSMALLOC_STAT=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_PCI=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_S390=y
+CONFIG_CHSC_SCH=y
+CONFIG_VFIO_AP=m
+CONFIG_VFIO_CCW=m
+CONFIG_CRASH_DUMP=y
+CONFIG_BINFMT_MISC=m
+CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
CONFIG_NET=y
CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
-CONFIG_NET_KEY=y
+CONFIG_UNIX_DIAG=m
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_SMC=m
+CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_NET_IPVTI=m
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+CONFIG_INET_UDP_DIAG=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_MIP6=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_IPV6_VTI=m
+CONFIG_IPV6_SIT=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_TIMEOUT=y
+CONFIG_NF_CONNTRACK_TIMESTAMP=y
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_SANE=m
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_HASH=m
+CONFIG_NETFILTER_XT_SET=m
+CONFIG_NETFILTER_XT_TARGET_AUDIT=m
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_HMARK=m
+CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_TEE=m
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_CPU=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_NFACCT=m
+CONFIG_NETFILTER_XT_MATCH_OSF=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_IP_SET=m
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_PE_SIP=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NFT_CHAIN_ROUTE_IPV4=m
+CONFIG_NF_TABLES_ARP=y
+CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NFT_CHAIN_ROUTE_IPV6=m
+CONFIG_NFT_CHAIN_NAT_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_NF_TABLES_BRIDGE=y
+CONFIG_RDS=m
+CONFIG_RDS_RDMA=m
+CONFIG_RDS_TCP=m
CONFIG_L2TP=m
CONFIG_L2TP_DEBUGFS=m
-CONFIG_VLAN_8021Q=y
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_MULTIQ=m
CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFB=m
CONFIG_NET_SCH_SFQ=m
CONFIG_NET_SCH_TEQL=m
CONFIG_NET_SCH_TBF=m
CONFIG_NET_SCH_GRED=m
CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_CLS_BPF=m
CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
+CONFIG_NETLINK_DIAG=m
+CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_NET_PKTGEN=m
CONFIG_DEVTMPFS=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
+CONFIG_CONNECTOR=y
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_RBD=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_GENWQE=m
+CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
-# CONFIG_SCSI_MQ_DEFAULT is not set
CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_ENCLOSURE=m
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_SCSI_DEBUG=m
CONFIG_ZFCP=y
-CONFIG_SCSI_VIRTIO=y
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_OSD_INITIATOR=m
+CONFIG_SCSI_OSD_ULD=m
CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=m
CONFIG_MD_MULTIPATH=m
-CONFIG_BLK_DEV_DM=y
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_MIRROR=m
CONFIG_DM_LOG_USERSPACE=m
CONFIG_DM_RAID=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_DELAY=m
CONFIG_DM_UEVENT=y
+CONFIG_DM_FLAKEY=m
CONFIG_DM_VERITY=m
CONFIG_DM_SWITCH=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
CONFIG_EQUALIZER=m
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
CONFIG_TUN=m
-CONFIG_VIRTIO_NET=y
-# CONFIG_NET_VENDOR_ALACRITECH is not set
-# CONFIG_NET_VENDOR_AURORA is not set
-# CONFIG_NET_VENDOR_CORTINA is not set
-# CONFIG_NET_VENDOR_SOLARFLARE is not set
-# CONFIG_NET_VENDOR_SOCIONEXT is not set
-# CONFIG_NET_VENDOR_SYNOPSYS is not set
-# CONFIG_INPUT is not set
+CONFIG_VETH=m
+CONFIG_VIRTIO_NET=m
+CONFIG_NLMON=m
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_CHELSIO is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPTP=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_ISM=m
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_DEVKMEM=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
-CONFIG_VIRTIO_BALLOON=y
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_TN3270_FS=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_WATCHDOG_NOWAYOUT=y
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=y
+CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_MLX4_INFINIBAND=m
+CONFIG_MLX5_INFINIBAND=m
+CONFIG_VFIO=m
+CONFIG_VFIO_PCI=m
+CONFIG_VFIO_MDEV=m
+CONFIG_VFIO_MDEV_DEVICE=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_INPUT=y
+CONFIG_S390_AP_IOMMU=y
+CONFIG_S390_CCW_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
+CONFIG_JBD2_DEBUG=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_NILFS2_FS=m
+CONFIG_FS_DAX=y
+CONFIG_EXPORTFS_BLOCK_OPS=y
+CONFIG_FS_ENCRYPTION=y
CONFIG_FANOTIFY=y
+CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=y
+CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
+CONFIG_FSCACHE=m
+CONFIG_CACHEFILES=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
-# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_CONFIGFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_CRAMFS=m
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=m
+CONFIG_NFS_SWAP=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFSD_V4_SECURITY_LABEL=y
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_STATS2=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_DEBUG is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_FRAME_WARN=1024
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_HIST_TRIGGERS=y
+CONFIG_LKDTM=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_BPF=m
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_S390_PTDUMP=y
+CONFIG_PERSISTENT_KEYRINGS=y
+CONFIG_BIG_KEYS=y
+CONFIG_ENCRYPTED_KEYS=m
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_DISABLE=y
+CONFIG_INTEGRITY_SIGNATURE=y
+CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_IMA=y
+CONFIG_IMA_DEFAULT_HASH_SHA256=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_CRYPTO_FIPS=y
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_USER=m
+# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
+CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_AUTHENC=m
CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=m
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_CFB=m
-CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=m
-CONFIG_CRYPTO_CMAC=m
+CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_SM4=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
+CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
+CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRC7=m
-# CONFIG_XZ_DEC_X86 is not set
-# CONFIG_XZ_DEC_POWERPC is not set
-# CONFIG_XZ_DEC_IA64 is not set
-# CONFIG_XZ_DEC_ARM is not set
-# CONFIG_XZ_DEC_ARMTHUMB is not set
-# CONFIG_XZ_DEC_SPARC is not set
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_GDB_SCRIPTS=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_DEBUG_SECTION_MISMATCH=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_PAGEALLOC=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_PROVE_LOCKING=y
-CONFIG_LOCK_STAT=y
-CONFIG_DEBUG_LOCKDEP=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_SG=y
-CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_LATENCYTOP=y
-CONFIG_SCHED_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
-# CONFIG_RUNTIME_TESTING_MENU is not set
-CONFIG_S390_PTDUMP=y
+CONFIG_CRC8=m
+CONFIG_CORDIC=m
+CONFIG_CMM=m
+CONFIG_APPLDATA_BASE=y
+CONFIG_KVM=m
+CONFIG_KVM_S390_UCONTROL=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
+++ /dev/null
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_AUDIT=y
-CONFIG_NO_HZ_IDLE=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NUMA_BALANCING=y
-# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
-CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
-CONFIG_BLK_CGROUP=y
-CONFIG_CFS_BANDWIDTH=y
-CONFIG_RT_GROUP_SCHED=y
-CONFIG_CGROUP_PIDS=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_HUGETLB=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_CGROUP_PERF=y
-CONFIG_NAMESPACES=y
-CONFIG_USER_NS=y
-CONFIG_SCHED_AUTOGROUP=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-# CONFIG_SYSFS_SYSCALL is not set
-CONFIG_CHECKPOINT_RESTORE=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_USERFAULTFD=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=m
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_FORCE_LOAD=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_SIG=y
-CONFIG_MODULE_SIG_SHA256=y
-CONFIG_BLK_DEV_INTEGRITY=y
-CONFIG_BLK_DEV_THROTTLING=y
-CONFIG_BLK_WBT=y
-CONFIG_BLK_WBT_SQ=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_IBM_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_CFQ_GROUP_IOSCHED=y
-CONFIG_DEFAULT_DEADLINE=y
-CONFIG_LIVEPATCH=y
-CONFIG_TUNE_ZEC12=y
-CONFIG_NR_CPUS=512
-CONFIG_NUMA=y
-CONFIG_HZ_100=y
-CONFIG_KEXEC_FILE=y
-CONFIG_KEXEC_VERIFY_SIG=y
-CONFIG_EXPOLINE=y
-CONFIG_EXPOLINE_AUTO=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_KSM=y
-CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CLEANCACHE=y
-CONFIG_FRONTSWAP=y
-CONFIG_MEM_SOFT_DIRTY=y
-CONFIG_ZSWAP=y
-CONFIG_ZBUD=m
-CONFIG_ZSMALLOC=m
-CONFIG_ZSMALLOC_STAT=y
-CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
-CONFIG_IDLE_PAGE_TRACKING=y
-CONFIG_PCI=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_S390=y
-CONFIG_CHSC_SCH=y
-CONFIG_VFIO_AP=m
-CONFIG_CRASH_DUMP=y
-CONFIG_BINFMT_MISC=m
-CONFIG_HIBERNATION=y
-CONFIG_PM_DEBUG=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_PACKET_DIAG=m
-CONFIG_UNIX=y
-CONFIG_UNIX_DIAG=m
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_SMC=m
-CONFIG_SMC_DIAG=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE_DEMUX=m
-CONFIG_NET_IPGRE=m
-CONFIG_NET_IPGRE_BROADCAST=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_NET_IPVTI=m
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
-CONFIG_INET_DIAG=m
-CONFIG_INET_UDP_DIAG=m
-CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
-CONFIG_IPV6_ROUTER_PREF=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_MIP6=m
-CONFIG_INET6_XFRM_MODE_TRANSPORT=m
-CONFIG_INET6_XFRM_MODE_TUNNEL=m
-CONFIG_INET6_XFRM_MODE_BEET=m
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
-CONFIG_IPV6_VTI=m
-CONFIG_IPV6_SIT=m
-CONFIG_IPV6_GRE=m
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IPV6_SUBTREES=y
-CONFIG_NETFILTER=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CONNTRACK_TIMEOUT=y
-CONFIG_NF_CONNTRACK_TIMESTAMP=y
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_SNMP=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CT_NETLINK_TIMEOUT=m
-CONFIG_NF_TABLES=m
-CONFIG_NFT_CT=m
-CONFIG_NFT_COUNTER=m
-CONFIG_NFT_LOG=m
-CONFIG_NFT_LIMIT=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NFT_HASH=m
-CONFIG_NETFILTER_XT_SET=m
-CONFIG_NETFILTER_XT_TARGET_AUDIT=m
-CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_HMARK=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_LOG=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_CPU=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_NFACCT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_IP_SET=m
-CONFIG_IP_SET_BITMAP_IP=m
-CONFIG_IP_SET_BITMAP_IPMAC=m
-CONFIG_IP_SET_BITMAP_PORT=m
-CONFIG_IP_SET_HASH_IP=m
-CONFIG_IP_SET_HASH_IPPORT=m
-CONFIG_IP_SET_HASH_IPPORTIP=m
-CONFIG_IP_SET_HASH_IPPORTNET=m
-CONFIG_IP_SET_HASH_NETPORTNET=m
-CONFIG_IP_SET_HASH_NET=m
-CONFIG_IP_SET_HASH_NETNET=m
-CONFIG_IP_SET_HASH_NETPORT=m
-CONFIG_IP_SET_HASH_NETIFACE=m
-CONFIG_IP_SET_LIST_SET=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-CONFIG_IP_VS_FTP=m
-CONFIG_IP_VS_PE_SIP=m
-CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_CHAIN_ROUTE_IPV4=m
-CONFIG_NF_TABLES_ARP=y
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_IP6_NF_NAT=m
-CONFIG_IP6_NF_TARGET_MASQUERADE=m
-CONFIG_NF_TABLES_BRIDGE=y
-CONFIG_RDS=m
-CONFIG_RDS_RDMA=m
-CONFIG_RDS_TCP=m
-CONFIG_L2TP=m
-CONFIG_L2TP_DEBUGFS=m
-CONFIG_L2TP_V3=y
-CONFIG_L2TP_IP=m
-CONFIG_L2TP_ETH=m
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_VLAN_8021Q_GVRP=y
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_MULTIQ=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFB=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_DRR=m
-CONFIG_NET_SCH_MQPRIO=m
-CONFIG_NET_SCH_CHOKE=m
-CONFIG_NET_SCH_QFQ=m
-CONFIG_NET_SCH_CODEL=m
-CONFIG_NET_SCH_FQ_CODEL=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_SCH_PLUG=m
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_FLOW=m
-CONFIG_NET_CLS_CGROUP=y
-CONFIG_NET_CLS_BPF=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_ACT_CSUM=m
-CONFIG_DNS_RESOLVER=y
-CONFIG_OPENVSWITCH=m
-CONFIG_VSOCKETS=m
-CONFIG_VIRTIO_VSOCKETS=m
-CONFIG_NETLINK_DIAG=m
-CONFIG_CGROUP_NET_PRIO=y
-CONFIG_BPF_JIT=y
-CONFIG_NET_PKTGEN=m
-CONFIG_DEVTMPFS=y
-CONFIG_DMA_CMA=y
-CONFIG_CMA_SIZE_MBYTES=0
-CONFIG_CONNECTOR=y
-CONFIG_ZRAM=m
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_DRBD=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_VIRTIO_BLK=y
-CONFIG_BLK_DEV_RBD=m
-CONFIG_BLK_DEV_NVME=m
-CONFIG_ENCLOSURE_SERVICES=m
-CONFIG_GENWQE=m
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-CONFIG_CHR_DEV_OSST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=y
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SPI_ATTRS=m
-CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_SAS_LIBSAS=m
-CONFIG_SCSI_SRP_ATTRS=m
-CONFIG_ISCSI_TCP=m
-CONFIG_SCSI_DEBUG=m
-CONFIG_ZFCP=y
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=y
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_HP_SW=m
-CONFIG_SCSI_DH_EMC=m
-CONFIG_SCSI_DH_ALUA=m
-CONFIG_SCSI_OSD_INITIATOR=m
-CONFIG_SCSI_OSD_ULD=m
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_THIN_PROVISIONING=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_LOG_USERSPACE=m
-CONFIG_DM_RAID=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_DELAY=m
-CONFIG_DM_UEVENT=y
-CONFIG_DM_FLAKEY=m
-CONFIG_DM_VERITY=m
-CONFIG_DM_SWITCH=m
-CONFIG_NETDEVICES=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_EQUALIZER=m
-CONFIG_IFB=m
-CONFIG_MACVLAN=m
-CONFIG_MACVTAP=m
-CONFIG_VXLAN=m
-CONFIG_TUN=m
-CONFIG_VETH=m
-CONFIG_VIRTIO_NET=m
-CONFIG_NLMON=m
-# CONFIG_NET_VENDOR_ARC is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-CONFIG_MLX4_EN=m
-CONFIG_MLX5_CORE=m
-CONFIG_MLX5_CORE_EN=y
-# CONFIG_NET_VENDOR_NATSEMI is not set
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_MPPE=m
-CONFIG_PPPOE=m
-CONFIG_PPTP=m
-CONFIG_PPPOL2TP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_ISM=m
-CONFIG_INPUT_EVDEV=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-CONFIG_LEGACY_PTY_COUNT=0
-CONFIG_HW_RANDOM_VIRTIO=m
-CONFIG_RAW_DRIVER=m
-CONFIG_HANGCHECK_TIMER=m
-CONFIG_TN3270_FS=y
-# CONFIG_HWMON is not set
-CONFIG_WATCHDOG=y
-CONFIG_WATCHDOG_NOWAYOUT=y
-CONFIG_SOFT_WATCHDOG=m
-CONFIG_DIAG288_WATCHDOG=m
-CONFIG_DRM=y
-CONFIG_DRM_VIRTIO_GPU=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-# CONFIG_HID is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_MLX5_INFINIBAND=m
-CONFIG_VFIO=m
-CONFIG_VFIO_PCI=m
-CONFIG_VFIO_MDEV=m
-CONFIG_VFIO_MDEV_DEVICE=m
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_VIRTIO_INPUT=y
-CONFIG_S390_AP_IOMMU=y
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-CONFIG_JBD2_DEBUG=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-CONFIG_JFS_STATISTICS=y
-CONFIG_XFS_FS=y
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-CONFIG_GFS2_FS=m
-CONFIG_GFS2_FS_LOCKING_DLM=y
-CONFIG_OCFS2_FS=m
-CONFIG_BTRFS_FS=y
-CONFIG_BTRFS_FS_POSIX_ACL=y
-CONFIG_NILFS2_FS=m
-CONFIG_FS_DAX=y
-CONFIG_EXPORTFS_BLOCK_OPS=y
-CONFIG_FS_ENCRYPTION=y
-CONFIG_FANOTIFY=y
-CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=y
-CONFIG_CUSE=m
-CONFIG_OVERLAY_FS=m
-CONFIG_FSCACHE=m
-CONFIG_CACHEFILES=m
-CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_NTFS_FS=m
-CONFIG_NTFS_RW=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_HUGETLBFS=y
-CONFIG_CONFIGFS_FS=m
-CONFIG_ECRYPT_FS=m
-CONFIG_CRAMFS=m
-CONFIG_SQUASHFS=m
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_ROMFS_FS=m
-CONFIG_NFS_FS=m
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=m
-CONFIG_NFS_SWAP=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_NFSD_V4_SECURITY_LABEL=y
-CONFIG_CIFS=m
-CONFIG_CIFS_STATS=y
-CONFIG_CIFS_STATS2=y
-CONFIG_CIFS_WEAK_PW_HASH=y
-CONFIG_CIFS_UPCALL=y
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-# CONFIG_CIFS_DEBUG is not set
-CONFIG_CIFS_DFS_UPCALL=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_UTF8=m
-CONFIG_DLM=m
-CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_GDB_SCRIPTS=y
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_FRAME_WARN=1024
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_RCU_TORTURE_TEST=m
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_LATENCYTOP=y
-CONFIG_SCHED_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_HIST_TRIGGERS=y
-CONFIG_LKDTM=m
-CONFIG_PERCPU_TEST=m
-CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_TEST_BPF=m
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_S390_PTDUMP=y
-CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
-CONFIG_ENCRYPTED_KEYS=m
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
-CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_INTEGRITY_SIGNATURE=y
-CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
-CONFIG_IMA=y
-CONFIG_IMA_DEFAULT_HASH_SHA256=y
-CONFIG_IMA_WRITE_POLICY=y
-CONFIG_IMA_APPRAISE=y
-CONFIG_CRYPTO_FIPS=y
-CONFIG_CRYPTO_DH=m
-CONFIG_CRYPTO_ECDH=m
-CONFIG_CRYPTO_USER=m
-# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
-CONFIG_CRYPTO_PCRYPT=m
-CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_KEYWRAP=m
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
-CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_AES_TI=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_842=m
-CONFIG_CRYPTO_LZ4=m
-CONFIG_CRYPTO_LZ4HC=m
-CONFIG_CRYPTO_ANSI_CPRNG=m
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_USER_API_SKCIPHER=m
-CONFIG_CRYPTO_USER_API_RNG=m
-CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_ZCRYPT=m
-CONFIG_PKEY=m
-CONFIG_CRYPTO_PAES_S390=m
-CONFIG_CRYPTO_SHA1_S390=m
-CONFIG_CRYPTO_SHA256_S390=m
-CONFIG_CRYPTO_SHA512_S390=m
-CONFIG_CRYPTO_DES_S390=m
-CONFIG_CRYPTO_AES_S390=m
-CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=y
-CONFIG_CRC7=m
-CONFIG_CRC8=m
-CONFIG_CORDIC=m
-CONFIG_CMM=m
-CONFIG_APPLDATA_BASE=y
-CONFIG_KVM=m
-CONFIG_KVM_S390_UCONTROL=y
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_VSOCK=m
# CONFIG_SECCOMP is not set
CONFIG_NET=y
# CONFIG_IUCV is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_RAM=y
# CONFIG_BLK_DEV_XPRAM is not set
static int __init ghash_mod_init(void)
{
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH))
- return -EOPNOTSUPP;
+ return -ENODEV;
return crypto_register_shash(&ghash_alg);
}
/* check if the CPU has a PRNG */
if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
- return -EOPNOTSUPP;
+ return -ENODEV;
/* check if TRNG subfunction is available */
if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
if (prng_mode == PRNG_MODE_SHA512) {
pr_err("The prng module cannot "
"start in SHA-512 mode\n");
- return -EOPNOTSUPP;
+ return -ENODEV;
}
prng_mode = PRNG_MODE_TDES;
} else
static int __init sha1_s390_init(void)
{
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
- return -EOPNOTSUPP;
+ return -ENODEV;
return crypto_register_shash(&alg);
}
int ret;
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
- return -EOPNOTSUPP;
+ return -ENODEV;
ret = crypto_register_shash(&sha256_alg);
if (ret < 0)
goto out;
int ret;
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
- return -EOPNOTSUPP;
+ return -ENODEV;
if ((ret = crypto_register_shash(&sha512_alg)) < 0)
goto out;
if ((ret = crypto_register_shash(&sha384_alg)) < 0)
#define _ASM_S390_AIRQ_H
#include <linux/bit_spinlock.h>
+#include <linux/dma-mapping.h>
struct airq_struct {
struct hlist_node list; /* Handler queueing. */
/* Adapter interrupt bit vector */
struct airq_iv {
unsigned long *vector; /* Adapter interrupt bit vector */
+ dma_addr_t vector_dma; /* Adapter interrupt bit vector dma */
unsigned long *avail; /* Allocation bit mask for the bit vector */
unsigned long *bitlock; /* Lock bit mask for the bit vector */
unsigned long *ptr; /* Pointer associated with each bit */
extern void ccw_device_wait_idle(struct ccw_device *);
extern int ccw_device_force_console(struct ccw_device *);
+extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
+extern void ccw_device_dma_free(struct ccw_device *cdev,
+ void *cpu_addr, size_t size);
+
int ccw_device_siosl(struct ccw_device *);
extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
#include <linux/spinlock.h>
#include <linux/bitops.h>
+#include <linux/genalloc.h>
#include <asm/types.h>
#define LPM_ANYPATH 0xff
#define CIW_TYPE_RNI 0x2 /* read node identifier */
/*
+ * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
+ */
+
+#define ND_VALIDITY_VALID 0
+#define ND_VALIDITY_OUTDATED 1
+#define ND_VALIDITY_INVALID 2
+
+struct node_descriptor {
+ /* Flags. */
+ union {
+ struct {
+ u32 validity:3;
+ u32 reserved:5;
+ } __packed;
+ u8 byte0;
+ } __packed;
+
+ /* Node parameters. */
+ u32 params:24;
+
+ /* Node ID. */
+ char type[6];
+ char model[3];
+ char manufacturer[3];
+ char plant[2];
+ char seq[12];
+ u16 tag;
+} __packed;
+
+/*
* Flags used as input parameters for do_IO()
*/
#define DOIO_ALLOW_SUSPEND 0x0001 /* allow for channel prog. suspend */
void channel_subsystem_reinit(void);
extern void css_schedule_reprobe(void);
+extern void *cio_dma_zalloc(size_t size);
+extern void cio_dma_free(void *cpu_addr, size_t size);
+extern struct device *cio_get_dma_css_dev(void);
+
+void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
+ size_t size);
+void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size);
+void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
+struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
+
/* Function from drivers/s390/cio/chsc.c */
int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
int chsc_sstpi(void *page, void *result, size_t size);
};
};
-#ifdef CONFIG_SMP
-# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
-# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
-#else
-# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit)
-# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit)
-#endif
+#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
+#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
#endif /* __ASSEMBLY__ */
#endif /* __ASM_CTL_REG_H */
void debug_set_level(debug_info_t *id, int new_level);
void debug_set_critical(void);
+
void debug_stop_all(void);
+/**
+ * debug_level_enabled() - Returns true if debug events for the specified
+ * level would be logged. Otherwise returns false.
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ *
+ * Return:
+ * - %true if level is less or equal to the current debug level.
+ */
static inline bool debug_level_enabled(debug_info_t *id, int level)
{
return level <= id->level;
}
+/**
+ * debug_event() - writes binary debug entry to active debug area
+ * (if level <= actual debug level)
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @data: pointer to data for debug entry
+ * @length: length of data in bytes
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_event(debug_info_t *id, int level,
void *data, int length)
{
return debug_event_common(id, level, data, length);
}
+/**
+ * debug_int_event() - writes unsigned integer debug entry to active debug area
+ * (if level <= actual debug level)
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @tag: integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_int_event(debug_info_t *id, int level,
unsigned int tag)
{
return debug_event_common(id, level, &t, sizeof(unsigned int));
}
+/**
+ * debug_long_event() - writes unsigned long debug entry to active debug area
+ * (if level <= actual debug level)
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @tag: long integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_long_event(debug_info_t *id, int level,
unsigned long tag)
{
return debug_event_common(id, level, &t, sizeof(unsigned long));
}
+/**
+ * debug_text_event() - writes string debug entry in ascii format to active
+ * debug area (if level <= actual debug level)
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @txt: string for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
const char *txt)
{
/*
* IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
- * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
*/
extern debug_entry_t *
__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
__attribute__ ((format(printf, 3, 4)));
+/**
+ * debug_sprintf_event() - writes debug entry with format string
+ * and varargs (longs) to active debug area
+ * (if level $<=$ actual debug level).
+ *
+ * @_id: handle for debug log
+ * @_level: debug level
+ * @_fmt: format string for debug entry
+ * @...: varargs used as in sprintf()
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ *
+ * floats and long long datatypes cannot be used as varargs.
+ */
#define debug_sprintf_event(_id, _level, _fmt, ...) \
({ \
debug_entry_t *__ret; \
__ret; \
})
+/**
+ * debug_exception() - writes binary debug entry to active debug area
+ * (if level <= actual debug level)
+ * and switches to next debug area
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @data: pointer to data for debug entry
+ * @length: length of data in bytes
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_exception(debug_info_t *id, int level,
void *data, int length)
{
return debug_exception_common(id, level, data, length);
}
+/**
+ * debug_int_exception() - writes unsigned int debug entry to active debug area
+ * (if level <= actual debug level)
+ * and switches to next debug area
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @tag: integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level,
unsigned int tag)
{
return debug_exception_common(id, level, &t, sizeof(unsigned int));
}
+/**
+ * debug_long_exception() - writes long debug entry to active debug area
+ * (if level <= actual debug level)
+ * and switches to next debug area
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @tag: long integer value for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level,
unsigned long tag)
{
return debug_exception_common(id, level, &t, sizeof(unsigned long));
}
+/**
+ * debug_text_exception() - writes string debug entry in ascii format to active
+ * debug area (if level <= actual debug level)
+ * and switches to next debug area
+ * area
+ *
+ * @id: handle for debug log
+ * @level: debug level
+ * @txt: string for debug entry
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ */
static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
const char *txt)
{
/*
* IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
- * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
+ * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
*/
extern debug_entry_t *
__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
__attribute__ ((format(printf, 3, 4)));
+
+/**
+ * debug_sprintf_exception() - writes debug entry with format string and
+ * varargs (longs) to active debug area
+ * (if level <= actual debug level)
+ * and switches to next debug area.
+ *
+ * @_id: handle for debug log
+ * @_level: debug level
+ * @_fmt: format string for debug entry
+ * @...: varargs used as in sprintf()
+ *
+ * Return:
+ * - Address of written debug entry
+ * - %NULL if error
+ *
+ * floats and long long datatypes cannot be used as varargs.
+ */
#define debug_sprintf_exception(_id, _level, _fmt, ...) \
({ \
debug_entry_t *__ret; \
})
int debug_register_view(debug_info_t *id, struct debug_view *view);
+
int debug_unregister_view(debug_info_t *id, struct debug_view *view);
/*
return __test_facility(nr, &S390_lowcore.stfle_fac_list);
}
+static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
+{
+ register unsigned long reg0 asm("0") = size - 1;
+
+ asm volatile(
+ ".insn s,0xb2b00000,0(%1)" /* stfle */
+ : "+d" (reg0)
+ : "a" (stfle_fac_list)
+ : "memory", "cc");
+ return reg0;
+}
+
/**
* stfle - Store facility list extended
* @stfle_fac_list: array where facility list can be stored
memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4);
if (S390_lowcore.stfl_fac_list & 0x01000000) {
/* More facility bits available with stfle */
- register unsigned long reg0 asm("0") = size - 1;
-
- asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */
- : "+d" (reg0)
- : "a" (stfle_fac_list)
- : "memory", "cc");
- nr = (reg0 + 1) * 8; /* # bytes stored by stfle */
+ nr = __stfle_asm(stfle_fac_list, size);
+ nr = min_t(unsigned long, (nr + 1) * 8, size * 8);
}
memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
}
nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG;
- ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *),
- GFP_DMA | GFP_KERNEL);
+ ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL);
if (ib == NULL)
return ERR_PTR(-ENOMEM);
ib->size = size;
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/seqlock.h>
+#include <linux/module.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu/api.h>
unsigned short ibc;
};
+struct kvm_s390_module_hook {
+ int (*hook)(struct kvm_vcpu *vcpu);
+ struct module *owner;
+};
+
struct kvm_s390_crypto {
struct kvm_s390_crypto_cb *crycb;
+ struct kvm_s390_module_hook *pqap_hook;
__u32 crycbd;
__u8 aes_kw;
__u8 dea_kw;
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef S390_MEM_ENCRYPT_H__
+#define S390_MEM_ENCRYPT_H__
+
+#ifndef __ASSEMBLY__
+
+#define sme_me_mask 0ULL
+
+static inline bool sme_active(void) { return false; }
+extern bool sev_active(void);
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* S390_MEM_ENCRYPT_H__ */
void zpci_destroy_iommu(struct zpci_dev *zdev);
#ifdef CONFIG_PCI
+static inline bool zpci_use_mio(struct zpci_dev *zdev)
+{
+ return static_branch_likely(&have_mio) && zdev->mio_capable;
+}
+
/* Error handling and recovery */
void zpci_event_error(void *);
void zpci_event_availability(void *);
* per cpu area, use weak definitions to force the compiler to
* generate external references.
*/
-#if defined(CONFIG_SMP) && defined(MODULE)
+#if defined(MODULE)
#define ARCH_NEEDS_WEAK_PER_CPU
#endif
#ifndef __ASSEMBLY__
+#include <linux/cpumask.h>
#include <linux/linkage.h>
#include <linux/irqflags.h>
#include <asm/cpu.h>
return cpu_address;
}
-/*
- * Give up the time slice of the virtual PU.
- */
-#define cpu_relax_yield cpu_relax_yield
-void cpu_relax_yield(void);
-
#define cpu_relax() barrier()
#define ECAG_CACHE_ATTRIBUTE 0
#define __ASM_SMP_H
#include <asm/sigp.h>
-
-#ifdef CONFIG_SMP
-
#include <asm/lowcore.h>
#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
extern void smp_fill_possible_mask(void);
extern void smp_detect_cpus(void);
-#else /* CONFIG_SMP */
-
-#define smp_cpu_mtid 0
-
-static inline void smp_call_ipl_cpu(void (*func)(void *), void *data)
-{
- func(data);
-}
-
-static inline void smp_call_online_cpu(void (*func)(void *), void *data)
-{
- func(data);
-}
-
-static inline void smp_emergency_stop(void)
-{
-}
-
-static inline int smp_find_processor_id(u16 address) { return 0; }
-static inline int smp_store_status(int cpu) { return 0; }
-static inline int smp_vcpu_scheduled(int cpu) { return 1; }
-static inline void smp_yield_cpu(int cpu) { }
-static inline void smp_fill_possible_mask(void) { }
-static inline void smp_detect_cpus(void) { }
-
-#endif /* CONFIG_SMP */
-
static inline void smp_stop_cpu(void)
{
u16 pcpu = stap();
return cpu - (cpu % (smp_cpu_mtid + 1));
}
-#ifdef CONFIG_HOTPLUG_CPU
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
extern int __cpu_disable(void);
-#else
-static inline int smp_rescan_cpus(void) { return 0; }
-static inline void cpu_die(void) { }
-#endif
#endif /* __ASM_SMP_H */
extern int spin_retry;
-#ifndef CONFIG_SMP
-static inline bool arch_vcpu_is_preempted(int cpu) { return false; }
-#else
bool arch_vcpu_is_preempted(int cpu);
-#endif
#define vcpu_is_preempted arch_vcpu_is_preempted
: : "a" (opt), "a" (asce) : "cc");
}
-#ifdef CONFIG_SMP
void smp_ptlb_all(void);
/*
else
__tlb_flush_global();
}
-#else
-#define __tlb_flush_global() __tlb_flush_local()
-
-/*
- * Flush TLB entries for a specific ASCE on all CPUs.
- */
-static inline void __tlb_flush_mm(struct mm_struct *mm)
-{
- __tlb_flush_local();
-}
-
-static inline void __tlb_flush_kernel(void)
-{
- __tlb_flush_local();
-}
-#endif
static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
{
size_t orc_ip_size, void *orc,
size_t orc_size) {}
-#ifdef CONFIG_KASAN
-/*
- * This disables KASAN checking when reading a value from another task's stack,
- * since the other task could be running on another CPU and could have poisoned
- * the stack in the meantime.
- */
-#define READ_ONCE_TASK_STACK(task, x) \
-({ \
- unsigned long val; \
- if (task == current) \
- val = READ_ONCE(x); \
- else \
- val = READ_ONCE_NOCHECK(x); \
- val; \
-})
-#else
-#define READ_ONCE_TASK_STACK(task, x) READ_ONCE(x)
-#endif
-
#endif /* _ASM_S390_UNWIND_H */
__u64 sf;
__u64 rsic;
__u64 reserved8;
-} __packed __aligned(8);
+} __attribute__((__packed__, __aligned__(8)));
static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
{
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
+obj-y += smp.o
extra-y += head64.o vmlinux.lds
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o
obj-$(CONFIG_AUDIT) += audit.o
return 0; /* success */
}
-/*
- * debug_register_mode:
- * - Creates and initializes debug area for the caller
- * The mode parameter allows to specify access rights for the s390dbf files
- * - Returns handle for debug area
+/**
+ * debug_register_mode() - creates and initializes debug area.
+ *
+ * @name: Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area: Number of pages, which will be allocated per area
+ * @nr_areas: Number of debug areas
+ * @buf_size: Size of data area in each debug entry
+ * @mode: File mode for debugfs files. E.g. S_IRWXUGO
+ * @uid: User ID for debugfs files. Currently only 0 is supported.
+ * @gid: Group ID for debugfs files. Currently only 0 is supported.
+ *
+ * Return:
+ * - Handle for generated debug area
+ * - %NULL if register failed
+ *
+ * Allocates memory for a debug log.
+ * Must not be called within an interrupt handler.
*/
debug_info_t *debug_register_mode(const char *name, int pages_per_area,
int nr_areas, int buf_size, umode_t mode,
}
EXPORT_SYMBOL(debug_register_mode);
-/*
- * debug_register:
- * - creates and initializes debug area for the caller
- * - returns handle for debug area
+/**
+ * debug_register() - creates and initializes debug area with default file mode.
+ *
+ * @name: Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area: Number of pages, which will be allocated per area
+ * @nr_areas: Number of debug areas
+ * @buf_size: Size of data area in each debug entry
+ *
+ * Return:
+ * - Handle for generated debug area
+ * - %NULL if register failed
+ *
+ * Allocates memory for a debug log.
+ * The debugfs file mode access permissions are read and write for user.
+ * Must not be called within an interrupt handler.
*/
debug_info_t *debug_register(const char *name, int pages_per_area,
int nr_areas, int buf_size)
}
EXPORT_SYMBOL(debug_register);
-/*
- * debug_unregister:
- * - give back debug area
+/**
+ * debug_unregister() - give back debug area.
+ *
+ * @id: handle for debug log
+ *
+ * Return:
+ * none
*/
void debug_unregister(debug_info_t *id)
{
return rc;
}
-/*
- * debug_set_level:
- * - set actual debug level
+/**
+ * debug_set_level() - Sets new actual debug level if new_level is valid.
+ *
+ * @id: handle for debug log
+ * @new_level: new debug level
+ *
+ * Return:
+ * none
*/
void debug_set_level(debug_info_t *id, int new_level)
{
static struct ctl_table_header *s390dbf_sysctl_header;
+/**
+ * debug_stop_all() - stops the debug feature if stopping is allowed.
+ *
+ * Return:
+ * - none
+ *
+ * Currently used in case of a kernel oops.
+ */
void debug_stop_all(void)
{
if (debug_stoppable)
}
EXPORT_SYMBOL(debug_stop_all);
+/**
+ * debug_set_critical() - event/exception functions try lock instead of spin.
+ *
+ * Return:
+ * - none
+ *
+ * Currently used in case of stopping all CPUs but the current one.
+ * Once in this state, functions to write a debug entry for an
+ * event or exception no longer spin on the debug area lock,
+ * but only try to get it and fail if they do not get the lock.
+ */
void debug_set_critical(void)
{
debug_critical = 1;
}
EXPORT_SYMBOL(__debug_sprintf_exception);
-/*
- * debug_register_view:
+/**
+ * debug_register_view() - registers new debug view and creates debugfs
+ * dir entry
+ *
+ * @id: handle for debug log
+ * @view: pointer to debug view struct
+ *
+ * Return:
+ * - 0 : ok
+ * - < 0: Error
*/
int debug_register_view(debug_info_t *id, struct debug_view *view)
{
}
EXPORT_SYMBOL(debug_register_view);
-/*
- * debug_unregister_view:
+/**
+ * debug_unregister_view() - unregisters debug view and removes debugfs
+ * dir entry
+ *
+ * @id: handle for debug log
+ * @view: pointer to debug view struct
+ *
+ * Return:
+ * - 0 : ok
+ * - < 0: Error
*/
int debug_unregister_view(debug_info_t *id, struct debug_view *view)
{
[INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 },
[INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 },
[INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 },
+ [INSTR_RRF_URR] = { R_24, R_28, U8_16, 0, 0, 0 },
[INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 },
[INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 },
[INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 },
[INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 },
[INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 },
[INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 },
- [INSTR_VRR_RV0U] = { R_8, V_12, U4_24, 0, 0, 0 },
+ [INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 },
[INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 },
[INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
[INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 },
[INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 },
[INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 },
[INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 },
- [INSTR_VRS_VVRD] = { V_8, V_12, D_20, B_16, 0, 0 },
[INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 },
[INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 },
- [INSTR_VRX_VRRD] = { V_8, D_20, X_12, B_16, 0, 0 },
[INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 },
[INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 },
[INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 },
#ifdef CONFIG_PREEMPT
pr_cont("PREEMPT ");
#endif
-#ifdef CONFIG_SMP
pr_cont("SMP ");
-#endif
if (debug_pagealloc_enabled())
pr_cont("DEBUG_PAGEALLOC");
pr_cont("\n");
stg %r3,__SF_EMPTY(%r15)
larl %r1,.Lpsw_idle_lpsw+4
stg %r1,__SF_EMPTY+8(%r15)
-#ifdef CONFIG_SMP
larl %r1,smp_cpu_mtid
llgf %r1,0(%r1)
ltgr %r1,%r1
jz .Lpsw_idle_stcctm
.insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
.Lpsw_idle_stcctm:
-#endif
oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
BPON
STCK __CLOCK_IDLE_ENTER(%r2)
mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
1: # calculate idle cycles
-#ifdef CONFIG_SMP
clg %r9,BASED(.Lcleanup_idle_insn)
jl 3f
larl %r1,smp_cpu_mtid
la %r3,8(%r3)
la %r4,8(%r4)
brct %r1,2b
-#endif
3: # account system time going idle
lg %r9,__LC_STEAL_TIMER
alg %r9,__CLOCK_IDLE_ENTER(%r2)
s32 offset;
} __packed;
-struct insn_args {
- struct jump_entry *entry;
- enum jump_label_type type;
-};
-
static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn)
{
- /* brcl 0,0 */
+ /* brcl 0,offset */
insn->opcode = 0xc004;
- insn->offset = 0;
+ insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
}
static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
s390_kernel_write(code, &new, sizeof(new));
}
-static int __sm_arch_jump_label_transform(void *data)
+static void __jump_label_sync(void *dummy)
{
- struct insn_args *args = data;
-
- __jump_label_transform(args->entry, args->type, 0);
- return 0;
}
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- struct insn_args args;
-
- args.entry = entry;
- args.type = type;
-
- stop_machine_cpuslocked(__sm_arch_jump_label_transform, &args, NULL);
+ __jump_label_transform(entry, type, 0);
+ smp_call_function(__jump_label_sync, NULL, 1);
}
void arch_jump_label_transform_static(struct jump_entry *entry,
*/
store_status(__do_machine_kdump, image);
}
-#endif
static unsigned long do_start_kdump(unsigned long addr)
{
return rc;
}
+#endif /* CONFIG_CRASH_DUMP */
+
/*
* Check if kdump checksums are valid: We call purgatory with parameter "0"
*/
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+#include <linux/stop_machine.h>
#include <linux/cpufeature.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
};
static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+static DEFINE_PER_CPU(int, cpu_relax_retry);
static bool machine_has_cpu_mhz;
on_each_cpu(update_cpu_mhz, NULL, 0);
}
-void notrace cpu_relax_yield(void)
+void notrace stop_machine_yield(const struct cpumask *cpumask)
{
- if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) {
- diag_stat_inc(DIAG_STAT_X044);
- asm volatile("diag 0,0,0x44");
+ int cpu, this_cpu;
+
+ this_cpu = smp_processor_id();
+ if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
+ __this_cpu_write(cpu_relax_retry, 0);
+ cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
+ if (cpu >= nr_cpu_ids)
+ return;
+ if (arch_vcpu_is_preempted(cpu))
+ smp_yield_cpu(cpu);
}
- barrier();
}
-EXPORT_SYMBOL(cpu_relax_yield);
/*
* cpu_init - initializes state that is per-CPU.
mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
-#ifdef CONFIG_SMP
lc->spinlock_lockval = arch_spin_lockval(0);
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
-#endif
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
set_prefix((u32)(unsigned long) lc);
return -ENOMEM;
}
-#ifdef CONFIG_HOTPLUG_CPU
-
static void pcpu_free_lowcore(struct pcpu *pcpu)
{
unsigned long async_stack, nodat_stack, lowcore;
free_pages(lowcore, LC_ORDER);
}
-#endif /* CONFIG_HOTPLUG_CPU */
-
static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
{
struct lowcore *lc = pcpu->lowcore;
diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
: : "d" (pcpu_devices[cpu].address));
- } else if (MACHINE_HAS_DIAG44) {
+ } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
diag_stat_inc_norecursion(DIAG_STAT_X044);
asm volatile("diag 0,0,0x44");
}
}
early_param("possible_cpus", _setup_possible_cpus);
-#ifdef CONFIG_HOTPLUG_CPU
-
int __cpu_disable(void)
{
unsigned long cregs[16];
for (;;) ;
}
-#endif /* CONFIG_HOTPLUG_CPU */
-
void __init smp_fill_possible_mask(void)
{
unsigned int possible, sclp_max, cpu;
return 0;
}
-#ifdef CONFIG_HOTPLUG_CPU
static ssize_t cpu_configure_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return rc ? rc : count;
}
static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
-#endif /* CONFIG_HOTPLUG_CPU */
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
static struct attribute *cpu_common_attrs[] = {
-#ifdef CONFIG_HOTPLUG_CPU
&dev_attr_configure.attr,
-#endif
&dev_attr_address.attr,
NULL,
};
out_topology:
sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
out_cpu:
-#ifdef CONFIG_HOTPLUG_CPU
unregister_cpu(c);
-#endif
out:
return rc;
}
-#ifdef CONFIG_HOTPLUG_CPU
-
int __ref smp_rescan_cpus(void)
{
struct sclp_core_info *info;
return rc ? rc : count;
}
static DEVICE_ATTR_WO(rescan);
-#endif /* CONFIG_HOTPLUG_CPU */
static int __init s390_smp_init(void)
{
int cpu, rc = 0;
-#ifdef CONFIG_HOTPLUG_CPU
rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
if (rc)
return rc;
-#endif
for_each_present_cpu(cpu) {
rc = smp_add_present_cpu(cpu);
if (rc)
larl %r1,__swsusp_reset_dma
lg %r1,0(%r1)
BASR_EX %r14,%r1
-#ifdef CONFIG_SMP
larl %r1,smp_cpu_mt_shift
icm %r1,15,0(%r1)
jz smt_done
brc 8,smt_done /* accepted */
brc 2,smt_loop /* busy, try again */
smt_done:
-#endif
larl %r1,.Lnew_pgm_check_psw
lpswe 0(%r1)
pgm_check_entry:
void data_exception(struct pt_regs *regs)
{
- int signal = 0;
-
save_fpu_regs();
if (current->thread.fpu.fpc & FPC_DXC_MASK)
- signal = SIGFPE;
- else
- signal = SIGILL;
- if (signal == SIGFPE)
do_fp_trap(regs, current->thread.fpu.fpc);
- else if (signal)
- do_trap(regs, signal, ILL_ILLOPN, "data exception");
+ else
+ do_trap(regs, SIGILL, ILL_ILLOPN, "data exception");
}
void space_switch_exception(struct pt_regs *regs)
regs = state->regs;
if (unlikely(regs)) {
- sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]);
+ sp = READ_ONCE_NOCHECK(regs->gprs[15]);
if (unlikely(outside_of_stack(state, sp))) {
if (!update_stack_info(state, sp))
goto out_err;
}
sf = (struct stack_frame *) sp;
- ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
regs = NULL;
} else {
sf = (struct stack_frame *) state->sp;
- sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain);
+ sp = READ_ONCE_NOCHECK(sf->back_chain);
if (likely(sp)) {
/* Non-zero back-chain points to the previous frame */
if (unlikely(outside_of_stack(state, sp))) {
goto out_err;
}
sf = (struct stack_frame *) sp;
- ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = true;
} else {
/* No back-chain, look for a pt_regs structure */
if (!on_stack(info, sp, sizeof(struct pt_regs)))
goto out_stop;
regs = (struct pt_regs *) sp;
- if (user_mode(regs))
+ if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
goto out_stop;
- ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+ ip = READ_ONCE_NOCHECK(regs->psw.addr);
reliable = true;
}
}
/* Get the instruction pointer from pt_regs or the stack frame */
if (regs) {
- ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+ ip = READ_ONCE_NOCHECK(regs->psw.addr);
reliable = true;
} else {
sf = (struct stack_frame *) sp;
- ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
}
set_kvm_facility(kvm->arch.model.fac_list, 147);
}
+ if (css_general_characteristics.aiv && test_facility(65))
+ set_kvm_facility(kvm->arch.model.fac_mask, 65);
+
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
#include <asm/io.h>
#include <asm/ptrace.h>
#include <asm/sclp.h>
+#include <asm/ap.h>
#include "gaccess.h"
#include "kvm-s390.h"
#include "trace.h"
}
}
+/*
+ * handle_pqap: Handling pqap interception
+ * @vcpu: the vcpu having issue the pqap instruction
+ *
+ * We now support PQAP/AQIC instructions and we need to correctly
+ * answer the guest even if no dedicated driver's hook is available.
+ *
+ * The intercepting code calls a dedicated callback for this instruction
+ * if a driver did register one in the CRYPTO satellite of the
+ * SIE block.
+ *
+ * If no callback is available, the queues are not available, return this
+ * response code to the caller and set CC to 3.
+ * Else return the response code returned by the callback.
+ */
+static int handle_pqap(struct kvm_vcpu *vcpu)
+{
+ struct ap_queue_status status = {};
+ unsigned long reg0;
+ int ret;
+ uint8_t fc;
+
+ /* Verify that the AP instruction are available */
+ if (!ap_instructions_available())
+ return -EOPNOTSUPP;
+ /* Verify that the guest is allowed to use AP instructions */
+ if (!(vcpu->arch.sie_block->eca & ECA_APIE))
+ return -EOPNOTSUPP;
+ /*
+ * The only possibly intercepted functions when AP instructions are
+ * available for the guest are AQIC and TAPQ with the t bit set
+ * since we do not set IC.3 (FIII) we currently will only intercept
+ * the AQIC function code.
+ */
+ reg0 = vcpu->run->s.regs.gprs[0];
+ fc = (reg0 >> 24) & 0xff;
+ if (WARN_ON_ONCE(fc != 0x03))
+ return -EOPNOTSUPP;
+
+ /* PQAP instruction is allowed for guest kernel only */
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ /* Common PQAP instruction specification exceptions */
+ /* bits 41-47 must all be zeros */
+ if (reg0 & 0x007f0000UL)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ /* APFT not install and T bit set */
+ if (!test_kvm_facility(vcpu->kvm, 15) && (reg0 & 0x00800000UL))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ /* APXA not installed and APID greater 64 or APQI greater 16 */
+ if (!(vcpu->kvm->arch.crypto.crycbd & 0x02) && (reg0 & 0x0000c0f0UL))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* AQIC function code specific exception */
+ /* facility 65 not present for AQIC function code */
+ if (!test_kvm_facility(vcpu->kvm, 65))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /*
+ * Verify that the hook callback is registered, lock the owner
+ * and call the hook.
+ */
+ if (vcpu->kvm->arch.crypto.pqap_hook) {
+ if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
+ return -EOPNOTSUPP;
+ ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
+ module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);
+ if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return ret;
+ }
+ /*
+ * A vfio_driver must register a hook.
+ * No hook means no driver to enable the SIE CRYCB and no queues.
+ * We send this response to the guest.
+ */
+ status.response_code = 0x01;
+ memcpy(&vcpu->run->s.regs.gprs[1], &status, sizeof(status));
+ kvm_s390_set_psw_cc(vcpu, 3);
+ return 0;
+}
+
static int handle_stfl(struct kvm_vcpu *vcpu)
{
int rc;
return handle_sthyi(vcpu);
case 0x7d:
return handle_stsi(vcpu);
+ case 0xaf:
+ return handle_pqap(vcpu);
case 0xb1:
return handle_stfl(vcpu);
case 0xb2:
# Makefile for s390-specific library files..
#
-lib-y += delay.o string.o uaccess.o find.o
+lib-y += delay.o string.o uaccess.o find.o spinlock.o
obj-y += mem.o xor.o
-lib-$(CONFIG_SMP) += spinlock.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/swap.h>
+#include <linux/swiotlb.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/export.h>
#include <linux/cma.h>
#include <linux/gfp.h>
+#include <linux/dma-mapping.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
#include <asm/sclp.h>
#include <asm/set_memory.h>
#include <asm/kasan.h>
+#include <asm/dma-mapping.h>
+#include <asm/uv.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
}
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ int i;
+
+ /* make specified pages unshared, (swiotlb, dma_free) */
+ for (i = 0; i < numpages; ++i) {
+ uv_remove_shared(addr);
+ addr += PAGE_SIZE;
+ }
+ return 0;
+}
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ int i;
+ /* make specified pages shared (swiotlb, dma_alloca) */
+ for (i = 0; i < numpages; ++i) {
+ uv_set_shared(addr);
+ addr += PAGE_SIZE;
+ }
+ return 0;
+}
+
+/* are we a protected virtualization guest? */
+bool sev_active(void)
+{
+ return is_prot_virt_guest();
+}
+
+/* protected virtualization */
+static void pv_init(void)
+{
+ if (!is_prot_virt_guest())
+ return;
+
+ /* make sure bounce buffers are shared */
+ swiotlb_init(1);
+ swiotlb_update_mem_attributes();
+ swiotlb_force = SWIOTLB_FORCE;
+}
+
void __init mem_init(void)
{
cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+ pv_init();
+
/* Setup guest page hinting */
cmma_init();
* Therefore we have a read-modify-write sequence: the function reads eight
* bytes from destination at an eight byte boundary, modifies the bytes
* requested and writes the result back in a loop.
- *
- * Note: this means that this function may not be called concurrently on
- * several cpus with overlapping words, since this may potentially
- * cause data corruption.
*/
+static DEFINE_SPINLOCK(s390_kernel_write_lock);
+
void notrace s390_kernel_write(void *dst, const void *src, size_t size)
{
+ unsigned long flags;
long copied;
+ spin_lock_irqsave(&s390_kernel_write_lock, flags);
while (size) {
copied = s390_kernel_write_odd(dst, src, size);
dst += copied;
src += copied;
size -= copied;
}
+ spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
}
static int __memcpy_real(void *dest, void *src, size_t count)
{
if (!(current->flags & PF_RANDOMIZE))
return 0;
- if (current->personality & ADDR_NO_RANDOMIZE)
- return 0;
return STACK_RND_MASK << PAGE_SHIFT;
}
if (!len)
continue;
- if (static_branch_likely(&have_mio))
+ if (zpci_use_mio(zdev))
pdev->resource[i].start =
(resource_size_t __force) zdev->bars[i].mio_wb;
else
- pdev->resource[i].start =
- (resource_size_t __force) pci_iomap(pdev, i, 0);
+ pdev->resource[i].start = (resource_size_t __force)
+ pci_iomap_range_fh(pdev, i, 0, 0);
pdev->resource[i].end = pdev->resource[i].start + len - 1;
}
static void zpci_unmap_resources(struct pci_dev *pdev)
{
+ struct zpci_dev *zdev = to_zpci(pdev);
resource_size_t len;
int i;
- if (static_branch_likely(&have_mio))
+ if (zpci_use_mio(zdev))
return;
for (i = 0; i < PCI_BAR_COUNT; i++) {
len = pci_resource_len(pdev, i);
if (!len)
continue;
- pci_iounmap(pdev, (void __iomem __force *)
- pdev->resource[i].start);
+ pci_iounmap_fh(pdev, (void __iomem __force *)
+ pdev->resource[i].start);
}
}
if (zdev->bars[i].val & 4)
flags |= IORESOURCE_MEM_64;
- if (static_branch_likely(&have_mio))
+ if (zpci_use_mio(zdev))
addr = (unsigned long) zdev->bars[i].mio_wb;
else
addr = ZPCI_ADDR(entry);
goto out;
zdev->fh = fh;
- if (zdev->mio_capable) {
+ if (zpci_use_mio(zdev)) {
rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO);
zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
if (rc)
-kexec-purgatory.c
+purgatory
+purgatory.lds
purgatory.ro
kapi := arch/$(ARCH)/include/generated/asm
kapi-hdrs-y := $(kapi)/facility-defs.h $(kapi)/dis-defs.h
-targets += $(addprefix ../../../,$(kapi-hdrs-y))
PHONY += kapi
kapi: $(kapi-hdrs-y)
hostprogs-y += gen_facilities
hostprogs-y += gen_opcode_table
-HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE)
-HOSTCFLAGS_gen_opcode_table.o += -Wall $(LINUXINCLUDE)
-
-# Ensure output directory exists
-_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+HOSTCFLAGS_gen_facilities.o += $(LINUXINCLUDE)
filechk_facility-defs.h = $(obj)/gen_facilities
b92f kmc RRE_RR
b930 cgfr RRE_RR
b931 clgfr RRE_RR
+b938 sortl RRE_RR
+b939 dfltcc RRF_R0RR2
+b93a kdsa RRE_RR
b93c ppno RRE_RR
b93e kimd RRE_RR
b93f klmd RRE_RR
b95b cxlftr RRF_UUFR
b960 cgrt RRF_U0RR
b961 clgrt RRF_U0RR
+b964 nngrk RRF_R0RR2
+b965 ocgrk RRF_R0RR2
+b966 nogrk RRF_R0RR2
+b967 nxgrk RRF_R0RR2
b972 crt RRF_U0RR
b973 clrt RRF_U0RR
+b974 nnrk RRF_R0RR2
+b975 ocrk RRF_R0RR2
+b976 nork RRF_R0RR2
+b977 nxrk RRF_R0RR2
b980 ngr RRE_RR
b981 ogr RRE_RR
b982 xgr RRE_RR
b9a0 clp RRF_U0RR
b9a1 tpei RRE_RR
b9a2 ptf RRE_R0
+b9a4 uvc RRF_URR
b9aa lptea RRF_RURR2
b9ab essa RRF_U0RR
b9ac irbm RRE_RR
b9bd trtre RRF_U0RR
b9be srstu RRE_RR
b9bf trte RRF_U0RR
+b9c0 selhhhr RRF_RURR
b9c8 ahhhr RRF_R0RR2
b9c9 shhhr RRF_R0RR2
b9ca alhhhr RRF_R0RR2
b9d0 pcistg RRE_RR
b9d2 pcilg RRE_RR
b9d3 rpcit RRE_RR
+b9d4 pcistgi RRE_RR
+b9d5 pciwb RRE_00
+b9d6 pcilgi RRE_RR
b9d8 ahhlr RRF_R0RR2
b9d9 shhlr RRF_R0RR2
b9da alhhlr RRF_R0RR2
b9dd chlr RRE_RR
b9df clhlr RRE_RR
b9e0 locfhr RRF_U0RR
-b9e1 popcnt RRE_RR
+b9e1 popcnt RRF_U0RR
b9e2 locgr RRF_U0RR
+b9e3 selgr RRF_RURR
b9e4 ngrk RRF_R0RR2
+b9e5 ncgrk RRF_R0RR2
b9e6 ogrk RRF_R0RR2
b9e7 xgrk RRF_R0RR2
b9e8 agrk RRF_R0RR2
b9eb slgrk RRF_R0RR2
b9ec mgrk RRF_R0RR2
b9ed msgrkc RRF_R0RR2
+b9f0 selr RRF_RURR
b9f2 locr RRF_U0RR
b9f4 nrk RRF_R0RR2
+b9f5 ncrk RRF_R0RR2
b9f6 ork RRF_R0RR2
b9f7 xrk RRF_R0RR2
b9f8 ark RRF_R0RR2
e500 lasp SSE_RDRD
e501 tprot SSE_RDRD
e502 strag SSE_RDRD
+e50a mvcrl SSE_RDRD
e50e mvcsk SSE_RDRD
e50f mvcdk SSE_RDRD
e544 mvhhi SIL_RDI
e55d clfhsi SIL_RDU
e560 tbegin SIL_RDU
e561 tbeginc SIL_RDU
+e601 vlebrh VRX_VRRDU
+e602 vlebrg VRX_VRRDU
+e603 vlebrf VRX_VRRDU
+e604 vllebrz VRX_VRRDU
+e605 vlbrrep VRX_VRRDU
+e606 vlbr VRX_VRRDU
+e607 vler VRX_VRRDU
+e609 vstebrh VRX_VRRDU
+e60a vstebrg VRX_VRRDU
+e60b vstebrf VRX_VRRDU
+e60e vstbr VRX_VRRDU
+e60f vster VRX_VRRDU
e634 vpkz VSI_URDV
e635 vlrl VSI_URDV
e637 vlrlr VRS_RRDV
e63d vstrl VSI_URDV
e63f vstrlr VRS_RRDV
e649 vlip VRI_V0UU2
-e650 vcvb VRR_RV0U
-e652 vcvbg VRR_RV0U
+e650 vcvb VRR_RV0UU
+e652 vcvbg VRR_RV0UU
e658 vcvd VRI_VR0UU
e659 vsrp VRI_VVUUU2
e65a vcvdg VRI_VR0UU
e703 vlef VRX_VRRDU
e704 vllez VRX_VRRDU
e705 vlrep VRX_VRRDU
-e706 vl VRX_VRRD
+e706 vl VRX_VRRDU
e707 vlbb VRX_VRRDU
e708 vsteb VRX_VRRDU
e709 vsteh VRX_VRRDU
e70a vsteg VRX_VRRDU
e70b vstef VRX_VRRDU
-e70e vst VRX_VRRD
+e70e vst VRX_VRRDU
e712 vgeg VRV_VVXRDU
e713 vgef VRV_VVXRDU
e71a vsceg VRV_VVXRDU
e727 lcbb RXE_RRRDU
e730 vesl VRS_VVRDU
e733 verll VRS_VVRDU
-e736 vlm VRS_VVRD
+e736 vlm VRS_VVRDU
e737 vll VRS_VRRD
e738 vesrl VRS_VVRDU
e73a vesra VRS_VVRDU
-e73e vstm VRS_VVRD
+e73e vstm VRS_VVRDU
e73f vstl VRS_VRRD
e740 vleib VRI_V0IU
e741 vleih VRI_V0IU
e782 vfae VRR_VVV0U0U
e784 vpdi VRR_VVV0U
e785 vbperm VRR_VVV
+e786 vsld VRI_VVV0U
+e787 vsrd VRI_VVV0U
e78a vstrc VRR_VVVUU0V
+e78b vstrs VRR_VVVUU0V
e78c vperm VRR_VVV0V
e78d vsel VRR_VVV0V
e78e vfms VRR_VVVU0UV
ebc0 tp RSL_R0RD
ebd0 pcistb RSY_RRRD
ebd1 sic RSY_RRRD
+ebd4 pcistbi RSY_RRRD
ebdc srak RSY_RRRD
ebdd slak RSY_RRRD
ebde srlk RSY_RRRD
select HAVE_DEBUG_STACKOVERFLOW
select MODULES_USE_ELF_REL
select OLD_SIGACTION
+ select GENERIC_VDSO_32
config X86_64
def_bool y
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
+ select GENERIC_GETTIMEOFDAY
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
+ select HAVE_GENERIC_VDSO
select HOTPLUG_SMT if SMP
select IRQ_FORCED_THREADING
select NEED_SG_DMA_LENGTH
struct thread_info *ti = current_thread_info();
unsigned long ret = 0;
- bool emulated = false;
u32 work;
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
BUG_ON(regs != task_pt_regs(current));
- work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
+ work = READ_ONCE(ti->flags);
- if (unlikely(work & _TIF_SYSCALL_EMU))
- emulated = true;
-
- if ((emulated || (work & _TIF_SYSCALL_TRACE)) &&
- tracehook_report_syscall_entry(regs))
- return -1L;
-
- if (emulated)
- return -1L;
+ if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
+ ret = tracehook_report_syscall_entry(regs);
+ if (ret || (work & _TIF_SYSCALL_EMU))
+ return -1L;
+ }
#ifdef CONFIG_SECCOMP
/*
.endr
END(irq_entries_start)
+#ifdef CONFIG_X86_LOCAL_APIC
+ .align 8
+ENTRY(spurious_entries_start)
+ vector=FIRST_SYSTEM_VECTOR
+ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+ pushl $(~vector+0x80) /* Note: always in signed byte range */
+ vector=vector+1
+ jmp common_spurious
+ .align 8
+ .endr
+END(spurious_entries_start)
+
+common_spurious:
+ ASM_CLAC
+ addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
+ SAVE_ALL switch_stacks=1
+ ENCODE_FRAME_POINTER
+ TRACE_IRQS_OFF
+ movl %esp, %eax
+ call smp_spurious_interrupt
+ jmp ret_from_intr
+ENDPROC(common_interrupt)
+#endif
+
/*
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
.endr
END(irq_entries_start)
+ .align 8
+ENTRY(spurious_entries_start)
+ vector=FIRST_SYSTEM_VECTOR
+ .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+ UNWIND_HINT_IRET_REGS
+ pushq $(~vector+0x80) /* Note: always in signed byte range */
+ jmp common_spurious
+ .align 8
+ vector=vector+1
+ .endr
+END(spurious_entries_start)
+
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
pushq %rax
/* Interrupt entry/exit. */
- /*
- * The interrupt stubs push (~vector+0x80) onto the stack and
- * then jump to common_interrupt.
- */
+/*
+ * The interrupt stubs push (~vector+0x80) onto the stack and
+ * then jump to common_spurious/interrupt.
+ */
+common_spurious:
+ addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
+ call interrupt_entry
+ UNWIND_HINT_REGS indirect=1
+ call smp_spurious_interrupt /* rdi points to pt_regs */
+ jmp ret_from_intr
+END(common_spurious)
+_ASM_NOKPROBE(common_spurious)
+
+/* common_interrupt is a hotpath. Align it */
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
# Building vDSO images for x86.
#
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE|
+ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE
+include $(srctree)/lib/vdso/Makefile
+
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
$(call if_changed,vdso)
+ $(call if_changed,vdso_check)
HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi
hostprogs-y += vdso2c
$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
$(call if_changed,vdso)
+ $(call if_changed,vdso_check)
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
$(obj)/vdso32/system_call.o \
$(obj)/vdso32/sigreturn.o
$(call if_changed,vdso)
+ $(call if_changed,vdso_check)
#
# The DSO images are built using a special linker script.
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright 2006 Andi Kleen, SUSE Labs.
- *
* Fast user context implementation of clock_gettime, gettimeofday, and time.
*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Copyright 2019 ARM Limited
+ *
* 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
* sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
- *
- * The code should have no internal unresolved relocations.
- * Check with readelf after changing.
*/
-
-#include <uapi/linux/time.h>
-#include <asm/vgtod.h>
-#include <asm/vvar.h>
-#include <asm/unistd.h>
-#include <asm/msr.h>
-#include <asm/pvclock.h>
-#include <asm/mshyperv.h>
-#include <linux/math64.h>
#include <linux/time.h>
#include <linux/kernel.h>
+#include <linux/types.h>
-#define gtod (&VVAR(vsyscall_gtod_data))
+#include "../../../../lib/vdso/gettimeofday.c"
-extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
-extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
+extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
-#ifdef CONFIG_PARAVIRT_CLOCK
-extern u8 pvclock_page[PAGE_SIZE]
- __attribute__((visibility("hidden")));
-#endif
-
-#ifdef CONFIG_HYPERV_TSCPAGE
-extern u8 hvclock_page[PAGE_SIZE]
- __attribute__((visibility("hidden")));
-#endif
-
-#ifndef BUILD_VDSO32
-
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
- long ret;
- asm ("syscall" : "=a" (ret), "=m" (*ts) :
- "0" (__NR_clock_gettime), "D" (clock), "S" (ts) :
- "rcx", "r11");
- return ret;
+ return __cvdso_gettimeofday(tv, tz);
}
-#else
+int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
+ __attribute__((weak, alias("__vdso_gettimeofday")));
-notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
+time_t __vdso_time(time_t *t)
{
- long ret;
-
- asm (
- "mov %%ebx, %%edx \n"
- "mov %[clock], %%ebx \n"
- "call __kernel_vsyscall \n"
- "mov %%edx, %%ebx \n"
- : "=a" (ret), "=m" (*ts)
- : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts)
- : "edx");
- return ret;
+ return __cvdso_time(t);
}
-#endif
+time_t time(time_t *t) __attribute__((weak, alias("__vdso_time")));
-#ifdef CONFIG_PARAVIRT_CLOCK
-static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
-{
- return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
-}
-static notrace u64 vread_pvclock(void)
-{
- const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
- u32 version;
- u64 ret;
-
- /*
- * Note: The kernel and hypervisor must guarantee that cpu ID
- * number maps 1:1 to per-CPU pvclock time info.
- *
- * Because the hypervisor is entirely unaware of guest userspace
- * preemption, it cannot guarantee that per-CPU pvclock time
- * info is updated if the underlying CPU changes or that that
- * version is increased whenever underlying CPU changes.
- *
- * On KVM, we are guaranteed that pvti updates for any vCPU are
- * atomic as seen by *all* vCPUs. This is an even stronger
- * guarantee than we get with a normal seqlock.
- *
- * On Xen, we don't appear to have that guarantee, but Xen still
- * supplies a valid seqlock using the version field.
- *
- * We only do pvclock vdso timing at all if
- * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
- * mean that all vCPUs have matching pvti and that the TSC is
- * synced, so we can just look at vCPU 0's pvti.
- */
-
- do {
- version = pvclock_read_begin(pvti);
-
- if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
- return U64_MAX;
-
- ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
- } while (pvclock_read_retry(pvti, version));
-
- return ret;
-}
-#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
-static notrace u64 vread_hvclock(void)
-{
- const struct ms_hyperv_tsc_page *tsc_pg =
- (const struct ms_hyperv_tsc_page *)&hvclock_page;
+#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64)
+/* both 64-bit and x32 use these */
+extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res);
- return hv_read_tsc_page(tsc_pg);
-}
-#endif
-
-notrace static inline u64 vgetcyc(int mode)
+int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
{
- if (mode == VCLOCK_TSC)
- return (u64)rdtsc_ordered();
-
- /*
- * For any memory-mapped vclock type, we need to make sure that gcc
- * doesn't cleverly hoist a load before the mode check. Otherwise we
- * might end up touching the memory-mapped page even if the vclock in
- * question isn't enabled, which will segfault. Hence the barriers.
- */
-#ifdef CONFIG_PARAVIRT_CLOCK
- if (mode == VCLOCK_PVCLOCK) {
- barrier();
- return vread_pvclock();
- }
-#endif
-#ifdef CONFIG_HYPERV_TSCPAGE
- if (mode == VCLOCK_HVCLOCK) {
- barrier();
- return vread_hvclock();
- }
-#endif
- return U64_MAX;
+ return __cvdso_clock_gettime(clock, ts);
}
-notrace static int do_hres(clockid_t clk, struct timespec *ts)
-{
- struct vgtod_ts *base = >od->basetime[clk];
- u64 cycles, last, sec, ns;
- unsigned int seq;
-
- do {
- seq = gtod_read_begin(gtod);
- cycles = vgetcyc(gtod->vclock_mode);
- ns = base->nsec;
- last = gtod->cycle_last;
- if (unlikely((s64)cycles < 0))
- return vdso_fallback_gettime(clk, ts);
- if (cycles > last)
- ns += (cycles - last) * gtod->mult;
- ns >>= gtod->shift;
- sec = base->sec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
-
- /*
- * Do this outside the loop: a race inside the loop could result
- * in __iter_div_u64_rem() being extremely slow.
- */
- ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
- ts->tv_nsec = ns;
-
- return 0;
-}
+int clock_gettime(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_gettime")));
-notrace static void do_coarse(clockid_t clk, struct timespec *ts)
+int __vdso_clock_getres(clockid_t clock,
+ struct __kernel_timespec *res)
{
- struct vgtod_ts *base = >od->basetime[clk];
- unsigned int seq;
-
- do {
- seq = gtod_read_begin(gtod);
- ts->tv_sec = base->sec;
- ts->tv_nsec = base->nsec;
- } while (unlikely(gtod_read_retry(gtod, seq)));
+ return __cvdso_clock_getres(clock, res);
}
+int clock_getres(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_getres")));
-notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+#else
+/* i386 only */
+extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts);
+extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res);
+
+int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts)
{
- unsigned int msk;
-
- /* Sort out negative (CPU/FD) and invalid clocks */
- if (unlikely((unsigned int) clock >= MAX_CLOCKS))
- return vdso_fallback_gettime(clock, ts);
-
- /*
- * Convert the clockid to a bitmask and use it to check which
- * clocks are handled in the VDSO directly.
- */
- msk = 1U << clock;
- if (likely(msk & VGTOD_HRES)) {
- return do_hres(clock, ts);
- } else if (msk & VGTOD_COARSE) {
- do_coarse(clock, ts);
- return 0;
- }
- return vdso_fallback_gettime(clock, ts);
+ return __cvdso_clock_gettime32(clock, ts);
}
-int clock_gettime(clockid_t, struct timespec *)
+int clock_gettime(clockid_t, struct old_timespec32 *)
__attribute__((weak, alias("__vdso_clock_gettime")));
-notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts)
{
- if (likely(tv != NULL)) {
- struct timespec *ts = (struct timespec *) tv;
-
- do_hres(CLOCK_REALTIME, ts);
- tv->tv_usec /= 1000;
- }
- if (unlikely(tz != NULL)) {
- tz->tz_minuteswest = gtod->tz_minuteswest;
- tz->tz_dsttime = gtod->tz_dsttime;
- }
-
- return 0;
+ return __cvdso_clock_gettime(clock, ts);
}
-int gettimeofday(struct timeval *, struct timezone *)
- __attribute__((weak, alias("__vdso_gettimeofday")));
-/*
- * This will break when the xtime seconds get inaccurate, but that is
- * unlikely
- */
-notrace time_t __vdso_time(time_t *t)
-{
- /* This is atomic on x86 so we don't need any locks. */
- time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec);
+int clock_gettime64(clockid_t, struct __kernel_timespec *)
+ __attribute__((weak, alias("__vdso_clock_gettime64")));
- if (t)
- *t = result;
- return result;
+int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res)
+{
+ return __cvdso_clock_getres_time32(clock, res);
}
-time_t time(time_t *t)
- __attribute__((weak, alias("__vdso_time")));
+
+int clock_getres(clockid_t, struct old_timespec32 *)
+ __attribute__((weak, alias("__vdso_clock_getres")));
+#endif
__vdso_getcpu;
time;
__vdso_time;
+ clock_getres;
+ __vdso_clock_getres;
local: *;
};
}
__vdso_clock_gettime;
__vdso_gettimeofday;
__vdso_time;
+ __vdso_clock_getres;
+ __vdso_clock_gettime64;
};
LINUX_2.5 {
__vdso_gettimeofday;
__vdso_getcpu;
__vdso_time;
+ __vdso_clock_getres;
local: *;
};
}
#include <asm/page.h>
#include <asm/desc.h>
#include <asm/cpufeature.h>
-#include <asm/mshyperv.h>
+#include <clocksource/hyperv_timer.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
#
# Makefile for the x86 low level vsyscall code
#
-obj-y := vsyscall_gtod.o
-
obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
- * Copyright 2003 Andi Kleen, SuSE Labs.
- *
- * Modified for x86 32 bit architecture by
- * Stefani Seibold <stefani@seibold.net>
- * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
- *
- * Thanks to hpa@transmeta.com for some useful hint.
- * Special thanks to Ingo Molnar for his early experience with
- * a different vsyscall implementation for Linux/IA32 and for the name.
- *
- */
-
-#include <linux/timekeeper_internal.h>
-#include <asm/vgtod.h>
-#include <asm/vvar.h>
-
-int vclocks_used __read_mostly;
-
-DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
-
-void update_vsyscall_tz(void)
-{
- vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
- vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
-}
-
-void update_vsyscall(struct timekeeper *tk)
-{
- int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
- struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
- struct vgtod_ts *base;
- u64 nsec;
-
- /* Mark the new vclock used. */
- BUILD_BUG_ON(VCLOCK_MAX >= 32);
- WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
-
- gtod_write_begin(vdata);
-
- /* copy vsyscall data */
- vdata->vclock_mode = vclock_mode;
- vdata->cycle_last = tk->tkr_mono.cycle_last;
- vdata->mask = tk->tkr_mono.mask;
- vdata->mult = tk->tkr_mono.mult;
- vdata->shift = tk->tkr_mono.shift;
-
- base = &vdata->basetime[CLOCK_REALTIME];
- base->sec = tk->xtime_sec;
- base->nsec = tk->tkr_mono.xtime_nsec;
-
- base = &vdata->basetime[CLOCK_TAI];
- base->sec = tk->xtime_sec + (s64)tk->tai_offset;
- base->nsec = tk->tkr_mono.xtime_nsec;
-
- base = &vdata->basetime[CLOCK_MONOTONIC];
- base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- nsec = tk->tkr_mono.xtime_nsec;
- nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
- while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
- nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
- base->sec++;
- }
- base->nsec = nsec;
-
- base = &vdata->basetime[CLOCK_REALTIME_COARSE];
- base->sec = tk->xtime_sec;
- base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
-
- base = &vdata->basetime[CLOCK_MONOTONIC_COARSE];
- base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
- nsec += tk->wall_to_monotonic.tv_nsec;
- while (nsec >= NSEC_PER_SEC) {
- nsec -= NSEC_PER_SEC;
- base->sec++;
- }
- base->nsec = nsec;
-
- gtod_write_end(vdata);
-}
}
/* sample_regs_user never support XMM registers */
- if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
+ if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK))
return -EINVAL;
/*
* Besides the general purpose registers, XMM registers may
* be collected in PEBS on some platforms, e.g. Icelake
*/
- if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
- if (x86_pmu.pebs_no_xmm_regs)
+ if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) {
+ if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS))
return -EINVAL;
if (!event->attr.precise_ip)
return;
}
- if (perf_hw_regs(regs)) {
- if (perf_callchain_store(entry, regs->ip))
- return;
+ if (perf_callchain_store(entry, regs->ip))
+ return;
+
+ if (perf_hw_regs(regs))
unwind_start(&state, current, regs, NULL);
- } else {
+ else
unwind_start(&state, current, NULL, (void *)regs->sp);
- }
for (; !unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
pebs_data_cfg |= PEBS_DATACFG_GP;
if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
- (attr->sample_regs_intr & PEBS_XMM_REGS))
+ (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
pebs_data_cfg |= PEBS_DATACFG_XMMS;
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
- if (x86_pmu.version <= 4) {
+ if (x86_pmu.version <= 4)
x86_pmu.pebs_no_isolation = 1;
- x86_pmu.pebs_no_xmm_regs = 1;
- }
+
if (x86_pmu.pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
char *pebs_qual = "";
PERF_SAMPLE_TIME;
x86_pmu.flags |= PMU_FL_PEBS_ALL;
pebs_qual = "-baseline";
+ x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
} else {
/* Only basic record supported */
- x86_pmu.pebs_no_xmm_regs = 1;
x86_pmu.large_pebs_flags &=
~(PERF_SAMPLE_ADDR |
PERF_SAMPLE_TIME |
(1ULL << PERF_REG_X86_R14) | \
(1ULL << PERF_REG_X86_R15))
-#define PEBS_XMM_REGS \
- ((1ULL << PERF_REG_X86_XMM0) | \
- (1ULL << PERF_REG_X86_XMM1) | \
- (1ULL << PERF_REG_X86_XMM2) | \
- (1ULL << PERF_REG_X86_XMM3) | \
- (1ULL << PERF_REG_X86_XMM4) | \
- (1ULL << PERF_REG_X86_XMM5) | \
- (1ULL << PERF_REG_X86_XMM6) | \
- (1ULL << PERF_REG_X86_XMM7) | \
- (1ULL << PERF_REG_X86_XMM8) | \
- (1ULL << PERF_REG_X86_XMM9) | \
- (1ULL << PERF_REG_X86_XMM10) | \
- (1ULL << PERF_REG_X86_XMM11) | \
- (1ULL << PERF_REG_X86_XMM12) | \
- (1ULL << PERF_REG_X86_XMM13) | \
- (1ULL << PERF_REG_X86_XMM14) | \
- (1ULL << PERF_REG_X86_XMM15))
-
/*
* Per register state.
*/
pebs_broken :1,
pebs_prec_dist :1,
pebs_no_tlb :1,
- pebs_no_isolation :1,
- pebs_no_xmm_regs :1;
+ pebs_no_isolation :1;
int pebs_record_size;
int pebs_buffer_size;
int max_pebs_events;
#include <linux/version.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
-#include <linux/clockchips.h>
#include <linux/hyperv.h>
#include <linux/slab.h>
#include <linux/cpuhotplug.h>
-
-#ifdef CONFIG_HYPERV_TSCPAGE
-
-static struct ms_hyperv_tsc_page *tsc_pg;
-
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
- return tsc_pg;
-}
-EXPORT_SYMBOL_GPL(hv_get_tsc_page);
-
-static u64 read_hv_clock_tsc(struct clocksource *arg)
-{
- u64 current_tick = hv_read_tsc_page(tsc_pg);
-
- if (current_tick == U64_MAX)
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
-
- return current_tick;
-}
-
-static struct clocksource hyperv_cs_tsc = {
- .name = "hyperv_clocksource_tsc_page",
- .rating = 400,
- .read = read_hv_clock_tsc,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-#endif
-
-static u64 read_hv_clock_msr(struct clocksource *arg)
-{
- u64 current_tick;
- /*
- * Read the partition counter to get the current tick count. This count
- * is set to 0 when the partition is created and is incremented in
- * 100 nanosecond units.
- */
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
- return current_tick;
-}
-
-static struct clocksource hyperv_cs_msr = {
- .name = "hyperv_clocksource_msr",
- .rating = 400,
- .read = read_hv_clock_msr,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
+#include <clocksource/hyperv_timer.h>
void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg);
-struct clocksource *hyperv_cs;
-EXPORT_SYMBOL_GPL(hyperv_cs);
u32 *hv_vp_index;
EXPORT_SYMBOL_GPL(hv_vp_index);
x86_init.pci.arch_init = hv_pci_init;
- /*
- * Register Hyper-V specific clocksource.
- */
-#ifdef CONFIG_HYPERV_TSCPAGE
- if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) {
- union hv_x64_msr_hypercall_contents tsc_msr;
-
- tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
- if (!tsc_pg)
- goto register_msr_cs;
-
- hyperv_cs = &hyperv_cs_tsc;
-
- rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
-
- tsc_msr.enable = 1;
- tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
-
- wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
-
- hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
-
- clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
- return;
- }
-register_msr_cs:
-#endif
- /*
- * For 32 bit guests just use the MSR based mechanism for reading
- * the partition counter.
- */
-
- hyperv_cs = &hyperv_cs_msr;
- if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)
- clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
-
+ /* Register Hyper-V specific clocksource */
+ hv_init_clocksource();
return;
remove_cpuhp_state:
extern int local_apic_timer_c2_ok;
extern int disable_apic;
-extern unsigned int lapic_timer_frequency;
+extern unsigned int lapic_timer_period;
extern enum apic_intr_mode_id apic_intr_mode;
enum apic_intr_mode_id {
extern int apic_force_enable(unsigned long addr);
#endif
-extern void apic_bsp_setup(bool upmode);
extern void apic_ap_setup(void);
/*
extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
extern void lapic_online(void);
extern void lapic_offline(void);
+extern bool apic_needs_pit(void);
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
+static inline bool apic_needs_pit(void) { return true; }
#endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC
#define trace_irq_entries_start irq_entries_start
#endif
+extern char spurious_entries_start[];
+
#define VECTOR_UNUSED NULL
-#define VECTOR_RETRIGGERED ((void *)~0UL)
+#define VECTOR_SHUTDOWN ((void *)~0UL)
+#define VECTOR_RETRIGGERED ((void *)~1UL)
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+/*
+ * The Hyper-V TimeRefCount register and the TSC
+ * page provide a guest VM clock with 100ns tick rate
+ */
+#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
+
typedef struct _HV_REFERENCE_TSC_PAGE {
__u32 tsc_sequence;
__u32 res1;
#define hv_get_crash_ctl(val) \
rdmsrl(HV_X64_MSR_CRASH_CTL, val)
+#define hv_get_time_ref_count(val) \
+ rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val)
+
+#define hv_get_reference_tsc(val) \
+ rdmsrl(HV_X64_MSR_REFERENCE_TSC, val)
+#define hv_set_reference_tsc(val) \
+ wrmsrl(HV_X64_MSR_REFERENCE_TSC, val)
+#define hv_set_clocksource_vdso(val) \
+ ((val).archdata.vclock_mode = VCLOCK_HVCLOCK)
+#define hv_get_raw_timer() rdtsc_ordered()
+
void hyperv_callback_vector(void);
void hyperv_reenlightenment_vector(void);
#ifdef CONFIG_TRACING
#if IS_ENABLED(CONFIG_HYPERV)
-extern struct clocksource *hyperv_cs;
extern void *hv_hypercall_pg;
extern void __percpu **hyperv_pcpu_input_arg;
}
#endif /* CONFIG_HYPERV */
-#ifdef CONFIG_HYPERV_TSCPAGE
-struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
- u64 *cur_tsc)
-{
- u64 scale, offset;
- u32 sequence;
-
- /*
- * The protocol for reading Hyper-V TSC page is specified in Hypervisor
- * Top-Level Functional Specification ver. 3.0 and above. To get the
- * reference time we must do the following:
- * - READ ReferenceTscSequence
- * A special '0' value indicates the time source is unreliable and we
- * need to use something else. The currently published specification
- * versions (up to 4.0b) contain a mistake and wrongly claim '-1'
- * instead of '0' as the special value, see commit c35b82ef0294.
- * - ReferenceTime =
- * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
- * - READ ReferenceTscSequence again. In case its value has changed
- * since our first reading we need to discard ReferenceTime and repeat
- * the whole sequence as the hypervisor was updating the page in
- * between.
- */
- do {
- sequence = READ_ONCE(tsc_pg->tsc_sequence);
- if (!sequence)
- return U64_MAX;
- /*
- * Make sure we read sequence before we read other values from
- * TSC page.
- */
- smp_rmb();
-
- scale = READ_ONCE(tsc_pg->tsc_scale);
- offset = READ_ONCE(tsc_pg->tsc_offset);
- *cur_tsc = rdtsc_ordered();
-
- /*
- * Make sure we read sequence after we read all other values
- * from TSC page.
- */
- smp_rmb();
-
- } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
-
- return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
-}
-
-static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
-{
- u64 cur_tsc;
-
- return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
-}
-
-#else
-static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
-{
- return NULL;
-}
-
-static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
- u64 *cur_tsc)
-{
- BUG();
- return U64_MAX;
-}
-#endif
#endif
#ifndef _ASM_X86_PVCLOCK_H
#define _ASM_X86_PVCLOCK_H
-#include <linux/clocksource.h>
+#include <asm/clocksource.h>
#include <asm/pvclock-abi.h>
/* some helper functions for xen and kvm pv clock sources */
extern void hpet_time_init(void);
extern void time_init(void);
+extern bool pit_timer_init(void);
extern struct clock_event_device *global_clock_event;
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Fast user context implementation of clock_gettime, gettimeofday, and time.
+ *
+ * Copyright (C) 2019 ARM Limited.
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
+ * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
+ */
+#ifndef __ASM_VDSO_GETTIMEOFDAY_H
+#define __ASM_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <uapi/linux/time.h>
+#include <asm/vgtod.h>
+#include <asm/vvar.h>
+#include <asm/unistd.h>
+#include <asm/msr.h>
+#include <asm/pvclock.h>
+#include <clocksource/hyperv_timer.h>
+
+#define __vdso_data (VVAR(_vdso_data))
+
+#define VDSO_HAS_TIME 1
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+/*
+ * Declare the memory-mapped vclock data pages. These come from hypervisors.
+ * If we ever reintroduce something like direct access to an MMIO clock like
+ * the HPET again, it will go here as well.
+ *
+ * A load from any of these pages will segfault if the clock in question is
+ * disabled, so appropriate compiler barriers and checks need to be used
+ * to prevent stray loads.
+ *
+ * These declarations MUST NOT be const. The compiler will assume that
+ * an extern const variable has genuinely constant contents, and the
+ * resulting code won't work, since the whole point is that these pages
+ * change over time, possibly while we're accessing them.
+ */
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+/*
+ * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO
+ * if the hypervisor tells us that all vCPUs can get valid data from the
+ * vCPU 0 page.
+ */
+extern struct pvclock_vsyscall_time_info pvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+extern struct ms_hyperv_tsc_page hvclock_page
+ __attribute__((visibility("hidden")));
+#endif
+
+#ifndef BUILD_VDSO32
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+ "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) :
+ "rcx", "r11");
+
+ return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ long ret;
+
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret), "=m" (*_ts) :
+ "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) :
+ "rcx", "r11");
+
+ return ret;
+}
+
+#else
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+static __always_inline
+long gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ long ret;
+
+ asm(
+ "mov %%ebx, %%edx \n"
+ "mov %2, %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret)
+ : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz)
+ : "memory", "edx");
+
+ return ret;
+}
+
+static __always_inline long
+clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ long ret;
+
+ asm (
+ "mov %%ebx, %%edx \n"
+ "mov %[clock], %%ebx \n"
+ "call __kernel_vsyscall \n"
+ "mov %%edx, %%ebx \n"
+ : "=a" (ret), "=m" (*_ts)
+ : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts)
+ : "edx");
+
+ return ret;
+}
+
+#endif
+
+#ifdef CONFIG_PARAVIRT_CLOCK
+static u64 vread_pvclock(void)
+{
+ const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti;
+ u32 version;
+ u64 ret;
+
+ /*
+ * Note: The kernel and hypervisor must guarantee that cpu ID
+ * number maps 1:1 to per-CPU pvclock time info.
+ *
+ * Because the hypervisor is entirely unaware of guest userspace
+ * preemption, it cannot guarantee that per-CPU pvclock time
+ * info is updated if the underlying CPU changes or that that
+ * version is increased whenever underlying CPU changes.
+ *
+ * On KVM, we are guaranteed that pvti updates for any vCPU are
+ * atomic as seen by *all* vCPUs. This is an even stronger
+ * guarantee than we get with a normal seqlock.
+ *
+ * On Xen, we don't appear to have that guarantee, but Xen still
+ * supplies a valid seqlock using the version field.
+ *
+ * We only do pvclock vdso timing at all if
+ * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
+ * mean that all vCPUs have matching pvti and that the TSC is
+ * synced, so we can just look at vCPU 0's pvti.
+ */
+
+ do {
+ version = pvclock_read_begin(pvti);
+
+ if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT)))
+ return U64_MAX;
+
+ ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
+ } while (pvclock_read_retry(pvti, version));
+
+ return ret;
+}
+#endif
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+static u64 vread_hvclock(void)
+{
+ return hv_read_tsc_page(&hvclock_page);
+}
+#endif
+
+static inline u64 __arch_get_hw_counter(s32 clock_mode)
+{
+ if (clock_mode == VCLOCK_TSC)
+ return (u64)rdtsc_ordered();
+ /*
+ * For any memory-mapped vclock type, we need to make sure that gcc
+ * doesn't cleverly hoist a load before the mode check. Otherwise we
+ * might end up touching the memory-mapped page even if the vclock in
+ * question isn't enabled, which will segfault. Hence the barriers.
+ */
+#ifdef CONFIG_PARAVIRT_CLOCK
+ if (clock_mode == VCLOCK_PVCLOCK) {
+ barrier();
+ return vread_pvclock();
+ }
+#endif
+#ifdef CONFIG_HYPERV_TSCPAGE
+ if (clock_mode == VCLOCK_HVCLOCK) {
+ barrier();
+ return vread_hvclock();
+ }
+#endif
+ return U64_MAX;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return __vdso_data;
+}
+
+/*
+ * x86 specific delta calculation.
+ *
+ * The regular implementation assumes that clocksource reads are globally
+ * monotonic. The TSC can be slightly off across sockets which can cause
+ * the regular delta calculation (@cycles - @last) to return a huge time
+ * jump.
+ *
+ * Therefore it needs to be verified that @cycles are greater than
+ * @last. If not then use @last, which is the base time of the current
+ * conversion period.
+ *
+ * This variant also removes the masking of the subtraction because the
+ * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX
+ * which would result in a pointless operation. The compiler cannot
+ * optimize it away as the mask comes from the vdso data and is not compile
+ * time constant.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ if (cycles > last)
+ return (cycles - last) * mult;
+ return 0;
+}
+#define vdso_calc_delta vdso_calc_delta
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <asm/vgtod.h>
+#include <asm/vvar.h>
+
+int vclocks_used __read_mostly;
+
+DEFINE_VVAR(struct vdso_data, _vdso_data);
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+static __always_inline
+struct vdso_data *__x86_get_k_vdso_data(void)
+{
+ return _vdso_data;
+}
+#define __arch_get_k_vdso_data __x86_get_k_vdso_data
+
+static __always_inline
+int __x86_get_clock_mode(struct timekeeper *tk)
+{
+ int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+
+ /* Mark the new vclock used. */
+ BUILD_BUG_ON(VCLOCK_MAX >= 32);
+ WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
+
+ return vclock_mode;
+}
+#define __arch_get_clock_mode __x86_get_clock_mode
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
#define _ASM_X86_VGTOD_H
#include <linux/compiler.h>
-#include <linux/clocksource.h>
+#include <asm/clocksource.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
#include <uapi/linux/time.h>
typedef unsigned long gtod_long_t;
#endif
-/*
- * There is one of these objects in the vvar page for each
- * vDSO-accelerated clockid. For high-resolution clocks, this encodes
- * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse
- * clocks, this encodes the actual time.
- *
- * To confuse the reader, for high-resolution clocks, nsec is left-shifted
- * by vsyscall_gtod_data.shift.
- */
-struct vgtod_ts {
- u64 sec;
- u64 nsec;
-};
-
-#define VGTOD_BASES (CLOCK_TAI + 1)
-#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI))
-#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE))
-
-/*
- * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
- * so be carefull by modifying this structure.
- */
-struct vsyscall_gtod_data {
- unsigned int seq;
-
- int vclock_mode;
- u64 cycle_last;
- u64 mask;
- u32 mult;
- u32 shift;
-
- struct vgtod_ts basetime[VGTOD_BASES];
-
- int tz_minuteswest;
- int tz_dsttime;
-};
-extern struct vsyscall_gtod_data vsyscall_gtod_data;
-
extern int vclocks_used;
static inline bool vclock_was_used(int vclock)
{
return READ_ONCE(vclocks_used) & (1 << vclock);
}
-static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s)
-{
- unsigned int ret;
-
-repeat:
- ret = READ_ONCE(s->seq);
- if (unlikely(ret & 1)) {
- cpu_relax();
- goto repeat;
- }
- smp_rmb();
- return ret;
-}
-
-static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
- unsigned int start)
-{
- smp_rmb();
- return unlikely(s->seq != start);
-}
-
-static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
-{
- ++s->seq;
- smp_wmb();
-}
-
-static inline void gtod_write_end(struct vsyscall_gtod_data *s)
-{
- smp_wmb();
- ++s->seq;
-}
-
#endif /* _ASM_X86_VGTOD_H */
extern char __vvar_page;
#define DECLARE_VVAR(offset, type, name) \
- extern type vvar_ ## name __attribute__((visibility("hidden")));
+ extern type vvar_ ## name[CS_BASES] \
+ __attribute__((visibility("hidden")));
#define VVAR(name) (vvar_ ## name)
#define DEFINE_VVAR(type, name) \
- type name \
+ type name[CS_BASES] \
__attribute__((section(".vvar_" #name), aligned(16))) __visible
#endif
/* DECLARE_VVAR(offset, type, name) */
-DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
+DECLARE_VVAR(128, struct vdso_data, _vdso_data)
#undef DECLARE_VVAR
/* These include both GPRs and XMMX registers */
PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
};
+
+#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
+
#endif /* _ASM_X86_PERF_REGS_H */
.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
};
-unsigned int lapic_timer_frequency = 0;
+unsigned int lapic_timer_period = 0;
static void apic_pm_activate(void);
if (evt->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
- __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1);
+ __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
return 0;
}
static int __init lapic_init_clockevent(void)
{
- if (!lapic_timer_frequency)
+ if (!lapic_timer_period)
return -1;
/* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+ lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
TICK_NSEC, lapic_clockevent.shift);
lapic_clockevent.max_delta_ns =
clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
return 0;
}
+bool __init apic_needs_pit(void)
+{
+ /*
+ * If the frequencies are not known, PIT is required for both TSC
+ * and apic timer calibration.
+ */
+ if (!tsc_khz || !cpu_khz)
+ return true;
+
+ /* Is there an APIC at all? */
+ if (!boot_cpu_has(X86_FEATURE_APIC))
+ return true;
+
+ /* Deadline timer is based on TSC so no further PIT action required */
+ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return false;
+
+ /* APIC timer disabled? */
+ if (disable_apic_timer)
+ return true;
+ /*
+ * The APIC timer frequency is known already, no PIT calibration
+ * required. If unknown, let the PIT be initialized.
+ */
+ return lapic_timer_period == 0;
+}
+
static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
*/
if (!lapic_init_clockevent()) {
apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
- lapic_timer_frequency);
+ lapic_timer_period);
/*
* Direct calibration methods must have an always running
* local APIC timer, no need for broadcast timer.
pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
&delta, &deltatsc);
- lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+ lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
lapic_init_clockevent();
apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
- lapic_timer_frequency);
+ lapic_timer_period);
if (boot_cpu_has(X86_FEATURE_TSC)) {
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
"%u.%04u MHz.\n",
- lapic_timer_frequency / (1000000 / HZ),
- lapic_timer_frequency % (1000000 / HZ));
+ lapic_timer_period / (1000000 / HZ),
+ lapic_timer_period % (1000000 / HZ));
/*
* Do a sanity check on the APIC calibration result
*/
- if (lapic_timer_frequency < (1000000 / HZ)) {
+ if (lapic_timer_period < (1000000 / HZ)) {
local_irq_enable();
pr_warning("APIC frequency too slow, disabling apic timer\n");
return -1;
apic_write(APIC_LVT1, value);
}
+static void __init apic_bsp_setup(bool upmode);
+
/* Init the interrupt delivery mode for the BSP */
void __init apic_intr_mode_init(void)
{
if (queued) {
if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
ntsc = rdtsc();
- max_loops = (cpu_khz << 10) - (ntsc - tsc);
+ max_loops = (long long)cpu_khz << 10;
+ max_loops -= ntsc - tsc;
} else {
max_loops--;
}
entering_irq();
trace_spurious_apic_entry(vector);
+ inc_irq_stat(irq_spurious_count);
+
+ /*
+ * If this is a spurious interrupt then do not acknowledge
+ */
+ if (vector == SPURIOUS_APIC_VECTOR) {
+ /* See SDM vol 3 */
+ pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
+ smp_processor_id());
+ goto out;
+ }
+
/*
- * Check if this really is a spurious interrupt and ACK it
- * if it is a vectored one. Just in case...
- * Spurious interrupts should not be ACKed.
+ * If it is a vectored one, verify it's set in the ISR. If set,
+ * acknowledge it.
*/
v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
- if (v & (1 << (vector & 0x1f)))
+ if (v & (1 << (vector & 0x1f))) {
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
+ vector, smp_processor_id());
ack_APIC_irq();
-
- inc_irq_stat(irq_spurious_count);
-
- /* see sw-dev-man vol 3, chapter 7.4.13.5 */
- pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
- "should never happen.\n", vector, smp_processor_id());
-
+ } else {
+ pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
+ vector, smp_processor_id());
+ }
+out:
trace_spurious_apic_exit(vector);
exiting_irq();
}
/**
* apic_bsp_setup - Setup function for local apic and io-apic
* @upmode: Force UP mode (for APIC_init_uniprocessor)
- *
- * Returns:
- * apic_id of BSP APIC
*/
-void __init apic_bsp_setup(bool upmode)
+static void __init apic_bsp_setup(bool upmode)
{
connect_bsp_APIC();
if (upmode)
int cpu = smp_processor_id();
if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
+ __clear_bit(cpu, &mask);
_flat_send_IPI_mask(mask, vector);
}
unsigned long mask = cpumask_bits(cpu_online_mask)[0];
if (cpu < BITS_PER_LONG)
- clear_bit(cpu, &mask);
+ __clear_bit(cpu, &mask);
_flat_send_IPI_mask(mask, vector);
}
#include <asm/acpi.h>
#include <asm/dma.h>
#include <asm/timer.h>
+#include <asm/time.h>
#include <asm/i8259.h>
#include <asm/setup.h>
#include <asm/irq_remapping.h>
return ret;
}
+/*
+ * Interrupt shutdown masks the ioapic pin, but the interrupt might already
+ * be in flight, but not yet serviced by the target CPU. That means
+ * __synchronize_hardirq() would return and claim that everything is calmed
+ * down. So free_irq() would proceed and deactivate the interrupt and free
+ * resources.
+ *
+ * Once the target CPU comes around to service it it will find a cleared
+ * vector and complain. While the spurious interrupt is harmless, the full
+ * release of resources might prevent the interrupt from being acknowledged
+ * which keeps the hardware in a weird state.
+ *
+ * Verify that the corresponding Remote-IRR bits are clear.
+ */
+static int ioapic_irq_get_chip_state(struct irq_data *irqd,
+ enum irqchip_irq_state which,
+ bool *state)
+{
+ struct mp_chip_data *mcd = irqd->chip_data;
+ struct IO_APIC_route_entry rentry;
+ struct irq_pin_list *p;
+
+ if (which != IRQCHIP_STATE_ACTIVE)
+ return -EINVAL;
+
+ *state = false;
+ raw_spin_lock(&ioapic_lock);
+ for_each_irq_pin(p, mcd->irq_2_pin) {
+ rentry = __ioapic_read_entry(p->apic, p->pin);
+ /*
+ * The remote IRR is only valid in level trigger mode. It's
+ * meaning is undefined for edge triggered interrupts and
+ * irrelevant because the IO-APIC treats them as fire and
+ * forget.
+ */
+ if (rentry.irr && rentry.trigger) {
+ *state = true;
+ break;
+ }
+ }
+ raw_spin_unlock(&ioapic_lock);
+ return 0;
+}
+
static struct irq_chip ioapic_chip __read_mostly = {
.name = "IO-APIC",
.irq_startup = startup_ioapic_irq,
.irq_eoi = ioapic_ack_level,
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
.irq_eoi = ioapic_ir_ack_level,
.irq_set_affinity = ioapic_set_affinity,
.irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_get_irqchip_state = ioapic_irq_get_chip_state,
.flags = IRQCHIP_SKIP_SET_WAKE,
};
unsigned long flags;
int no_pin1 = 0;
+ if (!global_clock_event)
+ return;
+
local_irq_save(flags);
/*
trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
apicd->prev_cpu);
- per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
+ per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
apicd->vector = 0;
if (!vector)
return;
- per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
+ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
apicd->prev_vector = 0;
apicd->move_in_progress = 0;
cpumask_copy(tmpmsk, mask);
/* If IPI should not be sent to self, clear current CPU */
if (apic_dest != APIC_DEST_ALLINC)
- cpumask_clear_cpu(smp_processor_id(), tmpmsk);
+ __cpumask_clear_cpu(smp_processor_id(), tmpmsk);
/* Collapse cpus in a cluster so a single IPI per cluster is sent */
for_each_cpu(cpu, tmpmsk) {
}
/*
+ * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
+ * bit in the mask to allow guests to use the mitigation even in the
+ * case where the host does not enable it.
+ */
+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+ }
+
+ /*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
* - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
}
}
.resume = mc_bp_resume,
};
-static int mc_cpu_online(unsigned int cpu)
+static int mc_cpu_starting(unsigned int cpu)
{
- struct device *dev;
-
- dev = get_cpu_device(cpu);
microcode_update_cpu(cpu);
pr_debug("CPU%d added\n", cpu);
+ return 0;
+}
+
+static int mc_cpu_online(unsigned int cpu)
+{
+ struct device *dev = get_cpu_device(cpu);
if (sysfs_create_group(&dev->kobj, &mc_attr_group))
pr_err("Failed to create group for CPU%d\n", cpu);
goto out_ucode_group;
register_syscore_ops(&mc_syscore_ops);
- cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online",
+ cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting",
+ mc_cpu_starting, NULL);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
mc_cpu_online, mc_cpu_down_prep);
pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION);
#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/i8253.h>
+#include <linux/random.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
inc_irq_stat(hyperv_stimer0_count);
if (hv_stimer0_handler)
hv_stimer0_handler();
+ add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
ack_APIC_irq();
exiting_irq();
int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
{
*vector = HYPERV_STIMER0_VECTOR;
- *irq = 0; /* Unused on x86/x64 */
+ *irq = -1; /* Unused on x86/x64 */
hv_stimer0_handler = handler;
return 0;
}
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
- lapic_timer_frequency = hv_lapic_frequency;
+ lapic_timer_period = hv_lapic_frequency;
pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
- lapic_timer_frequency);
+ lapic_timer_period);
}
register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
struct seq_file *seq, void *v)
{
struct rdt_resource *r = of->kn->parent->priv;
- u32 sw_shareable = 0, hw_shareable = 0;
- u32 exclusive = 0, pseudo_locked = 0;
+ /*
+ * Use unsigned long even though only 32 bits are used to ensure
+ * test_bit() is used safely.
+ */
+ unsigned long sw_shareable = 0, hw_shareable = 0;
+ unsigned long exclusive = 0, pseudo_locked = 0;
struct rdt_domain *dom;
int i, hwb, swb, excl, psl;
enum rdtgrp_mode mode;
}
for (i = r->cache.cbm_len - 1; i >= 0; i--) {
pseudo_locked = dom->plr ? dom->plr->cbm : 0;
- hwb = test_bit(i, (unsigned long *)&hw_shareable);
- swb = test_bit(i, (unsigned long *)&sw_shareable);
- excl = test_bit(i, (unsigned long *)&exclusive);
- psl = test_bit(i, (unsigned long *)&pseudo_locked);
+ hwb = test_bit(i, &hw_shareable);
+ swb = test_bit(i, &sw_shareable);
+ excl = test_bit(i, &exclusive);
+ psl = test_bit(i, &pseudo_locked);
if (hwb && swb)
seq_putc(seq, 'X');
else if (hwb && !swb)
*/
static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
{
- /*
- * Convert the u32 _val to an unsigned long required by all the bit
- * operations within this function. No more than 32 bits of this
- * converted value can be accessed because all bit operations are
- * additionally provided with cbm_len that is initialized during
- * hardware enumeration using five bits from the EAX register and
- * thus never can exceed 32 bits.
- */
- unsigned long *val = (unsigned long *)_val;
+ unsigned long val = *_val;
unsigned int cbm_len = r->cache.cbm_len;
unsigned long first_bit, zero_bit;
- if (*val == 0)
+ if (val == 0)
return;
- first_bit = find_first_bit(val, cbm_len);
- zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
+ first_bit = find_first_bit(&val, cbm_len);
+ zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
/* Clear any remaining bits to ensure contiguous region */
- bitmap_clear(val, zero_bit, cbm_len - zero_bit);
+ bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
+ *_val = (u32)val;
}
/*
#ifdef CONFIG_X86_LOCAL_APIC
/* Skip lapic calibration since we know the bus frequency. */
- lapic_timer_frequency = ecx / HZ;
+ lapic_timer_period = ecx / HZ;
pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
ecx);
#endif
#include <linux/init.h>
#include <linux/list.h>
#include <linux/module.h>
+#include <linux/memory.h>
#include <trace/syscall.h>
#ifdef CONFIG_DYNAMIC_FTRACE
int ftrace_arch_code_modify_prepare(void)
+ __acquires(&text_mutex)
{
+ /*
+ * Need to grab text_mutex to prevent a race from module loading
+ * and live kernel patching from changing the text permissions while
+ * ftrace has it set to "read/write".
+ */
+ mutex_lock(&text_mutex);
set_kernel_text_rw();
set_all_modules_text_rw();
return 0;
}
int ftrace_arch_code_modify_post_process(void)
+ __releases(&text_mutex)
{
set_all_modules_text_ro();
set_kernel_text_ro();
+ mutex_unlock(&text_mutex);
return 0;
}
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
if (la57) {
- p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
+ p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++],
+ physaddr);
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
- i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
- p4d[i + 0] = (pgdval_t)pud + pgtable_flags;
- p4d[i + 1] = (pgdval_t)pud + pgtable_flags;
+ i = physaddr >> P4D_SHIFT;
+ p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
+ p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags;
} else {
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
pgd[i + 0] = (pgdval_t)pud + pgtable_flags;
pgd[i + 1] = (pgdval_t)pud + pgtable_flags;
}
- i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
- pud[i + 0] = (pudval_t)pmd + pgtable_flags;
- pud[i + 1] = (pudval_t)pmd + pgtable_flags;
+ i = physaddr >> PUD_SHIFT;
+ pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
+ pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
/* Filter out unsupported __PAGE_KERNEL_* bits: */
pmd_entry += physaddr;
for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
- int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
- pmd[idx] = pmd_entry + i * PMD_SIZE;
+ int idx = i + (physaddr >> PMD_SHIFT);
+
+ pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE;
}
/*
#include <linux/timex.h>
#include <linux/i8253.h>
+#include <asm/apic.h>
#include <asm/hpet.h>
#include <asm/time.h>
#include <asm/smp.h>
*/
struct clock_event_device *global_clock_event;
-void __init setup_pit_timer(void)
+/*
+ * Modern chipsets can disable the PIT clock which makes it unusable. It
+ * would be possible to enable the clock but the registers are chipset
+ * specific and not discoverable. Avoid the whack a mole game.
+ *
+ * These platforms have discoverable TSC/CPU frequencies but this also
+ * requires to know the local APIC timer frequency as it normally is
+ * calibrated against the PIT interrupt.
+ */
+static bool __init use_pit(void)
+{
+ if (!IS_ENABLED(CONFIG_X86_TSC) || !boot_cpu_has(X86_FEATURE_TSC))
+ return true;
+
+ /* This also returns true when APIC is disabled */
+ return apic_needs_pit();
+}
+
+bool __init pit_timer_init(void)
{
+ if (!use_pit())
+ return false;
+
clockevent_i8253_init(true);
global_clock_event = &i8253_clockevent;
+ return true;
}
#ifndef CONFIG_X86_64
#ifdef CONFIG_X86_LOCAL_APIC
for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
set_bit(i, system_vectors);
- set_intr_gate(i, spurious_interrupt);
+ entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
+ set_intr_gate(i, entry);
}
#endif
}
if (!handle_irq(desc, regs)) {
ack_APIC_irq();
- if (desc != VECTOR_RETRIGGERED) {
+ if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) {
pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
__func__, smp_processor_id(),
vector);
static void __init jailhouse_timer_init(void)
{
- lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ);
+ lapic_timer_period = setup_data.apic_khz * (1000 / HZ);
}
static unsigned long jailhouse_get_tsc(void)
return regs_get_register(regs, pt_regs_offset[idx]);
}
+#define PERF_REG_X86_RESERVED (((1ULL << PERF_REG_X86_XMM0) - 1) & \
+ ~((1ULL << PERF_REG_X86_MAX) - 1))
+
#ifdef CONFIG_X86_32
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
(1ULL << PERF_REG_X86_R9) | \
int perf_reg_validate(u64 mask)
{
- if (!mask || (mask & REG_NOSUPPORT))
+ if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
return -EINVAL;
return 0;
int perf_reg_validate(u64 mask)
{
- if (!mask || (mask & REG_NOSUPPORT))
+ if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
return -EINVAL;
return 0;
void ptrace_disable(struct task_struct *child)
{
user_disable_single_step(child);
-#ifdef TIF_SYSCALL_EMU
- clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
-#endif
}
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
*/
+#include <linux/clocksource.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
}
cpumask_copy(allbutself, cpu_online_mask);
- cpumask_clear_cpu(smp_processor_id(), allbutself);
+ __cpumask_clear_cpu(smp_processor_id(), allbutself);
if (cpumask_equal(mask, allbutself) &&
cpumask_equal(cpu_online_mask, cpu_callout_mask))
/* Default timer init function */
void __init hpet_time_init(void)
{
- if (!hpet_enable())
- setup_pit_timer();
+ if (!hpet_enable()) {
+ if (!pit_timer_init())
+ return;
+ }
+
setup_default_timer_irq();
}
crystal_khz = ecx_hz / 1000;
- if (crystal_khz == 0) {
- switch (boot_cpu_data.x86_model) {
- case INTEL_FAM6_SKYLAKE_MOBILE:
- case INTEL_FAM6_SKYLAKE_DESKTOP:
- case INTEL_FAM6_KABYLAKE_MOBILE:
- case INTEL_FAM6_KABYLAKE_DESKTOP:
- crystal_khz = 24000; /* 24.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT_X:
- crystal_khz = 25000; /* 25.0 MHz */
- break;
- case INTEL_FAM6_ATOM_GOLDMONT:
- crystal_khz = 19200; /* 19.2 MHz */
- break;
- }
- }
+ /*
+ * Denverton SoCs don't report crystal clock, and also don't support
+ * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal
+ * clock.
+ */
+ if (crystal_khz == 0 &&
+ boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X)
+ crystal_khz = 25000;
- if (crystal_khz == 0)
- return 0;
/*
- * TSC frequency determined by CPUID is a "hardware reported"
+ * TSC frequency reported directly by CPUID is a "hardware reported"
* frequency and is the most accurate one so far we have. This
* is considered a known frequency.
*/
- setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+ if (crystal_khz != 0)
+ setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+
+ /*
+ * Some Intel SoCs like Skylake and Kabylake don't report the crystal
+ * clock, but we can easily calculate it to a high degree of accuracy
+ * by considering the crystal ratio and the CPU speed.
+ */
+ if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
+ unsigned int eax_base_mhz, ebx, ecx, edx;
+
+ cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
+ crystal_khz = eax_base_mhz * 1000 *
+ eax_denominator / ebx_numerator;
+ }
+
+ if (crystal_khz == 0)
+ return 0;
/*
* For Atom SoCs TSC is the only reliable clocksource.
if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * The local APIC appears to be fed by the core crystal clock
+ * (which sounds entirely sensible). We can set the global
+ * lapic_timer_period here to avoid having to calibrate the APIC
+ * timer later.
+ */
+ lapic_timer_period = crystal_khz * 1000 / HZ;
+#endif
+
return crystal_khz * ebx_numerator / eax_denominator;
}
/*
* MSR-based CPU/TSC frequency discovery for certain CPUs.
*
- * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy
+ * Set global "lapic_timer_period" to bus_clock_cycles/jiffy
* Return processor base frequency in KHz, or 0 on failure.
*/
unsigned long cpu_khz_from_msr(void)
res = freq * ratio;
#ifdef CONFIG_X86_LOCAL_APIC
- lapic_timer_frequency = (freq * 1000) / HZ;
+ lapic_timer_period = (freq * 1000) / HZ;
#endif
/*
* But they are copies of the ftrace entries that are static and
* defined in ftrace_*.S, which do have orc entries.
*
- * If the undwinder comes across a ftrace trampoline, then find the
+ * If the unwinder comes across a ftrace trampoline, then find the
* ftrace function that was used to create it, and use that ftrace
- * function's orc entrie, as the placement of the return code in
+ * function's orc entry, as the placement of the return code in
* the stack will be identical.
*/
static struct orc_entry *orc_ftrace_find(unsigned long ip)
.type = ORC_TYPE_CALL
};
+/* Fake frame pointer entry -- used as a fallback for generated code */
+static struct orc_entry orc_fp_entry = {
+ .type = ORC_TYPE_CALL,
+ .sp_reg = ORC_REG_BP,
+ .sp_offset = 16,
+ .bp_reg = ORC_REG_PREV_SP,
+ .bp_offset = -16,
+ .end = 0,
+};
+
static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
* calls and calls to noreturn functions.
*/
orc = orc_find(state->signal ? state->ip : state->ip - 1);
- if (!orc)
- goto err;
+ if (!orc) {
+ /*
+ * As a fallback, try to assume this code uses a frame pointer.
+ * This is useful for generated code, like BPF, which ORC
+ * doesn't know about. This is just a guess, so the rest of
+ * the unwind is no longer considered reliable.
+ */
+ orc = &orc_fp_entry;
+ state->error = true;
+ }
/* End-of-stack check for kernel threads: */
if (orc->sp_reg == ORC_REG_UNDEFINED) {
struct kvm_lapic *apic = vcpu->arch.apic;
u32 ppr;
- if (!apic_enabled(apic))
+ if (!kvm_apic_hw_enabled(apic))
return -1;
__apic_update_ppr(apic, &ppr);
ctr_val = rdtsc();
break;
case VMWARE_BACKDOOR_PMC_REAL_TIME:
- ctr_val = ktime_get_boot_ns();
+ ctr_val = ktime_get_boottime_ns();
break;
case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
- ctr_val = ktime_get_boot_ns() +
+ ctr_val = ktime_get_boottime_ns() +
vcpu->kvm->arch.kvmclock_offset;
break;
default:
vmx = to_vmx(vcpu);
vmcs12 = get_vmcs12(vcpu);
- if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled)
- kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
-
if (nested_vmx_allowed(vcpu) &&
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
if (vmx_has_valid_vmcs12(vcpu)) {
kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
+ if (vmx->nested.hv_evmcs)
+ kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
+
if (is_guest_mode(vcpu) &&
nested_cpu_has_shadow_vmcs(vmcs12) &&
vmcs12->vmcs_link_pointer != -1ull)
if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
return -EINVAL;
+ /*
+ * KVM_STATE_NESTED_EVMCS used to signal that KVM should
+ * enable eVMCS capability on vCPU. However, since then
+ * code was changed such that flag signals vmcs12 should
+ * be copied into eVMCS in guest memory.
+ *
+ * To preserve backwards compatability, allow user
+ * to set this flag even when there is no VMXON region.
+ */
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
return -EINVAL;
} else {
if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
return -EINVAL;
- }
+ }
if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
* nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
* must be zero.
*/
- if (is_smm(vcpu) ? kvm_state->flags : kvm_state->hdr.vmx.smm.flags)
+ if (is_smm(vcpu) ?
+ (kvm_state->flags &
+ (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
+ : kvm_state->hdr.vmx.smm.flags)
return -EINVAL;
if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
!(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL;
- vmx_leave_nested(vcpu);
- if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
- if (!nested_vmx_allowed(vcpu))
+ if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
+ (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
return -EINVAL;
- nested_enable_evmcs(vcpu, NULL);
- }
+ vmx_leave_nested(vcpu);
if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
return 0;
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/intel_pt.h>
+#include <clocksource/hyperv_timer.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
vcpu->arch.tsc_always_catchup = 1;
return 0;
} else {
- WARN(1, "user requested TSC rate below hardware speed\n");
+ pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
return -1;
}
}
user_tsc_khz, tsc_khz);
if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
- WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
- user_tsc_khz);
+ pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
+ user_tsc_khz);
return -1;
}
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
- ns = ktime_get_boot_ns();
+ ns = ktime_get_boottime_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
spin_lock(&ka->pvclock_gtod_sync_lock);
if (!ka->use_master_clock) {
spin_unlock(&ka->pvclock_gtod_sync_lock);
- return ktime_get_boot_ns() + ka->kvmclock_offset;
+ return ktime_get_boottime_ns() + ka->kvmclock_offset;
}
hv_clock.tsc_timestamp = ka->master_cycle_now;
&hv_clock.tsc_to_system_mul);
ret = __pvclock_read_cycles(&hv_clock, rdtsc());
} else
- ret = ktime_get_boot_ns() + ka->kvmclock_offset;
+ ret = ktime_get_boottime_ns() + ka->kvmclock_offset;
put_cpu();
}
if (!use_master_clock) {
host_tsc = rdtsc();
- kernel_ns = ktime_get_boot_ns();
+ kernel_ns = ktime_get_boottime_ns();
}
tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
* before any KVM threads can be running. Unfortunately, we can't
* bring the TSCs fully up to date with real time, as we aren't yet far
* enough into CPU bringup that we know how much real time has actually
- * elapsed; our helper function, ktime_get_boot_ns() will be using boot
+ * elapsed; our helper function, ktime_get_boottime_ns() will be using boot
* variables that haven't been updated yet.
*
* So we simply find the maximum observed TSC above, then record the
mutex_init(&kvm->arch.apic_map_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
- kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
+ kvm->arch.kvmclock_offset = -ktime_get_boottime_ns();
pvclock_update_vm_gtod_copy(kvm);
kvm->arch.guest_can_read_msr_platform_info = true;
int wbinvd_on_all_cpus(void)
{
- return on_each_cpu(__wbinvd, NULL, 1);
+ on_each_cpu(__wbinvd, NULL, 1);
+ return 0;
}
EXPORT_SYMBOL(wbinvd_on_all_cpus);
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask, bool init)
{
- unsigned long paddr_next, paddr_last = paddr_end;
- unsigned long vaddr = (unsigned long)__va(paddr);
- int i = p4d_index(vaddr);
+ unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
+
+ paddr_last = paddr_end;
+ vaddr = (unsigned long)__va(paddr);
+ vaddr_end = (unsigned long)__va(paddr_end);
if (!pgtable_l5_enabled())
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
page_size_mask, init);
- for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
- p4d_t *p4d;
+ for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+ p4d_t *p4d = p4d_page + p4d_index(vaddr);
pud_t *pud;
- vaddr = (unsigned long)__va(paddr);
- p4d = p4d_page + p4d_index(vaddr);
- paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
+ vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE;
+ paddr = __pa(vaddr);
if (paddr >= paddr_end) {
+ paddr_next = __pa(vaddr_next);
if (!after_bootmem &&
!e820__mapped_any(paddr & P4D_MASK, paddr_next,
E820_TYPE_RAM) &&
if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, 0);
- paddr_last = phys_pud_init(pud, paddr, paddr_end,
- page_size_mask, init);
+ paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
+ page_size_mask, init);
continue;
}
pud = alloc_low_page();
- paddr_last = phys_pud_init(pud, paddr, paddr_end,
+ paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
page_size_mask, init);
spin_lock(&init_mm.page_table_lock);
* Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so
* page faulting on these addresses isn't expected.
*/
- if (phys_addr >= 0x0000 && phys_addr <= 0x0fff)
+ if (phys_addr <= 0x0fff)
return;
/*
* containing only random (seeky) I/O are prevented from being tagged
* as soft real-time.
*/
-#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history & -1)
+#define BFQQ_TOTALLY_SEEKY(bfqq) (bfqq->seek_history == -1)
/* Min number of samples required to perform peak-rate update */
#define BFQ_RATE_MIN_SAMPLES 32
{
struct elevator_type *e = q->elevator->type;
+ /*
+ * If the parent directory has not been created yet, return, we will be
+ * called again later on and the directory/files will be created then.
+ */
+ if (!q->debugfs_dir)
+ return;
+
if (!e->queue_debugfs_attrs)
return;
struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst);
crypto_drop_skcipher(&ctx->spawn);
+ kfree(inst);
}
static int cryptd_create_skcipher(struct crypto_template *tmpl,
list_for_each_entry(q, &crypto_alg_list, cra_list) {
int match = 0;
+ if (crypto_is_larval(q))
+ continue;
+
if ((q->cra_flags ^ p->cru_type) & p->cru_mask)
continue;
acpi_irq_model = model;
acpi_gsi_domain_id = fwnode;
}
+
+/**
+ * acpi_irq_create_hierarchy - Create a hierarchical IRQ domain with the default
+ * GSI domain as its parent.
+ * @flags: Irq domain flags associated with the domain
+ * @size: Size of the domain.
+ * @fwnode: Optional fwnode of the interrupt controller
+ * @ops: Pointer to the interrupt domain callbacks
+ * @host_data: Controller private data pointer
+ */
+struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags,
+ unsigned int size,
+ struct fwnode_handle *fwnode,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain *d = irq_find_matching_fwnode(acpi_gsi_domain_id,
+ DOMAIN_BUS_ANY);
+
+ if (!d)
+ return NULL;
+
+ return irq_domain_create_hierarchy(d, flags, size, fwnode, ops,
+ host_data);
+}
+EXPORT_SYMBOL_GPL(acpi_irq_create_hierarchy);
}
}
+static bool flag_identical(struct acpi_table_header *table_hdr,
+ struct acpi_pptt_processor *cpu)
+{
+ struct acpi_pptt_processor *next;
+
+ /* heterogeneous machines must use PPTT revision > 1 */
+ if (table_hdr->revision < 2)
+ return false;
+
+ /* Locate the last node in the tree with IDENTICAL set */
+ if (cpu->flags & ACPI_PPTT_ACPI_IDENTICAL) {
+ next = fetch_pptt_node(table_hdr, cpu->parent);
+ if (!(next && next->flags & ACPI_PPTT_ACPI_IDENTICAL))
+ return true;
+ }
+
+ return false;
+}
+
/* Passing level values greater than this will result in search termination */
#define PPTT_ABORT_PACKAGE 0xFF
-static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr,
- struct acpi_pptt_processor *cpu,
- int level, int flag)
+static struct acpi_pptt_processor *acpi_find_processor_tag(struct acpi_table_header *table_hdr,
+ struct acpi_pptt_processor *cpu,
+ int level, int flag)
{
struct acpi_pptt_processor *prev_node;
while (cpu && level) {
- if (cpu->flags & flag)
+ /* special case the identical flag to find last identical */
+ if (flag == ACPI_PPTT_ACPI_IDENTICAL) {
+ if (flag_identical(table_hdr, cpu))
+ break;
+ } else if (cpu->flags & flag)
break;
pr_debug("level %d\n", level);
prev_node = fetch_pptt_node(table_hdr, cpu->parent);
cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
if (cpu_node) {
- cpu_node = acpi_find_processor_package_id(table, cpu_node,
- level, flag);
+ cpu_node = acpi_find_processor_tag(table, cpu_node,
+ level, flag);
/*
* As per specification if the processor structure represents
* an actual processor, then ACPI processor ID must be valid.
return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
ACPI_PPTT_PHYSICAL_PACKAGE);
}
+
+/**
+ * find_acpi_cpu_topology_hetero_id() - Get a core architecture tag
+ * @cpu: Kernel logical CPU number
+ *
+ * Determine a unique heterogeneous tag for the given CPU. CPUs with the same
+ * implementation should have matching tags.
+ *
+ * The returned tag can be used to group peers with identical implementation.
+ *
+ * The search terminates when a level is found with the identical implementation
+ * flag set or we reach a root node.
+ *
+ * Due to limitations in the PPTT data structure, there may be rare situations
+ * where two cores in a heterogeneous machine may be identical, but won't have
+ * the same tag.
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, or the CPU cannot be found.
+ * Otherwise returns a value which represents a group of identical cores
+ * similar to this CPU.
+ */
+int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
+{
+ return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE,
+ ACPI_PPTT_ACPI_IDENTICAL);
+}
static int cfag12864bfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
{
- return vm_insert_page(vma, vma->vm_start,
- virt_to_page(cfag12864b_buffer));
+ struct page *pages = virt_to_page(cfag12864b_buffer);
+
+ return vm_map_pages_zero(vma, &pages, 1);
}
static struct fb_ops cfag12864bfb_ops = {
static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma)
{
struct ht16k33_priv *priv = info->par;
+ struct page *pages = virt_to_page(priv->fbdev.buffer);
- return vm_insert_page(vma, vma->vm_start,
- virt_to_page(priv->fbdev.buffer));
+ return vm_map_pages_zero(vma, &pages, 1);
}
static struct fb_ops ht16k33_fb_ops = {
return -ENOTSUPP;
}
+unsigned int coherency_max_size;
+
static int cache_shared_cpu_map_setup(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
}
}
+ /* record the maximum cache line size */
+ if (this_leaf->coherency_line_size > coherency_max_size)
+ coherency_max_size = this_leaf->coherency_line_size;
}
return 0;
void global_cache_flush(void)
{
- if (on_each_cpu(ipi_handler, NULL, 1) != 0)
- panic(PFX "timed out waiting for the other CPUs!\n");
+ on_each_cpu(ipi_handler, NULL, 1);
}
EXPORT_SYMBOL(global_cache_flush);
const char *dev_id = dev ? dev_name(dev) : NULL;
struct device_node *np = core->of_node;
- if (np && index >= 0)
+ if (np && (name || index >= 0))
hw = of_clk_get_hw(np, index, name);
/*
[CLKID_MALI_1_DIV] = &g12a_mali_1_div.hw,
[CLKID_MALI_1] = &g12a_mali_1.hw,
[CLKID_MALI] = &g12a_mali.hw,
- [CLKID_MPLL_5OM_DIV] = &g12a_mpll_50m_div.hw,
- [CLKID_MPLL_5OM] = &g12a_mpll_50m.hw,
+ [CLKID_MPLL_50M_DIV] = &g12a_mpll_50m_div.hw,
+ [CLKID_MPLL_50M] = &g12a_mpll_50m.hw,
[CLKID_SYS_PLL_DIV16_EN] = &g12a_sys_pll_div16_en.hw,
[CLKID_SYS_PLL_DIV16] = &g12a_sys_pll_div16.hw,
[CLKID_CPU_CLK_DYN0_SEL] = &g12a_cpu_clk_premux0.hw,
#define CLKID_HDMI_DIV 167
#define CLKID_MALI_0_DIV 170
#define CLKID_MALI_1_DIV 173
-#define CLKID_MPLL_5OM_DIV 176
+#define CLKID_MPLL_50M_DIV 176
#define CLKID_SYS_PLL_DIV16_EN 178
#define CLKID_SYS_PLL_DIV16 179
#define CLKID_CPU_CLK_DYN0_SEL 180
},
};
-static const char * const mmeson8b_vpu_0_1_parent_names[] = {
+static const char * const meson8b_vpu_0_1_parent_names[] = {
"fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7"
};
.hw.init = &(struct clk_init_data){
.name = "vpu_0_sel",
.ops = &clk_regmap_mux_ops,
- .parent_names = mmeson8b_vpu_0_1_parent_names,
- .num_parents = ARRAY_SIZE(mmeson8b_vpu_0_1_parent_names),
+ .parent_names = meson8b_vpu_0_1_parent_names,
+ .num_parents = ARRAY_SIZE(meson8b_vpu_0_1_parent_names),
.flags = CLK_SET_RATE_PARENT,
},
};
.hw.init = &(struct clk_init_data){
.name = "vpu_1_sel",
.ops = &clk_regmap_mux_ops,
- .parent_names = mmeson8b_vpu_0_1_parent_names,
- .num_parents = ARRAY_SIZE(mmeson8b_vpu_0_1_parent_names),
+ .parent_names = meson8b_vpu_0_1_parent_names,
+ .num_parents = ARRAY_SIZE(meson8b_vpu_0_1_parent_names),
.flags = CLK_SET_RATE_PARENT,
},
};
{ STRATIX10_NOC_CLK, "noc_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux),
0, 0, 0, 0x3C, 1},
{ STRATIX10_EMAC_A_FREE_CLK, "emaca_free_clk", NULL, emaca_free_mux, ARRAY_SIZE(emaca_free_mux),
- 0, 0, 4, 0xB0, 0},
+ 0, 0, 2, 0xB0, 0},
{ STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux),
- 0, 0, 4, 0xB0, 1},
+ 0, 0, 2, 0xB0, 1},
{ STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux,
ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2},
{ STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux,
{ TEGRA210_CLK_I2S3_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
{ TEGRA210_CLK_I2S4_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
{ TEGRA210_CLK_VIMCLK_SYNC, TEGRA210_CLK_CLK_MAX, 24576000, 0 },
+ { TEGRA210_CLK_HDA, TEGRA210_CLK_PLL_P, 51000000, 0 },
+ { TEGRA210_CLK_HDA2CODEC_2X, TEGRA210_CLK_PLL_P, 48000000, 0 },
/* This MUST be the last entry. */
{ TEGRA210_CLK_CLK_MAX, TEGRA210_CLK_CLK_MAX, 0, 0 },
};
{
struct omap_clkctrl_provider *provider = data;
struct omap_clkctrl_clk *entry;
+ bool found = false;
if (clkspec->args_count != 2)
return ERR_PTR(-EINVAL);
list_for_each_entry(entry, &provider->clocks, node) {
if (entry->reg_offset == clkspec->args[0] &&
- entry->bit_offset == clkspec->args[1])
+ entry->bit_offset == clkspec->args[1]) {
+ found = true;
break;
+ }
}
- if (!entry)
+ if (!found)
return ERR_PTR(-EINVAL);
return entry->clk;
help
Enables the support for the BCM Kona mobile timer driver.
+config DAVINCI_TIMER
+ bool "Texas Instruments DaVinci timer driver" if COMPILE_TEST
+ help
+ Enables the support for the TI DaVinci timer driver.
+
config DIGICOLOR_TIMER
bool "Digicolor timer driver" if COMPILE_TEST
select CLKSRC_MMIO
bool "Tegra timer driver" if COMPILE_TEST
select CLKSRC_MMIO
select TIMER_OF
- depends on ARM || ARM64
+ depends on ARCH_TEGRA || COMPILE_TEST
help
Enables support for the Tegra driver.
Enable this option to use IMX Timer/PWM Module (TPM) timer as
clocksource.
+config TIMER_IMX_SYS_CTR
+ bool "i.MX system counter timer" if COMPILE_TEST
+ select TIMER_OF
+ help
+ Enable this option to use i.MX system counter timer as a
+ clockevent.
+
config CLKSRC_ST_LPC
bool "Low power clocksource found in the LPC" if COMPILE_TEST
select TIMER_OF if OF
obj-$(CONFIG_EM_TIMER_STI) += em_sti.o
obj-$(CONFIG_CLKBLD_I8253) += i8253.o
obj-$(CONFIG_CLKSRC_MMIO) += mmio.o
+obj-$(CONFIG_DAVINCI_TIMER) += timer-davinci.o
obj-$(CONFIG_DIGICOLOR_TIMER) += timer-digicolor.o
obj-$(CONFIG_OMAP_DM_TIMER) += timer-ti-dm.o
obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o
obj-$(CONFIG_SUN4I_TIMER) += timer-sun4i.o
obj-$(CONFIG_SUN5I_HSTIMER) += timer-sun5i.o
obj-$(CONFIG_MESON6_TIMER) += timer-meson6.o
-obj-$(CONFIG_TEGRA_TIMER) += timer-tegra20.o
+obj-$(CONFIG_TEGRA_TIMER) += timer-tegra.o
obj-$(CONFIG_VT8500_TIMER) += timer-vt8500.o
obj-$(CONFIG_NSPIRE_TIMER) += timer-zevio.o
obj-$(CONFIG_BCM_KONA_TIMER) += bcm_kona_timer.o
obj-$(CONFIG_CLKSRC_TANGO_XTAL) += timer-tango-xtal.o
obj-$(CONFIG_CLKSRC_IMX_GPT) += timer-imx-gpt.o
obj-$(CONFIG_CLKSRC_IMX_TPM) += timer-imx-tpm.o
+obj-$(CONFIG_TIMER_IMX_SYS_CTR) += timer-imx-sysctr.o
obj-$(CONFIG_ASM9260_TIMER) += asm9260_timer.o
obj-$(CONFIG_H8300_TMR8) += h8300_timer8.o
obj-$(CONFIG_H8300_TMR16) += h8300_timer16.o
obj-$(CONFIG_RISCV_TIMER) += timer-riscv.o
obj-$(CONFIG_CSKY_MP_TIMER) += timer-mp-csky.o
obj-$(CONFIG_GX6605S_TIMER) += timer-gx6605s.o
+obj-$(CONFIG_HYPERV_TIMER) += hyperv_timer.o
*/
#include <linux/interrupt.h>
+#include <linux/bits.h>
#include <linux/clk.h>
#include <linux/clk-provider.h>
#include <linux/clocksource.h>
l = read_aux_reg(AUX_RTC_LOW);
h = read_aux_reg(AUX_RTC_HIGH);
status = read_aux_reg(AUX_RTC_CTRL);
- } while (!(status & _BITUL(31)));
+ } while (!(status & BIT(31)));
return (((u64)h) << 32) | l;
}
cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT)
| ARCH_TIMER_VIRT_EVT_EN;
arch_timer_set_cntkctl(cntkctl);
-#ifdef CONFIG_ARM64
- cpu_set_named_feature(EVTSTRM);
-#else
- elf_hwcap |= HWCAP_EVTSTRM;
-#endif
-#ifdef CONFIG_COMPAT
- compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
-#endif
+ arch_timer_set_evtstrm_feature();
cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
}
} else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
-#ifdef CONFIG_ARM64
- if (cpu_have_named_feature(EVTSTRM))
-#else
- if (elf_hwcap & HWCAP_EVTSTRM)
-#endif
+ if (arch_timer_have_evtstrm_feature())
cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
}
return NOTIFY_OK;
static struct clocksource mct_frc = {
.name = "mct-frc",
- .rating = 400,
+ .rating = 450, /* use value higher than ARM arch timer */
.read = exynos4_frc_read,
.mask = CLOCKSOURCE_MASK(32),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
evt->set_state_oneshot_stopped = set_state_shutdown;
evt->tick_resume = set_state_shutdown;
evt->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
- evt->rating = 450;
+ evt->rating = 500; /* use value higher than ARM arch timer */
exynos4_mct_write(TICK_BASE_CNT, mevt->base + MCT_L_TCNTB_OFFSET);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Clocksource driver for the synthetic counter and timers
+ * provided by the Hyper-V hypervisor to guest VMs, as described
+ * in the Hyper-V Top Level Functional Spec (TLFS). This driver
+ * is instruction set architecture independent.
+ *
+ * Copyright (C) 2019, Microsoft, Inc.
+ *
+ * Author: Michael Kelley <mikelley@microsoft.com>
+ */
+
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/sched_clock.h>
+#include <linux/mm.h>
+#include <clocksource/hyperv_timer.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
+
+static struct clock_event_device __percpu *hv_clock_event;
+
+/*
+ * If false, we're using the old mechanism for stimer0 interrupts
+ * where it sends a VMbus message when it expires. The old
+ * mechanism is used when running on older versions of Hyper-V
+ * that don't support Direct Mode. While Hyper-V provides
+ * four stimer's per CPU, Linux uses only stimer0.
+ */
+static bool direct_mode_enabled;
+
+static int stimer0_irq;
+static int stimer0_vector;
+static int stimer0_message_sint;
+
+/*
+ * ISR for when stimer0 is operating in Direct Mode. Direct Mode
+ * does not use VMbus or any VMbus messages, so process here and not
+ * in the VMbus driver code.
+ */
+void hv_stimer0_isr(void)
+{
+ struct clock_event_device *ce;
+
+ ce = this_cpu_ptr(hv_clock_event);
+ ce->event_handler(ce);
+}
+EXPORT_SYMBOL_GPL(hv_stimer0_isr);
+
+static int hv_ce_set_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ u64 current_tick;
+
+ current_tick = hyperv_cs->read(NULL);
+ current_tick += delta;
+ hv_init_timer(0, current_tick);
+ return 0;
+}
+
+static int hv_ce_shutdown(struct clock_event_device *evt)
+{
+ hv_init_timer(0, 0);
+ hv_init_timer_config(0, 0);
+ if (direct_mode_enabled)
+ hv_disable_stimer0_percpu_irq(stimer0_irq);
+
+ return 0;
+}
+
+static int hv_ce_set_oneshot(struct clock_event_device *evt)
+{
+ union hv_stimer_config timer_cfg;
+
+ timer_cfg.as_uint64 = 0;
+ timer_cfg.enable = 1;
+ timer_cfg.auto_enable = 1;
+ if (direct_mode_enabled) {
+ /*
+ * When it expires, the timer will directly interrupt
+ * on the specified hardware vector/IRQ.
+ */
+ timer_cfg.direct_mode = 1;
+ timer_cfg.apic_vector = stimer0_vector;
+ hv_enable_stimer0_percpu_irq(stimer0_irq);
+ } else {
+ /*
+ * When it expires, the timer will generate a VMbus message,
+ * to be handled by the normal VMbus interrupt handler.
+ */
+ timer_cfg.direct_mode = 0;
+ timer_cfg.sintx = stimer0_message_sint;
+ }
+ hv_init_timer_config(0, timer_cfg.as_uint64);
+ return 0;
+}
+
+/*
+ * hv_stimer_init - Per-cpu initialization of the clockevent
+ */
+void hv_stimer_init(unsigned int cpu)
+{
+ struct clock_event_device *ce;
+
+ /*
+ * Synthetic timers are always available except on old versions of
+ * Hyper-V on x86. In that case, just return as Linux will use a
+ * clocksource based on emulated PIT or LAPIC timer hardware.
+ */
+ if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
+ return;
+
+ ce = per_cpu_ptr(hv_clock_event, cpu);
+ ce->name = "Hyper-V clockevent";
+ ce->features = CLOCK_EVT_FEAT_ONESHOT;
+ ce->cpumask = cpumask_of(cpu);
+ ce->rating = 1000;
+ ce->set_state_shutdown = hv_ce_shutdown;
+ ce->set_state_oneshot = hv_ce_set_oneshot;
+ ce->set_next_event = hv_ce_set_next_event;
+
+ clockevents_config_and_register(ce,
+ HV_CLOCK_HZ,
+ HV_MIN_DELTA_TICKS,
+ HV_MAX_MAX_DELTA_TICKS);
+}
+EXPORT_SYMBOL_GPL(hv_stimer_init);
+
+/*
+ * hv_stimer_cleanup - Per-cpu cleanup of the clockevent
+ */
+void hv_stimer_cleanup(unsigned int cpu)
+{
+ struct clock_event_device *ce;
+
+ /* Turn off clockevent device */
+ if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
+ ce = per_cpu_ptr(hv_clock_event, cpu);
+ hv_ce_shutdown(ce);
+ }
+}
+EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
+
+/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
+int hv_stimer_alloc(int sint)
+{
+ int ret;
+
+ hv_clock_event = alloc_percpu(struct clock_event_device);
+ if (!hv_clock_event)
+ return -ENOMEM;
+
+ direct_mode_enabled = ms_hyperv.misc_features &
+ HV_STIMER_DIRECT_MODE_AVAILABLE;
+ if (direct_mode_enabled) {
+ ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
+ hv_stimer0_isr);
+ if (ret) {
+ free_percpu(hv_clock_event);
+ hv_clock_event = NULL;
+ return ret;
+ }
+ }
+
+ stimer0_message_sint = sint;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hv_stimer_alloc);
+
+/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
+void hv_stimer_free(void)
+{
+ if (direct_mode_enabled && (stimer0_irq != 0)) {
+ hv_remove_stimer0_irq(stimer0_irq);
+ stimer0_irq = 0;
+ }
+ free_percpu(hv_clock_event);
+ hv_clock_event = NULL;
+}
+EXPORT_SYMBOL_GPL(hv_stimer_free);
+
+/*
+ * Do a global cleanup of clockevents for the cases of kexec and
+ * vmbus exit
+ */
+void hv_stimer_global_cleanup(void)
+{
+ int cpu;
+ struct clock_event_device *ce;
+
+ if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
+ for_each_present_cpu(cpu) {
+ ce = per_cpu_ptr(hv_clock_event, cpu);
+ clockevents_unbind_device(ce, cpu);
+ }
+ }
+ hv_stimer_free();
+}
+EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
+
+/*
+ * Code and definitions for the Hyper-V clocksources. Two
+ * clocksources are defined: one that reads the Hyper-V defined MSR, and
+ * the other that uses the TSC reference page feature as defined in the
+ * TLFS. The MSR version is for compatibility with old versions of
+ * Hyper-V and 32-bit x86. The TSC reference page version is preferred.
+ */
+
+struct clocksource *hyperv_cs;
+EXPORT_SYMBOL_GPL(hyperv_cs);
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+
+static struct ms_hyperv_tsc_page *tsc_pg;
+
+struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
+{
+ return tsc_pg;
+}
+EXPORT_SYMBOL_GPL(hv_get_tsc_page);
+
+static u64 notrace read_hv_sched_clock_tsc(void)
+{
+ u64 current_tick = hv_read_tsc_page(tsc_pg);
+
+ if (current_tick == U64_MAX)
+ hv_get_time_ref_count(current_tick);
+
+ return current_tick;
+}
+
+static u64 read_hv_clock_tsc(struct clocksource *arg)
+{
+ return read_hv_sched_clock_tsc();
+}
+
+static struct clocksource hyperv_cs_tsc = {
+ .name = "hyperv_clocksource_tsc_page",
+ .rating = 400,
+ .read = read_hv_clock_tsc,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+#endif
+
+static u64 notrace read_hv_sched_clock_msr(void)
+{
+ u64 current_tick;
+ /*
+ * Read the partition counter to get the current tick count. This count
+ * is set to 0 when the partition is created and is incremented in
+ * 100 nanosecond units.
+ */
+ hv_get_time_ref_count(current_tick);
+ return current_tick;
+}
+
+static u64 read_hv_clock_msr(struct clocksource *arg)
+{
+ return read_hv_sched_clock_msr();
+}
+
+static struct clocksource hyperv_cs_msr = {
+ .name = "hyperv_clocksource_msr",
+ .rating = 400,
+ .read = read_hv_clock_msr,
+ .mask = CLOCKSOURCE_MASK(64),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+static bool __init hv_init_tsc_clocksource(void)
+{
+ u64 tsc_msr;
+ phys_addr_t phys_addr;
+
+ if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
+ return false;
+
+ tsc_pg = vmalloc(PAGE_SIZE);
+ if (!tsc_pg)
+ return false;
+
+ hyperv_cs = &hyperv_cs_tsc;
+ phys_addr = page_to_phys(vmalloc_to_page(tsc_pg));
+
+ /*
+ * The Hyper-V TLFS specifies to preserve the value of reserved
+ * bits in registers. So read the existing value, preserve the
+ * low order 12 bits, and add in the guest physical address
+ * (which already has at least the low 12 bits set to zero since
+ * it is page aligned). Also set the "enable" bit, which is bit 0.
+ */
+ hv_get_reference_tsc(tsc_msr);
+ tsc_msr &= GENMASK_ULL(11, 0);
+ tsc_msr = tsc_msr | 0x1 | (u64)phys_addr;
+ hv_set_reference_tsc(tsc_msr);
+
+ hv_set_clocksource_vdso(hyperv_cs_tsc);
+ clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
+
+ /* sched_clock_register is needed on ARM64 but is a no-op on x86 */
+ sched_clock_register(read_hv_sched_clock_tsc, 64, HV_CLOCK_HZ);
+ return true;
+}
+#else
+static bool __init hv_init_tsc_clocksource(void)
+{
+ return false;
+}
+#endif
+
+
+void __init hv_init_clocksource(void)
+{
+ /*
+ * Try to set up the TSC page clocksource. If it succeeds, we're
+ * done. Otherwise, set up the MSR clocksoruce. At least one of
+ * these will always be available except on very old versions of
+ * Hyper-V on x86. In that case we won't have a Hyper-V
+ * clocksource, but Linux will still run with a clocksource based
+ * on the emulated PIT or LAPIC timer.
+ */
+ if (hv_init_tsc_clocksource())
+ return;
+
+ if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE))
+ return;
+
+ hyperv_cs = &hyperv_cs_msr;
+ clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
+
+ /* sched_clock_register is needed on ARM64 but is a no-op on x86 */
+ sched_clock_register(read_hv_sched_clock_msr, 64, HV_CLOCK_HZ);
+}
+EXPORT_SYMBOL_GPL(hv_init_clocksource);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * TI DaVinci clocksource driver
+ *
+ * Copyright (C) 2019 Texas Instruments
+ * Author: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+ * (with tiny parts adopted from code by Kevin Hilman <khilman@baylibre.com>)
+ */
+
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched_clock.h>
+
+#include <clocksource/timer-davinci.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "%s: " fmt "\n", __func__
+
+#define DAVINCI_TIMER_REG_TIM12 0x10
+#define DAVINCI_TIMER_REG_TIM34 0x14
+#define DAVINCI_TIMER_REG_PRD12 0x18
+#define DAVINCI_TIMER_REG_PRD34 0x1c
+#define DAVINCI_TIMER_REG_TCR 0x20
+#define DAVINCI_TIMER_REG_TGCR 0x24
+
+#define DAVINCI_TIMER_TIMMODE_MASK GENMASK(3, 2)
+#define DAVINCI_TIMER_RESET_MASK GENMASK(1, 0)
+#define DAVINCI_TIMER_TIMMODE_32BIT_UNCHAINED BIT(2)
+#define DAVINCI_TIMER_UNRESET GENMASK(1, 0)
+
+#define DAVINCI_TIMER_ENAMODE_MASK GENMASK(1, 0)
+#define DAVINCI_TIMER_ENAMODE_DISABLED 0x00
+#define DAVINCI_TIMER_ENAMODE_ONESHOT BIT(0)
+#define DAVINCI_TIMER_ENAMODE_PERIODIC BIT(1)
+
+#define DAVINCI_TIMER_ENAMODE_SHIFT_TIM12 6
+#define DAVINCI_TIMER_ENAMODE_SHIFT_TIM34 22
+
+#define DAVINCI_TIMER_MIN_DELTA 0x01
+#define DAVINCI_TIMER_MAX_DELTA 0xfffffffe
+
+#define DAVINCI_TIMER_CLKSRC_BITS 32
+
+#define DAVINCI_TIMER_TGCR_DEFAULT \
+ (DAVINCI_TIMER_TIMMODE_32BIT_UNCHAINED | DAVINCI_TIMER_UNRESET)
+
+struct davinci_clockevent {
+ struct clock_event_device dev;
+ void __iomem *base;
+ unsigned int cmp_off;
+};
+
+/*
+ * This must be globally accessible by davinci_timer_read_sched_clock(), so
+ * let's keep it here.
+ */
+static struct {
+ struct clocksource dev;
+ void __iomem *base;
+ unsigned int tim_off;
+} davinci_clocksource;
+
+static struct davinci_clockevent *
+to_davinci_clockevent(struct clock_event_device *clockevent)
+{
+ return container_of(clockevent, struct davinci_clockevent, dev);
+}
+
+static unsigned int
+davinci_clockevent_read(struct davinci_clockevent *clockevent,
+ unsigned int reg)
+{
+ return readl_relaxed(clockevent->base + reg);
+}
+
+static void davinci_clockevent_write(struct davinci_clockevent *clockevent,
+ unsigned int reg, unsigned int val)
+{
+ writel_relaxed(val, clockevent->base + reg);
+}
+
+static void davinci_tim12_shutdown(void __iomem *base)
+{
+ unsigned int tcr;
+
+ tcr = DAVINCI_TIMER_ENAMODE_DISABLED <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
+ /*
+ * This function is only ever called if we're using both timer
+ * halves. In this case TIM34 runs in periodic mode and we must
+ * not modify it.
+ */
+ tcr |= DAVINCI_TIMER_ENAMODE_PERIODIC <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM34;
+
+ writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
+}
+
+static void davinci_tim12_set_oneshot(void __iomem *base)
+{
+ unsigned int tcr;
+
+ tcr = DAVINCI_TIMER_ENAMODE_ONESHOT <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
+ /* Same as above. */
+ tcr |= DAVINCI_TIMER_ENAMODE_PERIODIC <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM34;
+
+ writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
+}
+
+static int davinci_clockevent_shutdown(struct clock_event_device *dev)
+{
+ struct davinci_clockevent *clockevent;
+
+ clockevent = to_davinci_clockevent(dev);
+
+ davinci_tim12_shutdown(clockevent->base);
+
+ return 0;
+}
+
+static int davinci_clockevent_set_oneshot(struct clock_event_device *dev)
+{
+ struct davinci_clockevent *clockevent = to_davinci_clockevent(dev);
+
+ davinci_clockevent_write(clockevent, DAVINCI_TIMER_REG_TIM12, 0x0);
+
+ davinci_tim12_set_oneshot(clockevent->base);
+
+ return 0;
+}
+
+static int
+davinci_clockevent_set_next_event_std(unsigned long cycles,
+ struct clock_event_device *dev)
+{
+ struct davinci_clockevent *clockevent = to_davinci_clockevent(dev);
+
+ davinci_clockevent_shutdown(dev);
+
+ davinci_clockevent_write(clockevent, DAVINCI_TIMER_REG_TIM12, 0x0);
+ davinci_clockevent_write(clockevent, DAVINCI_TIMER_REG_PRD12, cycles);
+
+ davinci_clockevent_set_oneshot(dev);
+
+ return 0;
+}
+
+static int
+davinci_clockevent_set_next_event_cmp(unsigned long cycles,
+ struct clock_event_device *dev)
+{
+ struct davinci_clockevent *clockevent = to_davinci_clockevent(dev);
+ unsigned int curr_time;
+
+ curr_time = davinci_clockevent_read(clockevent,
+ DAVINCI_TIMER_REG_TIM12);
+ davinci_clockevent_write(clockevent,
+ clockevent->cmp_off, curr_time + cycles);
+
+ return 0;
+}
+
+static irqreturn_t davinci_timer_irq_timer(int irq, void *data)
+{
+ struct davinci_clockevent *clockevent = data;
+
+ if (!clockevent_state_oneshot(&clockevent->dev))
+ davinci_tim12_shutdown(clockevent->base);
+
+ clockevent->dev.event_handler(&clockevent->dev);
+
+ return IRQ_HANDLED;
+}
+
+static u64 notrace davinci_timer_read_sched_clock(void)
+{
+ return readl_relaxed(davinci_clocksource.base +
+ davinci_clocksource.tim_off);
+}
+
+static u64 davinci_clocksource_read(struct clocksource *dev)
+{
+ return davinci_timer_read_sched_clock();
+}
+
+/*
+ * Standard use-case: we're using tim12 for clockevent and tim34 for
+ * clocksource. The default is making the former run in oneshot mode
+ * and the latter in periodic mode.
+ */
+static void davinci_clocksource_init_tim34(void __iomem *base)
+{
+ int tcr;
+
+ tcr = DAVINCI_TIMER_ENAMODE_PERIODIC <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM34;
+ tcr |= DAVINCI_TIMER_ENAMODE_ONESHOT <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
+
+ writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM34);
+ writel_relaxed(UINT_MAX, base + DAVINCI_TIMER_REG_PRD34);
+ writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
+}
+
+/*
+ * Special use-case on da830: the DSP may use tim34. We're using tim12 for
+ * both clocksource and clockevent. We set tim12 to periodic and don't touch
+ * tim34.
+ */
+static void davinci_clocksource_init_tim12(void __iomem *base)
+{
+ unsigned int tcr;
+
+ tcr = DAVINCI_TIMER_ENAMODE_PERIODIC <<
+ DAVINCI_TIMER_ENAMODE_SHIFT_TIM12;
+
+ writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM12);
+ writel_relaxed(UINT_MAX, base + DAVINCI_TIMER_REG_PRD12);
+ writel_relaxed(tcr, base + DAVINCI_TIMER_REG_TCR);
+}
+
+static void davinci_timer_init(void __iomem *base)
+{
+ /* Set clock to internal mode and disable it. */
+ writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TCR);
+ /*
+ * Reset both 32-bit timers, set no prescaler for timer 34, set the
+ * timer to dual 32-bit unchained mode, unreset both 32-bit timers.
+ */
+ writel_relaxed(DAVINCI_TIMER_TGCR_DEFAULT,
+ base + DAVINCI_TIMER_REG_TGCR);
+ /* Init both counters to zero. */
+ writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM12);
+ writel_relaxed(0x0, base + DAVINCI_TIMER_REG_TIM34);
+}
+
+int __init davinci_timer_register(struct clk *clk,
+ const struct davinci_timer_cfg *timer_cfg)
+{
+ struct davinci_clockevent *clockevent;
+ unsigned int tick_rate;
+ void __iomem *base;
+ int rv;
+
+ rv = clk_prepare_enable(clk);
+ if (rv) {
+ pr_err("Unable to prepare and enable the timer clock");
+ return rv;
+ }
+
+ if (!request_mem_region(timer_cfg->reg.start,
+ resource_size(&timer_cfg->reg),
+ "davinci-timer")) {
+ pr_err("Unable to request memory region");
+ return -EBUSY;
+ }
+
+ base = ioremap(timer_cfg->reg.start, resource_size(&timer_cfg->reg));
+ if (!base) {
+ pr_err("Unable to map the register range");
+ return -ENOMEM;
+ }
+
+ davinci_timer_init(base);
+ tick_rate = clk_get_rate(clk);
+
+ clockevent = kzalloc(sizeof(*clockevent), GFP_KERNEL | __GFP_NOFAIL);
+ if (!clockevent) {
+ pr_err("Error allocating memory for clockevent data");
+ return -ENOMEM;
+ }
+
+ clockevent->dev.name = "tim12";
+ clockevent->dev.features = CLOCK_EVT_FEAT_ONESHOT;
+ clockevent->dev.cpumask = cpumask_of(0);
+ clockevent->base = base;
+
+ if (timer_cfg->cmp_off) {
+ clockevent->cmp_off = timer_cfg->cmp_off;
+ clockevent->dev.set_next_event =
+ davinci_clockevent_set_next_event_cmp;
+ } else {
+ clockevent->dev.set_next_event =
+ davinci_clockevent_set_next_event_std;
+ clockevent->dev.set_state_oneshot =
+ davinci_clockevent_set_oneshot;
+ clockevent->dev.set_state_shutdown =
+ davinci_clockevent_shutdown;
+ }
+
+ rv = request_irq(timer_cfg->irq[DAVINCI_TIMER_CLOCKEVENT_IRQ].start,
+ davinci_timer_irq_timer, IRQF_TIMER,
+ "clockevent/tim12", clockevent);
+ if (rv) {
+ pr_err("Unable to request the clockevent interrupt");
+ return rv;
+ }
+
+ clockevents_config_and_register(&clockevent->dev, tick_rate,
+ DAVINCI_TIMER_MIN_DELTA,
+ DAVINCI_TIMER_MAX_DELTA);
+
+ davinci_clocksource.dev.rating = 300;
+ davinci_clocksource.dev.read = davinci_clocksource_read;
+ davinci_clocksource.dev.mask =
+ CLOCKSOURCE_MASK(DAVINCI_TIMER_CLKSRC_BITS);
+ davinci_clocksource.dev.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+ davinci_clocksource.base = base;
+
+ if (timer_cfg->cmp_off) {
+ davinci_clocksource.dev.name = "tim12";
+ davinci_clocksource.tim_off = DAVINCI_TIMER_REG_TIM12;
+ davinci_clocksource_init_tim12(base);
+ } else {
+ davinci_clocksource.dev.name = "tim34";
+ davinci_clocksource.tim_off = DAVINCI_TIMER_REG_TIM34;
+ davinci_clocksource_init_tim34(base);
+ }
+
+ rv = clocksource_register_hz(&davinci_clocksource.dev, tick_rate);
+ if (rv) {
+ pr_err("Unable to register clocksource");
+ return rv;
+ }
+
+ sched_clock_register(davinci_timer_read_sched_clock,
+ DAVINCI_TIMER_CLKSRC_BITS, tick_rate);
+
+ return 0;
+}
+
+static int __init of_davinci_timer_register(struct device_node *np)
+{
+ struct davinci_timer_cfg timer_cfg = { };
+ struct clk *clk;
+ int rv;
+
+ rv = of_address_to_resource(np, 0, &timer_cfg.reg);
+ if (rv) {
+ pr_err("Unable to get the register range for timer");
+ return rv;
+ }
+
+ rv = of_irq_to_resource_table(np, timer_cfg.irq,
+ DAVINCI_TIMER_NUM_IRQS);
+ if (rv != DAVINCI_TIMER_NUM_IRQS) {
+ pr_err("Unable to get the interrupts for timer");
+ return rv;
+ }
+
+ clk = of_clk_get(np, 0);
+ if (IS_ERR(clk)) {
+ pr_err("Unable to get the timer clock");
+ return PTR_ERR(clk);
+ }
+
+ rv = davinci_timer_register(clk, &timer_cfg);
+ if (rv)
+ clk_put(clk);
+
+ return rv;
+}
+TIMER_OF_DECLARE(davinci_timer, "ti,da830-timer", of_davinci_timer_register);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2017-2019 NXP
+
+#include <linux/interrupt.h>
+#include <linux/clockchips.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include "timer-of.h"
+
+#define CMP_OFFSET 0x10000
+
+#define CNTCV_LO 0x8
+#define CNTCV_HI 0xc
+#define CMPCV_LO (CMP_OFFSET + 0x20)
+#define CMPCV_HI (CMP_OFFSET + 0x24)
+#define CMPCR (CMP_OFFSET + 0x2c)
+
+#define SYS_CTR_EN 0x1
+#define SYS_CTR_IRQ_MASK 0x2
+
+static void __iomem *sys_ctr_base;
+static u32 cmpcr;
+
+static void sysctr_timer_enable(bool enable)
+{
+ writel(enable ? cmpcr | SYS_CTR_EN : cmpcr, sys_ctr_base + CMPCR);
+}
+
+static void sysctr_irq_acknowledge(void)
+{
+ /*
+ * clear the enable bit(EN =0) will clear
+ * the status bit(ISTAT = 0), then the interrupt
+ * signal will be negated(acknowledged).
+ */
+ sysctr_timer_enable(false);
+}
+
+static inline u64 sysctr_read_counter(void)
+{
+ u32 cnt_hi, tmp_hi, cnt_lo;
+
+ do {
+ cnt_hi = readl_relaxed(sys_ctr_base + CNTCV_HI);
+ cnt_lo = readl_relaxed(sys_ctr_base + CNTCV_LO);
+ tmp_hi = readl_relaxed(sys_ctr_base + CNTCV_HI);
+ } while (tmp_hi != cnt_hi);
+
+ return ((u64) cnt_hi << 32) | cnt_lo;
+}
+
+static int sysctr_set_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ u32 cmp_hi, cmp_lo;
+ u64 next;
+
+ sysctr_timer_enable(false);
+
+ next = sysctr_read_counter();
+
+ next += delta;
+
+ cmp_hi = (next >> 32) & 0x00fffff;
+ cmp_lo = next & 0xffffffff;
+
+ writel_relaxed(cmp_hi, sys_ctr_base + CMPCV_HI);
+ writel_relaxed(cmp_lo, sys_ctr_base + CMPCV_LO);
+
+ sysctr_timer_enable(true);
+
+ return 0;
+}
+
+static int sysctr_set_state_oneshot(struct clock_event_device *evt)
+{
+ return 0;
+}
+
+static int sysctr_set_state_shutdown(struct clock_event_device *evt)
+{
+ sysctr_timer_enable(false);
+
+ return 0;
+}
+
+static irqreturn_t sysctr_timer_interrupt(int irq, void *dev_id)
+{
+ struct clock_event_device *evt = dev_id;
+
+ sysctr_irq_acknowledge();
+
+ evt->event_handler(evt);
+
+ return IRQ_HANDLED;
+}
+
+static struct timer_of to_sysctr = {
+ .flags = TIMER_OF_IRQ | TIMER_OF_CLOCK | TIMER_OF_BASE,
+ .clkevt = {
+ .name = "i.MX system counter timer",
+ .features = CLOCK_EVT_FEAT_ONESHOT |
+ CLOCK_EVT_FEAT_DYNIRQ,
+ .set_state_oneshot = sysctr_set_state_oneshot,
+ .set_next_event = sysctr_set_next_event,
+ .set_state_shutdown = sysctr_set_state_shutdown,
+ .rating = 200,
+ },
+ .of_irq = {
+ .handler = sysctr_timer_interrupt,
+ .flags = IRQF_TIMER | IRQF_IRQPOLL,
+ },
+ .of_clk = {
+ .name = "per",
+ },
+};
+
+static void __init sysctr_clockevent_init(void)
+{
+ to_sysctr.clkevt.cpumask = cpumask_of(0);
+
+ clockevents_config_and_register(&to_sysctr.clkevt,
+ timer_of_rate(&to_sysctr),
+ 0xff, 0x7fffffff);
+}
+
+static int __init sysctr_timer_init(struct device_node *np)
+{
+ int ret = 0;
+
+ ret = timer_of_init(np, &to_sysctr);
+ if (ret)
+ return ret;
+
+ sys_ctr_base = timer_of_base(&to_sysctr);
+ cmpcr = readl(sys_ctr_base + CMPCR);
+ cmpcr &= ~SYS_CTR_EN;
+
+ sysctr_clockevent_init();
+
+ return 0;
+}
+TIMER_OF_DECLARE(sysctr_timer, "nxp,sysctr-timer", sysctr_timer_init);
return container_of(evt, struct ixp4xx_timer, clkevt);
}
-static u64 notrace ixp4xx_read_sched_clock(void)
+static unsigned long ixp4xx_read_timer(void)
{
return __raw_readl(local_ixp4xx_timer->base + IXP4XX_OSTS_OFFSET);
}
+static u64 notrace ixp4xx_read_sched_clock(void)
+{
+ return ixp4xx_read_timer();
+}
+
static u64 ixp4xx_clocksource_read(struct clocksource *c)
{
- return __raw_readl(local_ixp4xx_timer->base + IXP4XX_OSTS_OFFSET);
+ return ixp4xx_read_timer();
}
static irqreturn_t ixp4xx_timer_interrupt(int irq, void *dev_id)
sched_clock_register(ixp4xx_read_sched_clock, 32, timer_freq);
+#ifdef CONFIG_ARM
+ /* Also use this timer for delays */
+ tmr->delay_timer.read_current_timer = ixp4xx_read_timer;
+ tmr->delay_timer.freq = timer_freq;
+ register_current_timer_delay(&tmr->delay_timer);
+#endif
+
return 0;
}
+// SPDX-License-Identifier: GPL-2.0
/*
* Amlogic Meson6 SoCs timer handling.
*
* Copyright (C) 2014 Carlo Caione <carlo@caione.org>
*
* Based on code from Amlogic, Inc
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
*/
#include <linux/bitfield.h>
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * Author:
+ * Colin Cross <ccross@google.com>
+ */
+
+#define pr_fmt(fmt) "tegra-timer: " fmt
+
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/percpu.h>
+#include <linux/sched_clock.h>
+#include <linux/time.h>
+
+#include "timer-of.h"
+
+#define RTC_SECONDS 0x08
+#define RTC_SHADOW_SECONDS 0x0c
+#define RTC_MILLISECONDS 0x10
+
+#define TIMERUS_CNTR_1US 0x10
+#define TIMERUS_USEC_CFG 0x14
+#define TIMERUS_CNTR_FREEZE 0x4c
+
+#define TIMER_PTV 0x0
+#define TIMER_PTV_EN BIT(31)
+#define TIMER_PTV_PER BIT(30)
+#define TIMER_PCR 0x4
+#define TIMER_PCR_INTR_CLR BIT(30)
+
+#define TIMER1_BASE 0x00
+#define TIMER2_BASE 0x08
+#define TIMER3_BASE 0x50
+#define TIMER4_BASE 0x58
+#define TIMER10_BASE 0x90
+
+#define TIMER1_IRQ_IDX 0
+#define TIMER10_IRQ_IDX 10
+
+#define TIMER_1MHz 1000000
+
+static u32 usec_config;
+static void __iomem *timer_reg_base;
+
+static int tegra_timer_set_next_event(unsigned long cycles,
+ struct clock_event_device *evt)
+{
+ void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+ /*
+ * Tegra's timer uses n+1 scheme for the counter, i.e. timer will
+ * fire after one tick if 0 is loaded.
+ *
+ * The minimum and maximum numbers of oneshot ticks are defined
+ * by clockevents_config_and_register(1, 0x1fffffff + 1) invocation
+ * below in the code. Hence the cycles (ticks) can't be outside of
+ * a range supportable by hardware.
+ */
+ writel_relaxed(TIMER_PTV_EN | (cycles - 1), reg_base + TIMER_PTV);
+
+ return 0;
+}
+
+static int tegra_timer_shutdown(struct clock_event_device *evt)
+{
+ void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+ writel_relaxed(0, reg_base + TIMER_PTV);
+
+ return 0;
+}
+
+static int tegra_timer_set_periodic(struct clock_event_device *evt)
+{
+ void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+ unsigned long period = timer_of_period(to_timer_of(evt));
+
+ writel_relaxed(TIMER_PTV_EN | TIMER_PTV_PER | (period - 1),
+ reg_base + TIMER_PTV);
+
+ return 0;
+}
+
+static irqreturn_t tegra_timer_isr(int irq, void *dev_id)
+{
+ struct clock_event_device *evt = dev_id;
+ void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+ writel_relaxed(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
+ evt->event_handler(evt);
+
+ return IRQ_HANDLED;
+}
+
+static void tegra_timer_suspend(struct clock_event_device *evt)
+{
+ void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+ writel_relaxed(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
+}
+
+static void tegra_timer_resume(struct clock_event_device *evt)
+{
+ writel_relaxed(usec_config, timer_reg_base + TIMERUS_USEC_CFG);
+}
+
+static DEFINE_PER_CPU(struct timer_of, tegra_to) = {
+ .flags = TIMER_OF_CLOCK | TIMER_OF_BASE,
+
+ .clkevt = {
+ .name = "tegra_timer",
+ .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
+ .set_next_event = tegra_timer_set_next_event,
+ .set_state_shutdown = tegra_timer_shutdown,
+ .set_state_periodic = tegra_timer_set_periodic,
+ .set_state_oneshot = tegra_timer_shutdown,
+ .tick_resume = tegra_timer_shutdown,
+ .suspend = tegra_timer_suspend,
+ .resume = tegra_timer_resume,
+ },
+};
+
+static int tegra_timer_setup(unsigned int cpu)
+{
+ struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
+
+ writel_relaxed(0, timer_of_base(to) + TIMER_PTV);
+ writel_relaxed(TIMER_PCR_INTR_CLR, timer_of_base(to) + TIMER_PCR);
+
+ irq_force_affinity(to->clkevt.irq, cpumask_of(cpu));
+ enable_irq(to->clkevt.irq);
+
+ /*
+ * Tegra's timer uses n+1 scheme for the counter, i.e. timer will
+ * fire after one tick if 0 is loaded and thus minimum number of
+ * ticks is 1. In result both of the clocksource's tick limits are
+ * higher than a minimum and maximum that hardware register can
+ * take by 1, this is then taken into account by set_next_event
+ * callback.
+ */
+ clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
+ 1, /* min */
+ 0x1fffffff + 1); /* max 29 bits + 1 */
+
+ return 0;
+}
+
+static int tegra_timer_stop(unsigned int cpu)
+{
+ struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
+
+ to->clkevt.set_state_shutdown(&to->clkevt);
+ disable_irq_nosync(to->clkevt.irq);
+
+ return 0;
+}
+
+static u64 notrace tegra_read_sched_clock(void)
+{
+ return readl_relaxed(timer_reg_base + TIMERUS_CNTR_1US);
+}
+
+#ifdef CONFIG_ARM
+static unsigned long tegra_delay_timer_read_counter_long(void)
+{
+ return readl_relaxed(timer_reg_base + TIMERUS_CNTR_1US);
+}
+
+static struct delay_timer tegra_delay_timer = {
+ .read_current_timer = tegra_delay_timer_read_counter_long,
+ .freq = TIMER_1MHz,
+};
+#endif
+
+static struct timer_of suspend_rtc_to = {
+ .flags = TIMER_OF_BASE | TIMER_OF_CLOCK,
+};
+
+/*
+ * tegra_rtc_read - Reads the Tegra RTC registers
+ * Care must be taken that this function is not called while the
+ * tegra_rtc driver could be executing to avoid race conditions
+ * on the RTC shadow register
+ */
+static u64 tegra_rtc_read_ms(struct clocksource *cs)
+{
+ void __iomem *reg_base = timer_of_base(&suspend_rtc_to);
+
+ u32 ms = readl_relaxed(reg_base + RTC_MILLISECONDS);
+ u32 s = readl_relaxed(reg_base + RTC_SHADOW_SECONDS);
+
+ return (u64)s * MSEC_PER_SEC + ms;
+}
+
+static struct clocksource suspend_rtc_clocksource = {
+ .name = "tegra_suspend_timer",
+ .rating = 200,
+ .read = tegra_rtc_read_ms,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_SUSPEND_NONSTOP,
+};
+
+static inline unsigned int tegra_base_for_cpu(int cpu, bool tegra20)
+{
+ if (tegra20) {
+ switch (cpu) {
+ case 0:
+ return TIMER1_BASE;
+ case 1:
+ return TIMER2_BASE;
+ case 2:
+ return TIMER3_BASE;
+ default:
+ return TIMER4_BASE;
+ }
+ }
+
+ return TIMER10_BASE + cpu * 8;
+}
+
+static inline unsigned int tegra_irq_idx_for_cpu(int cpu, bool tegra20)
+{
+ if (tegra20)
+ return TIMER1_IRQ_IDX + cpu;
+
+ return TIMER10_IRQ_IDX + cpu;
+}
+
+static inline unsigned long tegra_rate_for_timer(struct timer_of *to,
+ bool tegra20)
+{
+ /*
+ * TIMER1-9 are fixed to 1MHz, TIMER10-13 are running off the
+ * parent clock.
+ */
+ if (tegra20)
+ return TIMER_1MHz;
+
+ return timer_of_rate(to);
+}
+
+static int __init tegra_init_timer(struct device_node *np, bool tegra20,
+ int rating)
+{
+ struct timer_of *to;
+ int cpu, ret;
+
+ to = this_cpu_ptr(&tegra_to);
+ ret = timer_of_init(np, to);
+ if (ret)
+ goto out;
+
+ timer_reg_base = timer_of_base(to);
+
+ /*
+ * Configure microsecond timers to have 1MHz clock
+ * Config register is 0xqqww, where qq is "dividend", ww is "divisor"
+ * Uses n+1 scheme
+ */
+ switch (timer_of_rate(to)) {
+ case 12000000:
+ usec_config = 0x000b; /* (11+1)/(0+1) */
+ break;
+ case 12800000:
+ usec_config = 0x043f; /* (63+1)/(4+1) */
+ break;
+ case 13000000:
+ usec_config = 0x000c; /* (12+1)/(0+1) */
+ break;
+ case 16800000:
+ usec_config = 0x0453; /* (83+1)/(4+1) */
+ break;
+ case 19200000:
+ usec_config = 0x045f; /* (95+1)/(4+1) */
+ break;
+ case 26000000:
+ usec_config = 0x0019; /* (25+1)/(0+1) */
+ break;
+ case 38400000:
+ usec_config = 0x04bf; /* (191+1)/(4+1) */
+ break;
+ case 48000000:
+ usec_config = 0x002f; /* (47+1)/(0+1) */
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+
+ writel_relaxed(usec_config, timer_reg_base + TIMERUS_USEC_CFG);
+
+ for_each_possible_cpu(cpu) {
+ struct timer_of *cpu_to = per_cpu_ptr(&tegra_to, cpu);
+ unsigned long flags = IRQF_TIMER | IRQF_NOBALANCING;
+ unsigned long rate = tegra_rate_for_timer(to, tegra20);
+ unsigned int base = tegra_base_for_cpu(cpu, tegra20);
+ unsigned int idx = tegra_irq_idx_for_cpu(cpu, tegra20);
+ unsigned int irq = irq_of_parse_and_map(np, idx);
+
+ if (!irq) {
+ pr_err("failed to map irq for cpu%d\n", cpu);
+ ret = -EINVAL;
+ goto out_irq;
+ }
+
+ cpu_to->clkevt.irq = irq;
+ cpu_to->clkevt.rating = rating;
+ cpu_to->clkevt.cpumask = cpumask_of(cpu);
+ cpu_to->of_base.base = timer_reg_base + base;
+ cpu_to->of_clk.period = rate / HZ;
+ cpu_to->of_clk.rate = rate;
+
+ irq_set_status_flags(cpu_to->clkevt.irq, IRQ_NOAUTOEN);
+
+ ret = request_irq(cpu_to->clkevt.irq, tegra_timer_isr, flags,
+ cpu_to->clkevt.name, &cpu_to->clkevt);
+ if (ret) {
+ pr_err("failed to set up irq for cpu%d: %d\n",
+ cpu, ret);
+ irq_dispose_mapping(cpu_to->clkevt.irq);
+ cpu_to->clkevt.irq = 0;
+ goto out_irq;
+ }
+ }
+
+ sched_clock_register(tegra_read_sched_clock, 32, TIMER_1MHz);
+
+ ret = clocksource_mmio_init(timer_reg_base + TIMERUS_CNTR_1US,
+ "timer_us", TIMER_1MHz, 300, 32,
+ clocksource_mmio_readl_up);
+ if (ret)
+ pr_err("failed to register clocksource: %d\n", ret);
+
+#ifdef CONFIG_ARM
+ register_current_timer_delay(&tegra_delay_timer);
+#endif
+
+ ret = cpuhp_setup_state(CPUHP_AP_TEGRA_TIMER_STARTING,
+ "AP_TEGRA_TIMER_STARTING", tegra_timer_setup,
+ tegra_timer_stop);
+ if (ret)
+ pr_err("failed to set up cpu hp state: %d\n", ret);
+
+ return ret;
+
+out_irq:
+ for_each_possible_cpu(cpu) {
+ struct timer_of *cpu_to;
+
+ cpu_to = per_cpu_ptr(&tegra_to, cpu);
+ if (cpu_to->clkevt.irq) {
+ free_irq(cpu_to->clkevt.irq, &cpu_to->clkevt);
+ irq_dispose_mapping(cpu_to->clkevt.irq);
+ }
+ }
+
+ to->of_base.base = timer_reg_base;
+out:
+ timer_of_cleanup(to);
+
+ return ret;
+}
+
+static int __init tegra210_init_timer(struct device_node *np)
+{
+ /*
+ * Arch-timer can't survive across power cycle of CPU core and
+ * after CPUPORESET signal due to a system design shortcoming,
+ * hence tegra-timer is more preferable on Tegra210.
+ */
+ return tegra_init_timer(np, false, 460);
+}
+TIMER_OF_DECLARE(tegra210_timer, "nvidia,tegra210-timer", tegra210_init_timer);
+
+static int __init tegra20_init_timer(struct device_node *np)
+{
+ int rating;
+
+ /*
+ * Tegra20 and Tegra30 have Cortex A9 CPU that has a TWD timer,
+ * that timer runs off the CPU clock and hence is subjected to
+ * a jitter caused by DVFS clock rate changes. Tegra-timer is
+ * more preferable for older Tegra's, while later SoC generations
+ * have arch-timer as a main per-CPU timer and it is not affected
+ * by DVFS changes.
+ */
+ if (of_machine_is_compatible("nvidia,tegra20") ||
+ of_machine_is_compatible("nvidia,tegra30"))
+ rating = 460;
+ else
+ rating = 330;
+
+ return tegra_init_timer(np, true, rating);
+}
+TIMER_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer);
+
+static int __init tegra20_init_rtc(struct device_node *np)
+{
+ int ret;
+
+ ret = timer_of_init(np, &suspend_rtc_to);
+ if (ret)
+ return ret;
+
+ return clocksource_register_hz(&suspend_rtc_clocksource, 1000);
+}
+TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2010 Google, Inc.
- *
- * Author:
- * Colin Cross <ccross@google.com>
- */
-
-#include <linux/clk.h>
-#include <linux/clockchips.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/interrupt.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/percpu.h>
-#include <linux/sched_clock.h>
-#include <linux/time.h>
-
-#include "timer-of.h"
-
-#ifdef CONFIG_ARM
-#include <asm/mach/time.h>
-#endif
-
-#define RTC_SECONDS 0x08
-#define RTC_SHADOW_SECONDS 0x0c
-#define RTC_MILLISECONDS 0x10
-
-#define TIMERUS_CNTR_1US 0x10
-#define TIMERUS_USEC_CFG 0x14
-#define TIMERUS_CNTR_FREEZE 0x4c
-
-#define TIMER_PTV 0x0
-#define TIMER_PTV_EN BIT(31)
-#define TIMER_PTV_PER BIT(30)
-#define TIMER_PCR 0x4
-#define TIMER_PCR_INTR_CLR BIT(30)
-
-#ifdef CONFIG_ARM
-#define TIMER_CPU0 0x50 /* TIMER3 */
-#else
-#define TIMER_CPU0 0x90 /* TIMER10 */
-#define TIMER10_IRQ_IDX 10
-#define IRQ_IDX_FOR_CPU(cpu) (TIMER10_IRQ_IDX + cpu)
-#endif
-#define TIMER_BASE_FOR_CPU(cpu) (TIMER_CPU0 + (cpu) * 8)
-
-static u32 usec_config;
-static void __iomem *timer_reg_base;
-#ifdef CONFIG_ARM
-static struct delay_timer tegra_delay_timer;
-#endif
-
-static int tegra_timer_set_next_event(unsigned long cycles,
- struct clock_event_device *evt)
-{
- void __iomem *reg_base = timer_of_base(to_timer_of(evt));
-
- writel(TIMER_PTV_EN |
- ((cycles > 1) ? (cycles - 1) : 0), /* n+1 scheme */
- reg_base + TIMER_PTV);
-
- return 0;
-}
-
-static int tegra_timer_shutdown(struct clock_event_device *evt)
-{
- void __iomem *reg_base = timer_of_base(to_timer_of(evt));
-
- writel(0, reg_base + TIMER_PTV);
-
- return 0;
-}
-
-static int tegra_timer_set_periodic(struct clock_event_device *evt)
-{
- void __iomem *reg_base = timer_of_base(to_timer_of(evt));
-
- writel(TIMER_PTV_EN | TIMER_PTV_PER |
- ((timer_of_rate(to_timer_of(evt)) / HZ) - 1),
- reg_base + TIMER_PTV);
-
- return 0;
-}
-
-static irqreturn_t tegra_timer_isr(int irq, void *dev_id)
-{
- struct clock_event_device *evt = (struct clock_event_device *)dev_id;
- void __iomem *reg_base = timer_of_base(to_timer_of(evt));
-
- writel(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
- evt->event_handler(evt);
-
- return IRQ_HANDLED;
-}
-
-static void tegra_timer_suspend(struct clock_event_device *evt)
-{
- void __iomem *reg_base = timer_of_base(to_timer_of(evt));
-
- writel(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
-}
-
-static void tegra_timer_resume(struct clock_event_device *evt)
-{
- writel(usec_config, timer_reg_base + TIMERUS_USEC_CFG);
-}
-
-#ifdef CONFIG_ARM64
-static DEFINE_PER_CPU(struct timer_of, tegra_to) = {
- .flags = TIMER_OF_CLOCK | TIMER_OF_BASE,
-
- .clkevt = {
- .name = "tegra_timer",
- .rating = 460,
- .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
- .set_next_event = tegra_timer_set_next_event,
- .set_state_shutdown = tegra_timer_shutdown,
- .set_state_periodic = tegra_timer_set_periodic,
- .set_state_oneshot = tegra_timer_shutdown,
- .tick_resume = tegra_timer_shutdown,
- .suspend = tegra_timer_suspend,
- .resume = tegra_timer_resume,
- },
-};
-
-static int tegra_timer_setup(unsigned int cpu)
-{
- struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
-
- irq_force_affinity(to->clkevt.irq, cpumask_of(cpu));
- enable_irq(to->clkevt.irq);
-
- clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
- 1, /* min */
- 0x1fffffff); /* 29 bits */
-
- return 0;
-}
-
-static int tegra_timer_stop(unsigned int cpu)
-{
- struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
-
- to->clkevt.set_state_shutdown(&to->clkevt);
- disable_irq_nosync(to->clkevt.irq);
-
- return 0;
-}
-#else /* CONFIG_ARM */
-static struct timer_of tegra_to = {
- .flags = TIMER_OF_CLOCK | TIMER_OF_BASE | TIMER_OF_IRQ,
-
- .clkevt = {
- .name = "tegra_timer",
- .rating = 300,
- .features = CLOCK_EVT_FEAT_ONESHOT |
- CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_DYNIRQ,
- .set_next_event = tegra_timer_set_next_event,
- .set_state_shutdown = tegra_timer_shutdown,
- .set_state_periodic = tegra_timer_set_periodic,
- .set_state_oneshot = tegra_timer_shutdown,
- .tick_resume = tegra_timer_shutdown,
- .suspend = tegra_timer_suspend,
- .resume = tegra_timer_resume,
- .cpumask = cpu_possible_mask,
- },
-
- .of_irq = {
- .index = 2,
- .flags = IRQF_TIMER | IRQF_TRIGGER_HIGH,
- .handler = tegra_timer_isr,
- },
-};
-
-static u64 notrace tegra_read_sched_clock(void)
-{
- return readl(timer_reg_base + TIMERUS_CNTR_1US);
-}
-
-static unsigned long tegra_delay_timer_read_counter_long(void)
-{
- return readl(timer_reg_base + TIMERUS_CNTR_1US);
-}
-
-static struct timer_of suspend_rtc_to = {
- .flags = TIMER_OF_BASE | TIMER_OF_CLOCK,
-};
-
-/*
- * tegra_rtc_read - Reads the Tegra RTC registers
- * Care must be taken that this funciton is not called while the
- * tegra_rtc driver could be executing to avoid race conditions
- * on the RTC shadow register
- */
-static u64 tegra_rtc_read_ms(struct clocksource *cs)
-{
- u32 ms = readl(timer_of_base(&suspend_rtc_to) + RTC_MILLISECONDS);
- u32 s = readl(timer_of_base(&suspend_rtc_to) + RTC_SHADOW_SECONDS);
- return (u64)s * MSEC_PER_SEC + ms;
-}
-
-static struct clocksource suspend_rtc_clocksource = {
- .name = "tegra_suspend_timer",
- .rating = 200,
- .read = tegra_rtc_read_ms,
- .mask = CLOCKSOURCE_MASK(32),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_SUSPEND_NONSTOP,
-};
-#endif
-
-static int tegra_timer_common_init(struct device_node *np, struct timer_of *to)
-{
- int ret = 0;
-
- ret = timer_of_init(np, to);
- if (ret < 0)
- goto out;
-
- timer_reg_base = timer_of_base(to);
-
- /*
- * Configure microsecond timers to have 1MHz clock
- * Config register is 0xqqww, where qq is "dividend", ww is "divisor"
- * Uses n+1 scheme
- */
- switch (timer_of_rate(to)) {
- case 12000000:
- usec_config = 0x000b; /* (11+1)/(0+1) */
- break;
- case 12800000:
- usec_config = 0x043f; /* (63+1)/(4+1) */
- break;
- case 13000000:
- usec_config = 0x000c; /* (12+1)/(0+1) */
- break;
- case 16800000:
- usec_config = 0x0453; /* (83+1)/(4+1) */
- break;
- case 19200000:
- usec_config = 0x045f; /* (95+1)/(4+1) */
- break;
- case 26000000:
- usec_config = 0x0019; /* (25+1)/(0+1) */
- break;
- case 38400000:
- usec_config = 0x04bf; /* (191+1)/(4+1) */
- break;
- case 48000000:
- usec_config = 0x002f; /* (47+1)/(0+1) */
- break;
- default:
- ret = -EINVAL;
- goto out;
- }
-
- writel(usec_config, timer_of_base(to) + TIMERUS_USEC_CFG);
-
-out:
- return ret;
-}
-
-#ifdef CONFIG_ARM64
-static int __init tegra_init_timer(struct device_node *np)
-{
- int cpu, ret = 0;
- struct timer_of *to;
-
- to = this_cpu_ptr(&tegra_to);
- ret = tegra_timer_common_init(np, to);
- if (ret < 0)
- goto out;
-
- for_each_possible_cpu(cpu) {
- struct timer_of *cpu_to;
-
- cpu_to = per_cpu_ptr(&tegra_to, cpu);
- cpu_to->of_base.base = timer_reg_base + TIMER_BASE_FOR_CPU(cpu);
- cpu_to->of_clk.rate = timer_of_rate(to);
- cpu_to->clkevt.cpumask = cpumask_of(cpu);
- cpu_to->clkevt.irq =
- irq_of_parse_and_map(np, IRQ_IDX_FOR_CPU(cpu));
- if (!cpu_to->clkevt.irq) {
- pr_err("%s: can't map IRQ for CPU%d\n",
- __func__, cpu);
- ret = -EINVAL;
- goto out;
- }
-
- irq_set_status_flags(cpu_to->clkevt.irq, IRQ_NOAUTOEN);
- ret = request_irq(cpu_to->clkevt.irq, tegra_timer_isr,
- IRQF_TIMER | IRQF_NOBALANCING,
- cpu_to->clkevt.name, &cpu_to->clkevt);
- if (ret) {
- pr_err("%s: cannot setup irq %d for CPU%d\n",
- __func__, cpu_to->clkevt.irq, cpu);
- ret = -EINVAL;
- goto out_irq;
- }
- }
-
- cpuhp_setup_state(CPUHP_AP_TEGRA_TIMER_STARTING,
- "AP_TEGRA_TIMER_STARTING", tegra_timer_setup,
- tegra_timer_stop);
-
- return ret;
-out_irq:
- for_each_possible_cpu(cpu) {
- struct timer_of *cpu_to;
-
- cpu_to = per_cpu_ptr(&tegra_to, cpu);
- if (cpu_to->clkevt.irq) {
- free_irq(cpu_to->clkevt.irq, &cpu_to->clkevt);
- irq_dispose_mapping(cpu_to->clkevt.irq);
- }
- }
-out:
- timer_of_cleanup(to);
- return ret;
-}
-#else /* CONFIG_ARM */
-static int __init tegra_init_timer(struct device_node *np)
-{
- int ret = 0;
-
- ret = tegra_timer_common_init(np, &tegra_to);
- if (ret < 0)
- goto out;
-
- tegra_to.of_base.base = timer_reg_base + TIMER_BASE_FOR_CPU(0);
- tegra_to.of_clk.rate = 1000000; /* microsecond timer */
-
- sched_clock_register(tegra_read_sched_clock, 32,
- timer_of_rate(&tegra_to));
- ret = clocksource_mmio_init(timer_reg_base + TIMERUS_CNTR_1US,
- "timer_us", timer_of_rate(&tegra_to),
- 300, 32, clocksource_mmio_readl_up);
- if (ret) {
- pr_err("Failed to register clocksource\n");
- goto out;
- }
-
- tegra_delay_timer.read_current_timer =
- tegra_delay_timer_read_counter_long;
- tegra_delay_timer.freq = timer_of_rate(&tegra_to);
- register_current_timer_delay(&tegra_delay_timer);
-
- clockevents_config_and_register(&tegra_to.clkevt,
- timer_of_rate(&tegra_to),
- 0x1,
- 0x1fffffff);
-
- return ret;
-out:
- timer_of_cleanup(&tegra_to);
-
- return ret;
-}
-
-static int __init tegra20_init_rtc(struct device_node *np)
-{
- int ret;
-
- ret = timer_of_init(np, &suspend_rtc_to);
- if (ret)
- return ret;
-
- clocksource_register_hz(&suspend_rtc_clocksource, 1000);
-
- return 0;
-}
-TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
-#endif
-TIMER_OF_DECLARE(tegra210_timer, "nvidia,tegra210-timer", tegra_init_timer);
-TIMER_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra_init_timer);
{
struct jz4780_dma_dev *jzdma = data;
unsigned int nb_channels = jzdma->soc_data->nb_channels;
- uint32_t pending, dmac;
+ unsigned long pending;
+ uint32_t dmac;
int i;
pending = jz4780_dma_ctrl_readl(jzdma, JZ_DMA_REG_DIRQP);
- for_each_set_bit(i, (unsigned long *)&pending, nb_channels) {
+ for_each_set_bit(i, &pending, nb_channels) {
if (jz4780_dma_chan_irq(jzdma, &jzdma->chan[i]))
pending &= ~BIT(i);
}
spin_lock_irqsave(&sdma->channel_0_lock, flags);
bd0->mode.command = C0_SETPM;
- bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+ bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD;
bd0->mode.count = size / 2;
bd0->buffer_addr = buf_phys;
bd0->ext_buffer_addr = address;
context->gReg[7] = sdmac->watermark_level;
bd0->mode.command = C0_SETDM;
- bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+ bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD;
bd0->mode.count = sizeof(*context) / 4;
bd0->buffer_addr = sdma->context_phys;
bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel;
if (pdata && pdata->script_addrs)
sdma_add_scripts(sdma, pdata->script_addrs);
- if (pdata) {
- ret = sdma_get_firmware(sdma, pdata->fw_name);
- if (ret)
- dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
- } else {
- /*
- * Because that device tree does not encode ROM script address,
- * the RAM script in firmware is mandatory for device tree
- * probe, otherwise it fails.
- */
- ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
- &fw_name);
- if (ret)
- dev_warn(&pdev->dev, "failed to get firmware name\n");
- else {
- ret = sdma_get_firmware(sdma, fw_name);
- if (ret)
- dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
- }
- }
-
sdma->dma_device.dev = &pdev->dev;
sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources;
of_node_put(spba_bus);
}
+ /*
+ * Kick off firmware loading as the very last step:
+ * attempt to load firmware only if we're not on the error path, because
+ * the firmware callback requires a fully functional and allocated sdma
+ * instance.
+ */
+ if (pdata) {
+ ret = sdma_get_firmware(sdma, pdata->fw_name);
+ if (ret)
+ dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
+ } else {
+ /*
+ * Because that device tree does not encode ROM script address,
+ * the RAM script in firmware is mandatory for device tree
+ * probe, otherwise it fails.
+ */
+ ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
+ &fw_name);
+ if (ret) {
+ dev_warn(&pdev->dev, "failed to get firmware name\n");
+ } else {
+ ret = sdma_get_firmware(sdma, fw_name);
+ if (ret)
+ dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
+ }
+ }
+
return 0;
err_register:
/* Number of bytes available to read */
avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
+ if (offset < bchan->head)
+ avail--;
+
list_for_each_entry_safe(async_desc, tmp,
&bchan->desc_list, desc_node) {
/* Not enough data to read */
bgrt->version);
goto out;
}
- if (bgrt->status & 0xfe) {
- pr_notice("Ignoring BGRT: reserved status bits are non-zero %u\n",
- bgrt->status);
- goto out;
- }
if (bgrt->image_type != 0) {
pr_notice("Ignoring BGRT: invalid image type %u (expected 0)\n",
bgrt->image_type);
/* first try to find a slot in an existing linked list entry */
for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
- rsv = __va(prsv);
+ rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size);
if (index < rsv->size) {
rsv->entry[index].base = addr;
rsv->entry[index].size = size;
+ memunmap(rsv);
return 0;
}
+ memunmap(rsv);
}
/* no slot found - allocate a new linked list entry */
if (!rsv)
return -ENOMEM;
- rsv->size = EFI_MEMRESERVE_COUNT(PAGE_SIZE);
+ /*
+ * The memremap() call above assumes that a linux_efi_memreserve entry
+ * never crosses a page boundary, so let's ensure that this remains true
+ * even when kexec'ing a 4k pages kernel from a >4k pages kernel, by
+ * using SZ_4K explicitly in the size calculation below.
+ */
+ rsv->size = EFI_MEMRESERVE_COUNT(SZ_4K);
atomic_set(&rsv->count, 1);
rsv->entry[0].base = addr;
rsv->entry[0].size = size;
efibc_str_to_str16(value, (efi_char16_t *)entry->var.Data);
memcpy(&entry->var.VendorGuid, &guid, sizeof(guid));
- ret = efivar_entry_set(entry,
- EFI_VARIABLE_NON_VOLATILE
- | EFI_VARIABLE_BOOTSERVICE_ACCESS
- | EFI_VARIABLE_RUNTIME_ACCESS,
- size, entry->var.Data, NULL);
+ ret = efivar_entry_set_safe(entry->var.VariableName,
+ entry->var.VendorGuid,
+ EFI_VARIABLE_NON_VOLATILE
+ | EFI_VARIABLE_BOOTSERVICE_ACCESS
+ | EFI_VARIABLE_RUNTIME_ACCESS,
+ false, size, entry->var.Data);
+
if (ret)
pr_err("failed to set %s EFI variable: 0x%x\n",
name, ret);
* Copyright (C) 2015 Linaro Ltd.
*/
+#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/init.h>
#include <linux/clk.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
+#include "gpiolib.h"
+
/*
* Only first 8bits of a register correspond to each pin,
* so there are 4 registers for 32 pins.
spin_unlock_irqrestore(&gchip->lock, flags);
}
+static int mb86s70_gpio_to_irq(struct gpio_chip *gc, unsigned int offset)
+{
+ int irq, index;
+
+ for (index = 0;; index++) {
+ irq = platform_get_irq(to_platform_device(gc->parent), index);
+ if (irq <= 0)
+ break;
+ if (irq_get_irq_data(irq)->hwirq == offset)
+ return irq;
+ }
+ return -EINVAL;
+}
+
static int mb86s70_gpio_probe(struct platform_device *pdev)
{
struct mb86s70_gpio_chip *gchip;
if (IS_ERR(gchip->base))
return PTR_ERR(gchip->base);
- gchip->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(gchip->clk))
- return PTR_ERR(gchip->clk);
+ if (!has_acpi_companion(&pdev->dev)) {
+ gchip->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(gchip->clk))
+ return PTR_ERR(gchip->clk);
- ret = clk_prepare_enable(gchip->clk);
- if (ret)
- return ret;
+ ret = clk_prepare_enable(gchip->clk);
+ if (ret)
+ return ret;
+ }
spin_lock_init(&gchip->lock);
gchip->gc.parent = &pdev->dev;
gchip->gc.base = -1;
+ if (has_acpi_companion(&pdev->dev))
+ gchip->gc.to_irq = mb86s70_gpio_to_irq;
+
ret = gpiochip_add_data(&gchip->gc, gchip);
if (ret) {
dev_err(&pdev->dev, "couldn't register gpio driver\n");
clk_disable_unprepare(gchip->clk);
+ return ret;
}
- return ret;
+ if (has_acpi_companion(&pdev->dev))
+ acpi_gpiochip_request_interrupts(&gchip->gc);
+
+ return 0;
}
static int mb86s70_gpio_remove(struct platform_device *pdev)
{
struct mb86s70_gpio_chip *gchip = platform_get_drvdata(pdev);
+ if (has_acpi_companion(&pdev->dev))
+ acpi_gpiochip_free_interrupts(&gchip->gc);
gpiochip_remove(&gchip->gc);
clk_disable_unprepare(gchip->clk);
};
MODULE_DEVICE_TABLE(of, mb86s70_gpio_dt_ids);
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id mb86s70_gpio_acpi_ids[] = {
+ { "SCX0007" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(acpi, mb86s70_gpio_acpi_ids);
+#endif
+
static struct platform_driver mb86s70_gpio_driver = {
.driver = {
.name = "mb86s70-gpio",
.of_match_table = mb86s70_gpio_dt_ids,
+ .acpi_match_table = ACPI_PTR(mb86s70_gpio_acpi_ids),
},
.probe = mb86s70_gpio_probe,
.remove = mb86s70_gpio_remove,
* Legacy handling of SPI active high chip select. If we have a
* property named "cs-gpios" we need to inspect the child node
* to determine if the flags should have inverted semantics.
+ *
+ * This does not apply to an SPI device named "spi-gpio", because
+ * these have traditionally obtained their own GPIOs by parsing
+ * the device tree directly and did not respect any "spi-cs-high"
+ * property on the SPI bus children.
*/
- if (IS_ENABLED(CONFIG_SPI_MASTER) && !strcmp(propname, "cs-gpios") &&
+ if (IS_ENABLED(CONFIG_SPI_MASTER) &&
+ !strcmp(propname, "cs-gpios") &&
+ !of_device_is_compatible(np, "spi-gpio") &&
of_property_read_bool(np, "cs-gpios")) {
struct device_node *child;
u32 cs;
mutex_unlock(&adev->srbm_mutex);
gfx_v9_0_init_compute_vmid(adev);
-
- mutex_lock(&adev->grbm_idx_mutex);
- /*
- * making sure that the following register writes will be broadcasted
- * to all the shaders
- */
- gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-
- WREG32_SOC15(GC, 0, mmPA_SC_FIFO_SIZE,
- (adev->gfx.config.sc_prim_fifo_size_frontend <<
- PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_prim_fifo_size_backend <<
- PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_hiz_tile_fifo_size <<
- PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
- (adev->gfx.config.sc_earlyz_tile_fifo_size <<
- PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
- mutex_unlock(&adev->grbm_idx_mutex);
-
}
static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
/* No access to rdtsc. Using raw monotonic time */
args->cpu_clock_counter = ktime_get_raw_ns();
- args->system_clock_counter = ktime_get_boot_ns();
+ args->system_clock_counter = ktime_get_boottime_ns();
/* Since the counter is in nano-seconds we use 1GHz frequency */
args->system_clock_freq = 1000000000;
if (ret)
return ret;
- ret = psm_adjust_power_state_dynamic(hwmgr, true, NULL);
+ ret = psm_adjust_power_state_dynamic(hwmgr, false, NULL);
return ret;
}
PHM_PlatformCaps_ThermalController
);
- if (0 == powerplay_table->usFanTableOffset)
+ if (0 == powerplay_table->usFanTableOffset) {
+ hwmgr->thermal_controller.use_hw_fan_control = 1;
return 0;
+ }
fan_table = (const PPTable_Generic_SubTable_Header *)
(((unsigned long)powerplay_table) +
uint8_t ucType;
uint8_t ucI2cLine;
uint8_t ucI2cAddress;
+ uint8_t use_hw_fan_control;
struct pp_fan_info fanInfo;
struct pp_advance_fan_control_parameters advanceFanControlParameters;
};
return 0;
}
+ /* use hardware fan control */
+ if (hwmgr->thermal_controller.use_hw_fan_control)
+ return 0;
+
tmp64 = hwmgr->thermal_controller.advanceFanControlParameters.
usPWMMin * duty100;
do_div(tmp64, 10000);
if (IS_ERR(gpu->cmdbuf_suballoc)) {
dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n");
ret = PTR_ERR(gpu->cmdbuf_suballoc);
- goto fail;
+ goto destroy_iommu;
}
/* Create buffer: */
PAGE_SIZE);
if (ret) {
dev_err(gpu->dev, "could not create command buffer\n");
- goto destroy_iommu;
+ goto destroy_suballoc;
}
if (gpu->mmu->version == ETNAVIV_IOMMU_V1 &&
free_buffer:
etnaviv_cmdbuf_free(&gpu->buffer);
gpu->buffer.suballoc = NULL;
+destroy_suballoc:
+ etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
+ gpu->cmdbuf_suballoc = NULL;
destroy_iommu:
etnaviv_iommu_destroy(gpu->mmu);
gpu->mmu = NULL;
*/
request->reserved_space += LEGACY_REQUEST_SIZE;
- ret = switch_context(request);
+ /* Unconditionally invalidate GPU caches and TLBs. */
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
if (ret)
return ret;
- /* Unconditionally invalidate GPU caches and TLBs. */
- ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
+ ret = switch_context(request);
if (ret)
return ret;
ipu_dc_disable(ipu);
ipu_prg_disable(ipu);
+ drm_crtc_vblank_off(crtc);
+
spin_lock_irq(&crtc->dev->event_lock);
- if (crtc->state->event) {
+ if (crtc->state->event && !crtc->state->active) {
drm_crtc_send_vblank_event(crtc, crtc->state->event);
crtc->state->event = NULL;
}
spin_unlock_irq(&crtc->dev->event_lock);
-
- drm_crtc_vblank_off(crtc);
}
static void imx_drm_crtc_reset(struct drm_crtc *crtc)
return 0;
err_free:
- drm_gem_object_put_unlocked(&shmem->base);
+ drm_gem_handle_delete(file, args->handle);
return ret;
}
output = vgdev->outputs + scanout;
new_edid = drm_do_get_edid(&output->conn, virtio_get_edid_block, resp);
+ drm_connector_update_edid_property(&output->conn, new_edid);
spin_lock(&vgdev->display_info_lock);
old_edid = output->edid;
output->edid = new_edid;
- drm_connector_update_edid_property(&output->conn, output->edid);
spin_unlock(&vgdev->display_info_lock);
kfree(old_edid);
#define HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP 0x1220
#define HID_DEVICE_ID_ALPS_U1 0x1215
#define HID_DEVICE_ID_ALPS_T4_BTNLESS 0x120C
+#define HID_DEVICE_ID_ALPS_1222 0x1222
#define USB_VENDOR_ID_AMI 0x046b
#define USB_DEVICE_ID_CHICONY_MULTI_TOUCH 0xb19d
#define USB_DEVICE_ID_CHICONY_WIRELESS 0x0618
#define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE 0x1053
+#define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2 0x0939
#define USB_DEVICE_ID_CHICONY_WIRELESS2 0x1123
#define USB_DEVICE_ID_ASUS_AK1D 0x1125
#define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A 0x1408
#define USB_VENDOR_ID_HUION 0x256c
#define USB_DEVICE_ID_HUION_TABLET 0x006e
+#define USB_DEVICE_ID_HUION_HS64 0x006d
#define USB_VENDOR_ID_IBM 0x04b3
#define USB_DEVICE_ID_IBM_SCROLLPOINT_III 0x3100
#define REPORT_ID_HIDPP_SHORT 0x10
#define REPORT_ID_HIDPP_LONG 0x11
+#define REPORT_ID_HIDPP_VERY_LONG 0x12
#define HIDPP_REPORT_SHORT_LENGTH 7
#define HIDPP_REPORT_LONG_LENGTH 20
int ret;
if ((buf[0] == REPORT_ID_HIDPP_SHORT) ||
- (buf[0] == REPORT_ID_HIDPP_LONG)) {
+ (buf[0] == REPORT_ID_HIDPP_LONG) ||
+ (buf[0] == REPORT_ID_HIDPP_VERY_LONG)) {
if (count < 2)
return -EINVAL;
HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
USB_VENDOR_ID_ALPS_JP,
HID_DEVICE_ID_ALPS_U1_DUAL_3BTN_PTP) },
+ { .driver_data = MT_CLS_WIN_8_DUAL,
+ HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+ USB_VENDOR_ID_ALPS_JP,
+ HID_DEVICE_ID_ALPS_1222) },
/* Lenovo X1 TAB Gen 2 */
{ .driver_data = MT_CLS_WIN_8_DUAL,
{ HID_USB_DEVICE(USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM), HID_QUIRK_NOGET },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2), HID_QUIRK_ALWAYS_POLL },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_CHIC, USB_DEVICE_ID_CHIC_GAMEPAD), HID_QUIRK_BADPAD },
{ HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK), HID_QUIRK_NOGET },
USB_DEVICE_ID_UCLOGIC_TABLET_TWHA60) },
{ HID_USB_DEVICE(USB_VENDOR_ID_HUION,
USB_DEVICE_ID_HUION_TABLET) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HUION,
+ USB_DEVICE_ID_HUION_HS64) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
USB_DEVICE_ID_HUION_TABLET) },
{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
/* FALL THROUGH */
case VID_PID(USB_VENDOR_ID_HUION,
USB_DEVICE_ID_HUION_TABLET):
+ case VID_PID(USB_VENDOR_ID_HUION,
+ USB_DEVICE_ID_HUION_HS64):
case VID_PID(USB_VENDOR_ID_UCLOGIC,
USB_DEVICE_ID_HUION_TABLET):
case VID_PID(USB_VENDOR_ID_UCLOGIC,
goto end_err_fw_release;
release_firmware(fw);
- kfree(filename);
dev_info(cl_data_to_dev(client_data), "ISH firmware %s loaded\n",
filename);
+ kfree(filename);
return 0;
end_err_fw_release:
*/
static int hid_ishtp_cl_suspend(struct device *device)
{
- struct ishtp_cl_device *cl_device = dev_get_drvdata(device);
+ struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device);
struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl);
*/
static int hid_ishtp_cl_resume(struct device *device)
{
- struct ishtp_cl_device *cl_device = dev_get_drvdata(device);
+ struct ishtp_cl_device *cl_device = ishtp_dev_to_cl_device(device);
struct ishtp_cl *hid_ishtp_cl = ishtp_get_drvdata(cl_device);
struct ishtp_cl_data *client_data = ishtp_get_client_data(hid_ishtp_cl);
}
ishtp_device_ready = true;
- dev_set_drvdata(&device->dev, device);
return device;
}
EXPORT_SYMBOL(ishtp_get_drvdata);
/**
+ * ishtp_dev_to_cl_device() - get ishtp_cl_device instance from device instance
+ * @device: device instance
+ *
+ * Get ish_cl_device instance which embeds device instance in it.
+ *
+ * Return: pointer to ishtp_cl_device instance
+ */
+struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *device)
+{
+ return to_ishtp_cl_device(device);
+}
+EXPORT_SYMBOL(ishtp_dev_to_cl_device);
+
+/**
* ishtp_bus_new_client() - Create a new client
* @dev: ISHTP device instance
*
Select this option to run Linux as a Hyper-V client operating
system.
+config HYPERV_TIMER
+ def_bool HYPERV
+
config HYPERV_TSCPAGE
def_bool HYPERV && X86_64
#include <linux/version.h>
#include <linux/random.h>
#include <linux/clockchips.h>
+#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
struct hv_context hv_context;
/*
- * If false, we're using the old mechanism for stimer0 interrupts
- * where it sends a VMbus message when it expires. The old
- * mechanism is used when running on older versions of Hyper-V
- * that don't support Direct Mode. While Hyper-V provides
- * four stimer's per CPU, Linux uses only stimer0.
- */
-static bool direct_mode_enabled;
-static int stimer0_irq;
-static int stimer0_vector;
-
-#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
-#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
-#define HV_MIN_DELTA_TICKS 1
-
-/*
* hv_init - Main initialization routine.
*
* This routine must be called before any other routines in here are called
hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
if (!hv_context.cpu_context)
return -ENOMEM;
-
- direct_mode_enabled = ms_hyperv.misc_features &
- HV_STIMER_DIRECT_MODE_AVAILABLE;
return 0;
}
return status & 0xFFFF;
}
-/*
- * ISR for when stimer0 is operating in Direct Mode. Direct Mode
- * does not use VMbus or any VMbus messages, so process here and not
- * in the VMbus driver code.
- */
-
-static void hv_stimer0_isr(void)
-{
- struct hv_per_cpu_context *hv_cpu;
-
- hv_cpu = this_cpu_ptr(hv_context.cpu_context);
- hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt);
- add_interrupt_randomness(stimer0_vector, 0);
-}
-
-static int hv_ce_set_next_event(unsigned long delta,
- struct clock_event_device *evt)
-{
- u64 current_tick;
-
- WARN_ON(!clockevent_state_oneshot(evt));
-
- current_tick = hyperv_cs->read(NULL);
- current_tick += delta;
- hv_init_timer(0, current_tick);
- return 0;
-}
-
-static int hv_ce_shutdown(struct clock_event_device *evt)
-{
- hv_init_timer(0, 0);
- hv_init_timer_config(0, 0);
- if (direct_mode_enabled)
- hv_disable_stimer0_percpu_irq(stimer0_irq);
-
- return 0;
-}
-
-static int hv_ce_set_oneshot(struct clock_event_device *evt)
-{
- union hv_stimer_config timer_cfg;
-
- timer_cfg.as_uint64 = 0;
- timer_cfg.enable = 1;
- timer_cfg.auto_enable = 1;
- if (direct_mode_enabled) {
- /*
- * When it expires, the timer will directly interrupt
- * on the specified hardware vector/IRQ.
- */
- timer_cfg.direct_mode = 1;
- timer_cfg.apic_vector = stimer0_vector;
- hv_enable_stimer0_percpu_irq(stimer0_irq);
- } else {
- /*
- * When it expires, the timer will generate a VMbus message,
- * to be handled by the normal VMbus interrupt handler.
- */
- timer_cfg.direct_mode = 0;
- timer_cfg.sintx = VMBUS_MESSAGE_SINT;
- }
- hv_init_timer_config(0, timer_cfg.as_uint64);
- return 0;
-}
-
-static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu)
-{
- dev->name = "Hyper-V clockevent";
- dev->features = CLOCK_EVT_FEAT_ONESHOT;
- dev->cpumask = cpumask_of(cpu);
- dev->rating = 1000;
- /*
- * Avoid settint dev->owner = THIS_MODULE deliberately as doing so will
- * result in clockevents_config_and_register() taking additional
- * references to the hv_vmbus module making it impossible to unload.
- */
-
- dev->set_state_shutdown = hv_ce_shutdown;
- dev->set_state_oneshot = hv_ce_set_oneshot;
- dev->set_next_event = hv_ce_set_next_event;
-}
-
-
int hv_synic_alloc(void)
{
int cpu;
tasklet_init(&hv_cpu->msg_dpc,
vmbus_on_msg_dpc, (unsigned long) hv_cpu);
- hv_cpu->clk_evt = kzalloc(sizeof(struct clock_event_device),
- GFP_KERNEL);
- if (hv_cpu->clk_evt == NULL) {
- pr_err("Unable to allocate clock event device\n");
- goto err;
- }
- hv_init_clockevent_device(hv_cpu->clk_evt, cpu);
-
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
INIT_LIST_HEAD(&hv_cpu->chan_list);
}
- if (direct_mode_enabled &&
- hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
- hv_stimer0_isr))
- goto err;
-
return 0;
err:
/*
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- kfree(hv_cpu->clk_evt);
free_page((unsigned long)hv_cpu->synic_event_page);
free_page((unsigned long)hv_cpu->synic_message_page);
free_page((unsigned long)hv_cpu->post_msg_page);
hv_set_synic_state(sctrl.as_uint64);
- /*
- * Register the per-cpu clockevent source.
- */
- if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)
- clockevents_config_and_register(hv_cpu->clk_evt,
- HV_TIMER_FREQUENCY,
- HV_MIN_DELTA_TICKS,
- HV_MAX_MAX_DELTA_TICKS);
- return 0;
-}
-
-/*
- * hv_synic_clockevents_cleanup - Cleanup clockevent devices
- */
-void hv_synic_clockevents_cleanup(void)
-{
- int cpu;
+ hv_stimer_init(cpu);
- if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
- return;
-
- if (direct_mode_enabled)
- hv_remove_stimer0_irq(stimer0_irq);
-
- for_each_present_cpu(cpu) {
- struct hv_per_cpu_context *hv_cpu
- = per_cpu_ptr(hv_context.cpu_context, cpu);
-
- clockevents_unbind_device(hv_cpu->clk_evt, cpu);
- }
+ return 0;
}
/*
if (channel_found && vmbus_connection.conn_state == CONNECTED)
return -EBUSY;
- /* Turn off clockevent device */
- if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) {
- struct hv_per_cpu_context *hv_cpu
- = this_cpu_ptr(hv_context.cpu_context);
-
- clockevents_unbind_device(hv_cpu->clk_evt, cpu);
- hv_ce_shutdown(hv_cpu->clk_evt);
- }
+ hv_stimer_cleanup(cpu);
hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
#include <linux/hyperv.h>
#include <linux/clockchips.h>
#include <linux/ptp_clock_kernel.h>
+#include <clocksource/hyperv_timer.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
* per-cpu list of the channels based on their CPU affinity.
*/
struct list_head chan_list;
- struct clock_event_device *clk_evt;
};
struct hv_context {
extern int hv_synic_cleanup(unsigned int cpu);
-extern void hv_synic_clockevents_cleanup(void);
-
/* Interface */
void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
#include <linux/kdebug.h>
#include <linux/efi.h>
#include <linux/random.h>
+#include <clocksource/hyperv_timer.h>
#include "hyperv_vmbus.h"
struct vmbus_dynid {
kfree(ctx);
}
-static void hv_process_timer_expiration(struct hv_message *msg,
- struct hv_per_cpu_context *hv_cpu)
-{
- struct clock_event_device *dev = hv_cpu->clk_evt;
-
- if (dev->event_handler)
- dev->event_handler(dev);
-
- vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
-}
-
void vmbus_on_msg_dpc(unsigned long data)
{
struct hv_per_cpu_context *hv_cpu = (void *)data;
/* Check if there are actual msgs to be processed */
if (msg->header.message_type != HVMSG_NONE) {
- if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
- hv_process_timer_expiration(msg, hv_cpu);
- else
+ if (msg->header.message_type == HVMSG_TIMER_EXPIRED) {
+ hv_stimer0_isr();
+ vmbus_signal_eom(msg, HVMSG_TIMER_EXPIRED);
+ } else
tasklet_schedule(&hv_cpu->msg_dpc);
}
ret = hv_synic_alloc();
if (ret)
goto err_alloc;
+
+ ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT);
+ if (ret < 0)
+ goto err_alloc;
+
/*
- * Initialize the per-cpu interrupt state and
- * connect to the host.
+ * Initialize the per-cpu interrupt state and stimer state.
+ * Then connect to the host.
*/
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
hv_synic_init, hv_synic_cleanup);
if (ret < 0)
- goto err_alloc;
+ goto err_cpuhp;
hyperv_cpuhp_online = ret;
ret = vmbus_connect();
err_connect:
cpuhp_remove_state(hyperv_cpuhp_online);
+err_cpuhp:
+ hv_stimer_free();
err_alloc:
hv_synic_free();
hv_remove_vmbus_irq();
static void hv_kexec_handler(void)
{
- hv_synic_clockevents_cleanup();
+ hv_stimer_global_cleanup();
vmbus_initiate_unload(false);
vmbus_connection.conn_state = DISCONNECTED;
/* Make sure conn_state is set as hv_synic_cleanup checks for it */
static void hv_crash_handler(struct pt_regs *regs)
{
+ int cpu;
+
vmbus_initiate_unload(true);
/*
* In crash handler we can't schedule synic cleanup for all CPUs,
* for kdump.
*/
vmbus_connection.conn_state = DISCONNECTED;
- hv_synic_cleanup(smp_processor_id());
+ cpu = smp_processor_id();
+ hv_stimer_cleanup(cpu);
+ hv_synic_cleanup(cpu);
hyperv_cleanup();
};
hv_remove_kexec_handler();
hv_remove_crash_handler();
vmbus_connection.conn_state = DISCONNECTED;
- hv_synic_clockevents_cleanup();
+ hv_stimer_global_cleanup();
vmbus_disconnect();
hv_remove_vmbus_irq();
for_each_online_cpu(cpu) {
return -EIO;
}
- dht11->timestamp = ktime_get_boot_ns();
+ dht11->timestamp = ktime_get_boottime_ns();
if (hum_int < 4) { /* DHT22: 100000 = (3*256+232)*100 */
dht11->temperature = (((temp_int & 0x7f) << 8) + temp_dec) *
((temp_int & 0x80) ? -100 : 100);
/* TODO: Consider making the handler safe for IRQ sharing */
if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) {
- dht11->edges[dht11->num_edges].ts = ktime_get_boot_ns();
+ dht11->edges[dht11->num_edges].ts = ktime_get_boottime_ns();
dht11->edges[dht11->num_edges++].value =
gpio_get_value(dht11->gpio);
int ret, timeres, offset;
mutex_lock(&dht11->lock);
- if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boot_ns()) {
+ if (dht11->timestamp + DHT11_DATA_VALID_TIME < ktime_get_boottime_ns()) {
timeres = ktime_get_resolution_ns();
dev_dbg(dht11->dev, "current timeresolution: %dns\n", timeres);
if (timeres > DHT11_MIN_TIMERES) {
return -EINVAL;
}
- dht11->timestamp = ktime_get_boot_ns() - DHT11_DATA_VALID_TIME - 1;
+ dht11->timestamp = ktime_get_boottime_ns() - DHT11_DATA_VALID_TIME - 1;
dht11->num_edges = -1;
platform_set_drvdata(pdev, iio);
ktime_get_coarse_ts64(&tp);
return timespec64_to_ns(&tp);
case CLOCK_BOOTTIME:
- return ktime_get_boot_ns();
+ return ktime_get_boottime_ns();
case CLOCK_TAI:
- return ktime_get_tai_ns();
+ return ktime_get_clocktai_ns();
default:
BUG();
}
if (status) {
pr_debug("(port: %d) failed: status = %d\n",
cb_ctx->port, status);
- rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC;
+ rec->time_to_run = ktime_get_boottime_ns() + 1 * NSEC_PER_SEC;
goto out;
}
be64_to_cpu((__force __be64)rec->guid_indexes),
be64_to_cpu((__force __be64)applied_guid_indexes),
be64_to_cpu((__force __be64)declined_guid_indexes));
- rec->time_to_run = ktime_get_boot_ns() +
+ rec->time_to_run = ktime_get_boottime_ns() +
resched_delay_sec * NSEC_PER_SEC;
} else {
rec->status = MLX4_GUID_INFO_STATUS_SET;
}
}
if (resched_delay_sec) {
- u64 curr_time = ktime_get_boot_ns();
+ u64 curr_time = ktime_get_boottime_ns();
*resched_delay_sec = (low_record_time < curr_time) ? 0 :
div_u64((low_record_time - curr_time), NSEC_PER_SEC);
bool
depends on PM
select ARM_GIC
- select PM_CLK
config ARM_GIC_MAX_NR
int
+ depends on ARM_GIC
default 2 if ARCH_REALVIEW
default 1
select PCI_MSI
select GENERIC_IRQ_CHIP
+config AL_FIC
+ bool "Amazon's Annapurna Labs Fabric Interrupt Controller"
+ depends on OF || COMPILE_TEST
+ select GENERIC_IRQ_CHIP
+ select IRQ_DOMAIN
+ help
+ Support Amazon's Annapurna Labs Fabric Interrupt Controller.
+
config ATMEL_AIC_IRQ
bool
select GENERIC_IRQ_CHIP
select IRQ_DOMAIN
config RENESAS_INTC_IRQPIN
- bool
+ bool "Renesas INTC External IRQ Pin Support" if COMPILE_TEST
select IRQ_DOMAIN
+ help
+ Enable support for the Renesas Interrupt Controller for external
+ interrupt pins, as found on SH/R-Mobile and R-Car Gen1 SoCs.
config RENESAS_IRQC
- bool
+ bool "Renesas R-Mobile APE6 and R-Car IRQC support" if COMPILE_TEST
select GENERIC_IRQ_CHIP
select IRQ_DOMAIN
+ help
+ Enable support for the Renesas Interrupt Controller for external
+ devices, as found on R-Mobile APE6, R-Car Gen2, and R-Car Gen3 SoCs.
+
+config RENESAS_RZA1_IRQC
+ bool "Renesas RZ/A1 IRQC support" if COMPILE_TEST
+ select IRQ_DOMAIN_HIERARCHY
+ help
+ Enable support for the Renesas RZ/A1 Interrupt Controller, to use up
+ to 8 external interrupts with configurable sense select.
config ST_IRQCHIP
bool
select IRQ_DOMAIN
config RENESAS_H8S_INTC
- bool
+ bool "Renesas H8S Interrupt Controller Support" if COMPILE_TEST
select IRQ_DOMAIN
+ help
+ Enable support for the Renesas H8/300 Interrupt Controller, as found
+ on Renesas H8S SoCs.
config IMX_GPCV2
bool
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_IRQCHIP) += irqchip.o
+obj-$(CONFIG_AL_FIC) += irq-al-fic.o
obj-$(CONFIG_ALPINE_MSI) += irq-alpine-msi.o
obj-$(CONFIG_ATH79) += irq-ath79-cpu.o
obj-$(CONFIG_ATH79) += irq-ath79-misc.o
obj-$(CONFIG_RDA_INTC) += irq-rda-intc.o
obj-$(CONFIG_RENESAS_INTC_IRQPIN) += irq-renesas-intc-irqpin.o
obj-$(CONFIG_RENESAS_IRQC) += irq-renesas-irqc.o
+obj-$(CONFIG_RENESAS_RZA1_IRQC) += irq-renesas-rza1.o
obj-$(CONFIG_VERSATILE_FPGA_IRQ) += irq-versatile-fpga.o
obj-$(CONFIG_ARCH_NSPIRE) += irq-zevio.o
obj-$(CONFIG_ARCH_VT8500) += irq-vt8500.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+/* FIC Registers */
+#define AL_FIC_CAUSE 0x00
+#define AL_FIC_MASK 0x10
+#define AL_FIC_CONTROL 0x28
+
+#define CONTROL_TRIGGER_RISING BIT(3)
+#define CONTROL_MASK_MSI_X BIT(5)
+
+#define NR_FIC_IRQS 32
+
+MODULE_AUTHOR("Talel Shenhar");
+MODULE_DESCRIPTION("Amazon's Annapurna Labs Interrupt Controller Driver");
+MODULE_LICENSE("GPL v2");
+
+enum al_fic_state {
+ AL_FIC_UNCONFIGURED = 0,
+ AL_FIC_CONFIGURED_LEVEL,
+ AL_FIC_CONFIGURED_RISING_EDGE,
+};
+
+struct al_fic {
+ void __iomem *base;
+ struct irq_domain *domain;
+ const char *name;
+ unsigned int parent_irq;
+ enum al_fic_state state;
+};
+
+static void al_fic_set_trigger(struct al_fic *fic,
+ struct irq_chip_generic *gc,
+ enum al_fic_state new_state)
+{
+ irq_flow_handler_t handler;
+ u32 control = readl_relaxed(fic->base + AL_FIC_CONTROL);
+
+ if (new_state == AL_FIC_CONFIGURED_LEVEL) {
+ handler = handle_level_irq;
+ control &= ~CONTROL_TRIGGER_RISING;
+ } else {
+ handler = handle_edge_irq;
+ control |= CONTROL_TRIGGER_RISING;
+ }
+ gc->chip_types->handler = handler;
+ fic->state = new_state;
+ writel_relaxed(control, fic->base + AL_FIC_CONTROL);
+}
+
+static int al_fic_irq_set_type(struct irq_data *data, unsigned int flow_type)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+ struct al_fic *fic = gc->private;
+ enum al_fic_state new_state;
+ int ret = 0;
+
+ irq_gc_lock(gc);
+
+ if (((flow_type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_LEVEL_HIGH) &&
+ ((flow_type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_EDGE_RISING)) {
+ pr_debug("fic doesn't support flow type %d\n", flow_type);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ new_state = (flow_type & IRQ_TYPE_LEVEL_HIGH) ?
+ AL_FIC_CONFIGURED_LEVEL : AL_FIC_CONFIGURED_RISING_EDGE;
+
+ /*
+ * A given FIC instance can be either all level or all edge triggered.
+ * This is generally fixed depending on what pieces of HW it's wired up
+ * to.
+ *
+ * We configure it based on the sensitivity of the first source
+ * being setup, and reject any subsequent attempt at configuring it in a
+ * different way.
+ */
+ if (fic->state == AL_FIC_UNCONFIGURED) {
+ al_fic_set_trigger(fic, gc, new_state);
+ } else if (fic->state != new_state) {
+ pr_debug("fic %s state already configured to %d\n",
+ fic->name, fic->state);
+ ret = -EINVAL;
+ goto err;
+ }
+
+err:
+ irq_gc_unlock(gc);
+
+ return ret;
+}
+
+static void al_fic_irq_handler(struct irq_desc *desc)
+{
+ struct al_fic *fic = irq_desc_get_handler_data(desc);
+ struct irq_domain *domain = fic->domain;
+ struct irq_chip *irqchip = irq_desc_get_chip(desc);
+ struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0);
+ unsigned long pending;
+ unsigned int irq;
+ u32 hwirq;
+
+ chained_irq_enter(irqchip, desc);
+
+ pending = readl_relaxed(fic->base + AL_FIC_CAUSE);
+ pending &= ~gc->mask_cache;
+
+ for_each_set_bit(hwirq, &pending, NR_FIC_IRQS) {
+ irq = irq_find_mapping(domain, hwirq);
+ generic_handle_irq(irq);
+ }
+
+ chained_irq_exit(irqchip, desc);
+}
+
+static int al_fic_register(struct device_node *node,
+ struct al_fic *fic)
+{
+ struct irq_chip_generic *gc;
+ int ret;
+
+ fic->domain = irq_domain_add_linear(node,
+ NR_FIC_IRQS,
+ &irq_generic_chip_ops,
+ fic);
+ if (!fic->domain) {
+ pr_err("fail to add irq domain\n");
+ return -ENOMEM;
+ }
+
+ ret = irq_alloc_domain_generic_chips(fic->domain,
+ NR_FIC_IRQS,
+ 1, fic->name,
+ handle_level_irq,
+ 0, 0, IRQ_GC_INIT_MASK_CACHE);
+ if (ret) {
+ pr_err("fail to allocate generic chip (%d)\n", ret);
+ goto err_domain_remove;
+ }
+
+ gc = irq_get_domain_generic_chip(fic->domain, 0);
+ gc->reg_base = fic->base;
+ gc->chip_types->regs.mask = AL_FIC_MASK;
+ gc->chip_types->regs.ack = AL_FIC_CAUSE;
+ gc->chip_types->chip.irq_mask = irq_gc_mask_set_bit;
+ gc->chip_types->chip.irq_unmask = irq_gc_mask_clr_bit;
+ gc->chip_types->chip.irq_ack = irq_gc_ack_clr_bit;
+ gc->chip_types->chip.irq_set_type = al_fic_irq_set_type;
+ gc->chip_types->chip.flags = IRQCHIP_SKIP_SET_WAKE;
+ gc->private = fic;
+
+ irq_set_chained_handler_and_data(fic->parent_irq,
+ al_fic_irq_handler,
+ fic);
+ return 0;
+
+err_domain_remove:
+ irq_domain_remove(fic->domain);
+
+ return ret;
+}
+
+/*
+ * al_fic_wire_init() - initialize and configure fic in wire mode
+ * @of_node: optional pointer to interrupt controller's device tree node.
+ * @base: mmio to fic register
+ * @name: name of the fic
+ * @parent_irq: interrupt of parent
+ *
+ * This API will configure the fic hardware to to work in wire mode.
+ * In wire mode, fic hardware is generating a wire ("wired") interrupt.
+ * Interrupt can be generated based on positive edge or level - configuration is
+ * to be determined based on connected hardware to this fic.
+ */
+static struct al_fic *al_fic_wire_init(struct device_node *node,
+ void __iomem *base,
+ const char *name,
+ unsigned int parent_irq)
+{
+ struct al_fic *fic;
+ int ret;
+ u32 control = CONTROL_MASK_MSI_X;
+
+ fic = kzalloc(sizeof(*fic), GFP_KERNEL);
+ if (!fic)
+ return ERR_PTR(-ENOMEM);
+
+ fic->base = base;
+ fic->parent_irq = parent_irq;
+ fic->name = name;
+
+ /* mask out all interrupts */
+ writel_relaxed(0xFFFFFFFF, fic->base + AL_FIC_MASK);
+
+ /* clear any pending interrupt */
+ writel_relaxed(0, fic->base + AL_FIC_CAUSE);
+
+ writel_relaxed(control, fic->base + AL_FIC_CONTROL);
+
+ ret = al_fic_register(node, fic);
+ if (ret) {
+ pr_err("fail to register irqchip\n");
+ goto err_free;
+ }
+
+ pr_debug("%s initialized successfully in Legacy mode (parent-irq=%u)\n",
+ fic->name, parent_irq);
+
+ return fic;
+
+err_free:
+ kfree(fic);
+ return ERR_PTR(ret);
+}
+
+static int __init al_fic_init_dt(struct device_node *node,
+ struct device_node *parent)
+{
+ int ret;
+ void __iomem *base;
+ unsigned int parent_irq;
+ struct al_fic *fic;
+
+ if (!parent) {
+ pr_err("%s: unsupported - device require a parent\n",
+ node->name);
+ return -EINVAL;
+ }
+
+ base = of_iomap(node, 0);
+ if (!base) {
+ pr_err("%s: fail to map memory\n", node->name);
+ return -ENOMEM;
+ }
+
+ parent_irq = irq_of_parse_and_map(node, 0);
+ if (!parent_irq) {
+ pr_err("%s: fail to map irq\n", node->name);
+ ret = -EINVAL;
+ goto err_unmap;
+ }
+
+ fic = al_fic_wire_init(node,
+ base,
+ node->name,
+ parent_irq);
+ if (IS_ERR(fic)) {
+ pr_err("%s: fail to initialize irqchip (%lu)\n",
+ node->name,
+ PTR_ERR(fic));
+ ret = PTR_ERR(fic);
+ goto err_irq_dispose;
+ }
+
+ return 0;
+
+err_irq_dispose:
+ irq_dispose_mapping(parent_irq);
+err_unmap:
+ iounmap(base);
+
+ return ret;
+}
+
+IRQCHIP_DECLARE(al_fic, "amazon,al-fic", al_fic_init_dt);
#define INTCG_CIDSTR 0x1000
#define INTCL_PICTLR 0x0
+#define INTCL_CFGR 0x14
#define INTCL_SIGR 0x60
-#define INTCL_HPPIR 0x68
#define INTCL_RDYIR 0x6c
#define INTCL_SENR 0xa0
#define INTCL_CENR 0xa4
static DEFINE_PER_CPU(void __iomem *, intcl_reg);
+static unsigned long *__trigger;
+
+#define IRQ_OFFSET(irq) ((irq < COMM_IRQ_BASE) ? irq : (irq - COMM_IRQ_BASE))
+
+#define TRIG_BYTE_OFFSET(i) ((((i) * 2) / 32) * 4)
+#define TRIG_BIT_OFFSET(i) (((i) * 2) % 32)
+
+#define TRIG_VAL(trigger, irq) (trigger << TRIG_BIT_OFFSET(IRQ_OFFSET(irq)))
+#define TRIG_VAL_MSK(irq) (~(3 << TRIG_BIT_OFFSET(IRQ_OFFSET(irq))))
+
+#define TRIG_BASE(irq) \
+ (TRIG_BYTE_OFFSET(IRQ_OFFSET(irq)) + ((irq < COMM_IRQ_BASE) ? \
+ (this_cpu_read(intcl_reg) + INTCL_CFGR) : (INTCG_base + INTCG_CICFGR)))
+
+static DEFINE_SPINLOCK(setup_lock);
+static void setup_trigger(unsigned long irq, unsigned long trigger)
+{
+ unsigned int tmp;
+
+ spin_lock(&setup_lock);
+
+ /* setup trigger */
+ tmp = readl_relaxed(TRIG_BASE(irq)) & TRIG_VAL_MSK(irq);
+
+ writel_relaxed(tmp | TRIG_VAL(trigger, irq), TRIG_BASE(irq));
+
+ spin_unlock(&setup_lock);
+}
+
static void csky_mpintc_handler(struct pt_regs *regs)
{
void __iomem *reg_base = this_cpu_read(intcl_reg);
- do {
- handle_domain_irq(root_domain,
- readl_relaxed(reg_base + INTCL_RDYIR),
- regs);
- } while (readl_relaxed(reg_base + INTCL_HPPIR) & BIT(31));
+ handle_domain_irq(root_domain,
+ readl_relaxed(reg_base + INTCL_RDYIR), regs);
}
static void csky_mpintc_enable(struct irq_data *d)
{
void __iomem *reg_base = this_cpu_read(intcl_reg);
+ setup_trigger(d->hwirq, __trigger[d->hwirq]);
+
writel_relaxed(d->hwirq, reg_base + INTCL_SENR);
}
writel_relaxed(d->hwirq, reg_base + INTCL_CACR);
}
+static int csky_mpintc_set_type(struct irq_data *d, unsigned int type)
+{
+ switch (type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_LEVEL_HIGH:
+ __trigger[d->hwirq] = 0;
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+ __trigger[d->hwirq] = 1;
+ break;
+ case IRQ_TYPE_EDGE_RISING:
+ __trigger[d->hwirq] = 2;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ __trigger[d->hwirq] = 3;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_SMP
static int csky_irq_set_affinity(struct irq_data *d,
const struct cpumask *mask_val,
if (cpu >= nr_cpu_ids)
return -EINVAL;
- /* Enable interrupt destination */
- cpu |= BIT(31);
+ /*
+ * The csky,mpintc could support auto irq deliver, but it only
+ * could deliver external irq to one cpu or all cpus. So it
+ * doesn't support deliver external irq to a group of cpus
+ * with cpu_mask.
+ * SO we only use auto deliver mode when affinity mask_val is
+ * equal to cpu_present_mask.
+ *
+ */
+ if (cpumask_equal(mask_val, cpu_present_mask))
+ cpu = 0;
+ else
+ cpu |= BIT(31);
writel_relaxed(cpu, INTCG_base + INTCG_CIDSTR + offset);
.irq_eoi = csky_mpintc_eoi,
.irq_enable = csky_mpintc_enable,
.irq_disable = csky_mpintc_disable,
+ .irq_set_type = csky_mpintc_set_type,
#ifdef CONFIG_SMP
.irq_set_affinity = csky_irq_set_affinity,
#endif
return 0;
}
+static int csky_irq_domain_xlate_cells(struct irq_domain *d,
+ struct device_node *ctrlr, const u32 *intspec,
+ unsigned int intsize, unsigned long *out_hwirq,
+ unsigned int *out_type)
+{
+ if (WARN_ON(intsize < 1))
+ return -EINVAL;
+
+ *out_hwirq = intspec[0];
+ if (intsize > 1)
+ *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
+ else
+ *out_type = IRQ_TYPE_LEVEL_HIGH;
+
+ return 0;
+}
+
static const struct irq_domain_ops csky_irqdomain_ops = {
.map = csky_irqdomain_map,
- .xlate = irq_domain_xlate_onecell,
+ .xlate = csky_irq_domain_xlate_cells,
};
#ifdef CONFIG_SMP
if (ret < 0)
nr_irq = INTC_IRQS;
+ __trigger = kcalloc(nr_irq, sizeof(unsigned long), GFP_KERNEL);
+ if (__trigger == NULL)
+ return -ENXIO;
+
if (INTCG_base == NULL) {
INTCG_base = ioremap(mfcr("cr<31, 14>"),
INTCL_SIZE*nr_cpu_ids + INTCG_SIZE);
/* List of flags for specific v2m implementation */
#define GICV2M_NEEDS_SPI_OFFSET 0x00000001
+#define GICV2M_GRAVITON_ADDRESS_ONLY 0x00000002
static LIST_HEAD(v2m_nodes);
static DEFINE_SPINLOCK(v2m_lock);
.chip = &gicv2m_msi_irq_chip,
};
+static phys_addr_t gicv2m_get_msi_addr(struct v2m_data *v2m, int hwirq)
+{
+ if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY)
+ return v2m->res.start | ((hwirq - 32) << 3);
+ else
+ return v2m->res.start + V2M_MSI_SETSPI_NS;
+}
+
static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
struct v2m_data *v2m = irq_data_get_irq_chip_data(data);
- phys_addr_t addr = v2m->res.start + V2M_MSI_SETSPI_NS;
+ phys_addr_t addr = gicv2m_get_msi_addr(v2m, data->hwirq);
msg->address_hi = upper_32_bits(addr);
msg->address_lo = lower_32_bits(addr);
- msg->data = data->hwirq;
+ if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY)
+ msg->data = 0;
+ else
+ msg->data = data->hwirq;
if (v2m->flags & GICV2M_NEEDS_SPI_OFFSET)
msg->data -= v2m->spi_offset;
hwirq = v2m->spi_start + offset;
err = iommu_dma_prepare_msi(info->desc,
- v2m->res.start + V2M_MSI_SETSPI_NS);
+ gicv2m_get_msi_addr(v2m, hwirq));
if (err)
return err;
static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
u32 spi_start, u32 nr_spis,
- struct resource *res)
+ struct resource *res, u32 flags)
{
int ret;
struct v2m_data *v2m;
INIT_LIST_HEAD(&v2m->entry);
v2m->fwnode = fwnode;
+ v2m->flags = flags;
memcpy(&v2m->res, res, sizeof(struct resource));
v2m->spi_start = spi_start;
v2m->nr_spis = nr_spis;
} else {
- u32 typer = readl_relaxed(v2m->base + V2M_MSI_TYPER);
+ u32 typer;
+
+ /* Graviton should always have explicit spi_start/nr_spis */
+ if (v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY) {
+ ret = -EINVAL;
+ goto err_iounmap;
+ }
+ typer = readl_relaxed(v2m->base + V2M_MSI_TYPER);
v2m->spi_start = V2M_MSI_TYPER_BASE_SPI(typer);
v2m->nr_spis = V2M_MSI_TYPER_NUM_SPI(typer);
*
* Broadom NS2 GICv2m implementation has an erratum where the MSI data
* is 'spi_number - 32'
+ *
+ * Reading that register fails on the Graviton implementation
*/
- switch (readl_relaxed(v2m->base + V2M_MSI_IIDR)) {
- case XGENE_GICV2M_MSI_IIDR:
- v2m->flags |= GICV2M_NEEDS_SPI_OFFSET;
- v2m->spi_offset = v2m->spi_start;
- break;
- case BCM_NS2_GICV2M_MSI_IIDR:
- v2m->flags |= GICV2M_NEEDS_SPI_OFFSET;
- v2m->spi_offset = 32;
- break;
+ if (!(v2m->flags & GICV2M_GRAVITON_ADDRESS_ONLY)) {
+ switch (readl_relaxed(v2m->base + V2M_MSI_IIDR)) {
+ case XGENE_GICV2M_MSI_IIDR:
+ v2m->flags |= GICV2M_NEEDS_SPI_OFFSET;
+ v2m->spi_offset = v2m->spi_start;
+ break;
+ case BCM_NS2_GICV2M_MSI_IIDR:
+ v2m->flags |= GICV2M_NEEDS_SPI_OFFSET;
+ v2m->spi_offset = 32;
+ break;
+ }
}
-
v2m->bm = kcalloc(BITS_TO_LONGS(v2m->nr_spis), sizeof(long),
GFP_KERNEL);
if (!v2m->bm) {
pr_info("DT overriding V2M MSI_TYPER (base:%u, num:%u)\n",
spi_start, nr_spis);
- ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis, &res);
+ ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis,
+ &res, 0);
if (ret) {
of_node_put(child);
break;
return data->fwnode;
}
+static bool acpi_check_amazon_graviton_quirks(void)
+{
+ static struct acpi_table_madt *madt;
+ acpi_status status;
+ bool rc = false;
+
+#define ACPI_AMZN_OEM_ID "AMAZON"
+
+ status = acpi_get_table(ACPI_SIG_MADT, 0,
+ (struct acpi_table_header **)&madt);
+
+ if (ACPI_FAILURE(status) || !madt)
+ return rc;
+ rc = !memcmp(madt->header.oem_id, ACPI_AMZN_OEM_ID, ACPI_OEM_ID_SIZE);
+ acpi_put_table((struct acpi_table_header *)madt);
+
+ return rc;
+}
+
static int __init
acpi_parse_madt_msi(union acpi_subtable_headers *header,
const unsigned long end)
u32 spi_start = 0, nr_spis = 0;
struct acpi_madt_generic_msi_frame *m;
struct fwnode_handle *fwnode;
+ u32 flags = 0;
m = (struct acpi_madt_generic_msi_frame *)header;
if (BAD_MADT_ENTRY(m, end))
res.end = m->base_address + SZ_4K - 1;
res.flags = IORESOURCE_MEM;
+ if (acpi_check_amazon_graviton_quirks()) {
+ pr_info("applying Amazon Graviton quirk\n");
+ res.end = res.start + SZ_8K - 1;
+ flags |= GICV2M_GRAVITON_ADDRESS_ONLY;
+ gicv2m_msi_domain_info.flags &= ~MSI_FLAG_MULTI_PCI_MSI;
+ }
+
if (m->flags & ACPI_MADT_OVERRIDE_SPI_VALUES) {
spi_start = m->spi_base;
nr_spis = m->spi_count;
return -EINVAL;
}
- ret = gicv2m_init_one(fwnode, spi_start, nr_spis, &res);
+ ret = gicv2m_init_one(fwnode, spi_start, nr_spis, &res, flags);
if (ret)
irq_domain_free_fwnode(fwnode);
}
static int its_wait_for_range_completion(struct its_node *its,
- struct its_cmd_block *from,
+ u64 prev_idx,
struct its_cmd_block *to)
{
- u64 rd_idx, from_idx, to_idx;
+ u64 rd_idx, to_idx, linear_idx;
u32 count = 1000000; /* 1s! */
- from_idx = its_cmd_ptr_to_offset(its, from);
+ /* Linearize to_idx if the command set has wrapped around */
to_idx = its_cmd_ptr_to_offset(its, to);
+ if (to_idx < prev_idx)
+ to_idx += ITS_CMD_QUEUE_SZ;
+
+ linear_idx = prev_idx;
while (1) {
+ s64 delta;
+
rd_idx = readl_relaxed(its->base + GITS_CREADR);
- /* Direct case */
- if (from_idx < to_idx && rd_idx >= to_idx)
- break;
+ /*
+ * Compute the read pointer progress, taking the
+ * potential wrap-around into account.
+ */
+ delta = rd_idx - prev_idx;
+ if (rd_idx < prev_idx)
+ delta += ITS_CMD_QUEUE_SZ;
- /* Wrapped case */
- if (from_idx >= to_idx && rd_idx >= to_idx && rd_idx < from_idx)
+ linear_idx += delta;
+ if (linear_idx >= to_idx)
break;
count--;
if (!count) {
- pr_err_ratelimited("ITS queue timeout (%llu %llu %llu)\n",
- from_idx, to_idx, rd_idx);
+ pr_err_ratelimited("ITS queue timeout (%llu %llu)\n",
+ to_idx, linear_idx);
return -1;
}
+ prev_idx = rd_idx;
cpu_relax();
udelay(1);
}
struct its_cmd_block *cmd, *sync_cmd, *next_cmd; \
synctype *sync_obj; \
unsigned long flags; \
+ u64 rd_idx; \
\
raw_spin_lock_irqsave(&its->lock, flags); \
\
} \
\
post: \
+ rd_idx = readl_relaxed(its->base + GITS_CREADR); \
next_cmd = its_post_commands(its); \
raw_spin_unlock_irqrestore(&its->lock, flags); \
\
- if (its_wait_for_range_completion(its, cmd, next_cmd)) \
+ if (its_wait_for_range_completion(its, rd_idx, next_cmd)) \
pr_err_ratelimited("ITS cmd %ps failed\n", builder); \
}
static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
{
+ bool irqs_enabled = interrupts_enabled(regs);
int err;
+ if (irqs_enabled)
+ nmi_enter();
+
if (static_branch_likely(&supports_deactivate_key))
gic_write_eoir(irqnr);
/*
err = handle_domain_nmi(gic_data.domain, irqnr, regs);
if (err)
gic_deactivate_unhandled(irqnr);
+
+ if (irqs_enabled)
+ nmi_exit();
}
static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
if (gic_dist_supports_lpis()) {
its_init(handle, &gic_data.rdists, gic_data.domain);
its_cpu_init();
+ } else {
+ if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
+ gicv2m_init(handle, gic_data.domain);
}
if (gic_prio_masking_enabled()) {
err = -EINVAL;
if (err) {
- dev_err(&pdev->dev, "Failed to create mbi-gen@%p irqdomain",
- mgn_chip->base);
+ dev_err(&pdev->dev, "Failed to create mbi-gen irqdomain\n");
return err;
}
{ .compatible = "amlogic,meson-gxbb-gpio-intc", .data = &gxbb_params },
{ .compatible = "amlogic,meson-gxl-gpio-intc", .data = &gxl_params },
{ .compatible = "amlogic,meson-axg-gpio-intc", .data = &axg_params },
+ { .compatible = "amlogic,meson-g12a-gpio-intc", .data = &axg_params },
{ }
};
intr = GIC_HWIRQ_TO_LOCAL(d->hwirq);
cd = irq_data_get_irq_chip_data(d);
- write_gic_vl_map(intr, cd->map);
+ write_gic_vl_map(mips_gic_vx_map_reg(intr), cd->map);
if (cd->mask)
write_gic_vl_smask(BIT(intr));
}
spin_lock_irqsave(&gic_lock, flags);
for_each_online_cpu(cpu) {
write_gic_vl_other(mips_cm_vp_id(cpu));
- write_gic_vo_map(intr, map);
+ write_gic_vo_map(mips_gic_vx_map_reg(intr), map);
}
spin_unlock_irqrestore(&gic_lock, flags);
}
irq_chip = &p->irq_chip;
- irq_chip->name = name;
+ irq_chip->name = "intc-irqpin";
+ irq_chip->parent_device = dev;
irq_chip->irq_mask = disable_fn;
irq_chip->irq_unmask = enable_fn;
irq_chip->irq_set_type = intc_irqpin_irq_set_type;
#include <linux/init.h>
#include <linux/platform_device.h>
-#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/io.h>
void __iomem *cpu_int_base;
struct irqc_irq irq[IRQC_IRQ_MAX];
unsigned int number_of_irqs;
- struct platform_device *pdev;
+ struct device *dev;
struct irq_chip_generic *gc;
struct irq_domain *irq_domain;
atomic_t wakeup_path;
static void irqc_dbg(struct irqc_irq *i, char *str)
{
- dev_dbg(&i->p->pdev->dev, "%s (%d:%d)\n",
- str, i->requested_irq, i->hw_irq);
+ dev_dbg(i->p->dev, "%s (%d:%d)\n", str, i->requested_irq, i->hw_irq);
}
static unsigned char irqc_sense[IRQ_TYPE_SENSE_MASK + 1] = {
static int irqc_probe(struct platform_device *pdev)
{
+ struct device *dev = &pdev->dev;
+ const char *name = dev_name(dev);
struct irqc_priv *p;
- struct resource *io;
struct resource *irq;
- const char *name = dev_name(&pdev->dev);
int ret;
int k;
- p = kzalloc(sizeof(*p), GFP_KERNEL);
- if (!p) {
- dev_err(&pdev->dev, "failed to allocate driver data\n");
- ret = -ENOMEM;
- goto err0;
- }
+ p = devm_kzalloc(dev, sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
- p->pdev = pdev;
+ p->dev = dev;
platform_set_drvdata(pdev, p);
- pm_runtime_enable(&pdev->dev);
- pm_runtime_get_sync(&pdev->dev);
-
- /* get hold of manadatory IOMEM */
- io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!io) {
- dev_err(&pdev->dev, "not enough IOMEM resources\n");
- ret = -EINVAL;
- goto err1;
- }
+ pm_runtime_enable(dev);
+ pm_runtime_get_sync(dev);
/* allow any number of IRQs between 1 and IRQC_IRQ_MAX */
for (k = 0; k < IRQC_IRQ_MAX; k++) {
p->number_of_irqs = k;
if (p->number_of_irqs < 1) {
- dev_err(&pdev->dev, "not enough IRQ resources\n");
+ dev_err(dev, "not enough IRQ resources\n");
ret = -EINVAL;
- goto err1;
+ goto err_runtime_pm_disable;
}
/* ioremap IOMEM and setup read/write callbacks */
- p->iomem = ioremap_nocache(io->start, resource_size(io));
- if (!p->iomem) {
- dev_err(&pdev->dev, "failed to remap IOMEM\n");
- ret = -ENXIO;
- goto err2;
+ p->iomem = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(p->iomem)) {
+ ret = PTR_ERR(p->iomem);
+ goto err_runtime_pm_disable;
}
p->cpu_int_base = p->iomem + IRQC_INT_CPU_BASE(0); /* SYS-SPI */
- p->irq_domain = irq_domain_add_linear(pdev->dev.of_node,
- p->number_of_irqs,
+ p->irq_domain = irq_domain_add_linear(dev->of_node, p->number_of_irqs,
&irq_generic_chip_ops, p);
if (!p->irq_domain) {
ret = -ENXIO;
- dev_err(&pdev->dev, "cannot initialize irq domain\n");
- goto err2;
+ dev_err(dev, "cannot initialize irq domain\n");
+ goto err_runtime_pm_disable;
}
ret = irq_alloc_domain_generic_chips(p->irq_domain, p->number_of_irqs,
- 1, name, handle_level_irq,
+ 1, "irqc", handle_level_irq,
0, 0, IRQ_GC_INIT_NESTED_LOCK);
if (ret) {
- dev_err(&pdev->dev, "cannot allocate generic chip\n");
- goto err3;
+ dev_err(dev, "cannot allocate generic chip\n");
+ goto err_remove_domain;
}
p->gc = irq_get_domain_generic_chip(p->irq_domain, 0);
p->gc->reg_base = p->cpu_int_base;
p->gc->chip_types[0].regs.enable = IRQC_EN_SET;
p->gc->chip_types[0].regs.disable = IRQC_EN_STS;
+ p->gc->chip_types[0].chip.parent_device = dev;
p->gc->chip_types[0].chip.irq_mask = irq_gc_mask_disable_reg;
p->gc->chip_types[0].chip.irq_unmask = irq_gc_unmask_enable_reg;
p->gc->chip_types[0].chip.irq_set_type = irqc_irq_set_type;
/* request interrupts one by one */
for (k = 0; k < p->number_of_irqs; k++) {
- if (request_irq(p->irq[k].requested_irq, irqc_irq_handler,
- 0, name, &p->irq[k])) {
- dev_err(&pdev->dev, "failed to request IRQ\n");
+ if (devm_request_irq(dev, p->irq[k].requested_irq,
+ irqc_irq_handler, 0, name, &p->irq[k])) {
+ dev_err(dev, "failed to request IRQ\n");
ret = -ENOENT;
- goto err4;
+ goto err_remove_domain;
}
}
- dev_info(&pdev->dev, "driving %d irqs\n", p->number_of_irqs);
+ dev_info(dev, "driving %d irqs\n", p->number_of_irqs);
return 0;
-err4:
- while (--k >= 0)
- free_irq(p->irq[k].requested_irq, &p->irq[k]);
-err3:
+err_remove_domain:
irq_domain_remove(p->irq_domain);
-err2:
- iounmap(p->iomem);
-err1:
- pm_runtime_put(&pdev->dev);
- pm_runtime_disable(&pdev->dev);
- kfree(p);
-err0:
+err_runtime_pm_disable:
+ pm_runtime_put(dev);
+ pm_runtime_disable(dev);
return ret;
}
static int irqc_remove(struct platform_device *pdev)
{
struct irqc_priv *p = platform_get_drvdata(pdev);
- int k;
-
- for (k = 0; k < p->number_of_irqs; k++)
- free_irq(p->irq[k].requested_irq, &p->irq[k]);
irq_domain_remove(p->irq_domain);
- iounmap(p->iomem);
pm_runtime_put(&pdev->dev);
pm_runtime_disable(&pdev->dev);
- kfree(p);
return 0;
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas RZ/A1 IRQC Driver
+ *
+ * Copyright (C) 2019 Glider bvba
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+#define IRQC_NUM_IRQ 8
+
+#define ICR0 0 /* Interrupt Control Register 0 */
+
+#define ICR0_NMIL BIT(15) /* NMI Input Level (0=low, 1=high) */
+#define ICR0_NMIE BIT(8) /* Edge Select (0=falling, 1=rising) */
+#define ICR0_NMIF BIT(1) /* NMI Interrupt Request */
+
+#define ICR1 2 /* Interrupt Control Register 1 */
+
+#define ICR1_IRQS(n, sense) ((sense) << ((n) * 2)) /* IRQ Sense Select */
+#define ICR1_IRQS_LEVEL_LOW 0
+#define ICR1_IRQS_EDGE_FALLING 1
+#define ICR1_IRQS_EDGE_RISING 2
+#define ICR1_IRQS_EDGE_BOTH 3
+#define ICR1_IRQS_MASK(n) ICR1_IRQS((n), 3)
+
+#define IRQRR 4 /* IRQ Interrupt Request Register */
+
+
+struct rza1_irqc_priv {
+ struct device *dev;
+ void __iomem *base;
+ struct irq_chip chip;
+ struct irq_domain *irq_domain;
+ struct of_phandle_args map[IRQC_NUM_IRQ];
+};
+
+static struct rza1_irqc_priv *irq_data_to_priv(struct irq_data *data)
+{
+ return data->domain->host_data;
+}
+
+static void rza1_irqc_eoi(struct irq_data *d)
+{
+ struct rza1_irqc_priv *priv = irq_data_to_priv(d);
+ u16 bit = BIT(irqd_to_hwirq(d));
+ u16 tmp;
+
+ tmp = readw_relaxed(priv->base + IRQRR);
+ if (tmp & bit)
+ writew_relaxed(GENMASK(IRQC_NUM_IRQ - 1, 0) & ~bit,
+ priv->base + IRQRR);
+
+ irq_chip_eoi_parent(d);
+}
+
+static int rza1_irqc_set_type(struct irq_data *d, unsigned int type)
+{
+ struct rza1_irqc_priv *priv = irq_data_to_priv(d);
+ unsigned int hw_irq = irqd_to_hwirq(d);
+ u16 sense, tmp;
+
+ switch (type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_LEVEL_LOW:
+ sense = ICR1_IRQS_LEVEL_LOW;
+ break;
+
+ case IRQ_TYPE_EDGE_FALLING:
+ sense = ICR1_IRQS_EDGE_FALLING;
+ break;
+
+ case IRQ_TYPE_EDGE_RISING:
+ sense = ICR1_IRQS_EDGE_RISING;
+ break;
+
+ case IRQ_TYPE_EDGE_BOTH:
+ sense = ICR1_IRQS_EDGE_BOTH;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ tmp = readw_relaxed(priv->base + ICR1);
+ tmp &= ~ICR1_IRQS_MASK(hw_irq);
+ tmp |= ICR1_IRQS(hw_irq, sense);
+ writew_relaxed(tmp, priv->base + ICR1);
+ return 0;
+}
+
+static int rza1_irqc_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct rza1_irqc_priv *priv = domain->host_data;
+ struct irq_fwspec *fwspec = arg;
+ unsigned int hwirq = fwspec->param[0];
+ struct irq_fwspec spec;
+ unsigned int i;
+ int ret;
+
+ ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq, &priv->chip,
+ priv);
+ if (ret)
+ return ret;
+
+ spec.fwnode = &priv->dev->of_node->fwnode;
+ spec.param_count = priv->map[hwirq].args_count;
+ for (i = 0; i < spec.param_count; i++)
+ spec.param[i] = priv->map[hwirq].args[i];
+
+ return irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, &spec);
+}
+
+static int rza1_irqc_translate(struct irq_domain *domain,
+ struct irq_fwspec *fwspec, unsigned long *hwirq,
+ unsigned int *type)
+{
+ if (fwspec->param_count != 2 || fwspec->param[0] >= IRQC_NUM_IRQ)
+ return -EINVAL;
+
+ *hwirq = fwspec->param[0];
+ *type = fwspec->param[1];
+ return 0;
+}
+
+static const struct irq_domain_ops rza1_irqc_domain_ops = {
+ .alloc = rza1_irqc_alloc,
+ .translate = rza1_irqc_translate,
+};
+
+static int rza1_irqc_parse_map(struct rza1_irqc_priv *priv,
+ struct device_node *gic_node)
+{
+ unsigned int imaplen, i, j, ret;
+ struct device *dev = priv->dev;
+ struct device_node *ipar;
+ const __be32 *imap;
+ u32 intsize;
+
+ imap = of_get_property(dev->of_node, "interrupt-map", &imaplen);
+ if (!imap)
+ return -EINVAL;
+
+ for (i = 0; i < IRQC_NUM_IRQ; i++) {
+ if (imaplen < 3)
+ return -EINVAL;
+
+ /* Check interrupt number, ignore sense */
+ if (be32_to_cpup(imap) != i)
+ return -EINVAL;
+
+ ipar = of_find_node_by_phandle(be32_to_cpup(imap + 2));
+ if (ipar != gic_node) {
+ of_node_put(ipar);
+ return -EINVAL;
+ }
+
+ imap += 3;
+ imaplen -= 3;
+
+ ret = of_property_read_u32(ipar, "#interrupt-cells", &intsize);
+ of_node_put(ipar);
+ if (ret)
+ return ret;
+
+ if (imaplen < intsize)
+ return -EINVAL;
+
+ priv->map[i].args_count = intsize;
+ for (j = 0; j < intsize; j++)
+ priv->map[i].args[j] = be32_to_cpup(imap++);
+
+ imaplen -= intsize;
+ }
+
+ return 0;
+}
+
+static int rza1_irqc_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct irq_domain *parent = NULL;
+ struct device_node *gic_node;
+ struct rza1_irqc_priv *priv;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, priv);
+ priv->dev = dev;
+
+ priv->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+ gic_node = of_irq_find_parent(np);
+ if (gic_node) {
+ parent = irq_find_host(gic_node);
+ of_node_put(gic_node);
+ }
+
+ if (!parent) {
+ dev_err(dev, "cannot find parent domain\n");
+ return -ENODEV;
+ }
+
+ ret = rza1_irqc_parse_map(priv, gic_node);
+ if (ret) {
+ dev_err(dev, "cannot parse %s: %d\n", "interrupt-map", ret);
+ return ret;
+ }
+
+ priv->chip.name = "rza1-irqc",
+ priv->chip.irq_mask = irq_chip_mask_parent,
+ priv->chip.irq_unmask = irq_chip_unmask_parent,
+ priv->chip.irq_eoi = rza1_irqc_eoi,
+ priv->chip.irq_retrigger = irq_chip_retrigger_hierarchy,
+ priv->chip.irq_set_type = rza1_irqc_set_type,
+ priv->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE;
+
+ priv->irq_domain = irq_domain_add_hierarchy(parent, 0, IRQC_NUM_IRQ,
+ np, &rza1_irqc_domain_ops,
+ priv);
+ if (!priv->irq_domain) {
+ dev_err(dev, "cannot initialize irq domain\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int rza1_irqc_remove(struct platform_device *pdev)
+{
+ struct rza1_irqc_priv *priv = platform_get_drvdata(pdev);
+
+ irq_domain_remove(priv->irq_domain);
+ return 0;
+}
+
+static const struct of_device_id rza1_irqc_dt_ids[] = {
+ { .compatible = "renesas,rza1-irqc" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, rza1_irqc_dt_ids);
+
+static struct platform_driver rza1_irqc_device_driver = {
+ .probe = rza1_irqc_probe,
+ .remove = rza1_irqc_remove,
+ .driver = {
+ .name = "renesas_rza1_irqc",
+ .of_match_table = rza1_irqc_dt_ids,
+ }
+};
+
+static int __init rza1_irqc_init(void)
+{
+ return platform_driver_register(&rza1_irqc_device_driver);
+}
+postcore_initcall(rza1_irqc_init);
+
+static void __exit rza1_irqc_exit(void)
+{
+ platform_driver_unregister(&rza1_irqc_device_driver);
+}
+module_exit(rza1_irqc_exit);
+
+MODULE_AUTHOR("Geert Uytterhoeven <geert+renesas@glider.be>");
+MODULE_DESCRIPTION("Renesas RZ/A1 IRQC Driver");
+MODULE_LICENSE("GPL v2");
/*
* Driver for Socionext External Interrupt Unit (EXIU)
*
- * Copyright (c) 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (c) 2017-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
* Based on irq-tegra.c:
* Copyright (C) 2011 Google, Inc.
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
*hwirq = fwspec->param[1] - info->spi_base;
*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
- return 0;
+ } else {
+ if (fwspec->param_count != 2)
+ return -EINVAL;
+ *hwirq = fwspec->param[0];
+ *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
}
- return -EINVAL;
+ return 0;
}
static int exiu_domain_alloc(struct irq_domain *dom, unsigned int virq,
struct exiu_irq_data *info = dom->host_data;
irq_hw_number_t hwirq;
- if (fwspec->param_count != 3)
- return -EINVAL; /* Not GIC compliant */
- if (fwspec->param[0] != GIC_SPI)
- return -EINVAL; /* No PPI should point to this domain */
+ parent_fwspec = *fwspec;
+ if (is_of_node(dom->parent->fwnode)) {
+ if (fwspec->param_count != 3)
+ return -EINVAL; /* Not GIC compliant */
+ if (fwspec->param[0] != GIC_SPI)
+ return -EINVAL; /* No PPI should point to this domain */
+ hwirq = fwspec->param[1] - info->spi_base;
+ } else {
+ hwirq = fwspec->param[0];
+ parent_fwspec.param[0] = hwirq + info->spi_base + 32;
+ }
WARN_ON(nr_irqs != 1);
- hwirq = fwspec->param[1] - info->spi_base;
irq_domain_set_hwirq_and_chip(dom, virq, hwirq, &exiu_irq_chip, info);
- parent_fwspec = *fwspec;
parent_fwspec.fwnode = dom->parent->fwnode;
return irq_domain_alloc_irqs_parent(dom, virq, nr_irqs, &parent_fwspec);
}
.free = irq_domain_free_irqs_common,
};
-static int __init exiu_init(struct device_node *node,
- struct device_node *parent)
+static struct exiu_irq_data *exiu_init(const struct fwnode_handle *fwnode,
+ struct resource *res)
{
- struct irq_domain *parent_domain, *domain;
struct exiu_irq_data *data;
int err;
- if (!parent) {
- pr_err("%pOF: no parent, giving up\n", node);
- return -ENODEV;
- }
-
- parent_domain = irq_find_host(parent);
- if (!parent_domain) {
- pr_err("%pOF: unable to obtain parent domain\n", node);
- return -ENXIO;
- }
-
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- if (of_property_read_u32(node, "socionext,spi-base", &data->spi_base)) {
- pr_err("%pOF: failed to parse 'spi-base' property\n", node);
+ if (fwnode_property_read_u32_array(fwnode, "socionext,spi-base",
+ &data->spi_base, 1)) {
err = -ENODEV;
goto out_free;
}
- data->base = of_iomap(node, 0);
+ data->base = ioremap(res->start, resource_size(res));
if (!data->base) {
err = -ENODEV;
goto out_free;
writel_relaxed(0xFFFFFFFF, data->base + EIREQCLR);
writel_relaxed(0xFFFFFFFF, data->base + EIMASK);
+ return data;
+
+out_free:
+ kfree(data);
+ return ERR_PTR(err);
+}
+
+static int __init exiu_dt_init(struct device_node *node,
+ struct device_node *parent)
+{
+ struct irq_domain *parent_domain, *domain;
+ struct exiu_irq_data *data;
+ struct resource res;
+
+ if (!parent) {
+ pr_err("%pOF: no parent, giving up\n", node);
+ return -ENODEV;
+ }
+
+ parent_domain = irq_find_host(parent);
+ if (!parent_domain) {
+ pr_err("%pOF: unable to obtain parent domain\n", node);
+ return -ENXIO;
+ }
+
+ if (of_address_to_resource(node, 0, &res)) {
+ pr_err("%pOF: failed to parse memory resource\n", node);
+ return -ENXIO;
+ }
+
+ data = exiu_init(of_node_to_fwnode(node), &res);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
domain = irq_domain_add_hierarchy(parent_domain, 0, NUM_IRQS, node,
&exiu_domain_ops, data);
if (!domain) {
pr_err("%pOF: failed to allocate domain\n", node);
- err = -ENOMEM;
goto out_unmap;
}
out_unmap:
iounmap(data->base);
-out_free:
kfree(data);
- return err;
+ return -ENOMEM;
}
-IRQCHIP_DECLARE(exiu, "socionext,synquacer-exiu", exiu_init);
+IRQCHIP_DECLARE(exiu, "socionext,synquacer-exiu", exiu_dt_init);
+
+#ifdef CONFIG_ACPI
+static int exiu_acpi_probe(struct platform_device *pdev)
+{
+ struct irq_domain *domain;
+ struct exiu_irq_data *data;
+ struct resource *res;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(&pdev->dev, "failed to parse memory resource\n");
+ return -ENXIO;
+ }
+
+ data = exiu_init(dev_fwnode(&pdev->dev), res);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ domain = acpi_irq_create_hierarchy(0, NUM_IRQS, dev_fwnode(&pdev->dev),
+ &exiu_domain_ops, data);
+ if (!domain) {
+ dev_err(&pdev->dev, "failed to create IRQ domain\n");
+ goto out_unmap;
+ }
+
+ dev_info(&pdev->dev, "%d interrupts forwarded\n", NUM_IRQS);
+
+ return 0;
+
+out_unmap:
+ iounmap(data->base);
+ kfree(data);
+ return -ENOMEM;
+}
+
+static const struct acpi_device_id exiu_acpi_ids[] = {
+ { "SCX0008" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(acpi, exiu_acpi_ids);
+
+static struct platform_driver exiu_driver = {
+ .driver = {
+ .name = "exiu",
+ .acpi_match_table = exiu_acpi_ids,
+ },
+ .probe = exiu_acpi_probe,
+};
+builtin_platform_driver(exiu_driver);
+#endif
parent_fwspec.param[1] = vint_desc->vint_id;
parent_virq = irq_create_fwspec_mapping(&parent_fwspec);
- if (parent_virq <= 0) {
+ if (parent_virq == 0) {
kfree(vint_desc);
- return ERR_PTR(parent_virq);
+ return ERR_PTR(-EINVAL);
}
vint_desc->parent_virq = parent_virq;
static int __init combiner_probe(struct platform_device *pdev)
{
struct combiner *combiner;
- size_t alloc_sz;
int nregs;
int err;
return -EINVAL;
}
- alloc_sz = sizeof(*combiner) + sizeof(struct combiner_reg) * nregs;
- combiner = devm_kzalloc(&pdev->dev, alloc_sz, GFP_KERNEL);
+ combiner = devm_kzalloc(&pdev->dev, struct_size(combiner, regs, nregs),
+ GFP_KERNEL);
if (!combiner)
return -ENOMEM;
* down to 16us, ensuring we won't overflow 32-bit computations below
* even up to 3k CPUs, while keeping divides cheap on smaller systems.
*/
- curr_boot = ktime_get_boot_ns() * cpus;
+ curr_boot = ktime_get_boottime_ns() * cpus;
diff_boot = (curr_boot - activity_data->last_boot) >> 16;
diff_used = (curr_used - activity_data->last_used) >> 16;
activity_data->last_boot = curr_boot;
return ERR_PTR(-EINVAL);
}
/* target_args */
- dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL,
- DM_MAX_STR_SIZE);
+ dev->target_args_array[n] = kstrndup(field[3], DM_MAX_STR_SIZE,
+ GFP_KERNEL);
if (!dev->target_args_array[n])
return ERR_PTR(-ENOMEM);
return 0;
if (strlen(create) >= DM_MAX_STR_SIZE) {
- DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE);
+ DMERR("Argument is too big. Limit is %d", DM_MAX_STR_SIZE);
return -EINVAL;
}
- str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE);
+ str = kstrndup(create, DM_MAX_STR_SIZE, GFP_KERNEL);
if (!str)
return -ENOMEM;
if (r)
goto out;
- DMINFO("waiting for all devices to be available before creating mapped devices\n");
+ DMINFO("waiting for all devices to be available before creating mapped devices");
wait_for_device_probe();
list_for_each_entry(dev, &devices, list) {
#define WRITE_LOG_VERSION 1ULL
#define WRITE_LOG_MAGIC 0x6a736677736872ULL
+#define WRITE_LOG_SUPER_SECTOR 0
/*
* The disk format for this is braindead simple.
struct list_head logging_blocks;
wait_queue_head_t wait;
struct task_struct *log_kthread;
+ struct completion super_done;
};
struct pending_block {
bio_put(bio);
}
+static void log_end_super(struct bio *bio)
+{
+ struct log_writes_c *lc = bio->bi_private;
+
+ complete(&lc->super_done);
+ log_end_io(bio);
+}
+
/*
* Meant to be called if there is an error, it will free all the pages
* associated with the block.
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, lc->logdev->bdev);
- bio->bi_end_io = log_end_io;
+ bio->bi_end_io = (sector == WRITE_LOG_SUPER_SECTOR) ?
+ log_end_super : log_end_io;
bio->bi_private = lc;
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
super.nr_entries = cpu_to_le64(lc->logged_entries);
super.sectorsize = cpu_to_le32(lc->sectorsize);
- if (write_metadata(lc, &super, sizeof(super), NULL, 0, 0)) {
+ if (write_metadata(lc, &super, sizeof(super), NULL, 0,
+ WRITE_LOG_SUPER_SECTOR)) {
DMERR("Couldn't write super");
return -1;
}
+ /*
+ * Super sector should be writen in-order, otherwise the
+ * nr_entries could be rewritten incorrectly by an old bio.
+ */
+ wait_for_completion_io(&lc->super_done);
+
return 0;
}
INIT_LIST_HEAD(&lc->unflushed_blocks);
INIT_LIST_HEAD(&lc->logging_blocks);
init_waitqueue_head(&lc->wait);
+ init_completion(&lc->super_done);
atomic_set(&lc->io_blocks, 0);
atomic_set(&lc->pending_blocks, 0);
gfp = GFP_NOIO;
}
argv = kmalloc_array(new_size, sizeof(*argv), gfp);
- if (argv) {
+ if (argv && old_argv) {
memcpy(argv, old_argv, *size * sizeof(*argv));
*size = new_size;
}
BUG();
}
- DMERR("%s: %s block %llu is corrupted", v->data_dev->name, type_str,
- block);
+ DMERR_LIMIT("%s: %s block %llu is corrupted", v->data_dev->name,
+ type_str, block);
if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS)
DMERR("%s: reached maximum errors", v->data_dev->name);
static irqreturn_t stmfx_irq_handler(int irq, void *data)
{
struct stmfx *stmfx = data;
- unsigned long n, pending;
- u32 ack;
- int ret;
+ unsigned long bits;
+ u32 pending, ack;
+ int n, ret;
- ret = regmap_read(stmfx->map, STMFX_REG_IRQ_PENDING,
- (u32 *)&pending);
+ ret = regmap_read(stmfx->map, STMFX_REG_IRQ_PENDING, &pending);
if (ret)
return IRQ_NONE;
return IRQ_NONE;
}
- for_each_set_bit(n, &pending, STMFX_REG_IRQ_SRC_MAX)
+ bits = pending;
+ for_each_set_bit(n, &bits, STMFX_REG_IRQ_SRC_MAX)
handle_nested_irq(irq_find_mapping(stmfx->irq_domain, n));
return IRQ_HANDLED;
if MTD_NAND_JZ4780
config MTD_NAND_INGENIC_ECC
- tristate
+ bool
config MTD_NAND_JZ4740_ECC
tristate "Hardware BCH support for JZ4740 SoC"
obj-$(CONFIG_MTD_NAND_JZ4740) += jz4740_nand.o
obj-$(CONFIG_MTD_NAND_JZ4780) += ingenic_nand.o
-obj-$(CONFIG_MTD_NAND_INGENIC_ECC) += ingenic_ecc.o
+ingenic_nand-y += ingenic_nand_drv.o
+ingenic_nand-$(CONFIG_MTD_NAND_INGENIC_ECC) += ingenic_ecc.o
+
obj-$(CONFIG_MTD_NAND_JZ4740_ECC) += jz4740_ecc.o
obj-$(CONFIG_MTD_NAND_JZ4725B_BCH) += jz4725b_bch.o
obj-$(CONFIG_MTD_NAND_JZ4780_BCH) += jz4780_bch.o
{
return ecc->ops->calculate(ecc, params, buf, ecc_code);
}
-EXPORT_SYMBOL(ingenic_ecc_calculate);
/**
* ingenic_ecc_correct() - detect and correct bit errors
{
return ecc->ops->correct(ecc, params, buf, ecc_code);
}
-EXPORT_SYMBOL(ingenic_ecc_correct);
/**
* ingenic_ecc_get() - get the ECC controller device
}
return ecc;
}
-EXPORT_SYMBOL(of_ingenic_ecc_get);
/**
* ingenic_ecc_release() - release the ECC controller device
clk_disable_unprepare(ecc->clk);
put_device(ecc->dev);
}
-EXPORT_SYMBOL(ingenic_ecc_release);
int ingenic_ecc_probe(struct platform_device *pdev)
{
return 0;
}
EXPORT_SYMBOL(ingenic_ecc_probe);
-
-MODULE_AUTHOR("Alex Smith <alex@alex-smith.me.uk>");
-MODULE_AUTHOR("Harvey Hunt <harveyhuntnexus@gmail.com>");
-MODULE_DESCRIPTION("Ingenic ECC common driver");
-MODULE_LICENSE("GPL v2");
memorg = nanddev_get_memorg(&chip->base);
memorg->planes_per_lun = 1;
memorg->luns_per_target = 1;
- memorg->ntargets = 1;
/*
* Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
if (ret)
return ret;
+ memorg->ntargets = maxchips;
+
/* Read the flash type */
ret = nand_detect(chip, table);
if (ret) {
#define NFC_REG_USER_DATA(x) (0x0050 + ((x) * 4))
#define NFC_REG_SPARE_AREA 0x00A0
#define NFC_REG_PAT_ID 0x00A4
+#define NFC_REG_MDMA_CNT 0x00C4
#define NFC_RAM0_BASE 0x0400
#define NFC_RAM1_BASE 0x0800
#define NFC_PAGE_SHIFT(x) (((x) < 10 ? 0 : (x) - 10) << 8)
#define NFC_SAM BIT(12)
#define NFC_RAM_METHOD BIT(14)
+#define NFC_DMA_TYPE_NORMAL BIT(15)
#define NFC_DEBUG_CTL BIT(31)
/* define bit use in NFC_ST */
* NAND Controller capabilities structure: stores NAND controller capabilities
* for distinction between compatible strings.
*
- * @sram_through_ahb: On A23, we choose to access the internal RAM through AHB
- * instead of MBUS (less configuration). A10, A10s, A13 and
- * A20 use the MBUS but no extra configuration is needed.
+ * @extra_mbus_conf: Contrary to A10, A10s and A13, accessing internal RAM
+ * through MBUS on A23/A33 needs extra configuration.
* @reg_io_data: I/O data register
* @dma_maxburst: DMA maxburst
*/
struct sunxi_nfc_caps {
- bool sram_through_ahb;
+ bool extra_mbus_conf;
unsigned int reg_io_data;
unsigned int dma_maxburst;
};
goto err_unmap_buf;
}
- /*
- * On A23, we suppose the "internal RAM" (p.12 of the NFC user manual)
- * refers to the NAND controller's internal SRAM. This memory is mapped
- * and so is accessible from the AHB. It seems that it can also be
- * accessed by the MBUS. MBUS accesses are mandatory when using the
- * internal DMA instead of the external DMA engine.
- *
- * During DMA I/O operation, either we access this memory from the AHB
- * by clearing the NFC_RAM_METHOD bit, or we set the bit and use the
- * MBUS. In this case, we should also configure the MBUS DMA length
- * NFC_REG_MDMA_CNT(0xC4) to be chunksize * nchunks. NAND I/O over MBUS
- * are also limited to 32kiB pages.
- */
- if (nfc->caps->sram_through_ahb)
- writel(readl(nfc->regs + NFC_REG_CTL) & ~NFC_RAM_METHOD,
- nfc->regs + NFC_REG_CTL);
- else
- writel(readl(nfc->regs + NFC_REG_CTL) | NFC_RAM_METHOD,
- nfc->regs + NFC_REG_CTL);
-
+ writel(readl(nfc->regs + NFC_REG_CTL) | NFC_RAM_METHOD,
+ nfc->regs + NFC_REG_CTL);
writel(nchunks, nfc->regs + NFC_REG_SECTOR_NUM);
writel(chunksize, nfc->regs + NFC_REG_CNT);
+ if (nfc->caps->extra_mbus_conf)
+ writel(chunksize * nchunks, nfc->regs + NFC_REG_MDMA_CNT);
dmat = dmaengine_submit(dmad);
dmac_cfg.src_maxburst = nfc->caps->dma_maxburst;
dmac_cfg.dst_maxburst = nfc->caps->dma_maxburst;
dmaengine_slave_config(nfc->dmac, &dmac_cfg);
+
+ if (nfc->caps->extra_mbus_conf)
+ writel(readl(nfc->regs + NFC_REG_CTL) |
+ NFC_DMA_TYPE_NORMAL, nfc->regs + NFC_REG_CTL);
+
} else {
dev_warn(dev, "failed to request rxtx DMA channel\n");
}
};
static const struct sunxi_nfc_caps sunxi_nfc_a23_caps = {
- .sram_through_ahb = true,
+ .extra_mbus_conf = true,
.reg_io_data = NFC_REG_A23_IO_DATA,
.dma_maxburst = 8,
};
SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout,
gd5fxgq4xa_ecc_get_status)),
SPINAND_INFO("GD5F4GQ4xA", 0xF4,
- NAND_MEMORG(1, 2048, 64, 64, 4096, 40, 1, 1, 1),
+ NAND_MEMORG(1, 2048, 64, 64, 4096, 80, 1, 1, 1),
NAND_ECCREQ(8, 512),
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
static const struct spinand_info macronix_spinand_table[] = {
SPINAND_INFO("MX35LF1GE4AB", 0x12,
- NAND_MEMORG(1, 2048, 64, 64, 1024, 40, 1, 1, 1),
+ NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1),
NAND_ECCREQ(4, 512),
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout,
mx35lf1ge4ab_ecc_get_status)),
SPINAND_INFO("MX35LF2GE4AB", 0x22,
- NAND_MEMORG(1, 2048, 64, 64, 2048, 20, 2, 1, 1),
+ NAND_MEMORG(1, 2048, 64, 64, 2048, 40, 2, 1, 1),
NAND_ECCREQ(4, 512),
SPINAND_INFO_OP_VARIANTS(&read_cache_variants,
&write_cache_variants,
return 0;
}
+/**
+ * spi_nor_clear_sr_bp() - clear the Status Register Block Protection bits.
+ * @nor: pointer to a 'struct spi_nor'
+ *
+ * Read-modify-write function that clears the Block Protection bits from the
+ * Status Register without affecting other bits.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_clear_sr_bp(struct spi_nor *nor)
+{
+ int ret;
+ u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
+
+ ret = read_sr(nor);
+ if (ret < 0) {
+ dev_err(nor->dev, "error while reading status register\n");
+ return ret;
+ }
+
+ write_enable(nor);
+
+ ret = write_sr(nor, ret & ~mask);
+ if (ret) {
+ dev_err(nor->dev, "write to status register failed\n");
+ return ret;
+ }
+
+ ret = spi_nor_wait_till_ready(nor);
+ if (ret)
+ dev_err(nor->dev, "timeout while writing status register\n");
+ return ret;
+}
+
+/**
+ * spi_nor_spansion_clear_sr_bp() - clear the Status Register Block Protection
+ * bits on spansion flashes.
+ * @nor: pointer to a 'struct spi_nor'
+ *
+ * Read-modify-write function that clears the Block Protection bits from the
+ * Status Register without affecting other bits. The function is tightly
+ * coupled with the spansion_quad_enable() function. Both assume that the Write
+ * Register with 16 bits, together with the Read Configuration Register (35h)
+ * instructions are supported.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_spansion_clear_sr_bp(struct spi_nor *nor)
+{
+ int ret;
+ u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
+ u8 sr_cr[2] = {0};
+
+ /* Check current Quad Enable bit value. */
+ ret = read_cr(nor);
+ if (ret < 0) {
+ dev_err(nor->dev,
+ "error while reading configuration register\n");
+ return ret;
+ }
+
+ /*
+ * When the configuration register Quad Enable bit is one, only the
+ * Write Status (01h) command with two data bytes may be used.
+ */
+ if (ret & CR_QUAD_EN_SPAN) {
+ sr_cr[1] = ret;
+
+ ret = read_sr(nor);
+ if (ret < 0) {
+ dev_err(nor->dev,
+ "error while reading status register\n");
+ return ret;
+ }
+ sr_cr[0] = ret & ~mask;
+
+ ret = write_sr_cr(nor, sr_cr);
+ if (ret)
+ dev_err(nor->dev, "16-bit write register failed\n");
+ return ret;
+ }
+
+ /*
+ * If the Quad Enable bit is zero, use the Write Status (01h) command
+ * with one data byte.
+ */
+ return spi_nor_clear_sr_bp(nor);
+}
+
/* Used when the "_ext_id" is two bytes at most */
#define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags) \
.id = { \
default:
/* Kept only for backward compatibility purpose. */
params->quad_enable = spansion_quad_enable;
+ if (nor->clear_sr_bp)
+ nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp;
break;
}
{
int err;
- /*
- * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
- * with the software protection bits set
- */
- if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL ||
- JEDEC_MFR(nor->info) == SNOR_MFR_INTEL ||
- JEDEC_MFR(nor->info) == SNOR_MFR_SST ||
- nor->info->flags & SPI_NOR_HAS_LOCK) {
- write_enable(nor);
- write_sr(nor, 0);
- spi_nor_wait_till_ready(nor);
+ if (nor->clear_sr_bp) {
+ err = nor->clear_sr_bp(nor);
+ if (err) {
+ dev_err(nor->dev,
+ "fail to clear block protection bits\n");
+ return err;
+ }
}
if (nor->quad_enable) {
if (info->flags & SPI_S3AN)
nor->flags |= SNOR_F_READY_XSR_RDY;
+ /*
+ * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
+ * with the software protection bits set.
+ */
+ if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL ||
+ JEDEC_MFR(nor->info) == SNOR_MFR_INTEL ||
+ JEDEC_MFR(nor->info) == SNOR_MFR_SST ||
+ nor->info->flags & SPI_NOR_HAS_LOCK)
+ nor->clear_sr_bp = spi_nor_clear_sr_bp;
+
/* Parse the Serial Flash Discoverable Parameters table. */
ret = spi_nor_init_params(nor, ¶ms);
if (ret)
bond_dev->features |= NETIF_F_NETNS_LOCAL;
bond_dev->hw_features = BOND_VLAN_FEATURES |
- NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;
bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
bond_dev->features |= bond_dev->hw_features;
+ bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
}
/* Destroy a bonding device.
return PTR_ERR(dev->reset_gpio);
if (dev->reset_gpio) {
- gpiod_set_value(dev->reset_gpio, 1);
+ gpiod_set_value_cansleep(dev->reset_gpio, 1);
mdelay(10);
- gpiod_set_value(dev->reset_gpio, 0);
+ gpiod_set_value_cansleep(dev->reset_gpio, 0);
}
mutex_init(&dev->dev_mutex);
dsa_unregister_switch(dev->ds);
if (dev->reset_gpio)
- gpiod_set_value(dev->reset_gpio, 1);
+ gpiod_set_value_cansleep(dev->reset_gpio, 1);
}
EXPORT_SYMBOL(ksz_switch_remove);
return err;
if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
- if (hweight < AQ_VLAN_MAX_FILTERS)
- err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, true);
+ if (hweight < AQ_VLAN_MAX_FILTERS && hweight > 0) {
+ err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw,
+ !(aq_nic->packet_filter & IFF_PROMISC));
+ aq_nic->aq_nic_cfg.is_vlan_force_promisc = false;
+ } else {
/* otherwise left in promiscue mode */
+ aq_nic->aq_nic_cfg.is_vlan_force_promisc = true;
+ }
}
return err;
if (unlikely(!aq_hw_ops->hw_filter_vlan_ctrl))
return -EOPNOTSUPP;
+ aq_nic->aq_nic_cfg.is_vlan_force_promisc = true;
err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false);
if (err)
return err;
cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk;
cfg->features = cfg->aq_hw_caps->hw_features;
+ cfg->is_vlan_force_promisc = true;
}
static int aq_nic_update_link_status(struct aq_nic_s *self)
u32 flow_control;
u32 link_speed_msk;
u32 wol;
+ bool is_vlan_force_promisc;
u16 is_mc_list_enabled;
u16 mc_list_count;
bool is_autoneg;
unsigned int packet_filter)
{
unsigned int i = 0U;
+ struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
+
+ hw_atl_rpfl2promiscuous_mode_en_set(self,
+ IS_FILTER_ENABLED(IFF_PROMISC));
+
+ hw_atl_rpf_vlan_prom_mode_en_set(self,
+ IS_FILTER_ENABLED(IFF_PROMISC) ||
+ cfg->is_vlan_force_promisc);
- hw_atl_rpfl2promiscuous_mode_en_set(self, IS_FILTER_ENABLED(IFF_PROMISC));
hw_atl_rpfl2multicast_flr_en_set(self,
IS_FILTER_ENABLED(IFF_ALLMULTI), 0);
hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST));
- self->aq_nic_cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);
+ cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);
for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i)
hw_atl_rpfl2_uc_flr_en_set(self,
- (self->aq_nic_cfg->is_mc_list_enabled &&
- (i <= self->aq_nic_cfg->mc_list_count)) ?
- 1U : 0U, i);
+ (cfg->is_mc_list_enabled &&
+ (i <= cfg->mc_list_count)) ?
+ 1U : 0U, i);
return aq_hw_err_from_flags(self);
}
static int hw_atl_b0_hw_vlan_ctrl(struct aq_hw_s *self, bool enable)
{
/* set promisc in case of disabing the vland filter */
- hw_atl_rpf_vlan_prom_mode_en_set(self, !!!enable);
+ hw_atl_rpf_vlan_prom_mode_en_set(self, !enable);
return aq_hw_err_from_flags(self);
}
if (PTR_ERR(mac) == -EPROBE_DEFER) {
err = -EPROBE_DEFER;
goto err_out_free_netdev;
- } else if (!IS_ERR(mac)) {
+ } else if (!IS_ERR_OR_NULL(mac)) {
ether_addr_copy(bp->dev->dev_addr, mac);
} else {
macb_get_hwaddr(bp);
u64 *data)
{
struct be_adapter *adapter = netdev_priv(netdev);
- int status;
+ int status, cnt;
u8 link_status = 0;
if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM);
+ /* check link status before offline tests */
+ link_status = netif_carrier_ok(netdev);
+
if (test->flags & ETH_TEST_FL_OFFLINE) {
if (be_loopback_test(adapter, BE_MAC_LOOPBACK, &data[0]) != 0)
test->flags |= ETH_TEST_FL_FAILED;
test->flags |= ETH_TEST_FL_FAILED;
}
- status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
- if (status) {
- test->flags |= ETH_TEST_FL_FAILED;
- data[4] = -1;
- } else if (!link_status) {
+ /* link status was down prior to test */
+ if (!link_status) {
test->flags |= ETH_TEST_FL_FAILED;
data[4] = 1;
+ return;
+ }
+
+ for (cnt = 10; cnt; cnt--) {
+ status = be_cmd_link_status_query(adapter, NULL, &link_status,
+ 0);
+ if (status) {
+ test->flags |= ETH_TEST_FL_FAILED;
+ data[4] = -1;
+ break;
+ }
+
+ if (link_status)
+ break;
+
+ msleep_interruptible(500);
}
}
sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
/* Enable all known interrupts by setting the interrupt mask. */
- sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+ sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
sw32(cr, RxENA | sr32(cr));
sw32(ier, IE);
sw32(txdp, sis_priv->tx_ring_dma);
/* Enable all known interrupts by setting the interrupt mask. */
- sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+ sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
}
/**
spin_unlock_irqrestore(&sis_priv->lock, flags);
return NETDEV_TX_OK;
}
- sis_priv->tx_ring[entry].cmdsts = (OWN | skb->len);
+ sis_priv->tx_ring[entry].cmdsts = (OWN | INTR | skb->len);
sw32(cr, TxENA | sr32(cr));
sis_priv->cur_tx ++;
do {
status = sr32(isr);
- if ((status & (HIBERR|TxURN|TxERR|TxIDLE|RxORN|RxERR|RxOK)) == 0)
+ if ((status & (HIBERR|TxURN|TxERR|TxIDLE|TxDESC|RxORN|RxERR|RxOK)) == 0)
/* nothing intresting happened */
break;
handled = 1;
/* Rx interrupt */
sis900_rx(net_dev);
- if (status & (TxURN | TxERR | TxIDLE))
+ if (status & (TxURN | TxERR | TxIDLE | TxDESC))
/* Tx interrupt */
sis900_finish_xmit(net_dev);
if (tx_status & OWN) {
/* The packet is not transmitted yet (owned by hardware) !
- * Note: the interrupt is generated only when Tx Machine
- * is idle, so this is an almost impossible case */
+ * Note: this is an almost impossible condition
+ * in case of TxDESC ('descriptor interrupt') */
break;
}
sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED);
/* Enable all known interrupts by setting the interrupt mask. */
- sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE);
+ sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC);
sw32(cr, RxENA | sr32(cr));
sw32(ier, IE);
* programmed with (2^32 – <new_sec_value>)
*/
if (gmac4)
- sec = (100000000ULL - sec);
+ sec = -sec;
value = readl(ioaddr + PTP_TCR);
if (value & PTP_TCR_TSCTRLSSR)
/* Manage tx mitigation */
tx_q->tx_count_frames += nfrags + 1;
- if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
+ if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+ !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
+ (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ priv->hwts_tx_en)) {
+ stmmac_tx_timer_arm(priv, queue);
+ } else {
+ tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, desc);
priv->xstats.tx_set_ic_bit++;
- tx_q->tx_count_frames = 0;
- } else {
- stmmac_tx_timer_arm(priv, queue);
}
skb_tx_timestamp(skb);
* element in case of no SG.
*/
tx_q->tx_count_frames += nfrags + 1;
- if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
+ if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+ !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
+ (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ priv->hwts_tx_en)) {
+ stmmac_tx_timer_arm(priv, queue);
+ } else {
+ tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, desc);
priv->xstats.tx_set_ic_bit++;
- tx_q->tx_count_frames = 0;
- } else {
- stmmac_tx_timer_arm(priv, queue);
}
skb_tx_timestamp(skb);
MODULE_DESCRIPTION("Point-to-Point Protocol Microsoft Point-to-Point Encryption support");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("ppp-compress-" __stringify(CI_MPPE));
+MODULE_SOFTDEP("pre: arc4");
MODULE_VERSION("1.0.2");
static unsigned int
dev->features |= NETIF_F_NETNS_LOCAL;
dev->hw_features = TEAM_VLAN_FEATURES |
- NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_FILTER;
dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
dev->features |= dev->hw_features;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
}
static int team_newlink(struct net *src_net, struct net_device *dev,
* different. Ignore the current interface if the number of endpoints
* equals the number for the diag interface (two).
*/
- info = (void *)&id->driver_info;
+ info = (void *)id->driver_info;
if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) {
if (desc->bNumEndpoints == 2)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ const struct in6_addr *nexthop;
struct neighbour *neigh;
- struct in6_addr *nexthop;
int ret;
nf_reset(skb);
struct cfg80211_pmsr_result result = {
.status = NL80211_PMSR_STATUS_FAILURE,
.final = 1,
- .host_time = ktime_get_boot_ns(),
+ .host_time = ktime_get_boottime_ns(),
.type = NL80211_PMSR_TYPE_FTM,
};
int i;
if (unlikely(ieee80211_is_beacon(hdr->frame_control) ||
ieee80211_is_probe_resp(hdr->frame_control)))
- rx_status->boottime_ns = ktime_get_boot_ns();
+ rx_status->boottime_ns = ktime_get_boottime_ns();
/* Take a reference briefly to kick off a d0i3 entry delay so
* we can handle bursts of RX packets without toggling the
if (unlikely(ieee80211_is_beacon(hdr->frame_control) ||
ieee80211_is_probe_resp(hdr->frame_control)))
- rx_status->boottime_ns = ktime_get_boot_ns();
+ rx_status->boottime_ns = ktime_get_boottime_ns();
}
if (iwl_mvm_create_skb(mvm, skb, hdr, len, crypt_len, rxb)) {
}
*gp2 = iwl_mvm_get_systime(mvm);
- *boottime = ktime_get_boot_ns();
+ *boottime = ktime_get_boottime_ns();
if (!ps_disabled) {
mvm->ps_disabled = ps_disabled;
*/
if (ieee80211_is_beacon(hdr->frame_control) ||
ieee80211_is_probe_resp(hdr->frame_control)) {
- rx_status.boottime_ns = ktime_get_boot_ns();
+ rx_status.boottime_ns = ktime_get_boottime_ns();
now = data->abs_bcn_ts;
} else {
now = mac80211_hwsim_get_tsf_raw();
}
/* update the host-chipset time offset */
- wl->time_offset = (ktime_get_boot_ns() >> 10) -
+ wl->time_offset = (ktime_get_boottime_ns() >> 10) -
(s64)(status->fw_localtime);
wl->fw_fast_lnk_map = status->link_fast_bitmap;
}
if (beacon || probe_rsp)
- status->boottime_ns = ktime_get_boot_ns();
+ status->boottime_ns = ktime_get_boottime_ns();
if (beacon)
wlcore_set_pending_regdomain_ch(wl, (u16)desc->channel,
}
/* configure packet life time */
- hosttime = (ktime_get_boot_ns() >> 10);
+ hosttime = (ktime_get_boottime_ns() >> 10);
desc->start_time = cpu_to_le32(hosttime - wl->time_offset);
is_dummy = wl12xx_is_dummy_packet(wl, skb);
informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
CFG80211_BSS_FTYPE_PRESP,
fake_router_bssid,
- ktime_get_boot_ns(),
+ ktime_get_boottime_ns(),
WLAN_CAPABILITY_ESS, 0,
(void *)&ssid, sizeof(ssid),
DBM_TO_MBM(-50), GFP_KERNEL);
pci_dev->bus->self->skip_bus_pm = true;
}
- if (pci_dev->skip_bus_pm && !pm_suspend_via_firmware()) {
+ if (pci_dev->skip_bus_pm && pm_suspend_no_platform()) {
dev_dbg(dev, "PCI PM: Skipped\n");
goto Fixup;
}
/*
* In the suspend-to-idle case, devices left in D0 during suspend will
* stay in D0, so it is not necessary to restore or update their
- * configuration here and attempting to put them into D0 again may
- * confuse some firmware, so avoid doing that.
+ * configuration here and attempting to put them into D0 again is
+ * pointless, so avoid doing that.
*/
- if (!pci_dev->skip_bus_pm || pm_suspend_via_firmware())
+ if (!(pci_dev->skip_bus_pm && pm_suspend_no_platform()))
pci_pm_default_resume_early(pci_dev);
pci_fixup_device(pci_fixup_resume_early, pci_dev);
system, control logic. The PMU allows counting various events related
to DSU.
+config FSL_IMX8_DDR_PMU
+ tristate "Freescale i.MX8 DDR perf monitor"
+ depends on ARCH_MXC
+ help
+ Provides support for the DDR performance monitor in i.MX8, which
+ can give information about memory throughput and other related
+ events.
+
config HISI_PMU
bool "HiSilicon SoC PMU"
depends on ARM64 && ACPI
obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o
+obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o
obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
acpi_unregister_gsi(gsi);
}
+#if IS_ENABLED(CONFIG_ARM_SPE_PMU)
+static struct resource spe_resources[] = {
+ {
+ /* irq */
+ .flags = IORESOURCE_IRQ,
+ }
+};
+
+static struct platform_device spe_dev = {
+ .name = ARMV8_SPE_PDEV_NAME,
+ .id = -1,
+ .resource = spe_resources,
+ .num_resources = ARRAY_SIZE(spe_resources)
+};
+
+/*
+ * For lack of a better place, hook the normal PMU MADT walk
+ * and create a SPE device if we detect a recent MADT with
+ * a homogeneous PPI mapping.
+ */
+static void arm_spe_acpi_register_device(void)
+{
+ int cpu, hetid, irq, ret;
+ bool first = true;
+ u16 gsi = 0;
+
+ /*
+ * Sanity check all the GICC tables for the same interrupt number.
+ * For now, we only support homogeneous ACPI/SPE machines.
+ */
+ for_each_possible_cpu(cpu) {
+ struct acpi_madt_generic_interrupt *gicc;
+
+ gicc = acpi_cpu_get_madt_gicc(cpu);
+ if (gicc->header.length < ACPI_MADT_GICC_SPE)
+ return;
+
+ if (first) {
+ gsi = gicc->spe_interrupt;
+ if (!gsi)
+ return;
+ hetid = find_acpi_cpu_topology_hetero_id(cpu);
+ first = false;
+ } else if ((gsi != gicc->spe_interrupt) ||
+ (hetid != find_acpi_cpu_topology_hetero_id(cpu))) {
+ pr_warn("ACPI: SPE must be homogeneous\n");
+ return;
+ }
+ }
+
+ irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE,
+ ACPI_ACTIVE_HIGH);
+ if (irq < 0) {
+ pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi);
+ return;
+ }
+
+ spe_resources[0].start = irq;
+ ret = platform_device_register(&spe_dev);
+ if (ret < 0) {
+ pr_warn("ACPI: SPE: Unable to register device\n");
+ acpi_unregister_gsi(gsi);
+ }
+}
+#else
+static inline void arm_spe_acpi_register_device(void)
+{
+}
+#endif /* CONFIG_ARM_SPE_PMU */
+
static int arm_pmu_acpi_parse_irqs(void)
{
int irq, cpu, irq_cpu, err;
if (acpi_disabled)
return 0;
+ arm_spe_acpi_register_device();
+
ret = arm_pmu_acpi_parse_irqs();
if (ret)
return ret;
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
#include <linux/printk.h>
#include <linux/slab.h>
};
MODULE_DEVICE_TABLE(of, arm_spe_pmu_of_match);
-static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev)
+static const struct platform_device_id arm_spe_match[] = {
+ { ARMV8_SPE_PDEV_NAME, 0},
+ { }
+};
+MODULE_DEVICE_TABLE(platform, arm_spe_match);
+
+static int arm_spe_pmu_device_probe(struct platform_device *pdev)
{
int ret;
struct arm_spe_pmu *spe_pmu;
}
static struct platform_driver arm_spe_pmu_driver = {
+ .id_table = arm_spe_match,
.driver = {
.name = DRVNAME,
.of_match_table = of_match_ptr(arm_spe_pmu_of_match),
},
- .probe = arm_spe_pmu_device_dt_probe,
+ .probe = arm_spe_pmu_device_probe,
.remove = arm_spe_pmu_device_remove,
};
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2017 NXP
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+#define COUNTER_CNTL 0x0
+#define COUNTER_READ 0x20
+
+#define COUNTER_DPCR1 0x30
+
+#define CNTL_OVER 0x1
+#define CNTL_CLEAR 0x2
+#define CNTL_EN 0x4
+#define CNTL_EN_MASK 0xFFFFFFFB
+#define CNTL_CLEAR_MASK 0xFFFFFFFD
+#define CNTL_OVER_MASK 0xFFFFFFFE
+
+#define CNTL_CSV_SHIFT 24
+#define CNTL_CSV_MASK (0xFF << CNTL_CSV_SHIFT)
+
+#define EVENT_CYCLES_ID 0
+#define EVENT_CYCLES_COUNTER 0
+#define NUM_COUNTERS 4
+
+#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
+
+#define DDR_PERF_DEV_NAME "imx8_ddr"
+#define DDR_CPUHP_CB_NAME DDR_PERF_DEV_NAME "_perf_pmu"
+
+static DEFINE_IDA(ddr_ida);
+
+static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
+ { .compatible = "fsl,imx8-ddr-pmu",},
+ { .compatible = "fsl,imx8m-ddr-pmu",},
+ { /* sentinel */ }
+};
+
+struct ddr_pmu {
+ struct pmu pmu;
+ void __iomem *base;
+ unsigned int cpu;
+ struct hlist_node node;
+ struct device *dev;
+ struct perf_event *events[NUM_COUNTERS];
+ int active_events;
+ enum cpuhp_state cpuhp_state;
+ int irq;
+ int id;
+};
+
+static ssize_t ddr_perf_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
+}
+
+static struct device_attribute ddr_perf_cpumask_attr =
+ __ATTR(cpumask, 0444, ddr_perf_cpumask_show, NULL);
+
+static struct attribute *ddr_perf_cpumask_attrs[] = {
+ &ddr_perf_cpumask_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ddr_perf_cpumask_attr_group = {
+ .attrs = ddr_perf_cpumask_attrs,
+};
+
+static ssize_t
+ddr_pmu_event_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define IMX8_DDR_PMU_EVENT_ATTR(_name, _id) \
+ (&((struct perf_pmu_events_attr[]) { \
+ { .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
+ .id = _id, } \
+ })[0].attr.attr)
+
+static struct attribute *ddr_perf_events_attrs[] = {
+ IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID),
+ IMX8_DDR_PMU_EVENT_ATTR(selfresh, 0x01),
+ IMX8_DDR_PMU_EVENT_ATTR(read-accesses, 0x04),
+ IMX8_DDR_PMU_EVENT_ATTR(write-accesses, 0x05),
+ IMX8_DDR_PMU_EVENT_ATTR(read-queue-depth, 0x08),
+ IMX8_DDR_PMU_EVENT_ATTR(write-queue-depth, 0x09),
+ IMX8_DDR_PMU_EVENT_ATTR(lp-read-credit-cnt, 0x10),
+ IMX8_DDR_PMU_EVENT_ATTR(hp-read-credit-cnt, 0x11),
+ IMX8_DDR_PMU_EVENT_ATTR(write-credit-cnt, 0x12),
+ IMX8_DDR_PMU_EVENT_ATTR(read-command, 0x20),
+ IMX8_DDR_PMU_EVENT_ATTR(write-command, 0x21),
+ IMX8_DDR_PMU_EVENT_ATTR(read-modify-write-command, 0x22),
+ IMX8_DDR_PMU_EVENT_ATTR(hp-read, 0x23),
+ IMX8_DDR_PMU_EVENT_ATTR(hp-req-nocredit, 0x24),
+ IMX8_DDR_PMU_EVENT_ATTR(hp-xact-credit, 0x25),
+ IMX8_DDR_PMU_EVENT_ATTR(lp-req-nocredit, 0x26),
+ IMX8_DDR_PMU_EVENT_ATTR(lp-xact-credit, 0x27),
+ IMX8_DDR_PMU_EVENT_ATTR(wr-xact-credit, 0x29),
+ IMX8_DDR_PMU_EVENT_ATTR(read-cycles, 0x2a),
+ IMX8_DDR_PMU_EVENT_ATTR(write-cycles, 0x2b),
+ IMX8_DDR_PMU_EVENT_ATTR(read-write-transition, 0x30),
+ IMX8_DDR_PMU_EVENT_ATTR(precharge, 0x31),
+ IMX8_DDR_PMU_EVENT_ATTR(activate, 0x32),
+ IMX8_DDR_PMU_EVENT_ATTR(load-mode, 0x33),
+ IMX8_DDR_PMU_EVENT_ATTR(perf-mwr, 0x34),
+ IMX8_DDR_PMU_EVENT_ATTR(read, 0x35),
+ IMX8_DDR_PMU_EVENT_ATTR(read-activate, 0x36),
+ IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37),
+ IMX8_DDR_PMU_EVENT_ATTR(write, 0x38),
+ IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39),
+ NULL,
+};
+
+static struct attribute_group ddr_perf_events_attr_group = {
+ .name = "events",
+ .attrs = ddr_perf_events_attrs,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *ddr_perf_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group ddr_perf_format_attr_group = {
+ .name = "format",
+ .attrs = ddr_perf_format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+ &ddr_perf_events_attr_group,
+ &ddr_perf_format_attr_group,
+ &ddr_perf_cpumask_attr_group,
+ NULL,
+};
+
+static u32 ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event)
+{
+ int i;
+
+ /*
+ * Always map cycle event to counter 0
+ * Cycles counter is dedicated for cycle event
+ * can't used for the other events
+ */
+ if (event == EVENT_CYCLES_ID) {
+ if (pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+ return EVENT_CYCLES_COUNTER;
+ else
+ return -ENOENT;
+ }
+
+ for (i = 1; i < NUM_COUNTERS; i++) {
+ if (pmu->events[i] == NULL)
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+static void ddr_perf_free_counter(struct ddr_pmu *pmu, int counter)
+{
+ pmu->events[counter] = NULL;
+}
+
+static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter)
+{
+ return readl_relaxed(pmu->base + COUNTER_READ + counter * 4);
+}
+
+static int ddr_perf_event_init(struct perf_event *event)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_event *sibling;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EOPNOTSUPP;
+
+ if (event->cpu < 0) {
+ dev_warn(pmu->dev, "Can't provide per-task data!\n");
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * We must NOT create groups containing mixed PMUs, although software
+ * events are acceptable (for example to create a CCN group
+ * periodically read when a hrtimer aka cpu-clock leader triggers).
+ */
+ if (event->group_leader->pmu != event->pmu &&
+ !is_software_event(event->group_leader))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (sibling->pmu != event->pmu &&
+ !is_software_event(sibling))
+ return -EINVAL;
+ }
+
+ event->cpu = pmu->cpu;
+ hwc->idx = -1;
+
+ return 0;
+}
+
+
+static void ddr_perf_event_update(struct perf_event *event)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 delta, prev_raw_count, new_raw_count;
+ int counter = hwc->idx;
+
+ do {
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = ddr_perf_read_counter(pmu, counter);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count);
+
+ delta = (new_raw_count - prev_raw_count) & 0xFFFFFFFF;
+
+ local64_add(delta, &event->count);
+}
+
+static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
+ int counter, bool enable)
+{
+ u8 reg = counter * 4 + COUNTER_CNTL;
+ int val;
+
+ if (enable) {
+ /*
+ * must disable first, then enable again
+ * otherwise, cycle counter will not work
+ * if previous state is enabled.
+ */
+ writel(0, pmu->base + reg);
+ val = CNTL_EN | CNTL_CLEAR;
+ val |= FIELD_PREP(CNTL_CSV_MASK, config);
+ writel(val, pmu->base + reg);
+ } else {
+ /* Disable counter */
+ writel(0, pmu->base + reg);
+ }
+}
+
+static void ddr_perf_event_start(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ local64_set(&hwc->prev_count, 0);
+
+ ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
+
+ hwc->state = 0;
+}
+
+static int ddr_perf_event_add(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter;
+ int cfg = event->attr.config;
+
+ counter = ddr_perf_alloc_counter(pmu, cfg);
+ if (counter < 0) {
+ dev_dbg(pmu->dev, "There are not enough counters\n");
+ return -EOPNOTSUPP;
+ }
+
+ pmu->events[counter] = event;
+ pmu->active_events++;
+ hwc->idx = counter;
+
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (flags & PERF_EF_START)
+ ddr_perf_event_start(event, flags);
+
+ return 0;
+}
+
+static void ddr_perf_event_stop(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ ddr_perf_counter_enable(pmu, event->attr.config, counter, false);
+ ddr_perf_event_update(event);
+
+ hwc->state |= PERF_HES_STOPPED;
+}
+
+static void ddr_perf_event_del(struct perf_event *event, int flags)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int counter = hwc->idx;
+
+ ddr_perf_event_stop(event, PERF_EF_UPDATE);
+
+ ddr_perf_free_counter(pmu, counter);
+ pmu->active_events--;
+ hwc->idx = -1;
+}
+
+static void ddr_perf_pmu_enable(struct pmu *pmu)
+{
+ struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+ /* enable cycle counter if cycle is not active event list */
+ if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+ ddr_perf_counter_enable(ddr_pmu,
+ EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER,
+ true);
+}
+
+static void ddr_perf_pmu_disable(struct pmu *pmu)
+{
+ struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
+
+ if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
+ ddr_perf_counter_enable(ddr_pmu,
+ EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER,
+ false);
+}
+
+static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
+ struct device *dev)
+{
+ *pmu = (struct ddr_pmu) {
+ .pmu = (struct pmu) {
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+ .task_ctx_nr = perf_invalid_context,
+ .attr_groups = attr_groups,
+ .event_init = ddr_perf_event_init,
+ .add = ddr_perf_event_add,
+ .del = ddr_perf_event_del,
+ .start = ddr_perf_event_start,
+ .stop = ddr_perf_event_stop,
+ .read = ddr_perf_event_update,
+ .pmu_enable = ddr_perf_pmu_enable,
+ .pmu_disable = ddr_perf_pmu_disable,
+ },
+ .base = base,
+ .dev = dev,
+ };
+
+ pmu->id = ida_simple_get(&ddr_ida, 0, 0, GFP_KERNEL);
+ return pmu->id;
+}
+
+static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
+{
+ int i;
+ struct ddr_pmu *pmu = (struct ddr_pmu *) p;
+ struct perf_event *event, *cycle_event = NULL;
+
+ /* all counter will stop if cycle counter disabled */
+ ddr_perf_counter_enable(pmu,
+ EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER,
+ false);
+ /*
+ * When the cycle counter overflows, all counters are stopped,
+ * and an IRQ is raised. If any other counter overflows, it
+ * continues counting, and no IRQ is raised.
+ *
+ * Cycles occur at least 4 times as often as other events, so we
+ * can update all events on a cycle counter overflow and not
+ * lose events.
+ *
+ */
+ for (i = 0; i < NUM_COUNTERS; i++) {
+
+ if (!pmu->events[i])
+ continue;
+
+ event = pmu->events[i];
+
+ ddr_perf_event_update(event);
+
+ if (event->hw.idx == EVENT_CYCLES_COUNTER)
+ cycle_event = event;
+ }
+
+ ddr_perf_counter_enable(pmu,
+ EVENT_CYCLES_ID,
+ EVENT_CYCLES_COUNTER,
+ true);
+ if (cycle_event)
+ ddr_perf_event_update(cycle_event);
+
+ return IRQ_HANDLED;
+}
+
+static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
+ int target;
+
+ if (cpu != pmu->cpu)
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ perf_pmu_migrate_context(&pmu->pmu, cpu, target);
+ pmu->cpu = target;
+
+ WARN_ON(irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu)));
+
+ return 0;
+}
+
+static int ddr_perf_probe(struct platform_device *pdev)
+{
+ struct ddr_pmu *pmu;
+ struct device_node *np;
+ void __iomem *base;
+ char *name;
+ int num;
+ int ret;
+ int irq;
+
+ base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ np = pdev->dev.of_node;
+
+ pmu = devm_kzalloc(&pdev->dev, sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ return -ENOMEM;
+
+ num = ddr_perf_init(pmu, base, &pdev->dev);
+
+ platform_set_drvdata(pdev, pmu);
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d",
+ num);
+ if (!name)
+ return -ENOMEM;
+
+ pmu->cpu = raw_smp_processor_id();
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ DDR_CPUHP_CB_NAME,
+ NULL,
+ ddr_perf_offline_cpu);
+
+ if (ret < 0) {
+ dev_err(&pdev->dev, "cpuhp_setup_state_multi failed\n");
+ goto ddr_perf_err;
+ }
+
+ pmu->cpuhp_state = ret;
+
+ /* Register the pmu instance for cpu hotplug */
+ cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+
+ /* Request irq */
+ irq = of_irq_get(np, 0);
+ if (irq < 0) {
+ dev_err(&pdev->dev, "Failed to get irq: %d", irq);
+ ret = irq;
+ goto ddr_perf_err;
+ }
+
+ ret = devm_request_irq(&pdev->dev, irq,
+ ddr_perf_irq_handler,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
+ DDR_CPUHP_CB_NAME,
+ pmu);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Request irq failed: %d", ret);
+ goto ddr_perf_err;
+ }
+
+ pmu->irq = irq;
+ ret = irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu));
+ if (ret) {
+ dev_err(pmu->dev, "Failed to set interrupt affinity!\n");
+ goto ddr_perf_err;
+ }
+
+ ret = perf_pmu_register(&pmu->pmu, name, -1);
+ if (ret)
+ goto ddr_perf_err;
+
+ return 0;
+
+ddr_perf_err:
+ if (pmu->cpuhp_state)
+ cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+
+ ida_simple_remove(&ddr_ida, pmu->id);
+ dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret);
+ return ret;
+}
+
+static int ddr_perf_remove(struct platform_device *pdev)
+{
+ struct ddr_pmu *pmu = platform_get_drvdata(pdev);
+
+ cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
+ irq_set_affinity_hint(pmu->irq, NULL);
+
+ perf_pmu_unregister(&pmu->pmu);
+
+ ida_simple_remove(&ddr_ida, pmu->id);
+ return 0;
+}
+
+static struct platform_driver imx_ddr_pmu_driver = {
+ .driver = {
+ .name = "imx-ddr-pmu",
+ .of_match_table = imx_ddr_pmu_dt_ids,
+ },
+ .probe = ddr_perf_probe,
+ .remove = ddr_perf_remove,
+};
+
+module_platform_driver(imx_ddr_pmu_driver);
+MODULE_LICENSE("GPL v2");
void __iomem *reg = mtk_eint_get_offset(eint, d->hwirq,
eint->regs->mask_set);
+ eint->cur_mask[d->hwirq >> 5] &= ~mask;
+
writel(mask, reg);
}
void __iomem *reg = mtk_eint_get_offset(eint, d->hwirq,
eint->regs->mask_clr);
+ eint->cur_mask[d->hwirq >> 5] |= mask;
+
writel(mask, reg);
if (eint->dual_edge[d->hwirq])
}
}
-static void mtk_eint_chip_read_mask(const struct mtk_eint *eint,
- void __iomem *base, u32 *buf)
-{
- int port;
- void __iomem *reg;
-
- for (port = 0; port < eint->hw->ports; port++) {
- reg = base + eint->regs->mask + (port << 2);
- buf[port] = ~readl_relaxed(reg);
- /* Mask is 0 when irq is enabled, and 1 when disabled. */
- }
-}
-
static int mtk_eint_irq_request_resources(struct irq_data *d)
{
struct mtk_eint *eint = irq_data_get_irq_chip_data(d);
struct irq_chip *chip = irq_desc_get_chip(desc);
struct mtk_eint *eint = irq_desc_get_handler_data(desc);
unsigned int status, eint_num;
- int offset, index, virq;
+ int offset, mask_offset, index, virq;
void __iomem *reg = mtk_eint_get_offset(eint, 0, eint->regs->stat);
int dual_edge, start_level, curr_level;
status = readl(reg);
while (status) {
offset = __ffs(status);
+ mask_offset = eint_num >> 5;
index = eint_num + offset;
virq = irq_find_mapping(eint->domain, index);
status &= ~BIT(offset);
+ /*
+ * If we get an interrupt on pin that was only required
+ * for wake (but no real interrupt requested), mask the
+ * interrupt (as would mtk_eint_resume do anyway later
+ * in the resume sequence).
+ */
+ if (eint->wake_mask[mask_offset] & BIT(offset) &&
+ !(eint->cur_mask[mask_offset] & BIT(offset))) {
+ writel_relaxed(BIT(offset), reg -
+ eint->regs->stat +
+ eint->regs->mask_set);
+ }
+
dual_edge = eint->dual_edge[index];
if (dual_edge) {
/*
int mtk_eint_do_suspend(struct mtk_eint *eint)
{
- mtk_eint_chip_read_mask(eint, eint->base, eint->cur_mask);
mtk_eint_chip_write_mask(eint, eint->base, eint->wake_mask);
return 0;
if (ret < 0)
goto fail;
+ ret = devm_gpiochip_add_data(dev, &mcp->chip, mcp);
+ if (ret < 0)
+ goto fail;
+
mcp->irq_controller =
device_property_read_bool(dev, "interrupt-controller");
if (mcp->irq && mcp->irq_controller) {
goto fail;
}
- ret = devm_gpiochip_add_data(dev, &mcp->chip, mcp);
- if (ret < 0)
- goto fail;
-
if (one_regmap_config) {
mcp->pinctrl_desc.name = devm_kasprintf(dev, GFP_KERNEL,
"mcp23xxx-pinctrl.%d", raw_chip_address);
return -1;
}
-#define REG(r, info, p) ((r) * (info)->stride + (4 * ((p) / 32)))
+#define REG_ALT(msb, info, p) (OCELOT_GPIO_ALT0 * (info)->stride + 4 * ((msb) + ((info)->stride * ((p) / 32))))
static int ocelot_pinmux_set_mux(struct pinctrl_dev *pctldev,
unsigned int selector, unsigned int group)
/*
* f is encoded on two bits.
- * bit 0 of f goes in BIT(pin) of ALT0, bit 1 of f goes in BIT(pin) of
- * ALT1
+ * bit 0 of f goes in BIT(pin) of ALT[0], bit 1 of f goes in BIT(pin) of
+ * ALT[1]
* This is racy because both registers can't be updated at the same time
* but it doesn't matter much for now.
*/
- regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT0, info, pin->pin),
+ regmap_update_bits(info->map, REG_ALT(0, info, pin->pin),
BIT(p), f << p);
- regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT1, info, pin->pin),
+ regmap_update_bits(info->map, REG_ALT(1, info, pin->pin),
BIT(p), f << (p - 1));
return 0;
}
+#define REG(r, info, p) ((r) * (info)->stride + (4 * ((p) / 32)))
+
static int ocelot_gpio_set_direction(struct pinctrl_dev *pctldev,
struct pinctrl_gpio_range *range,
unsigned int pin, bool input)
struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
unsigned int p = pin % 32;
- regmap_update_bits(info->map, REG(OCELOT_GPIO_OE, info, p), BIT(p),
+ regmap_update_bits(info->map, REG(OCELOT_GPIO_OE, info, pin), BIT(p),
input ? 0 : BIT(p));
return 0;
struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
unsigned int p = offset % 32;
- regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT0, info, offset),
+ regmap_update_bits(info->map, REG_ALT(0, info, offset),
BIT(p), 0);
- regmap_update_bits(info->map, REG(OCELOT_GPIO_ALT1, info, offset),
+ regmap_update_bits(info->map, REG_ALT(1, info, offset),
BIT(p), 0);
return 0;
depends on DASD
help
Enable this option if you want to see profiling information
- in /proc/dasd/statistics.
+ in /proc/dasd/statistics.
config DASD_ECKD
def_tristate y
else if (len == 8 && !strncmp(str, "failfast", 8))
features |= DASD_FEATURE_FAILFAST;
else {
- pr_warn("%*s is not a supported device option\n",
+ pr_warn("%.*s is not a supported device option\n",
len, str);
rc = -EINVAL;
}
Include support for using an IBM SCLP VT220-compatible terminal as a
Linux system console.
-config SCLP_ASYNC
- def_tristate m
- prompt "Support for Call Home via Asynchronous SCLP Records"
- depends on S390
- help
- This option enables the call home function, which is able to inform
- the service element and connected organisations about a kernel panic.
- You should only select this option if you know what you are doing,
- want for inform other people about your kernel panics,
- need this feature and intend to run your kernel in LPAR.
-
-config SCLP_ASYNC_ID
- string "Component ID for Call Home"
- depends on SCLP_ASYNC
- default "000000000"
- help
- The Component ID for Call Home is used to identify the correct
- problem reporting queue the call home records should be sent to.
-
- If your are unsure, please use the default value "000000000".
-
config HMC_DRV
def_tristate m
prompt "Support for file transfers from HMC drive CD/DVD-ROM"
depends on S390
help
Character device driver for z/VM reader, puncher and printer.
-
obj-$(CONFIG_SCLP_TTY) += sclp_tty.o
obj-$(CONFIG_SCLP_CONSOLE) += sclp_con.o
obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o
-obj-$(CONFIG_SCLP_ASYNC) += sclp_async.o
obj-$(CONFIG_PCI) += sclp_pci.o
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Enable Asynchronous Notification via SCLP.
- *
- * Copyright IBM Corp. 2009
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/ctype.h>
-#include <linux/kmod.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/sysctl.h>
-#include <linux/utsname.h>
-#include "sclp.h"
-
-static int callhome_enabled;
-static struct sclp_req *request;
-static struct sclp_async_sccb *sccb;
-static int sclp_async_send_wait(char *message);
-static struct ctl_table_header *callhome_sysctl_header;
-static DEFINE_SPINLOCK(sclp_async_lock);
-#define SCLP_NORMAL_WRITE 0x00
-
-struct async_evbuf {
- struct evbuf_header header;
- u64 reserved;
- u8 rflags;
- u8 empty;
- u8 rtype;
- u8 otype;
- char comp_id[12];
- char data[3000]; /* there is still some space left */
-} __attribute__((packed));
-
-struct sclp_async_sccb {
- struct sccb_header header;
- struct async_evbuf evbuf;
-} __attribute__((packed));
-
-static struct sclp_register sclp_async_register = {
- .send_mask = EVTYP_ASYNC_MASK,
-};
-
-static int call_home_on_panic(struct notifier_block *self,
- unsigned long event, void *data)
-{
- strncat(data, init_utsname()->nodename,
- sizeof(init_utsname()->nodename));
- sclp_async_send_wait(data);
- return NOTIFY_DONE;
-}
-
-static struct notifier_block call_home_panic_nb = {
- .notifier_call = call_home_on_panic,
- .priority = INT_MAX,
-};
-
-static int zero;
-static int one = 1;
-
-static struct ctl_table callhome_table[] = {
- {
- .procname = "callhome",
- .data = &callhome_enabled,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
- },
- {}
-};
-
-static struct ctl_table kern_dir_table[] = {
- {
- .procname = "kernel",
- .maxlen = 0,
- .mode = 0555,
- .child = callhome_table,
- },
- {}
-};
-
-/*
- * Function used to transfer asynchronous notification
- * records which waits for send completion
- */
-static int sclp_async_send_wait(char *message)
-{
- struct async_evbuf *evb;
- int rc;
- unsigned long flags;
-
- if (!callhome_enabled)
- return 0;
- sccb->evbuf.header.type = EVTYP_ASYNC;
- sccb->evbuf.rtype = 0xA5;
- sccb->evbuf.otype = 0x00;
- evb = &sccb->evbuf;
- request->command = SCLP_CMDW_WRITE_EVENT_DATA;
- request->sccb = sccb;
- request->status = SCLP_REQ_FILLED;
- strncpy(sccb->evbuf.data, message, sizeof(sccb->evbuf.data));
- /*
- * Retain Queue
- * e.g. 5639CC140 500 Red Hat RHEL5 Linux for zSeries (RHEL AS)
- */
- strncpy(sccb->evbuf.comp_id, CONFIG_SCLP_ASYNC_ID,
- sizeof(sccb->evbuf.comp_id));
- sccb->evbuf.header.length = sizeof(sccb->evbuf);
- sccb->header.length = sizeof(sccb->evbuf) + sizeof(sccb->header);
- sccb->header.function_code = SCLP_NORMAL_WRITE;
- rc = sclp_add_request(request);
- if (rc)
- return rc;
- spin_lock_irqsave(&sclp_async_lock, flags);
- while (request->status != SCLP_REQ_DONE &&
- request->status != SCLP_REQ_FAILED) {
- sclp_sync_wait();
- }
- spin_unlock_irqrestore(&sclp_async_lock, flags);
- if (request->status != SCLP_REQ_DONE)
- return -EIO;
- rc = ((struct sclp_async_sccb *)
- request->sccb)->header.response_code;
- if (rc != 0x0020)
- return -EIO;
- if (evb->header.flags != 0x80)
- return -EIO;
- return rc;
-}
-
-static int __init sclp_async_init(void)
-{
- int rc;
-
- rc = sclp_register(&sclp_async_register);
- if (rc)
- return rc;
- rc = -EOPNOTSUPP;
- if (!(sclp_async_register.sclp_receive_mask & EVTYP_ASYNC_MASK))
- goto out_sclp;
- rc = -ENOMEM;
- callhome_sysctl_header = register_sysctl_table(kern_dir_table);
- if (!callhome_sysctl_header)
- goto out_sclp;
- request = kzalloc(sizeof(struct sclp_req), GFP_KERNEL);
- sccb = (struct sclp_async_sccb *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
- if (!request || !sccb)
- goto out_mem;
- rc = atomic_notifier_chain_register(&panic_notifier_list,
- &call_home_panic_nb);
- if (!rc)
- goto out;
-out_mem:
- kfree(request);
- free_page((unsigned long) sccb);
- unregister_sysctl_table(callhome_sysctl_header);
-out_sclp:
- sclp_unregister(&sclp_async_register);
-out:
- return rc;
-}
-module_init(sclp_async_init);
-
-static void __exit sclp_async_exit(void)
-{
- atomic_notifier_chain_unregister(&panic_notifier_list,
- &call_home_panic_nb);
- unregister_sysctl_table(callhome_sysctl_header);
- sclp_unregister(&sclp_async_register);
- free_page((unsigned long) sccb);
- kfree(request);
-}
-module_exit(sclp_async_exit);
-
-MODULE_AUTHOR("Copyright IBM Corp. 2009");
-MODULE_AUTHOR("Hans-Joachim Picht <hans@linux.vnet.ibm.com>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SCLP Asynchronous Notification Records");
* dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same
* dump format as s390 standalone dumps.
*
- * For more information please refer to Documentation/s390/zfcpdump.txt
+ * For more information please refer to Documentation/s390/zfcpdump.rst
*
* Copyright IBM Corp. 2003, 2008
* Author(s): Michael Holzheu
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/slab.h>
+#include <linux/dmapool.h>
#include <asm/airq.h>
#include <asm/isc.h>
+#include <asm/cio.h>
#include "cio.h"
#include "cio_debug.h"
static DEFINE_SPINLOCK(airq_lists_lock);
static struct hlist_head airq_lists[MAX_ISC+1];
-static struct kmem_cache *airq_iv_cache;
+static struct dma_pool *airq_iv_cache;
/**
* register_adapter_interrupt() - register adapter interrupt handler
setup_irq(THIN_INTERRUPT, &airq_interrupt);
}
+static inline unsigned long iv_size(unsigned long bits)
+{
+ return BITS_TO_LONGS(bits) * sizeof(unsigned long);
+}
+
/**
* airq_iv_create - create an interrupt vector
* @bits: number of bits in the interrupt vector
goto out;
iv->bits = bits;
iv->flags = flags;
- size = BITS_TO_LONGS(bits) * sizeof(unsigned long);
+ size = iv_size(bits);
if (flags & AIRQ_IV_CACHELINE) {
- if ((cache_line_size() * BITS_PER_BYTE) < bits)
+ if ((cache_line_size() * BITS_PER_BYTE) < bits
+ || !airq_iv_cache)
goto out_free;
- iv->vector = kmem_cache_zalloc(airq_iv_cache, GFP_KERNEL);
+ iv->vector = dma_pool_zalloc(airq_iv_cache, GFP_KERNEL,
+ &iv->vector_dma);
if (!iv->vector)
goto out_free;
} else {
- iv->vector = kzalloc(size, GFP_KERNEL);
+ iv->vector = cio_dma_zalloc(size);
if (!iv->vector)
goto out_free;
}
kfree(iv->ptr);
kfree(iv->bitlock);
kfree(iv->avail);
- if (iv->flags & AIRQ_IV_CACHELINE)
- kmem_cache_free(airq_iv_cache, iv->vector);
+ if (iv->flags & AIRQ_IV_CACHELINE && iv->vector)
+ dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
else
- kfree(iv->vector);
+ cio_dma_free(iv->vector, size);
kfree(iv);
out:
return NULL;
kfree(iv->ptr);
kfree(iv->bitlock);
if (iv->flags & AIRQ_IV_CACHELINE)
- kmem_cache_free(airq_iv_cache, iv->vector);
+ dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
else
- kfree(iv->vector);
+ cio_dma_free(iv->vector, iv_size(iv->bits));
kfree(iv->avail);
kfree(iv);
}
}
EXPORT_SYMBOL(airq_iv_scan);
-static int __init airq_init(void)
+int __init airq_init(void)
{
- airq_iv_cache = kmem_cache_create("airq_iv_cache", cache_line_size(),
- cache_line_size(), 0, NULL);
+ airq_iv_cache = dma_pool_create("airq_iv_cache", cio_get_dma_css_dev(),
+ cache_line_size(),
+ cache_line_size(), PAGE_SIZE);
if (!airq_iv_cache)
return -ENOMEM;
return 0;
}
-subsys_initcall(airq_init);
return;
req->done = 1;
ccw_device_set_timeout(cdev, 0);
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
if (rc && rc != -ENODEV && req->drc)
rc = req->drc;
req->callback(cdev, req->data, rc);
continue;
}
/* Perform start function. */
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
rc = cio_start(sch, cp, (u8) req->mask);
if (rc == 0) {
/* I/O started successfully. */
*/
static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
{
- struct irb *irb = &cdev->private->irb;
+ struct irb *irb = &cdev->private->dma_area->irb;
struct cmd_scsw *scsw = &irb->scsw.cmd;
enum uc_todo todo;
CIO_TRACE_EVENT(2, "sensedata");
CIO_HEX_EVENT(2, &cdev->private->dev_id,
sizeof(struct ccw_dev_id));
- CIO_HEX_EVENT(2, &cdev->private->irb.ecw, SENSE_MAX_COUNT);
+ CIO_HEX_EVENT(2, &cdev->private->dma_area->irb.ecw,
+ SENSE_MAX_COUNT);
/* Check for command reject. */
if (irb->ecw[0] & SNS0_CMD_REJECT)
return IO_REJECTED;
} __packed __aligned(PAGE_SIZE);
/*
- * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
- */
-
-#define ND_VALIDITY_VALID 0
-#define ND_VALIDITY_OUTDATED 1
-#define ND_VALIDITY_INVALID 2
-
-struct node_descriptor {
- /* Flags. */
- union {
- struct {
- u32 validity:3;
- u32 reserved:5;
- } __packed;
- u8 byte0;
- } __packed;
-
- /* Node parameters. */
- u32 params:24;
-
- /* Node ID. */
- char type[6];
- char model[3];
- char manufacturer[3];
- char plant[2];
- char seq[12];
- u16 tag;
-} __packed;
-
-/*
* Link Incident Record as defined in SA22-7202, "ESCON I/O Interface"
*/
enum sch_todo todo;
struct work_struct todo_work;
struct schib_config config;
+ char *driver_override; /* Driver name to force a match */
} __attribute__ ((aligned(8)));
DECLARE_PER_CPU_ALIGNED(struct irb, cio_irb);
int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key);
int cio_tm_intrg(struct subchannel *sch);
+extern int __init airq_init(void);
+
/* Use with care. */
#ifdef CONFIG_CCW_CONSOLE
extern struct subchannel *cio_probe_console(void);
#include <linux/reboot.h>
#include <linux/suspend.h>
#include <linux/proc_fs.h>
+#include <linux/genalloc.h>
+#include <linux/dma-mapping.h>
#include <asm/isc.h>
#include <asm/crw.h>
sch->config.intparm = 0;
cio_commit_config(sch);
+ kfree(sch->driver_override);
kfree(sch->lock);
kfree(sch);
}
INIT_WORK(&sch->todo_work, css_sch_todo);
sch->dev.release = &css_subchannel_release;
device_initialize(&sch->dev);
+ /*
+ * The physical addresses of some the dma structures that can
+ * belong to a subchannel need to fit 31 bit width (e.g. ccw).
+ */
+ sch->dev.coherent_dma_mask = DMA_BIT_MASK(31);
+ sch->dev.dma_mask = &sch->dev.coherent_dma_mask;
return sch;
err:
static DEVICE_ATTR_RO(modalias);
+static ssize_t driver_override_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct subchannel *sch = to_subchannel(dev);
+ char *driver_override, *old, *cp;
+
+ /* We need to keep extra room for a newline */
+ if (count >= (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ driver_override = kstrndup(buf, count, GFP_KERNEL);
+ if (!driver_override)
+ return -ENOMEM;
+
+ cp = strchr(driver_override, '\n');
+ if (cp)
+ *cp = '\0';
+
+ device_lock(dev);
+ old = sch->driver_override;
+ if (strlen(driver_override)) {
+ sch->driver_override = driver_override;
+ } else {
+ kfree(driver_override);
+ sch->driver_override = NULL;
+ }
+ device_unlock(dev);
+
+ kfree(old);
+
+ return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct subchannel *sch = to_subchannel(dev);
+ ssize_t len;
+
+ device_lock(dev);
+ len = snprintf(buf, PAGE_SIZE, "%s\n", sch->driver_override);
+ device_unlock(dev);
+ return len;
+}
+static DEVICE_ATTR_RW(driver_override);
+
static struct attribute *subch_attrs[] = {
&dev_attr_type.attr,
&dev_attr_modalias.attr,
+ &dev_attr_driver_override.attr,
NULL,
};
dev_set_name(&css->device, "css%x", nr);
css->device.groups = cssdev_attr_groups;
css->device.release = channel_subsystem_release;
+ /*
+ * We currently allocate notifier bits with this (using
+ * css->device as the device argument with the DMA API)
+ * and are fine with 64 bit addresses.
+ */
+ css->device.coherent_dma_mask = DMA_BIT_MASK(64);
+ css->device.dma_mask = &css->device.coherent_dma_mask;
mutex_init(&css->mutex);
css->cssid = chsc_get_cssid(nr);
.notifier_call = css_power_event,
};
+#define CIO_DMA_GFP (GFP_KERNEL | __GFP_ZERO)
+static struct gen_pool *cio_dma_pool;
+
+/* Currently cio supports only a single css */
+struct device *cio_get_dma_css_dev(void)
+{
+ return &channel_subsystems[0]->device;
+}
+
+struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages)
+{
+ struct gen_pool *gp_dma;
+ void *cpu_addr;
+ dma_addr_t dma_addr;
+ int i;
+
+ gp_dma = gen_pool_create(3, -1);
+ if (!gp_dma)
+ return NULL;
+ for (i = 0; i < nr_pages; ++i) {
+ cpu_addr = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr,
+ CIO_DMA_GFP);
+ if (!cpu_addr)
+ return gp_dma;
+ gen_pool_add_virt(gp_dma, (unsigned long) cpu_addr,
+ dma_addr, PAGE_SIZE, -1);
+ }
+ return gp_dma;
+}
+
+static void __gp_dma_free_dma(struct gen_pool *pool,
+ struct gen_pool_chunk *chunk, void *data)
+{
+ size_t chunk_size = chunk->end_addr - chunk->start_addr + 1;
+
+ dma_free_coherent((struct device *) data, chunk_size,
+ (void *) chunk->start_addr,
+ (dma_addr_t) chunk->phys_addr);
+}
+
+void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev)
+{
+ if (!gp_dma)
+ return;
+ /* this is quite ugly but no better idea */
+ gen_pool_for_each_chunk(gp_dma, __gp_dma_free_dma, dma_dev);
+ gen_pool_destroy(gp_dma);
+}
+
+static int cio_dma_pool_init(void)
+{
+ /* No need to free up the resources: compiled in */
+ cio_dma_pool = cio_gp_dma_create(cio_get_dma_css_dev(), 1);
+ if (!cio_dma_pool)
+ return -ENOMEM;
+ return 0;
+}
+
+void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
+ size_t size)
+{
+ dma_addr_t dma_addr;
+ unsigned long addr;
+ size_t chunk_size;
+
+ if (!gp_dma)
+ return NULL;
+ addr = gen_pool_alloc(gp_dma, size);
+ while (!addr) {
+ chunk_size = round_up(size, PAGE_SIZE);
+ addr = (unsigned long) dma_alloc_coherent(dma_dev,
+ chunk_size, &dma_addr, CIO_DMA_GFP);
+ if (!addr)
+ return NULL;
+ gen_pool_add_virt(gp_dma, addr, dma_addr, chunk_size, -1);
+ addr = gen_pool_alloc(gp_dma, size);
+ }
+ return (void *) addr;
+}
+
+void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size)
+{
+ if (!cpu_addr)
+ return;
+ memset(cpu_addr, 0, size);
+ gen_pool_free(gp_dma, (unsigned long) cpu_addr, size);
+}
+
+/*
+ * Allocate dma memory from the css global pool. Intended for memory not
+ * specific to any single device within the css. The allocated memory
+ * is not guaranteed to be 31-bit addressable.
+ *
+ * Caution: Not suitable for early stuff like console.
+ */
+void *cio_dma_zalloc(size_t size)
+{
+ return cio_gp_dma_zalloc(cio_dma_pool, cio_get_dma_css_dev(), size);
+}
+
+void cio_dma_free(void *cpu_addr, size_t size)
+{
+ cio_gp_dma_free(cio_dma_pool, cpu_addr, size);
+}
+
/*
* Now that the driver core is running, we can setup our channel subsystem.
* The struct subchannel's are created during probing.
if (ret)
goto out_unregister;
ret = register_pm_notifier(&css_power_notifier);
- if (ret) {
- unregister_reboot_notifier(&css_reboot_notifier);
- goto out_unregister;
- }
+ if (ret)
+ goto out_unregister_rn;
+ ret = cio_dma_pool_init();
+ if (ret)
+ goto out_unregister_pmn;
+ airq_init();
css_init_done = 1;
/* Enable default isc for I/O subchannels. */
isc_register(IO_SCH_ISC);
return 0;
+out_unregister_pmn:
+ unregister_pm_notifier(&css_power_notifier);
+out_unregister_rn:
+ unregister_reboot_notifier(&css_reboot_notifier);
out_unregister:
while (i-- > 0) {
struct channel_subsystem *css = channel_subsystems[i];
struct css_driver *driver = to_cssdriver(drv);
struct css_device_id *id;
+ /* When driver_override is set, only bind to the matching driver */
+ if (sch->driver_override && strcmp(sch->driver_override, drv->name))
+ return 0;
+
for (id = driver->subchannel_type; id->match_flags; id++) {
if (sch->st == id->type)
return 1;
#include <linux/timer.h>
#include <linux/kernel_stat.h>
#include <linux/sched/signal.h>
+#include <linux/dma-mapping.h>
#include <asm/ccwdev.h>
#include <asm/cio.h>
struct ccw_device *cdev;
cdev = to_ccwdev(dev);
+ cio_gp_dma_free(cdev->private->dma_pool, cdev->private->dma_area,
+ sizeof(*cdev->private->dma_area));
+ cio_gp_dma_destroy(cdev->private->dma_pool, &cdev->dev);
/* Release reference of parent subchannel. */
put_device(cdev->dev.parent);
kfree(cdev->private);
static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
{
struct ccw_device *cdev;
+ struct gen_pool *dma_pool;
cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
- if (cdev) {
- cdev->private = kzalloc(sizeof(struct ccw_device_private),
- GFP_KERNEL | GFP_DMA);
- if (cdev->private)
- return cdev;
- }
+ if (!cdev)
+ goto err_cdev;
+ cdev->private = kzalloc(sizeof(struct ccw_device_private),
+ GFP_KERNEL | GFP_DMA);
+ if (!cdev->private)
+ goto err_priv;
+ cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask;
+ cdev->dev.dma_mask = &cdev->dev.coherent_dma_mask;
+ dma_pool = cio_gp_dma_create(&cdev->dev, 1);
+ if (!dma_pool)
+ goto err_dma_pool;
+ cdev->private->dma_pool = dma_pool;
+ cdev->private->dma_area = cio_gp_dma_zalloc(dma_pool, &cdev->dev,
+ sizeof(*cdev->private->dma_area));
+ if (!cdev->private->dma_area)
+ goto err_dma_area;
+ return cdev;
+err_dma_area:
+ cio_gp_dma_destroy(dma_pool, &cdev->dev);
+err_dma_pool:
+ kfree(cdev->private);
+err_priv:
kfree(cdev);
+err_cdev:
return ERR_PTR(-ENOMEM);
}
wake_up(&ccw_device_init_wq);
break;
case DEV_STATE_OFFLINE:
- /*
+ /*
* We can't register the device in interrupt context so
* we schedule a work item.
*/
if (!io_priv)
goto out_schedule;
+ io_priv->dma_area = dma_alloc_coherent(&sch->dev,
+ sizeof(*io_priv->dma_area),
+ &io_priv->dma_area_dma, GFP_KERNEL);
+ if (!io_priv->dma_area) {
+ kfree(io_priv);
+ goto out_schedule;
+ }
+
set_io_private(sch, io_priv);
css_schedule_eval(sch->schid);
return 0;
set_io_private(sch, NULL);
spin_unlock_irq(sch->lock);
out_free:
+ dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
+ io_priv->dma_area, io_priv->dma_area_dma);
kfree(io_priv);
sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group);
return 0;
return ERR_CAST(sch);
io_priv = kzalloc(sizeof(*io_priv), GFP_KERNEL | GFP_DMA);
- if (!io_priv) {
- put_device(&sch->dev);
- return ERR_PTR(-ENOMEM);
- }
+ if (!io_priv)
+ goto err_priv;
+ io_priv->dma_area = dma_alloc_coherent(&sch->dev,
+ sizeof(*io_priv->dma_area),
+ &io_priv->dma_area_dma, GFP_KERNEL);
+ if (!io_priv->dma_area)
+ goto err_dma_area;
set_io_private(sch, io_priv);
cdev = io_subchannel_create_ccwdev(sch);
if (IS_ERR(cdev)) {
+ dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
+ io_priv->dma_area, io_priv->dma_area_dma);
+ set_io_private(sch, NULL);
put_device(&sch->dev);
kfree(io_priv);
return cdev;
cdev->drv = drv;
ccw_device_set_int_class(cdev);
return cdev;
+
+err_dma_area:
+ kfree(io_priv);
+err_priv:
+ put_device(&sch->dev);
+ return ERR_PTR(-ENOMEM);
}
void __init ccw_device_destroy_console(struct ccw_device *cdev)
set_io_private(sch, NULL);
put_device(&sch->dev);
put_device(&cdev->dev);
+ dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
+ io_priv->dma_area, io_priv->dma_area_dma);
kfree(io_priv);
}
sizeof(struct tcw), 0);
} else {
printk(KERN_WARNING "cio: orb indicates command mode\n");
- if ((void *)(addr_t)orb->cmd.cpa == &private->sense_ccw ||
- (void *)(addr_t)orb->cmd.cpa == cdev->private->iccws)
+ if ((void *)(addr_t)orb->cmd.cpa ==
+ &private->dma_area->sense_ccw ||
+ (void *)(addr_t)orb->cmd.cpa ==
+ cdev->private->dma_area->iccws)
printk(KERN_WARNING "cio: last channel program "
"(intern):\n");
else
void ccw_device_update_sense_data(struct ccw_device *cdev)
{
memset(&cdev->id, 0, sizeof(cdev->id));
- cdev->id.cu_type = cdev->private->senseid.cu_type;
- cdev->id.cu_model = cdev->private->senseid.cu_model;
- cdev->id.dev_type = cdev->private->senseid.dev_type;
- cdev->id.dev_model = cdev->private->senseid.dev_model;
+ cdev->id.cu_type = cdev->private->dma_area->senseid.cu_type;
+ cdev->id.cu_model = cdev->private->dma_area->senseid.cu_model;
+ cdev->id.dev_type = cdev->private->dma_area->senseid.dev_type;
+ cdev->id.dev_model = cdev->private->dma_area->senseid.dev_model;
}
int ccw_device_test_sense_data(struct ccw_device *cdev)
{
- return cdev->id.cu_type == cdev->private->senseid.cu_type &&
- cdev->id.cu_model == cdev->private->senseid.cu_model &&
- cdev->id.dev_type == cdev->private->senseid.dev_type &&
- cdev->id.dev_model == cdev->private->senseid.dev_model;
+ return cdev->id.cu_type ==
+ cdev->private->dma_area->senseid.cu_type &&
+ cdev->id.cu_model ==
+ cdev->private->dma_area->senseid.cu_model &&
+ cdev->id.dev_type ==
+ cdev->private->dma_area->senseid.dev_type &&
+ cdev->id.dev_model ==
+ cdev->private->dma_area->senseid.dev_model;
}
/*
cio_disable_subchannel(sch);
/* Reset device status. */
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
cdev->private->state = state;
ccw_device_done(cdev, DEV_STATE_ONLINE);
/* Deliver fake irb to device driver, if needed. */
if (cdev->private->flags.fake_irb) {
- create_fake_irb(&cdev->private->irb,
+ create_fake_irb(&cdev->private->dma_area->irb,
cdev->private->flags.fake_irb);
cdev->private->flags.fake_irb = 0;
if (cdev->handler)
cdev->handler(cdev, cdev->private->intparm,
- &cdev->private->irb);
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ &cdev->private->dma_area->irb);
+ memset(&cdev->private->dma_area->irb, 0,
+ sizeof(struct irb));
}
ccw_device_report_path_events(cdev);
ccw_device_handle_broken_paths(cdev);
if (scsw_actl(&sch->schib.scsw) != 0 ||
(scsw_stctl(&sch->schib.scsw) & SCSW_STCTL_STATUS_PEND) ||
- (scsw_stctl(&cdev->private->irb.scsw) & SCSW_STCTL_STATUS_PEND)) {
+ (scsw_stctl(&cdev->private->dma_area->irb.scsw) &
+ SCSW_STCTL_STATUS_PEND)) {
/*
* No final status yet or final status not yet delivered
* to the device driver. Can't do path verification now,
* - fast notification was requested (primary status)
* - unsolicited interrupts
*/
- stctl = scsw_stctl(&cdev->private->irb.scsw);
+ stctl = scsw_stctl(&cdev->private->dma_area->irb.scsw);
ending_status = (stctl & SCSW_STCTL_SEC_STATUS) ||
(stctl == (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) ||
(stctl == SCSW_STCTL_STATUS_PEND);
if (cdev->handler)
cdev->handler(cdev, cdev->private->intparm,
- &cdev->private->irb);
+ &cdev->private->dma_area->irb);
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
return 1;
}
/* Unit check but no sense data. Need basic sense. */
if (ccw_device_do_sense(cdev, irb) != 0)
goto call_handler_unsol;
- memcpy(&cdev->private->irb, irb, sizeof(struct irb));
+ memcpy(&cdev->private->dma_area->irb, irb,
+ sizeof(struct irb));
cdev->private->state = DEV_STATE_W4SENSE;
cdev->private->intparm = 0;
return;
if (scsw_fctl(&irb->scsw) &
(SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC)) {
cdev->private->flags.dosense = 0;
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
ccw_device_accumulate_irb(cdev, irb);
goto call_handler;
}
static int diag210_get_dev_info(struct ccw_device *cdev)
{
struct ccw_dev_id *dev_id = &cdev->private->dev_id;
- struct senseid *senseid = &cdev->private->senseid;
+ struct senseid *senseid = &cdev->private->dma_area->senseid;
struct diag210 diag_data;
int rc;
static void snsid_init(struct ccw_device *cdev)
{
cdev->private->flags.esid = 0;
- memset(&cdev->private->senseid, 0, sizeof(cdev->private->senseid));
- cdev->private->senseid.cu_type = 0xffff;
+
+ memset(&cdev->private->dma_area->senseid, 0,
+ sizeof(cdev->private->dma_area->senseid));
+ cdev->private->dma_area->senseid.cu_type = 0xffff;
}
/*
*/
static int snsid_check(struct ccw_device *cdev, void *data)
{
- struct cmd_scsw *scsw = &cdev->private->irb.scsw.cmd;
+ struct cmd_scsw *scsw = &cdev->private->dma_area->irb.scsw.cmd;
int len = sizeof(struct senseid) - scsw->count;
/* Check for incomplete SENSE ID data. */
if (len < SENSE_ID_MIN_LEN)
goto out_restart;
- if (cdev->private->senseid.cu_type == 0xffff)
+ if (cdev->private->dma_area->senseid.cu_type == 0xffff)
goto out_restart;
/* Check for incompatible SENSE ID data. */
- if (cdev->private->senseid.reserved != 0xff)
+ if (cdev->private->dma_area->senseid.reserved != 0xff)
return -EOPNOTSUPP;
/* Check for extended-identification information. */
if (len > SENSE_ID_BASIC_LEN)
static void snsid_callback(struct ccw_device *cdev, void *data, int rc)
{
struct ccw_dev_id *id = &cdev->private->dev_id;
- struct senseid *senseid = &cdev->private->senseid;
+ struct senseid *senseid = &cdev->private->dma_area->senseid;
int vm = 0;
if (rc && MACHINE_IS_VM) {
{
struct subchannel *sch = to_subchannel(cdev->dev.parent);
struct ccw_request *req = &cdev->private->req;
- struct ccw1 *cp = cdev->private->iccws;
+ struct ccw1 *cp = cdev->private->dma_area->iccws;
CIO_TRACE_EVENT(4, "snsid");
CIO_HEX_EVENT(4, &cdev->private->dev_id, sizeof(cdev->private->dev_id));
snsid_init(cdev);
/* Channel program setup. */
cp->cmd_code = CCW_CMD_SENSE_ID;
- cp->cda = (u32) (addr_t) &cdev->private->senseid;
+ cp->cda = (u32) (addr_t) &cdev->private->dma_area->senseid;
cp->count = sizeof(struct senseid);
cp->flags = CCW_FLAG_SLI;
/* Request setup. */
if (cdev->private->flags.esid == 0)
return NULL;
for (ciw_cnt = 0; ciw_cnt < MAX_CIWS; ciw_cnt++)
- if (cdev->private->senseid.ciw[ciw_cnt].ct == ct)
- return cdev->private->senseid.ciw + ciw_cnt;
+ if (cdev->private->dma_area->senseid.ciw[ciw_cnt].ct == ct)
+ return cdev->private->dma_area->senseid.ciw + ciw_cnt;
return NULL;
}
}
EXPORT_SYMBOL_GPL(ccw_device_get_schid);
+/*
+ * Allocate zeroed dma coherent 31 bit addressable memory using
+ * the subchannels dma pool. Maximal size of allocation supported
+ * is PAGE_SIZE.
+ */
+void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size)
+{
+ return cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size);
+}
+EXPORT_SYMBOL(ccw_device_dma_zalloc);
+
+void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size)
+{
+ cio_gp_dma_free(cdev->private->dma_pool, cpu_addr, size);
+}
+EXPORT_SYMBOL(ccw_device_dma_free);
+
EXPORT_SYMBOL(ccw_device_set_options_mask);
EXPORT_SYMBOL(ccw_device_set_options);
EXPORT_SYMBOL(ccw_device_clear_options);
static void nop_build_cp(struct ccw_device *cdev)
{
struct ccw_request *req = &cdev->private->req;
- struct ccw1 *cp = cdev->private->iccws;
+ struct ccw1 *cp = cdev->private->dma_area->iccws;
cp->cmd_code = CCW_CMD_NOOP;
cp->cda = 0;
static void spid_build_cp(struct ccw_device *cdev, u8 fn)
{
struct ccw_request *req = &cdev->private->req;
- struct ccw1 *cp = cdev->private->iccws;
+ struct ccw1 *cp = cdev->private->dma_area->iccws;
int i = pathmask_to_pos(req->lpm);
- struct pgid *pgid = &cdev->private->pgid[i];
+ struct pgid *pgid = &cdev->private->dma_area->pgid[i];
pgid->inf.fc = fn;
cp->cmd_code = CCW_CMD_SET_PGID;
static void pgid_analyze(struct ccw_device *cdev, struct pgid **p,
int *mismatch, u8 *reserved, u8 *reset)
{
- struct pgid *pgid = &cdev->private->pgid[0];
+ struct pgid *pgid = &cdev->private->dma_area->pgid[0];
struct pgid *first = NULL;
int lpm;
int i;
lpm = 0x80 >> i;
if ((cdev->private->pgid_valid_mask & lpm) == 0)
continue;
- pgid = &cdev->private->pgid[i];
+ pgid = &cdev->private->dma_area->pgid[i];
if (sch->opm & lpm) {
if (pgid->inf.ps.state1 != SNID_STATE1_GROUPED)
continue;
int i;
for (i = 0; i < 8; i++)
- memcpy(&cdev->private->pgid[i], pgid, sizeof(struct pgid));
+ memcpy(&cdev->private->dma_area->pgid[i], pgid,
+ sizeof(struct pgid));
}
/*
static void snid_build_cp(struct ccw_device *cdev)
{
struct ccw_request *req = &cdev->private->req;
- struct ccw1 *cp = cdev->private->iccws;
+ struct ccw1 *cp = cdev->private->dma_area->iccws;
int i = pathmask_to_pos(req->lpm);
/* Channel program setup. */
cp->cmd_code = CCW_CMD_SENSE_PGID;
- cp->cda = (u32) (addr_t) &cdev->private->pgid[i];
+ cp->cda = (u32) (addr_t) &cdev->private->dma_area->pgid[i];
cp->count = sizeof(struct pgid);
cp->flags = CCW_FLAG_SLI;
req->cp = cp;
sch->lpm = sch->schib.pmcw.pam;
/* Initialize PGID data. */
- memset(cdev->private->pgid, 0, sizeof(cdev->private->pgid));
+ memset(cdev->private->dma_area->pgid, 0,
+ sizeof(cdev->private->dma_area->pgid));
cdev->private->pgid_valid_mask = 0;
cdev->private->pgid_todo_mask = sch->schib.pmcw.pam;
cdev->private->path_notoper_mask = 0;
static void stlck_build_cp(struct ccw_device *cdev, void *buf1, void *buf2)
{
struct ccw_request *req = &cdev->private->req;
- struct ccw1 *cp = cdev->private->iccws;
+ struct ccw1 *cp = cdev->private->dma_area->iccws;
cp[0].cmd_code = CCW_CMD_STLCK;
cp[0].cda = (u32) (addr_t) buf1;
* are condition that have to be met for the extended control
* bit to have meaning. Sick.
*/
- cdev->private->irb.scsw.cmd.ectl = 0;
+ cdev->private->dma_area->irb.scsw.cmd.ectl = 0;
if ((irb->scsw.cmd.stctl & SCSW_STCTL_ALERT_STATUS) &&
!(irb->scsw.cmd.stctl & SCSW_STCTL_INTER_STATUS))
- cdev->private->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
+ cdev->private->dma_area->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
/* Check if extended control word is valid. */
- if (!cdev->private->irb.scsw.cmd.ectl)
+ if (!cdev->private->dma_area->irb.scsw.cmd.ectl)
return;
/* Copy concurrent sense / model dependent information. */
- memcpy (&cdev->private->irb.ecw, irb->ecw, sizeof (irb->ecw));
+ memcpy(&cdev->private->dma_area->irb.ecw, irb->ecw, sizeof(irb->ecw));
}
/*
if (!ccw_device_accumulate_esw_valid(irb))
return;
- cdev_irb = &cdev->private->irb;
+ cdev_irb = &cdev->private->dma_area->irb;
/* Copy last path used mask. */
cdev_irb->esw.esw1.lpum = irb->esw.esw1.lpum;
ccw_device_path_notoper(cdev);
/* No irb accumulation for transport mode irbs. */
if (scsw_is_tm(&irb->scsw)) {
- memcpy(&cdev->private->irb, irb, sizeof(struct irb));
+ memcpy(&cdev->private->dma_area->irb, irb, sizeof(struct irb));
return;
}
/*
if (!scsw_is_solicited(&irb->scsw))
return;
- cdev_irb = &cdev->private->irb;
+ cdev_irb = &cdev->private->dma_area->irb;
/*
* If the clear function had been performed, all formerly pending
* intermediate accumulated status to the device driver.
*/
if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC)
- memset(&cdev->private->irb, 0, sizeof(struct irb));
+ memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
/* Copy bits which are valid only for the start function. */
if (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) {
/*
* We have ending status but no sense information. Do a basic sense.
*/
- sense_ccw = &to_io_private(sch)->sense_ccw;
+ sense_ccw = &to_io_private(sch)->dma_area->sense_ccw;
sense_ccw->cmd_code = CCW_CMD_BASIC_SENSE;
- sense_ccw->cda = (__u32) __pa(cdev->private->irb.ecw);
+ sense_ccw->cda = (__u32) __pa(cdev->private->dma_area->irb.ecw);
sense_ccw->count = SENSE_MAX_COUNT;
sense_ccw->flags = CCW_FLAG_SLI;
if (!(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK) &&
(irb->scsw.cmd.dstat & DEV_STAT_CHN_END)) {
- cdev->private->irb.esw.esw0.erw.cons = 1;
+ cdev->private->dma_area->irb.esw.esw0.erw.cons = 1;
cdev->private->flags.dosense = 0;
}
/* Check if path verification is required. */
/* Check for basic sense. */
if (cdev->private->flags.dosense &&
!(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK)) {
- cdev->private->irb.esw.esw0.erw.cons = 1;
+ cdev->private->dma_area->irb.esw.esw0.erw.cons = 1;
cdev->private->flags.dosense = 0;
return 0;
}
#include "css.h"
#include "orb.h"
+struct io_subchannel_dma_area {
+ struct ccw1 sense_ccw; /* static ccw for sense command */
+};
+
struct io_subchannel_private {
union orb orb; /* operation request block */
- struct ccw1 sense_ccw; /* static ccw for sense command */
struct ccw_device *cdev;/* pointer to the child ccw device */
struct {
unsigned int suspend:1; /* allow suspend */
unsigned int prefetch:1;/* deny prefetch */
unsigned int inter:1; /* suppress intermediate interrupts */
} __packed options;
+ struct io_subchannel_dma_area *dma_area;
+ dma_addr_t dma_area_dma;
} __aligned(8);
#define to_io_private(n) ((struct io_subchannel_private *) \
#define FAKE_CMD_IRB 1
#define FAKE_TM_IRB 2
+struct ccw_device_dma_area {
+ struct senseid senseid; /* SenseID info */
+ struct ccw1 iccws[2]; /* ccws for SNID/SID/SPGID commands */
+ struct irb irb; /* device status */
+ struct pgid pgid[8]; /* path group IDs per chpid*/
+};
+
struct ccw_device_private {
struct ccw_device *cdev;
struct subchannel *sch;
} __attribute__((packed)) flags;
unsigned long intparm; /* user interruption parameter */
struct qdio_irq *qdio_data;
- struct irb irb; /* device status */
int async_kill_io_rc;
- struct senseid senseid; /* SenseID info */
- struct pgid pgid[8]; /* path group IDs per chpid*/
- struct ccw1 iccws[2]; /* ccws for SNID/SID/SPGID commands */
struct work_struct todo_work;
enum cdev_todo todo;
wait_queue_head_t wait_q;
struct list_head cmb_list; /* list of measured devices */
u64 cmb_start_time; /* clock value of cmb reset */
void *cmb_wait; /* deferred cmb enable/disable */
+ struct gen_pool *dma_pool;
+ struct ccw_device_dma_area *dma_area;
enum interruption_class int_class;
};
switch (state) {
case SLSB_P_OUTPUT_EMPTY:
+ case SLSB_P_OUTPUT_PENDING:
/* the adapter got it */
DBF_DEV_EVENT(DBF_INFO, q->irq_ptr,
"out empty:%1d %02x", q->nr, count);
return -ENOMEM;
}
irq_ptr_qs[i] = q;
+ INIT_LIST_HEAD(&q->entry);
}
return 0;
}
q->mask = 1 << (31 - i);
q->nr = i;
q->handler = handler;
+ INIT_LIST_HEAD(&q->entry);
}
static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr,
mutex_lock(&tiq_list_lock);
list_add_rcu(&irq_ptr->input_qs[0]->entry, &tiq_list);
mutex_unlock(&tiq_list_lock);
- xchg(irq_ptr->dsci, 1 << 7);
}
void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr)
struct qdio_q *q;
q = irq_ptr->input_qs[0];
- /* if establish triggered an error */
- if (!q || !q->entry.prev || !q->entry.next)
+ if (!q)
return;
mutex_lock(&tiq_list_lock);
list_del_rcu(&q->entry);
mutex_unlock(&tiq_list_lock);
synchronize_rcu();
+ INIT_LIST_HEAD(&q->entry);
}
static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq_ptr)
/**
* tiqdio_thinint_handler - thin interrupt handler for qdio
* @airq: pointer to adapter interrupt descriptor
+ * @floating: flag to recognize floating vs. directed interrupts (unused)
*/
static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating)
{
#include "vfio_ccw_cp.h"
-/*
- * Max length for ccw chain.
- * XXX: Limit to 256, need to check more?
- */
-#define CCWCHAIN_LEN_MAX 256
-
struct pfn_array {
/* Starting guest physical I/O address. */
unsigned long pa_iova;
int pa_nr;
};
-struct pfn_array_table {
- struct pfn_array *pat_pa;
- int pat_nr;
-};
-
struct ccwchain {
struct list_head next;
struct ccw1 *ch_ccw;
/* Count of the valid ccws in chain. */
int ch_len;
/* Pinned PAGEs for the original data. */
- struct pfn_array_table *ch_pat;
+ struct pfn_array *ch_pa;
};
/*
- * pfn_array_alloc_pin() - alloc memory for PFNs, then pin user pages in memory
+ * pfn_array_alloc() - alloc memory for PFNs
* @pa: pfn_array on which to perform the operation
- * @mdev: the mediated device to perform pin/unpin operations
* @iova: target guest physical address
* @len: number of bytes that should be pinned from @iova
*
- * Attempt to allocate memory for PFNs, and pin user pages in memory.
+ * Attempt to allocate memory for PFNs.
*
* Usage of pfn_array:
* We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in
* this structure will be filled in by this function.
*
* Returns:
- * Number of pages pinned on success.
- * If @pa->pa_nr is not 0, or @pa->pa_iova_pfn is not NULL initially,
- * returns -EINVAL.
- * If no pages were pinned, returns -errno.
+ * 0 if PFNs are allocated
+ * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova_pfn is not NULL
+ * -ENOMEM if alloc failed
*/
-static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
- u64 iova, unsigned int len)
+static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len)
{
- int i, ret = 0;
-
- if (!len)
- return 0;
+ int i;
if (pa->pa_nr || pa->pa_iova_pfn)
return -EINVAL;
pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
- for (i = 1; i < pa->pa_nr; i++)
+ pa->pa_pfn[0] = -1ULL;
+ for (i = 1; i < pa->pa_nr; i++) {
pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
+ pa->pa_pfn[i] = -1ULL;
+ }
+
+ return 0;
+}
+
+/*
+ * pfn_array_pin() - Pin user pages in memory
+ * @pa: pfn_array on which to perform the operation
+ * @mdev: the mediated device to perform pin operations
+ *
+ * Returns number of pages pinned upon success.
+ * If the pin request partially succeeds, or fails completely,
+ * all pages are left unpinned and a negative error value is returned.
+ */
+static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
+{
+ int ret = 0;
ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
err_out:
pa->pa_nr = 0;
- kfree(pa->pa_iova_pfn);
- pa->pa_iova_pfn = NULL;
return ret;
}
/* Unpin the pages before releasing the memory. */
static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
{
- vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
+ /* Only unpin if any pages were pinned to begin with */
+ if (pa->pa_nr)
+ vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
pa->pa_nr = 0;
kfree(pa->pa_iova_pfn);
}
-static int pfn_array_table_init(struct pfn_array_table *pat, int nr)
-{
- pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
- if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) {
- pat->pat_nr = 0;
- return -ENOMEM;
- }
-
- pat->pat_nr = nr;
-
- return 0;
-}
-
-static void pfn_array_table_unpin_free(struct pfn_array_table *pat,
- struct device *mdev)
+static bool pfn_array_iova_pinned(struct pfn_array *pa, unsigned long iova)
{
- int i;
-
- for (i = 0; i < pat->pat_nr; i++)
- pfn_array_unpin_free(pat->pat_pa + i, mdev);
-
- if (pat->pat_nr) {
- kfree(pat->pat_pa);
- pat->pat_pa = NULL;
- pat->pat_nr = 0;
- }
-}
-
-static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat,
- unsigned long iova)
-{
- struct pfn_array *pa = pat->pat_pa;
unsigned long iova_pfn = iova >> PAGE_SHIFT;
- int i, j;
+ int i;
- for (i = 0; i < pat->pat_nr; i++, pa++)
- for (j = 0; j < pa->pa_nr; j++)
- if (pa->pa_iova_pfn[j] == iova_pfn)
- return true;
+ for (i = 0; i < pa->pa_nr; i++)
+ if (pa->pa_iova_pfn[i] == iova_pfn)
+ return true;
return false;
}
-/* Create the list idal words for a pfn_array_table. */
-static inline void pfn_array_table_idal_create_words(
- struct pfn_array_table *pat,
+/* Create the list of IDAL words for a pfn_array. */
+static inline void pfn_array_idal_create_words(
+ struct pfn_array *pa,
unsigned long *idaws)
{
- struct pfn_array *pa;
- int i, j, k;
+ int i;
/*
* Idal words (execept the first one) rely on the memory being 4k
* there will be no problem here to simply use the phys to create an
* idaw.
*/
- k = 0;
- for (i = 0; i < pat->pat_nr; i++) {
- pa = pat->pat_pa + i;
- for (j = 0; j < pa->pa_nr; j++) {
- idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT;
- if (k == 0)
- idaws[k] += pa->pa_iova & (PAGE_SIZE - 1);
- k++;
+
+ for (i = 0; i < pa->pa_nr; i++)
+ idaws[i] = pa->pa_pfn[i] << PAGE_SHIFT;
+
+ /* Adjust the first IDAW, since it may not start on a page boundary */
+ idaws[0] += pa->pa_iova & (PAGE_SIZE - 1);
+}
+
+static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
+{
+ struct ccw0 ccw0;
+ struct ccw1 *pccw1 = source;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ ccw0 = *(struct ccw0 *)pccw1;
+ if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
+ pccw1->cmd_code = CCW_CMD_TIC;
+ pccw1->flags = 0;
+ pccw1->count = 0;
+ } else {
+ pccw1->cmd_code = ccw0.cmd_code;
+ pccw1->flags = ccw0.flags;
+ pccw1->count = ccw0.count;
}
+ pccw1->cda = ccw0.cda;
+ pccw1++;
}
}
-
/*
* Within the domain (@mdev), copy @n bytes from a guest physical
* address (@iova) to a host physical address (@to).
int i, ret;
unsigned long l, m;
- ret = pfn_array_alloc_pin(&pa, mdev, iova, n);
- if (ret <= 0)
+ ret = pfn_array_alloc(&pa, iova, n);
+ if (ret < 0)
+ return ret;
+
+ ret = pfn_array_pin(&pa, mdev);
+ if (ret < 0) {
+ pfn_array_unpin_free(&pa, mdev);
return ret;
+ }
l = n;
for (i = 0; i < pa.pa_nr; i++) {
return l;
}
-static long copy_ccw_from_iova(struct channel_program *cp,
- struct ccw1 *to, u64 iova,
- unsigned long len)
-{
- struct ccw0 ccw0;
- struct ccw1 *pccw1;
- int ret;
- int i;
-
- ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1));
- if (ret)
- return ret;
-
- if (!cp->orb.cmd.fmt) {
- pccw1 = to;
- for (i = 0; i < len; i++) {
- ccw0 = *(struct ccw0 *)pccw1;
- if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
- pccw1->cmd_code = CCW_CMD_TIC;
- pccw1->flags = 0;
- pccw1->count = 0;
- } else {
- pccw1->cmd_code = ccw0.cmd_code;
- pccw1->flags = ccw0.flags;
- pccw1->count = ccw0.count;
- }
- pccw1->cda = ccw0.cda;
- pccw1++;
- }
- }
-
- return ret;
-}
-
/*
* Helpers to operate ccwchain.
*/
-#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
+#define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
+#define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
+#define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
-
+#define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
/*
+ * ccw_does_data_transfer()
+ *
+ * Determine whether a CCW will move any data, such that the guest pages
+ * would need to be pinned before performing the I/O.
+ *
+ * Returns 1 if yes, 0 if no.
+ */
+static inline int ccw_does_data_transfer(struct ccw1 *ccw)
+{
+ /* If the count field is zero, then no data will be transferred */
+ if (ccw->count == 0)
+ return 0;
+
+ /* If the command is a NOP, then no data will be transferred */
+ if (ccw_is_noop(ccw))
+ return 0;
+
+ /* If the skip flag is off, then data will be transferred */
+ if (!ccw_is_skip(ccw))
+ return 1;
+
+ /*
+ * If the skip flag is on, it is only meaningful if the command
+ * code is a read, read backward, sense, or sense ID. In those
+ * cases, no data will be transferred.
+ */
+ if (ccw_is_read(ccw) || ccw_is_read_backward(ccw))
+ return 0;
+
+ if (ccw_is_sense(ccw))
+ return 0;
+
+ /* The skip flag is on, but it is ignored for this command code. */
+ return 1;
+}
+
+/*
* is_cpa_within_range()
*
* @cpa: channel program address being questioned
/* Make ccw address aligned to 8. */
size = ((sizeof(*chain) + 7L) & -8L) +
sizeof(*chain->ch_ccw) * len +
- sizeof(*chain->ch_pat) * len;
+ sizeof(*chain->ch_pa) * len;
chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
if (!chain)
return NULL;
chain->ch_ccw = (struct ccw1 *)data;
data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
- chain->ch_pat = (struct pfn_array_table *)data;
+ chain->ch_pa = (struct pfn_array *)data;
chain->ch_len = len;
{
struct ccw1 *ccw = chain->ch_ccw + idx;
- if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
- return;
- if (!ccw->count)
+ if (ccw_is_tic(ccw))
return;
kfree((void *)(u64)ccw->cda);
}
-/* Unpin the pages then free the memory resources. */
-static void cp_unpin_free(struct channel_program *cp)
-{
- struct ccwchain *chain, *temp;
- int i;
-
- cp->initialized = false;
- list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
- for (i = 0; i < chain->ch_len; i++) {
- pfn_array_table_unpin_free(chain->ch_pat + i,
- cp->mdev);
- ccwchain_cda_free(chain, i);
- }
- ccwchain_free(chain);
- }
-}
-
/**
* ccwchain_calc_length - calculate the length of the ccw chain.
* @iova: guest physical address of the target ccw chain
*/
static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
{
- struct ccw1 *ccw, *p;
- int cnt;
-
- /*
- * Copy current chain from guest to host kernel.
- * Currently the chain length is limited to CCWCHAIN_LEN_MAX (256).
- * So copying 2K is enough (safe).
- */
- p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL);
- if (!ccw)
- return -ENOMEM;
-
- cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX);
- if (cnt) {
- kfree(ccw);
- return cnt;
- }
+ struct ccw1 *ccw = cp->guest_cp;
+ int cnt = 0;
- cnt = 0;
do {
cnt++;
* orb specified one of the unsupported formats, we defer
* checking for IDAWs in unsupported formats to here.
*/
- if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) {
- kfree(p);
+ if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
return -EOPNOTSUPP;
- }
/*
* We want to keep counting if the current CCW has the
if (cnt == CCWCHAIN_LEN_MAX + 1)
cnt = -EINVAL;
- kfree(p);
return cnt;
}
static int ccwchain_loop_tic(struct ccwchain *chain,
struct channel_program *cp);
-static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
+static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp)
{
struct ccwchain *chain;
- int len, ret;
+ int len;
- /* May transfer to an existing chain. */
- if (tic_target_chain_exists(tic, cp))
- return 0;
+ /* Copy 2K (the most we support today) of possible CCWs */
+ len = copy_from_iova(cp->mdev, cp->guest_cp, cda,
+ CCWCHAIN_LEN_MAX * sizeof(struct ccw1));
+ if (len)
+ return len;
- /* Get chain length. */
- len = ccwchain_calc_length(tic->cda, cp);
+ /* Convert any Format-0 CCWs to Format-1 */
+ if (!cp->orb.cmd.fmt)
+ convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
+
+ /* Count the CCWs in the current chain */
+ len = ccwchain_calc_length(cda, cp);
if (len < 0)
return len;
chain = ccwchain_alloc(cp, len);
if (!chain)
return -ENOMEM;
- chain->ch_iova = tic->cda;
+ chain->ch_iova = cda;
- /* Copy the new chain from user. */
- ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len);
- if (ret) {
- ccwchain_free(chain);
- return ret;
- }
+ /* Copy the actual CCWs into the new chain */
+ memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
/* Loop for tics on this new chain. */
return ccwchain_loop_tic(chain, cp);
if (!ccw_is_tic(tic))
continue;
- ret = ccwchain_handle_tic(tic, cp);
+ /* May transfer to an existing chain. */
+ if (tic_target_chain_exists(tic, cp))
+ continue;
+
+ /* Build a ccwchain for the next segment */
+ ret = ccwchain_handle_ccw(tic->cda, cp);
if (ret)
return ret;
}
struct channel_program *cp)
{
struct ccw1 *ccw;
- struct pfn_array_table *pat;
+ struct pfn_array *pa;
+ u64 iova;
unsigned long *idaws;
int ret;
+ int bytes = 1;
+ int idaw_nr, idal_len;
+ int i;
ccw = chain->ch_ccw + idx;
- if (!ccw->count) {
- /*
- * We just want the translation result of any direct ccw
- * to be an IDA ccw, so let's add the IDA flag for it.
- * Although the flag will be ignored by firmware.
- */
- ccw->flags |= CCW_FLAG_IDA;
- return 0;
- }
-
- /*
- * Pin data page(s) in memory.
- * The number of pages actually is the count of the idaws which will be
- * needed when translating a direct ccw to a idal ccw.
- */
- pat = chain->ch_pat + idx;
- ret = pfn_array_table_init(pat, 1);
- if (ret)
- goto out_init;
-
- ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count);
- if (ret < 0)
- goto out_unpin;
+ if (ccw->count)
+ bytes = ccw->count;
- /* Translate this direct ccw to a idal ccw. */
- idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
- if (!idaws) {
- ret = -ENOMEM;
- goto out_unpin;
+ /* Calculate size of IDAL */
+ if (ccw_is_idal(ccw)) {
+ /* Read first IDAW to see if it's 4K-aligned or not. */
+ /* All subsequent IDAws will be 4K-aligned. */
+ ret = copy_from_iova(cp->mdev, &iova, ccw->cda, sizeof(iova));
+ if (ret)
+ return ret;
+ } else {
+ iova = ccw->cda;
}
- ccw->cda = (__u32) virt_to_phys(idaws);
- ccw->flags |= CCW_FLAG_IDA;
-
- pfn_array_table_idal_create_words(pat, idaws);
-
- return 0;
-
-out_unpin:
- pfn_array_table_unpin_free(pat, cp->mdev);
-out_init:
- ccw->cda = 0;
- return ret;
-}
-
-static int ccwchain_fetch_idal(struct ccwchain *chain,
- int idx,
- struct channel_program *cp)
-{
- struct ccw1 *ccw;
- struct pfn_array_table *pat;
- unsigned long *idaws;
- u64 idaw_iova;
- unsigned int idaw_nr, idaw_len;
- int i, ret;
-
- ccw = chain->ch_ccw + idx;
-
- if (!ccw->count)
- return 0;
-
- /* Calculate size of idaws. */
- ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova));
- if (ret)
- return ret;
- idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count);
- idaw_len = idaw_nr * sizeof(*idaws);
-
- /* Pin data page(s) in memory. */
- pat = chain->ch_pat + idx;
- ret = pfn_array_table_init(pat, idaw_nr);
- if (ret)
- goto out_init;
+ idaw_nr = idal_nr_words((void *)iova, bytes);
+ idal_len = idaw_nr * sizeof(*idaws);
- /* Translate idal ccw to use new allocated idaws. */
- idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL);
+ /* Allocate an IDAL from host storage */
+ idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
if (!idaws) {
ret = -ENOMEM;
- goto out_unpin;
+ goto out_init;
}
- ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len);
- if (ret)
+ /*
+ * Allocate an array of pfn's for pages to pin/translate.
+ * The number of pages is actually the count of the idaws
+ * required for the data transfer, since we only only support
+ * 4K IDAWs today.
+ */
+ pa = chain->ch_pa + idx;
+ ret = pfn_array_alloc(pa, iova, bytes);
+ if (ret < 0)
goto out_free_idaws;
- ccw->cda = virt_to_phys(idaws);
+ if (ccw_is_idal(ccw)) {
+ /* Copy guest IDAL into host IDAL */
+ ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idal_len);
+ if (ret)
+ goto out_unpin;
- for (i = 0; i < idaw_nr; i++) {
- idaw_iova = *(idaws + i);
+ /*
+ * Copy guest IDAWs into pfn_array, in case the memory they
+ * occupy is not contiguous.
+ */
+ for (i = 0; i < idaw_nr; i++)
+ pa->pa_iova_pfn[i] = idaws[i] >> PAGE_SHIFT;
+ } else {
+ /*
+ * No action is required here; the iova addresses in pfn_array
+ * were initialized sequentially in pfn_array_alloc() beginning
+ * with the contents of ccw->cda.
+ */
+ }
- ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev,
- idaw_iova, 1);
+ if (ccw_does_data_transfer(ccw)) {
+ ret = pfn_array_pin(pa, cp->mdev);
if (ret < 0)
- goto out_free_idaws;
+ goto out_unpin;
+ } else {
+ pa->pa_nr = 0;
}
- pfn_array_table_idal_create_words(pat, idaws);
+ ccw->cda = (__u32) virt_to_phys(idaws);
+ ccw->flags |= CCW_FLAG_IDA;
+
+ /* Populate the IDAL with pinned/translated addresses from pfn */
+ pfn_array_idal_create_words(pa, idaws);
return 0;
+out_unpin:
+ pfn_array_unpin_free(pa, cp->mdev);
out_free_idaws:
kfree(idaws);
-out_unpin:
- pfn_array_table_unpin_free(pat, cp->mdev);
out_init:
ccw->cda = 0;
return ret;
{
struct ccw1 *ccw = chain->ch_ccw + idx;
- if (ccw_is_test(ccw) || ccw_is_noop(ccw))
- return 0;
-
if (ccw_is_tic(ccw))
return ccwchain_fetch_tic(chain, idx, cp);
- if (ccw_is_idal(ccw))
- return ccwchain_fetch_idal(chain, idx, cp);
-
return ccwchain_fetch_direct(chain, idx, cp);
}
*/
int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
{
- u64 iova = orb->cmd.cpa;
- struct ccwchain *chain;
- int len, ret;
+ int ret;
/*
* XXX:
memcpy(&cp->orb, orb, sizeof(*orb));
cp->mdev = mdev;
- /* Get chain length. */
- len = ccwchain_calc_length(iova, cp);
- if (len < 0)
- return len;
-
- /* Alloc mem for the head chain. */
- chain = ccwchain_alloc(cp, len);
- if (!chain)
- return -ENOMEM;
- chain->ch_iova = iova;
-
- /* Copy the head chain from guest. */
- ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len);
- if (ret) {
- ccwchain_free(chain);
- return ret;
- }
-
- /* Now loop for its TICs. */
- ret = ccwchain_loop_tic(chain, cp);
+ /* Build a ccwchain for the first CCW segment */
+ ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
if (ret)
- cp_unpin_free(cp);
+ cp_free(cp);
+
/* It is safe to force: if not set but idals used
* ccwchain_calc_length returns an error.
*/
*/
void cp_free(struct channel_program *cp)
{
- if (cp->initialized)
- cp_unpin_free(cp);
+ struct ccwchain *chain, *temp;
+ int i;
+
+ if (!cp->initialized)
+ return;
+
+ cp->initialized = false;
+ list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
+ for (i = 0; i < chain->ch_len; i++) {
+ pfn_array_unpin_free(chain->ch_pa + i, cp->mdev);
+ ccwchain_cda_free(chain, i);
+ }
+ ccwchain_free(chain);
+ }
}
/**
*/
list_for_each_entry(chain, &cp->ccwchain_list, next) {
ccw_head = (u32)(u64)chain->ch_ccw;
- if (is_cpa_within_range(cpa, ccw_head, chain->ch_len)) {
+ /*
+ * On successful execution, cpa points just beyond the end
+ * of the chain.
+ */
+ if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) {
/*
* (cpa - ccw_head) is the offset value of the host
* physical ccw to its chain head.
list_for_each_entry(chain, &cp->ccwchain_list, next) {
for (i = 0; i < chain->ch_len; i++)
- if (pfn_array_table_iova_pinned(chain->ch_pat + i,
- iova))
+ if (pfn_array_iova_pinned(chain->ch_pa + i, iova))
return true;
}
#include "orb.h"
+/*
+ * Max length for ccw chain.
+ * XXX: Limit to 256, need to check more?
+ */
+#define CCWCHAIN_LEN_MAX 256
+
/**
* struct channel_program - manage information for channel program
* @ccwchain_list: list head of ccwchains
union orb orb;
struct device *mdev;
bool initialized;
+ struct ccw1 *guest_cp;
};
extern int cp_init(struct channel_program *cp, struct device *mdev,
memcpy(private->io_region->irb_area, irb, sizeof(*irb));
mutex_unlock(&private->io_mutex);
- if (private->io_trigger)
- eventfd_signal(private->io_trigger, 1);
-
if (private->mdev && is_final)
private->state = VFIO_CCW_STATE_IDLE;
+
+ if (private->io_trigger)
+ eventfd_signal(private->io_trigger, 1);
}
/*
if (!private)
return -ENOMEM;
+ private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1),
+ GFP_KERNEL);
+ if (!private->cp.guest_cp)
+ goto out_free;
+
private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
GFP_KERNEL | GFP_DMA);
if (!private->io_region)
kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
if (private->io_region)
kmem_cache_free(vfio_ccw_io_region, private->io_region);
+ kfree(private->cp.guest_cp);
kfree(private);
return ret;
}
kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
kmem_cache_free(vfio_ccw_io_region, private->io_region);
+ kfree(private->cp.guest_cp);
kfree(private);
return 0;
*/
if (!cpacf_test_func(&pckmo_functions, fc)) {
DEBUG_ERR("%s pckmo functions not available\n", __func__);
- return -EOPNOTSUPP;
+ return -ENODEV;
}
/* prepare param block */
* are able to work with protected keys.
*/
if (!cpacf_query(CPACF_PCKMO, &pckmo_functions))
- return -EOPNOTSUPP;
+ return -ENODEV;
/* check for kmc instructions available */
if (!cpacf_query(CPACF_KMC, &kmc_functions))
- return -EOPNOTSUPP;
+ return -ENODEV;
if (!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256))
- return -EOPNOTSUPP;
+ return -ENODEV;
pkey_debug_init();
* Copyright IBM Corp. 2018
*
* Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
+ * Pierre Morel <pmorel@linux.ibm.com>
*/
#include <linux/module.h>
MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids);
+/**
+ * vfio_ap_queue_dev_probe:
+ *
+ * Allocate a vfio_ap_queue structure and associate it
+ * with the device as driver_data.
+ */
static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
{
+ struct vfio_ap_queue *q;
+
+ q = kzalloc(sizeof(*q), GFP_KERNEL);
+ if (!q)
+ return -ENOMEM;
+ dev_set_drvdata(&apdev->device, q);
+ q->apqn = to_ap_queue(&apdev->device)->qid;
+ q->saved_isc = VFIO_AP_ISC_INVALID;
return 0;
}
+/**
+ * vfio_ap_queue_dev_remove:
+ *
+ * Takes the matrix lock to avoid actions on this device while removing
+ * Free the associated vfio_ap_queue structure
+ */
static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
{
- /* Nothing to do yet */
+ struct vfio_ap_queue *q;
+ int apid, apqi;
+
+ mutex_lock(&matrix_dev->lock);
+ q = dev_get_drvdata(&apdev->device);
+ dev_set_drvdata(&apdev->device, NULL);
+ apid = AP_QID_CARD(q->apqn);
+ apqi = AP_QID_QUEUE(q->apqn);
+ vfio_ap_mdev_reset_queue(apid, apqi, 1);
+ vfio_ap_irq_disable(q);
+ kfree(q);
+ mutex_unlock(&matrix_dev->lock);
}
static void vfio_ap_matrix_dev_release(struct device *dev)
#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
+static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev);
+
+static int match_apqn(struct device *dev, void *data)
+{
+ struct vfio_ap_queue *q = dev_get_drvdata(dev);
+
+ return (q->apqn == *(int *)(data)) ? 1 : 0;
+}
+
+/**
+ * vfio_ap_get_queue: Retrieve a queue with a specific APQN from a list
+ * @matrix_mdev: the associated mediated matrix
+ * @apqn: The queue APQN
+ *
+ * Retrieve a queue with a specific APQN from the list of the
+ * devices of the vfio_ap_drv.
+ * Verify that the APID and the APQI are set in the matrix.
+ *
+ * Returns the pointer to the associated vfio_ap_queue
+ */
+static struct vfio_ap_queue *vfio_ap_get_queue(
+ struct ap_matrix_mdev *matrix_mdev,
+ int apqn)
+{
+ struct vfio_ap_queue *q;
+ struct device *dev;
+
+ if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
+ return NULL;
+ if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
+ return NULL;
+
+ dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
+ &apqn, match_apqn);
+ if (!dev)
+ return NULL;
+ q = dev_get_drvdata(dev);
+ q->matrix_mdev = matrix_mdev;
+ put_device(dev);
+
+ return q;
+}
+
+/**
+ * vfio_ap_wait_for_irqclear
+ * @apqn: The AP Queue number
+ *
+ * Checks the IRQ bit for the status of this APQN using ap_tapq.
+ * Returns if the ap_tapq function succeeded and the bit is clear.
+ * Returns if ap_tapq function failed with invalid, deconfigured or
+ * checkstopped AP.
+ * Otherwise retries up to 5 times after waiting 20ms.
+ *
+ */
+static void vfio_ap_wait_for_irqclear(int apqn)
+{
+ struct ap_queue_status status;
+ int retry = 5;
+
+ do {
+ status = ap_tapq(apqn, NULL);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ if (!status.irq_enabled)
+ return;
+ /* Fall through */
+ case AP_RESPONSE_BUSY:
+ msleep(20);
+ break;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ default:
+ WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
+ status.response_code, apqn);
+ return;
+ }
+ } while (--retry);
+
+ WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
+ __func__, status.response_code, apqn);
+}
+
+/**
+ * vfio_ap_free_aqic_resources
+ * @q: The vfio_ap_queue
+ *
+ * Unregisters the ISC in the GIB when the saved ISC not invalid.
+ * Unpin the guest's page holding the NIB when it exist.
+ * Reset the saved_pfn and saved_isc to invalid values.
+ * Clear the pointer to the matrix mediated device.
+ *
+ */
+static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
+{
+ if (q->saved_isc != VFIO_AP_ISC_INVALID && q->matrix_mdev)
+ kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
+ if (q->saved_pfn && q->matrix_mdev)
+ vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev),
+ &q->saved_pfn, 1);
+ q->saved_pfn = 0;
+ q->saved_isc = VFIO_AP_ISC_INVALID;
+ q->matrix_mdev = NULL;
+}
+
+/**
+ * vfio_ap_irq_disable
+ * @q: The vfio_ap_queue
+ *
+ * Uses ap_aqic to disable the interruption and in case of success, reset
+ * in progress or IRQ disable command already proceeded: calls
+ * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
+ * and calls vfio_ap_free_aqic_resources() to free the resources associated
+ * with the AP interrupt handling.
+ *
+ * In the case the AP is busy, or a reset is in progress,
+ * retries after 20ms, up to 5 times.
+ *
+ * Returns if ap_aqic function failed with invalid, deconfigured or
+ * checkstopped AP.
+ */
+struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
+{
+ struct ap_qirq_ctrl aqic_gisa = {};
+ struct ap_queue_status status;
+ int retries = 5;
+
+ do {
+ status = ap_aqic(q->apqn, aqic_gisa, NULL);
+ switch (status.response_code) {
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ case AP_RESPONSE_NORMAL:
+ vfio_ap_wait_for_irqclear(q->apqn);
+ goto end_free;
+ case AP_RESPONSE_RESET_IN_PROGRESS:
+ case AP_RESPONSE_BUSY:
+ msleep(20);
+ break;
+ case AP_RESPONSE_Q_NOT_AVAIL:
+ case AP_RESPONSE_DECONFIGURED:
+ case AP_RESPONSE_CHECKSTOPPED:
+ case AP_RESPONSE_INVALID_ADDRESS:
+ default:
+ /* All cases in default means AP not operational */
+ WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
+ status.response_code);
+ goto end_free;
+ }
+ } while (retries--);
+
+ WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
+ status.response_code);
+end_free:
+ vfio_ap_free_aqic_resources(q);
+ return status;
+}
+
+/**
+ * vfio_ap_setirq: Enable Interruption for a APQN
+ *
+ * @dev: the device associated with the ap_queue
+ * @q: the vfio_ap_queue holding AQIC parameters
+ *
+ * Pin the NIB saved in *q
+ * Register the guest ISC to GIB interface and retrieve the
+ * host ISC to issue the host side PQAP/AQIC
+ *
+ * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
+ * vfio_pin_pages failed.
+ *
+ * Otherwise return the ap_queue_status returned by the ap_aqic(),
+ * all retry handling will be done by the guest.
+ */
+static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
+ int isc,
+ unsigned long nib)
+{
+ struct ap_qirq_ctrl aqic_gisa = {};
+ struct ap_queue_status status = {};
+ struct kvm_s390_gisa *gisa;
+ struct kvm *kvm;
+ unsigned long h_nib, g_pfn, h_pfn;
+ int ret;
+
+ g_pfn = nib >> PAGE_SHIFT;
+ ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1,
+ IOMMU_READ | IOMMU_WRITE, &h_pfn);
+ switch (ret) {
+ case 1:
+ break;
+ default:
+ status.response_code = AP_RESPONSE_INVALID_ADDRESS;
+ return status;
+ }
+
+ kvm = q->matrix_mdev->kvm;
+ gisa = kvm->arch.gisa_int.origin;
+
+ h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
+ aqic_gisa.gisc = isc;
+ aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc);
+ aqic_gisa.ir = 1;
+ aqic_gisa.gisa = (uint64_t)gisa >> 4;
+
+ status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
+ switch (status.response_code) {
+ case AP_RESPONSE_NORMAL:
+ /* See if we did clear older IRQ configuration */
+ vfio_ap_free_aqic_resources(q);
+ q->saved_pfn = g_pfn;
+ q->saved_isc = isc;
+ break;
+ case AP_RESPONSE_OTHERWISE_CHANGED:
+ /* We could not modify IRQ setings: clear new configuration */
+ vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1);
+ kvm_s390_gisc_unregister(kvm, isc);
+ break;
+ default:
+ pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
+ status.response_code);
+ vfio_ap_irq_disable(q);
+ break;
+ }
+
+ return status;
+}
+
+/**
+ * handle_pqap: PQAP instruction callback
+ *
+ * @vcpu: The vcpu on which we received the PQAP instruction
+ *
+ * Get the general register contents to initialize internal variables.
+ * REG[0]: APQN
+ * REG[1]: IR and ISC
+ * REG[2]: NIB
+ *
+ * Response.status may be set to following Response Code:
+ * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
+ * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
+ * - AP_RESPONSE_NORMAL (0) : in case of successs
+ * Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
+ * We take the matrix_dev lock to ensure serialization on queues and
+ * mediated device access.
+ *
+ * Return 0 if we could handle the request inside KVM.
+ * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
+ */
+static int handle_pqap(struct kvm_vcpu *vcpu)
+{
+ uint64_t status;
+ uint16_t apqn;
+ struct vfio_ap_queue *q;
+ struct ap_queue_status qstatus = {
+ .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
+ struct ap_matrix_mdev *matrix_mdev;
+
+ /* If we do not use the AIV facility just go to userland */
+ if (!(vcpu->arch.sie_block->eca & ECA_AIV))
+ return -EOPNOTSUPP;
+
+ apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
+ mutex_lock(&matrix_dev->lock);
+
+ if (!vcpu->kvm->arch.crypto.pqap_hook)
+ goto out_unlock;
+ matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
+ struct ap_matrix_mdev, pqap_hook);
+
+ q = vfio_ap_get_queue(matrix_mdev, apqn);
+ if (!q)
+ goto out_unlock;
+
+ status = vcpu->run->s.regs.gprs[1];
+
+ /* If IR bit(16) is set we enable the interrupt */
+ if ((status >> (63 - 16)) & 0x01)
+ qstatus = vfio_ap_irq_enable(q, status & 0x07,
+ vcpu->run->s.regs.gprs[2]);
+ else
+ qstatus = vfio_ap_irq_disable(q);
+
+out_unlock:
+ memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
+ vcpu->run->s.regs.gprs[1] >>= 32;
+ mutex_unlock(&matrix_dev->lock);
+ return 0;
+}
+
static void vfio_ap_matrix_init(struct ap_config_info *info,
struct ap_matrix *matrix)
{
return -ENOMEM;
}
+ matrix_mdev->mdev = mdev;
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
mdev_set_drvdata(mdev, matrix_mdev);
+ matrix_mdev->pqap_hook.hook = handle_pqap;
+ matrix_mdev->pqap_hook.owner = THIS_MODULE;
mutex_lock(&matrix_dev->lock);
list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
mutex_unlock(&matrix_dev->lock);
return -EBUSY;
mutex_lock(&matrix_dev->lock);
+ vfio_ap_mdev_reset_queues(mdev);
list_del(&matrix_mdev->node);
mutex_unlock(&matrix_dev->lock);
}
matrix_mdev->kvm = kvm;
+ kvm_get_kvm(kvm);
+ kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
mutex_unlock(&matrix_dev->lock);
return 0;
}
+/*
+ * vfio_ap_mdev_iommu_notifier: IOMMU notifier callback
+ *
+ * @nb: The notifier block
+ * @action: Action to be taken
+ * @data: data associated with the request
+ *
+ * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
+ * pinned before). Other requests are ignored.
+ *
+ */
+static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct ap_matrix_mdev *matrix_mdev;
+
+ matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
+
+ if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
+ struct vfio_iommu_type1_dma_unmap *unmap = data;
+ unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
+
+ vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1);
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
return NOTIFY_OK;
}
-static int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
- unsigned int retry)
+static void vfio_ap_irq_disable_apqn(int apqn)
+{
+ struct device *dev;
+ struct vfio_ap_queue *q;
+
+ dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
+ &apqn, match_apqn);
+ if (dev) {
+ q = dev_get_drvdata(dev);
+ vfio_ap_irq_disable(q);
+ put_device(dev);
+ }
+}
+
+int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
+ unsigned int retry)
{
struct ap_queue_status status;
+ int retry2 = 2;
+ int apqn = AP_MKQID(apid, apqi);
do {
- status = ap_zapq(AP_MKQID(apid, apqi));
+ status = ap_zapq(apqn);
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
+ while (!status.queue_empty && retry2--) {
+ msleep(20);
+ status = ap_tapq(apqn, NULL);
+ }
+ WARN_ON_ONCE(retry <= 0);
return 0;
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_BUSY:
*/
if (ret)
rc = ret;
+ vfio_ap_irq_disable_apqn(AP_MKQID(apid, apqi));
}
}
return ret;
}
- return 0;
+ matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
+ events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+ ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+ &events, &matrix_mdev->iommu_notifier);
+ if (!ret)
+ return ret;
+
+ vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
+ &matrix_mdev->group_notifier);
+ module_put(THIS_MODULE);
+ return ret;
}
static void vfio_ap_mdev_release(struct mdev_device *mdev)
{
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
- if (matrix_mdev->kvm)
+ mutex_lock(&matrix_dev->lock);
+ if (matrix_mdev->kvm) {
kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
+ matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
+ vfio_ap_mdev_reset_queues(mdev);
+ kvm_put_kvm(matrix_mdev->kvm);
+ matrix_mdev->kvm = NULL;
+ }
+ mutex_unlock(&matrix_dev->lock);
- vfio_ap_mdev_reset_queues(mdev);
+ vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+ &matrix_mdev->iommu_notifier);
vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
&matrix_mdev->group_notifier);
- matrix_mdev->kvm = NULL;
module_put(THIS_MODULE);
}
{
int ret;
+ mutex_lock(&matrix_dev->lock);
switch (cmd) {
case VFIO_DEVICE_GET_INFO:
ret = vfio_ap_mdev_get_device_info(arg);
ret = -EOPNOTSUPP;
break;
}
+ mutex_unlock(&matrix_dev->lock);
return ret;
}
*
* Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
* Halil Pasic <pasic@linux.ibm.com>
+ * Pierre Morel <pmorel@linux.ibm.com>
*
* Copyright IBM Corp. 2018
*/
#include <linux/mdev.h>
#include <linux/delay.h>
#include <linux/mutex.h>
+#include <linux/kvm_host.h>
#include "ap_bus.h"
struct list_head node;
struct ap_matrix matrix;
struct notifier_block group_notifier;
+ struct notifier_block iommu_notifier;
struct kvm *kvm;
+ struct kvm_s390_module_hook pqap_hook;
+ struct mdev_device *mdev;
};
extern int vfio_ap_mdev_register(void);
extern void vfio_ap_mdev_unregister(void);
+int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
+ unsigned int retry);
+struct vfio_ap_queue {
+ struct ap_matrix_mdev *matrix_mdev;
+ unsigned long saved_pfn;
+ int apqn;
+#define VFIO_AP_ISC_INVALID 0xff
+ unsigned char saved_isc;
+};
+struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q);
#endif /* _VFIO_AP_PRIVATE_H_ */
payload_hdr = (struct pld_hdr *)((&(msg->pld_lenfmt))+lfmt);
*fcode = payload_hdr->func_val & 0xFFFF;
+ /* enable special processing based on the cprbs flags special bit */
+ if (msg->cprbx.flags & 0x20)
+ ap_msg->special = 1;
+
return 0;
}
prompt "Lan Channel Station Interface"
depends on CCW && NETDEVICES && (ETHERNET || FDDI)
help
- Select this option if you want to use LCS networking on IBM System z.
- This device driver supports FDDI (IEEE 802.7) and Ethernet.
- To compile as a module, choose M. The module name is lcs.
- If you do not know what it is, it's safe to choose Y.
+ Select this option if you want to use LCS networking on IBM System z.
+ This device driver supports FDDI (IEEE 802.7) and Ethernet.
+ To compile as a module, choose M. The module name is lcs.
+ If you do not know what it is, it's safe to choose Y.
config CTCM
def_tristate m
#define VIRTIO_CCW_CONFIG_SIZE 0x100
/* same as PCI config space size, should be enough for all drivers */
+struct vcdev_dma_area {
+ unsigned long indicators;
+ unsigned long indicators2;
+ struct vq_config_block config_block;
+ __u8 status;
+};
+
struct virtio_ccw_device {
struct virtio_device vdev;
- __u8 *status;
__u8 config[VIRTIO_CCW_CONFIG_SIZE];
struct ccw_device *cdev;
__u32 curr_io;
spinlock_t lock;
struct mutex io_lock; /* Serializes I/O requests */
struct list_head virtqueues;
- unsigned long indicators;
- unsigned long indicators2;
- struct vq_config_block *config_block;
bool is_thinint;
bool going_away;
bool device_lost;
unsigned int config_ready;
void *airq_info;
- u64 dma_mask;
+ struct vcdev_dma_area *dma_area;
};
+static inline unsigned long *indicators(struct virtio_ccw_device *vcdev)
+{
+ return &vcdev->dma_area->indicators;
+}
+
+static inline unsigned long *indicators2(struct virtio_ccw_device *vcdev)
+{
+ return &vcdev->dma_area->indicators2;
+}
+
struct vq_info_block_legacy {
__u64 queue;
__u32 align;
struct airq_info {
rwlock_t lock;
- u8 summary_indicator;
+ u8 summary_indicator_idx;
struct airq_struct airq;
struct airq_iv *aiv;
};
static struct airq_info *airq_areas[MAX_AIRQ_AREAS];
+static u8 *summary_indicators;
+
+static inline u8 *get_summary_indicator(struct airq_info *info)
+{
+ return summary_indicators + info->summary_indicator_idx;
+}
#define CCW_CMD_SET_VQ 0x13
#define CCW_CMD_VDEV_RESET 0x33
break;
vring_interrupt(0, (void *)airq_iv_get_ptr(info->aiv, ai));
}
- info->summary_indicator = 0;
+ *(get_summary_indicator(info)) = 0;
smp_wmb();
/* Walk through indicators field, summary indicator not active. */
for (ai = 0;;) {
read_unlock(&info->lock);
}
-static struct airq_info *new_airq_info(void)
+static struct airq_info *new_airq_info(int index)
{
struct airq_info *info;
int rc;
if (!info)
return NULL;
rwlock_init(&info->lock);
- info->aiv = airq_iv_create(VIRTIO_IV_BITS, AIRQ_IV_ALLOC | AIRQ_IV_PTR);
+ info->aiv = airq_iv_create(VIRTIO_IV_BITS, AIRQ_IV_ALLOC | AIRQ_IV_PTR
+ | AIRQ_IV_CACHELINE);
if (!info->aiv) {
kfree(info);
return NULL;
}
info->airq.handler = virtio_airq_handler;
- info->airq.lsi_ptr = &info->summary_indicator;
+ info->summary_indicator_idx = index;
+ info->airq.lsi_ptr = get_summary_indicator(info);
info->airq.lsi_mask = 0xff;
info->airq.isc = VIRTIO_AIRQ_ISC;
rc = register_adapter_interrupt(&info->airq);
for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) {
if (!airq_areas[i])
- airq_areas[i] = new_airq_info();
+ airq_areas[i] = new_airq_info(i);
info = airq_areas[i];
if (!info)
return 0;
struct airq_info *airq_info = vcdev->airq_info;
if (vcdev->is_thinint) {
- thinint_area = kzalloc(sizeof(*thinint_area),
- GFP_DMA | GFP_KERNEL);
+ thinint_area = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(*thinint_area));
if (!thinint_area)
return;
thinint_area->summary_indicator =
- (unsigned long) &airq_info->summary_indicator;
+ (unsigned long) get_summary_indicator(airq_info);
thinint_area->isc = VIRTIO_AIRQ_ISC;
ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER;
ccw->count = sizeof(*thinint_area);
ccw->cda = (__u32)(unsigned long) thinint_area;
} else {
/* payload is the address of the indicators */
- indicatorp = kmalloc(sizeof(&vcdev->indicators),
- GFP_DMA | GFP_KERNEL);
+ indicatorp = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(indicators(vcdev)));
if (!indicatorp)
return;
*indicatorp = 0;
ccw->cmd_code = CCW_CMD_SET_IND;
- ccw->count = sizeof(&vcdev->indicators);
+ ccw->count = sizeof(indicators(vcdev));
ccw->cda = (__u32)(unsigned long) indicatorp;
}
/* Deregister indicators from host. */
- vcdev->indicators = 0;
+ *indicators(vcdev) = 0;
ccw->flags = 0;
ret = ccw_io_helper(vcdev, ccw,
vcdev->is_thinint ?
"Failed to deregister indicators (%d)\n", ret);
else if (vcdev->is_thinint)
virtio_ccw_drop_indicators(vcdev);
- kfree(indicatorp);
- kfree(thinint_area);
+ ccw_device_dma_free(vcdev->cdev, indicatorp, sizeof(indicators(vcdev)));
+ ccw_device_dma_free(vcdev->cdev, thinint_area, sizeof(*thinint_area));
}
static inline long __do_kvm_notify(struct subchannel_id schid,
{
int ret;
- vcdev->config_block->index = index;
+ vcdev->dma_area->config_block.index = index;
ccw->cmd_code = CCW_CMD_READ_VQ_CONF;
ccw->flags = 0;
ccw->count = sizeof(struct vq_config_block);
- ccw->cda = (__u32)(unsigned long)(vcdev->config_block);
+ ccw->cda = (__u32)(unsigned long)(&vcdev->dma_area->config_block);
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF);
if (ret)
return ret;
- return vcdev->config_block->num ?: -ENOENT;
+ return vcdev->dma_area->config_block.num ?: -ENOENT;
}
static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw)
ret, index);
vring_del_virtqueue(vq);
- kfree(info->info_block);
+ ccw_device_dma_free(vcdev->cdev, info->info_block,
+ sizeof(*info->info_block));
kfree(info);
}
struct ccw1 *ccw;
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return;
list_for_each_entry_safe(vq, n, &vdev->vqs, list)
virtio_ccw_del_vq(vq, ccw);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
}
static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev,
err = -ENOMEM;
goto out_err;
}
- info->info_block = kzalloc(sizeof(*info->info_block),
- GFP_DMA | GFP_KERNEL);
+ info->info_block = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(*info->info_block));
if (!info->info_block) {
dev_warn(&vcdev->cdev->dev, "no info block\n");
err = -ENOMEM;
if (vq)
vring_del_virtqueue(vq);
if (info) {
- kfree(info->info_block);
+ ccw_device_dma_free(vcdev->cdev, info->info_block,
+ sizeof(*info->info_block));
}
kfree(info);
return ERR_PTR(err);
struct virtio_thinint_area *thinint_area = NULL;
struct airq_info *info;
- thinint_area = kzalloc(sizeof(*thinint_area), GFP_DMA | GFP_KERNEL);
+ thinint_area = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(*thinint_area));
if (!thinint_area) {
ret = -ENOMEM;
goto out;
}
info = vcdev->airq_info;
thinint_area->summary_indicator =
- (unsigned long) &info->summary_indicator;
+ (unsigned long) get_summary_indicator(info);
thinint_area->isc = VIRTIO_AIRQ_ISC;
ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER;
ccw->flags = CCW_FLAG_SLI;
virtio_ccw_drop_indicators(vcdev);
}
out:
- kfree(thinint_area);
+ ccw_device_dma_free(vcdev->cdev, thinint_area, sizeof(*thinint_area));
return ret;
}
int ret, i, queue_idx = 0;
struct ccw1 *ccw;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return -ENOMEM;
* We need a data area under 2G to communicate. Our payload is
* the address of the indicators.
*/
- indicatorp = kmalloc(sizeof(&vcdev->indicators), GFP_DMA | GFP_KERNEL);
+ indicatorp = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(indicators(vcdev)));
if (!indicatorp)
goto out;
- *indicatorp = (unsigned long) &vcdev->indicators;
+ *indicatorp = (unsigned long) indicators(vcdev);
if (vcdev->is_thinint) {
ret = virtio_ccw_register_adapter_ind(vcdev, vqs, nvqs, ccw);
if (ret)
}
if (!vcdev->is_thinint) {
/* Register queue indicators with host. */
- vcdev->indicators = 0;
+ *indicators(vcdev) = 0;
ccw->cmd_code = CCW_CMD_SET_IND;
ccw->flags = 0;
- ccw->count = sizeof(&vcdev->indicators);
+ ccw->count = sizeof(indicators(vcdev));
ccw->cda = (__u32)(unsigned long) indicatorp;
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND);
if (ret)
goto out;
}
/* Register indicators2 with host for config changes */
- *indicatorp = (unsigned long) &vcdev->indicators2;
- vcdev->indicators2 = 0;
+ *indicatorp = (unsigned long) indicators2(vcdev);
+ *indicators2(vcdev) = 0;
ccw->cmd_code = CCW_CMD_SET_CONF_IND;
ccw->flags = 0;
- ccw->count = sizeof(&vcdev->indicators2);
+ ccw->count = sizeof(indicators2(vcdev));
ccw->cda = (__u32)(unsigned long) indicatorp;
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_CONF_IND);
if (ret)
goto out;
- kfree(indicatorp);
- kfree(ccw);
+ if (indicatorp)
+ ccw_device_dma_free(vcdev->cdev, indicatorp,
+ sizeof(indicators(vcdev)));
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
return 0;
out:
- kfree(indicatorp);
- kfree(ccw);
+ if (indicatorp)
+ ccw_device_dma_free(vcdev->cdev, indicatorp,
+ sizeof(indicators(vcdev)));
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
virtio_ccw_del_vqs(vdev);
return ret;
}
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
struct ccw1 *ccw;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return;
/* Zero status bits. */
- *vcdev->status = 0;
+ vcdev->dma_area->status = 0;
/* Send a reset ccw on device. */
ccw->cmd_code = CCW_CMD_VDEV_RESET;
ccw->count = 0;
ccw->cda = 0;
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_RESET);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
}
static u64 virtio_ccw_get_features(struct virtio_device *vdev)
u64 rc;
struct ccw1 *ccw;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return 0;
- features = kzalloc(sizeof(*features), GFP_DMA | GFP_KERNEL);
+ features = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*features));
if (!features) {
rc = 0;
goto out_free;
rc |= (u64)le32_to_cpu(features->features) << 32;
out_free:
- kfree(features);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, features, sizeof(*features));
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
return rc;
}
return -EINVAL;
}
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return -ENOMEM;
- features = kzalloc(sizeof(*features), GFP_DMA | GFP_KERNEL);
+ features = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*features));
if (!features) {
ret = -ENOMEM;
goto out_free;
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT);
out_free:
- kfree(features);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, features, sizeof(*features));
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
return ret;
}
void *config_area;
unsigned long flags;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return;
- config_area = kzalloc(VIRTIO_CCW_CONFIG_SIZE, GFP_DMA | GFP_KERNEL);
+ config_area = ccw_device_dma_zalloc(vcdev->cdev,
+ VIRTIO_CCW_CONFIG_SIZE);
if (!config_area)
goto out_free;
memcpy(buf, config_area + offset, len);
out_free:
- kfree(config_area);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, config_area, VIRTIO_CCW_CONFIG_SIZE);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
}
static void virtio_ccw_set_config(struct virtio_device *vdev,
void *config_area;
unsigned long flags;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return;
- config_area = kzalloc(VIRTIO_CCW_CONFIG_SIZE, GFP_DMA | GFP_KERNEL);
+ config_area = ccw_device_dma_zalloc(vcdev->cdev,
+ VIRTIO_CCW_CONFIG_SIZE);
if (!config_area)
goto out_free;
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_CONFIG);
out_free:
- kfree(config_area);
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, config_area, VIRTIO_CCW_CONFIG_SIZE);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
}
static u8 virtio_ccw_get_status(struct virtio_device *vdev)
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
- u8 old_status = *vcdev->status;
+ u8 old_status = vcdev->dma_area->status;
struct ccw1 *ccw;
if (vcdev->revision < 1)
- return *vcdev->status;
+ return vcdev->dma_area->status;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return old_status;
ccw->cmd_code = CCW_CMD_READ_STATUS;
ccw->flags = 0;
- ccw->count = sizeof(*vcdev->status);
- ccw->cda = (__u32)(unsigned long)vcdev->status;
+ ccw->count = sizeof(vcdev->dma_area->status);
+ ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status;
ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_STATUS);
/*
* If the channel program failed (should only happen if the device
* was hotunplugged, and then we clean up via the machine check
- * handler anyway), vcdev->status was not overwritten and we just
+ * handler anyway), vcdev->dma_area->status was not overwritten and we just
* return the old status, which is fine.
*/
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
- return *vcdev->status;
+ return vcdev->dma_area->status;
}
static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status)
{
struct virtio_ccw_device *vcdev = to_vc_device(vdev);
- u8 old_status = *vcdev->status;
+ u8 old_status = vcdev->dma_area->status;
struct ccw1 *ccw;
int ret;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return;
/* Write the status to the host. */
- *vcdev->status = status;
+ vcdev->dma_area->status = status;
ccw->cmd_code = CCW_CMD_WRITE_STATUS;
ccw->flags = 0;
ccw->count = sizeof(status);
- ccw->cda = (__u32)(unsigned long)vcdev->status;
+ ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status;
ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_STATUS);
/* Write failed? We assume status is unchanged. */
if (ret)
- *vcdev->status = old_status;
- kfree(ccw);
+ vcdev->dma_area->status = old_status;
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
}
static const char *virtio_ccw_bus_name(struct virtio_device *vdev)
struct virtio_device *dev = dev_to_virtio(_d);
struct virtio_ccw_device *vcdev = to_vc_device(dev);
- kfree(vcdev->status);
- kfree(vcdev->config_block);
+ ccw_device_dma_free(vcdev->cdev, vcdev->dma_area,
+ sizeof(*vcdev->dma_area));
kfree(vcdev);
}
vcdev->err = -EIO;
}
virtio_ccw_check_activity(vcdev, activity);
- for_each_set_bit(i, &vcdev->indicators,
- sizeof(vcdev->indicators) * BITS_PER_BYTE) {
+ for_each_set_bit(i, indicators(vcdev),
+ sizeof(*indicators(vcdev)) * BITS_PER_BYTE) {
/* The bit clear must happen before the vring kick. */
- clear_bit(i, &vcdev->indicators);
+ clear_bit(i, indicators(vcdev));
barrier();
vq = virtio_ccw_vq_by_ind(vcdev, i);
vring_interrupt(0, vq);
}
- if (test_bit(0, &vcdev->indicators2)) {
+ if (test_bit(0, indicators2(vcdev))) {
virtio_config_changed(&vcdev->vdev);
- clear_bit(0, &vcdev->indicators2);
+ clear_bit(0, indicators2(vcdev));
}
}
struct ccw1 *ccw;
int ret;
- ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
+ ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
if (!ccw)
return -ENOMEM;
- rev = kzalloc(sizeof(*rev), GFP_DMA | GFP_KERNEL);
+ rev = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*rev));
if (!rev) {
- kfree(ccw);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
return -ENOMEM;
}
}
} while (ret == -EOPNOTSUPP);
- kfree(ccw);
- kfree(rev);
+ ccw_device_dma_free(vcdev->cdev, ccw, sizeof(*ccw));
+ ccw_device_dma_free(vcdev->cdev, rev, sizeof(*rev));
return ret;
}
ret = -ENOMEM;
goto out_free;
}
-
vcdev->vdev.dev.parent = &cdev->dev;
- cdev->dev.dma_mask = &vcdev->dma_mask;
- /* we are fine with common virtio infrastructure using 64 bit DMA */
- ret = dma_set_mask_and_coherent(&cdev->dev, DMA_BIT_MASK(64));
- if (ret) {
- dev_warn(&cdev->dev, "Failed to enable 64-bit DMA.\n");
- goto out_free;
- }
-
- vcdev->config_block = kzalloc(sizeof(*vcdev->config_block),
- GFP_DMA | GFP_KERNEL);
- if (!vcdev->config_block) {
- ret = -ENOMEM;
- goto out_free;
- }
- vcdev->status = kzalloc(sizeof(*vcdev->status), GFP_DMA | GFP_KERNEL);
- if (!vcdev->status) {
+ vcdev->cdev = cdev;
+ vcdev->dma_area = ccw_device_dma_zalloc(vcdev->cdev,
+ sizeof(*vcdev->dma_area));
+ if (!vcdev->dma_area) {
ret = -ENOMEM;
goto out_free;
}
vcdev->vdev.dev.release = virtio_ccw_release_dev;
vcdev->vdev.config = &virtio_ccw_config_ops;
- vcdev->cdev = cdev;
init_waitqueue_head(&vcdev->wait_q);
INIT_LIST_HEAD(&vcdev->virtqueues);
spin_lock_init(&vcdev->lock);
return ret;
out_free:
if (vcdev) {
- kfree(vcdev->status);
- kfree(vcdev->config_block);
+ ccw_device_dma_free(vcdev->cdev, vcdev->dma_area,
+ sizeof(*vcdev->dma_area));
}
kfree(vcdev);
return ret;
static int __init virtio_ccw_init(void)
{
+ int rc;
+
/* parse no_auto string before we do anything further */
no_auto_parse();
- return ccw_driver_register(&virtio_ccw_driver);
+
+ summary_indicators = cio_dma_zalloc(MAX_AIRQ_AREAS);
+ if (!summary_indicators)
+ return -ENOMEM;
+ rc = ccw_driver_register(&virtio_ccw_driver);
+ if (rc)
+ cio_dma_free(summary_indicators, MAX_AIRQ_AREAS);
+ return rc;
}
device_initcall(virtio_ccw_init);
struct pvscsi_adapter *adapter = shost_priv(host);
struct pvscsi_ctx *ctx;
unsigned long flags;
+ unsigned char op;
spin_lock_irqsave(&adapter->hw_lock, flags);
}
cmd->scsi_done = done;
+ op = cmd->cmnd[0];
dev_dbg(&cmd->device->sdev_gendev,
- "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, cmd->cmnd[0]);
+ "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);
spin_unlock_irqrestore(&adapter->hw_lock, flags);
- pvscsi_kick_io(adapter, cmd->cmnd[0]);
+ pvscsi_kick_io(adapter, op);
return 0;
}
obj-$(CONFIG_SOC_SAMSUNG) += samsung/
obj-y += sunxi/
obj-$(CONFIG_ARCH_TEGRA) += tegra/
-obj-$(CONFIG_SOC_TI) += ti/
+obj-y += ti/
obj-$(CONFIG_ARCH_U8500) += ux500/
obj-$(CONFIG_PLAT_VERSATILE) += versatile/
obj-y += xilinx/
called ti_sci_pm_domains. Note this is needed early in boot before
rootfs may be available.
+endif # SOC_TI
+
config TI_SCI_INTA_MSI_DOMAIN
bool
select GENERIC_MSI_IRQ_DOMAIN
help
Driver to enable Interrupt Aggregator specific MSI Domain.
-
-endif # SOC_TI
return CHAP_DIGEST_UNKNOWN;
}
+static void chap_close(struct iscsi_conn *conn)
+{
+ kfree(conn->auth_protocol);
+ conn->auth_protocol = NULL;
+}
+
static struct iscsi_chap *chap_server_open(
struct iscsi_conn *conn,
struct iscsi_node_auth *auth,
case CHAP_DIGEST_UNKNOWN:
default:
pr_err("Unsupported CHAP_A value\n");
- kfree(conn->auth_protocol);
+ chap_close(conn);
return NULL;
}
* Generate Challenge.
*/
if (chap_gen_challenge(conn, 1, aic_str, aic_len) < 0) {
- kfree(conn->auth_protocol);
+ chap_close(conn);
return NULL;
}
return chap;
}
-static void chap_close(struct iscsi_conn *conn)
-{
- kfree(conn->auth_protocol);
- conn->auth_protocol = NULL;
-}
-
static int chap_server_compute_md5(
struct iscsi_conn *conn,
struct iscsi_node_auth *auth,
/* Always in 512 byte units for Linux/Block */
block_lba += sg->length >> SECTOR_SHIFT;
- sectors -= 1;
+ sectors -= sg->length >> SECTOR_SHIFT;
}
iblock_submit_bios(&list);
config VALIDATE_FS_PARSER
bool "Validate filesystem parameter description"
- default y
help
Enable this to perform validation of the parameter description for a
filesystem when it is registered.
struct afs_super_info *as = AFS_FS_S(cbi->sb);
struct afs_volume *volume = as->volume;
- write_lock(&volume->cb_break_lock);
+ write_lock(&volume->cb_v_break_lock);
volume->cb_v_break++;
- write_unlock(&volume->cb_break_lock);
+ write_unlock(&volume->cb_v_break_lock);
} else {
data.volume = NULL;
data.fid = *fid;
}
/*
+ * Set the file size and block count. Estimate the number of 512 bytes blocks
+ * used, rounded up to nearest 1K for consistency with other AFS clients.
+ */
+static void afs_set_i_size(struct afs_vnode *vnode, u64 size)
+{
+ i_size_write(&vnode->vfs_inode, size);
+ vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1;
+}
+
+/*
* Initialise an inode from the vnode status.
*/
static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key,
return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
}
- /*
- * Estimate 512 bytes blocks used, rounded up to nearest 1K
- * for consistency with other AFS clients.
- */
- inode->i_blocks = ((i_size_read(inode) + 1023) >> 10) << 1;
- i_size_write(&vnode->vfs_inode, status->size);
+ afs_set_i_size(vnode, status->size);
vnode->invalid_before = status->data_version;
inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
if (expected_version &&
*expected_version != status->data_version) {
- kdebug("vnode modified %llx on {%llx:%llu} [exp %llx] %s",
- (unsigned long long) status->data_version,
- vnode->fid.vid, vnode->fid.vnode,
- (unsigned long long) *expected_version,
- fc->type ? fc->type->name : "???");
+ if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
+ pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s\n",
+ vnode->fid.vid, vnode->fid.vnode,
+ (unsigned long long)*expected_version,
+ (unsigned long long)status->data_version,
+ fc->type ? fc->type->name : "???");
+
vnode->invalid_before = status->data_version;
if (vnode->status.type == AFS_FTYPE_DIR) {
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
if (data_changed) {
inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
- i_size_write(&vnode->vfs_inode, status->size);
+ afs_set_i_size(vnode, status->size);
}
}
struct rxrpc_call *rxcall; /* RxRPC call handle */
struct key *key; /* security for this call */
struct afs_net *net; /* The network namespace */
- union {
- struct afs_server *server;
- struct afs_vlserver *vlserver;
- };
+ struct afs_server *server; /* The fileserver record if fs op (pins ref) */
+ struct afs_vlserver *vlserver; /* The vlserver record if vl op */
struct afs_cb_interest *cbi; /* Callback interest for server used */
struct afs_vnode *lvnode; /* vnode being locked */
void *request; /* request data (first part) */
unsigned int servers_seq; /* Incremented each time ->servers changes */
unsigned cb_v_break; /* Break-everything counter. */
- rwlock_t cb_break_lock;
+ rwlock_t cb_v_break_lock;
afs_voltype_t type; /* type of volume */
short error;
atomic_set(&volume->usage, 1);
INIT_LIST_HEAD(&volume->proc_link);
rwlock_init(&volume->servers_lock);
+ rwlock_init(&volume->cb_v_break_lock);
memcpy(volume->name, vldb->name, vldb->name_len + 1);
slist = afs_alloc_server_list(params->cell, params->key, vldb, type_mask);
struct __aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 ts;
+ bool interrupted;
int ret;
if (timeout && unlikely(get_timespec64(&ts, timeout)))
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
struct __aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 ts;
+ bool interrupted;
int ret;
if (timeout && unlikely(get_old_timespec32(&ts, timeout)))
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
struct __compat_aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 t;
+ bool interrupted;
int ret;
if (timeout && get_old_timespec32(&t, timeout))
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
struct __compat_aio_sigset ksig = { NULL, };
sigset_t ksigmask, sigsaved;
struct timespec64 t;
+ bool interrupted;
int ret;
if (timeout && get_timespec64(&t, timeout))
return ret;
ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
- restore_user_sigmask(ksig.sigmask, &sigsaved);
- if (signal_pending(current) && !ret)
+
+ interrupted = signal_pending(current);
+ restore_user_sigmask(ksig.sigmask, &sigsaved, interrupted);
+ if (interrupted && !ret)
ret = -ERESTARTNOHAND;
return ret;
static int load_flat_shared_library(int id, struct lib_info *libs)
{
+ /*
+ * This is a fake bprm struct; only the members "buf", "file" and
+ * "filename" are actually used.
+ */
struct linux_binprm bprm;
int res;
char buf[16];
+ loff_t pos = 0;
memset(&bprm, 0, sizeof(bprm));
if (IS_ERR(bprm.file))
return res;
- bprm.cred = prepare_exec_creds();
- res = -ENOMEM;
- if (!bprm.cred)
- goto out;
-
- /* We don't really care about recalculating credentials at this point
- * as we're past the point of no return and are dealing with shared
- * libraries.
- */
- bprm.called_set_creds = 1;
+ res = kernel_read(bprm.file, bprm.buf, BINPRM_BUF_SIZE, &pos);
- res = prepare_binprm(&bprm);
-
- if (!res)
+ if (res >= 0)
res = load_flat_file(&bprm, libs, id, NULL);
- abort_creds(bprm.cred);
-
-out:
allow_write_access(bprm.file);
fput(bprm.file);
if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
dout("build_path path+%d: %p SNAPDIR\n",
pos, temp);
- } else if (stop_on_nosnap && inode &&
+ } else if (stop_on_nosnap && inode && dentry != temp &&
ceph_snap(inode) == CEPH_NOSNAP) {
spin_unlock(&temp->d_lock);
+ pos++; /* get rid of any prepended '/' */
break;
} else {
pos -= temp->d_name.len;
kfree(dfs_rsp);
return rc;
}
+
+static int
+parse_reparse_symlink(struct reparse_symlink_data_buffer *symlink_buf,
+ u32 plen, char **target_path,
+ struct cifs_sb_info *cifs_sb)
+{
+ unsigned int sub_len;
+ unsigned int sub_offset;
+
+ /* We only handle Symbolic Link : MS-FSCC 2.1.2.4 */
+ if (le32_to_cpu(symlink_buf->ReparseTag) != IO_REPARSE_TAG_SYMLINK) {
+ cifs_dbg(VFS, "srv returned invalid symlink buffer\n");
+ return -EIO;
+ }
+
+ sub_offset = le16_to_cpu(symlink_buf->SubstituteNameOffset);
+ sub_len = le16_to_cpu(symlink_buf->SubstituteNameLength);
+ if (sub_offset + 20 > plen ||
+ sub_offset + sub_len + 20 > plen) {
+ cifs_dbg(VFS, "srv returned malformed symlink buffer\n");
+ return -EIO;
+ }
+
+ *target_path = cifs_strndup_from_utf16(
+ symlink_buf->PathBuffer + sub_offset,
+ sub_len, true, cifs_sb->local_nls);
+ if (!(*target_path))
+ return -ENOMEM;
+
+ convert_delimiter(*target_path, '/');
+ cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+
+ return 0;
+}
+
#define SMB2_SYMLINK_STRUCT_SIZE \
(sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
struct kvec close_iov[1];
struct smb2_create_rsp *create_rsp;
struct smb2_ioctl_rsp *ioctl_rsp;
- char *ioctl_buf;
+ struct reparse_data_buffer *reparse_buf;
u32 plen;
cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
+ *target_path = NULL;
+
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
if ((rc == 0) && (is_reparse_point)) {
/* See MS-FSCC 2.3.23 */
- ioctl_buf = (char *)ioctl_rsp + le32_to_cpu(ioctl_rsp->OutputOffset);
+ reparse_buf = (struct reparse_data_buffer *)
+ ((char *)ioctl_rsp +
+ le32_to_cpu(ioctl_rsp->OutputOffset));
plen = le32_to_cpu(ioctl_rsp->OutputCount);
if (plen + le32_to_cpu(ioctl_rsp->OutputOffset) >
rsp_iov[1].iov_len) {
- cifs_dbg(VFS, "srv returned invalid ioctl length: %d\n", plen);
+ cifs_dbg(VFS, "srv returned invalid ioctl len: %d\n",
+ plen);
+ rc = -EIO;
+ goto querty_exit;
+ }
+
+ if (plen < 8) {
+ cifs_dbg(VFS, "reparse buffer is too small. Must be "
+ "at least 8 bytes but was %d\n", plen);
+ rc = -EIO;
+ goto querty_exit;
+ }
+
+ if (plen < le16_to_cpu(reparse_buf->ReparseDataLength) + 8) {
+ cifs_dbg(VFS, "srv returned invalid reparse buf "
+ "length: %d\n", plen);
rc = -EIO;
goto querty_exit;
}
- /* Do stuff with ioctl_buf/plen */
+ rc = parse_reparse_symlink(
+ (struct reparse_symlink_data_buffer *)reparse_buf,
+ plen, target_path, cifs_sb);
goto querty_exit;
}
__u8 PathBuffer[0]; /* Variable Length */
} __packed;
-/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */
+#define SYMLINK_FLAG_RELATIVE 0x00000001
+
+struct reparse_symlink_data_buffer {
+ __le32 ReparseTag;
+ __le16 ReparseDataLength;
+ __u16 Reserved;
+ __le16 SubstituteNameOffset;
+ __le16 SubstituteNameLength;
+ __le16 PrintNameOffset;
+ __le16 PrintNameLength;
+ __le32 Flags;
+ __u8 PathBuffer[0]; /* Variable Length */
+} __packed;
/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */
xas_reset(xas);
xas_lock_irq(xas);
- if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
+ if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+ void *old;
+
dax_disassociate_entry(entry, mapping, false);
dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
- }
-
- if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
/*
* Only swap our new entry into the page cache if the current
* entry is a zero page or an empty entry. If a normal PTE or
* existing entry is a PMD, we will just leave the PMD in the
* tree and dirty it if necessary.
*/
- void *old = dax_lock_entry(xas, new_entry);
+ old = dax_lock_entry(xas, new_entry);
WARN_ON_ONCE(old != xa_mk_value(xa_to_value(entry) |
DAX_LOCKED));
entry = new_entry;
error = do_epoll_wait(epfd, events, maxevents, timeout);
- restore_user_sigmask(sigmask, &sigsaved);
+ restore_user_sigmask(sigmask, &sigsaved, error == -EINTR);
return error;
}
err = do_epoll_wait(epfd, events, maxevents, timeout);
- restore_user_sigmask(sigmask, &sigsaved);
+ restore_user_sigmask(sigmask, &sigsaved, err == -EINTR);
return err;
}
static void __address_space_init_once(struct address_space *mapping)
{
- xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ);
+ xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
init_rwsem(&mapping->i_mmap_rwsem);
INIT_LIST_HEAD(&mapping->private_list);
spin_lock_init(&mapping->private_lock);
state->cur_req++;
}
+ req->file = NULL;
req->ctx = ctx;
req->flags = 0;
/* one is dropped after submission, the other at completion */
req->sequence = ctx->cached_sq_head - 1;
}
- if (!io_op_needs_file(s->sqe)) {
- req->file = NULL;
+ if (!io_op_needs_file(s->sqe))
return 0;
- }
if (flags & IOSQE_FIXED_FILE) {
if (unlikely(!ctx->user_files ||
}
ret = wait_event_interruptible(ctx->wait, io_cqring_events(ring) >= min_events);
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
if (sig)
- restore_user_sigmask(sig, &sigsaved);
+ restore_user_sigmask(sig, &sigsaved, ret == -ERESTARTSYS);
+
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0;
}
if (!check_mnt(p))
goto out;
- /* The thing moved should be either ours or completely unattached. */
- if (attached && !check_mnt(old))
+ /* The thing moved must be mounted... */
+ if (!is_mounted(&old->mnt))
goto out;
- if (!attached && !(ns && is_anon_ns(ns)))
+ /* ... and either ours or the root of anon namespace */
+ if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
goto out;
if (old->mnt.mnt_flags & MNT_LOCKED)
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
-static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
+static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO;
static unsigned int dataserver_retrans;
static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
* Never use more than a third of the remaining memory,
* unless it's the only way to give this client a slot:
*/
- avail = clamp_t(int, avail, slotsize, total_avail/3);
+ avail = clamp_t(unsigned long, avail, slotsize, total_avail/3);
num = min_t(int, num, avail / slotsize);
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
* a program is not able to use ptrace(2) in that case. It is
* safe because the task has stopped executing permanently.
*/
- if (permitted && (task->flags & PF_DUMPCORE)) {
+ if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
if (try_get_task_stack(task)) {
eip = KSTK_EIP(task);
esp = KSTK_ESP(task);
struct pid *tgid_pidfd_to_pid(const struct file *file)
{
- if (!d_is_dir(file->f_path.dentry) ||
- (file->f_op != &proc_tgid_base_operations))
+ if (file->f_op != &proc_tgid_base_operations)
return ERR_PTR(-EBADF);
return proc_pid(file_inode(file));
return ret;
ret = core_sys_select(n, inp, outp, exp, to);
+ restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND);
ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
- restore_user_sigmask(sigmask, &sigsaved);
-
return ret;
}
ret = do_sys_poll(ufds, nfds, to);
- restore_user_sigmask(sigmask, &sigsaved);
-
+ restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
/* We can restart this syscall, usually */
if (ret == -EINTR)
ret = -ERESTARTNOHAND;
ret = do_sys_poll(ufds, nfds, to);
- restore_user_sigmask(sigmask, &sigsaved);
-
+ restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
/* We can restart this syscall, usually */
if (ret == -EINTR)
ret = -ERESTARTNOHAND;
return ret;
ret = compat_core_sys_select(n, inp, outp, exp, to);
+ restore_user_sigmask(sigmask, &sigsaved, ret == -ERESTARTNOHAND);
ret = poll_select_copy_remaining(&end_time, tsp, type, ret);
- restore_user_sigmask(sigmask, &sigsaved);
-
return ret;
}
ret = do_sys_poll(ufds, nfds, to);
- restore_user_sigmask(sigmask, &sigsaved);
-
+ restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
/* We can restart this syscall, usually */
if (ret == -EINTR)
ret = -ERESTARTNOHAND;
ret = do_sys_poll(ufds, nfds, to);
- restore_user_sigmask(sigmask, &sigsaved);
-
+ restore_user_sigmask(sigmask, &sigsaved, ret == -EINTR);
/* We can restart this syscall, usually */
if (ret == -EINTR)
ret = -ERESTARTNOHAND;
/*
* Start with fault_pending_wqh and fault_wqh so they're more likely
* to be in the same cacheline.
+ *
+ * Locking order:
+ * fd_wqh.lock
+ * fault_pending_wqh.lock
+ * fault_wqh.lock
+ * event_wqh.lock
+ *
+ * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,
+ * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's
+ * also taken in IRQ context.
*/
struct userfaultfd_ctx {
/* waitqueue head for the pending (i.e. not read) userfaults */
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
TASK_KILLABLE;
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
* through poll/read().
* __add_wait_queue.
*/
set_current_state(blocking_state);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
if (!is_vm_hugetlb_page(vmf->vma))
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
* kernel stack can be released after the list_del_init.
*/
if (!list_empty_careful(&uwq.wq.entry)) {
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/*
* No need of list_del_init(), the uwq on the stack
* will be freed shortly anyway.
*/
list_del(&uwq.wq.entry);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
}
/*
init_waitqueue_entry(&ewq->wq, current);
release_new_ctx = NULL;
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
/*
* After the __add_wait_queue the uwq is visible to userland
* through poll/read().
break;
}
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
wake_up_poll(&ctx->fd_wqh, EPOLLIN);
schedule();
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
}
__set_current_state(TASK_RUNNING);
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
if (release_new_ctx) {
struct vm_area_struct *vma;
* the last page faults that may have been already waiting on
* the fault_*wqh.
*/
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
/* Flush pending events that may still wait on event_wqh */
wake_up_all(&ctx->event_wqh);
if (!ret && msg->event == UFFD_EVENT_FORK) {
ret = resolve_userfault_fork(ctx, fork_nctx, msg);
- spin_lock(&ctx->event_wqh.lock);
+ spin_lock_irq(&ctx->event_wqh.lock);
if (!list_empty(&fork_event)) {
/*
* The fork thread didn't abort, so we can
if (ret)
userfaultfd_ctx_put(fork_nctx);
}
- spin_unlock(&ctx->event_wqh.lock);
+ spin_unlock_irq(&ctx->event_wqh.lock);
}
return ret;
static void __wake_userfault(struct userfaultfd_ctx *ctx,
struct userfaultfd_wake_range *range)
{
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
/* wake all in the range and autoremove */
if (waitqueue_active(&ctx->fault_pending_wqh))
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
range);
if (waitqueue_active(&ctx->fault_wqh))
__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
}
static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
wait_queue_entry_t *wq;
unsigned long pending = 0, total = 0;
- spin_lock(&ctx->fault_pending_wqh.lock);
+ spin_lock_irq(&ctx->fault_pending_wqh.lock);
list_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {
pending++;
total++;
list_for_each_entry(wq, &ctx->fault_wqh.head, entry) {
total++;
}
- spin_unlock(&ctx->fault_pending_wqh.lock);
+ spin_unlock_irq(&ctx->fault_pending_wqh.lock);
/*
* If more protocols will be added, there will be all shown
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_VSYSCALL_H
+#define __ASM_GENERIC_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#ifndef __arch_get_k_vdso_data
+static __always_inline struct vdso_data *__arch_get_k_vdso_data(void)
+{
+ return NULL;
+}
+#endif /* __arch_get_k_vdso_data */
+
+#ifndef __arch_update_vdso_data
+static __always_inline int __arch_update_vdso_data(void)
+{
+ return 0;
+}
+#endif /* __arch_update_vdso_data */
+
+#ifndef __arch_get_clock_mode
+static __always_inline int __arch_get_clock_mode(struct timekeeper *tk)
+{
+ return 0;
+}
+#endif /* __arch_get_clock_mode */
+
+#ifndef __arch_use_vsyscall
+static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata)
+{
+ return 1;
+}
+#endif /* __arch_use_vsyscall */
+
+#ifndef __arch_update_vsyscall
+static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata,
+ struct timekeeper *tk)
+{
+}
+#endif /* __arch_update_vsyscall */
+
+#ifndef __arch_sync_vdso_data
+static __always_inline void __arch_sync_vdso_data(struct vdso_data *vdata)
+{
+}
+#endif /* __arch_sync_vdso_data */
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_GENERIC_VSYSCALL_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Definitions for the clocksource provided by the Hyper-V
+ * hypervisor to guest VMs, as described in the Hyper-V Top
+ * Level Functional Spec (TLFS).
+ *
+ * Copyright (C) 2019, Microsoft, Inc.
+ *
+ * Author: Michael Kelley <mikelley@microsoft.com>
+ */
+
+#ifndef __CLKSOURCE_HYPERV_TIMER_H
+#define __CLKSOURCE_HYPERV_TIMER_H
+
+#include <linux/clocksource.h>
+#include <linux/math64.h>
+#include <asm/mshyperv.h>
+
+#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
+#define HV_MIN_DELTA_TICKS 1
+
+/* Routines called by the VMbus driver */
+extern int hv_stimer_alloc(int sint);
+extern void hv_stimer_free(void);
+extern void hv_stimer_init(unsigned int cpu);
+extern void hv_stimer_cleanup(unsigned int cpu);
+extern void hv_stimer_global_cleanup(void);
+extern void hv_stimer0_isr(void);
+
+#if IS_ENABLED(CONFIG_HYPERV)
+extern struct clocksource *hyperv_cs;
+extern void hv_init_clocksource(void);
+#endif /* CONFIG_HYPERV */
+
+#ifdef CONFIG_HYPERV_TSCPAGE
+extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
+
+static inline notrace u64
+hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, u64 *cur_tsc)
+{
+ u64 scale, offset;
+ u32 sequence;
+
+ /*
+ * The protocol for reading Hyper-V TSC page is specified in Hypervisor
+ * Top-Level Functional Specification ver. 3.0 and above. To get the
+ * reference time we must do the following:
+ * - READ ReferenceTscSequence
+ * A special '0' value indicates the time source is unreliable and we
+ * need to use something else. The currently published specification
+ * versions (up to 4.0b) contain a mistake and wrongly claim '-1'
+ * instead of '0' as the special value, see commit c35b82ef0294.
+ * - ReferenceTime =
+ * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
+ * - READ ReferenceTscSequence again. In case its value has changed
+ * since our first reading we need to discard ReferenceTime and repeat
+ * the whole sequence as the hypervisor was updating the page in
+ * between.
+ */
+ do {
+ sequence = READ_ONCE(tsc_pg->tsc_sequence);
+ if (!sequence)
+ return U64_MAX;
+ /*
+ * Make sure we read sequence before we read other values from
+ * TSC page.
+ */
+ smp_rmb();
+
+ scale = READ_ONCE(tsc_pg->tsc_scale);
+ offset = READ_ONCE(tsc_pg->tsc_offset);
+ *cur_tsc = hv_get_raw_timer();
+
+ /*
+ * Make sure we read sequence after we read all other values
+ * from TSC page.
+ */
+ smp_rmb();
+
+ } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
+
+ return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset;
+}
+
+static inline notrace u64
+hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
+{
+ u64 cur_tsc;
+
+ return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
+}
+
+#else /* CONFIG_HYPERV_TSC_PAGE */
+static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
+{
+ return NULL;
+}
+
+static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
+ u64 *cur_tsc)
+{
+ return U64_MAX;
+}
+#endif /* CONFIG_HYPERV_TSCPAGE */
+
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * TI DaVinci clocksource driver
+ *
+ * Copyright (C) 2019 Texas Instruments
+ * Author: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+ */
+
+#ifndef __TIMER_DAVINCI_H__
+#define __TIMER_DAVINCI_H__
+
+#include <linux/clk.h>
+#include <linux/ioport.h>
+
+enum {
+ DAVINCI_TIMER_CLOCKEVENT_IRQ,
+ DAVINCI_TIMER_CLOCKSOURCE_IRQ,
+ DAVINCI_TIMER_NUM_IRQS,
+};
+
+/**
+ * struct davinci_timer_cfg - davinci clocksource driver configuration struct
+ * @reg: register range resource
+ * @irq: clockevent and clocksource interrupt resources
+ * @cmp_off: if set - it specifies the compare register used for clockevent
+ *
+ * Note: if the compare register is specified, the driver will use the bottom
+ * clock half for both clocksource and clockevent and the compare register
+ * to generate event irqs. The user must supply the correct compare register
+ * interrupt number.
+ *
+ * This is only used by da830 the DSP of which uses the top half. The timer
+ * driver still configures the top half to run in free-run mode.
+ */
+struct davinci_timer_cfg {
+ struct resource reg;
+ struct resource irq[DAVINCI_TIMER_NUM_IRQS];
+ unsigned int cmp_off;
+};
+
+int __init davinci_timer_register(struct clk *clk,
+ const struct davinci_timer_cfg *data);
+
+#endif /* __TIMER_DAVINCI_H__ */
#define CLKID_MALI_1_SEL 172
#define CLKID_MALI_1 174
#define CLKID_MALI 175
-#define CLKID_MPLL_5OM 177
+#define CLKID_MPLL_50M 177
#define CLKID_CPU_CLK 187
#define CLKID_PCIE_PLL 201
#define CLKID_VDEC_1 204
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
/*
* Copyright (C) 2018-2019 SiFive, Inc.
* Wesley Terpstra
#include <linux/errno.h>
#include <linux/ioport.h> /* for struct resource */
+#include <linux/irqdomain.h>
#include <linux/resource_ext.h>
#include <linux/device.h>
#include <linux/property.h>
void acpi_set_irq_model(enum acpi_irq_model_id model,
struct fwnode_handle *fwnode);
+struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags,
+ unsigned int size,
+ struct fwnode_handle *fwnode,
+ const struct irq_domain_ops *ops,
+ void *host_data);
+
#ifdef CONFIG_X86_IO_APIC
extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity);
#else
#ifdef CONFIG_ACPI_PPTT
int find_acpi_cpu_topology(unsigned int cpu, int level);
int find_acpi_cpu_topology_package(unsigned int cpu);
+int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
#else
static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
{
return -EINVAL;
}
+static inline int find_acpi_cpu_topology_hetero_id(unsigned int cpu)
+{
+ return -EINVAL;
+}
static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
{
return -EINVAL;
CACHE_TYPE_UNIFIED = BIT(2),
};
+extern unsigned int coherency_max_size;
+
/**
* struct cacheinfo - represent a cache leaf node
* @id: This cache's id. It is unique among caches with the same (type, level).
CPUHP_AP_PERF_ARM_ACPI_STARTING,
CPUHP_AP_PERF_ARM_STARTING,
CPUHP_AP_ARM_L2X0_STARTING,
+ CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_STARTING,
CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
CPUHP_AP_JCORE_TIMER_STARTING,
- CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_TWD_STARTING,
CPUHP_AP_QCOM_TIMER_STARTING,
CPUHP_AP_TEGRA_TIMER_STARTING,
gfp_t gfp_mask, unsigned int order);
extern void devm_free_pages(struct device *dev, unsigned long addr);
-void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
+void __iomem *devm_ioremap_resource(struct device *dev,
+ const struct resource *res);
void __iomem *devm_of_iomap(struct device *dev,
struct device_node *node, int index,
#ifndef _LINUX_HRTIMER_H
#define _LINUX_HRTIMER_H
+#include <linux/hrtimer_defs.h>
#include <linux/rbtree.h>
-#include <linux/ktime.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/percpu.h>
extern void hrtimer_interrupt(struct clock_event_device *dev);
-/*
- * The resolution of the clocks. The resolution value is returned in
- * the clock_getres() system call to give application programmers an
- * idea of the (in)accuracy of timers. Timer values are rounded up to
- * this resolution values.
- */
-# define HIGH_RES_NSEC 1
-# define KTIME_HIGH_RES (HIGH_RES_NSEC)
-# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
-# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
-
extern void clock_was_set_delayed(void);
extern unsigned int hrtimer_resolution;
#else
-# define MONOTONIC_RES_NSEC LOW_RES_NSEC
-# define KTIME_MONOTONIC_RES KTIME_LOW_RES
-
#define hrtimer_resolution (unsigned int)LOW_RES_NSEC
static inline void clock_was_set_delayed(void) { }
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_HRTIMER_DEFS_H
+#define _LINUX_HRTIMER_DEFS_H
+
+#include <linux/ktime.h>
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+
+/*
+ * The resolution of the clocks. The resolution value is returned in
+ * the clock_getres() system call to give application programmers an
+ * idea of the (in)accuracy of timers. Timer values are rounded up to
+ * this resolution values.
+ */
+# define HIGH_RES_NSEC 1
+# define KTIME_HIGH_RES (HIGH_RES_NSEC)
+# define MONOTONIC_RES_NSEC HIGH_RES_NSEC
+# define KTIME_MONOTONIC_RES KTIME_HIGH_RES
+
+#else
+
+# define MONOTONIC_RES_NSEC LOW_RES_NSEC
+# define KTIME_MONOTONIC_RES KTIME_LOW_RES
+
+#endif
+
+#endif
void ishtp_get_device(struct ishtp_cl_device *cl_dev);
void ishtp_set_drvdata(struct ishtp_cl_device *cl_device, void *data);
void *ishtp_get_drvdata(struct ishtp_cl_device *cl_device);
+struct ishtp_cl_device *ishtp_dev_to_cl_device(struct device *dev);
int ishtp_register_event_cb(struct ishtp_cl_device *device,
void (*read_cb)(struct ishtp_cl_device *));
struct ishtp_fw_client *ishtp_fw_cl_get_client(struct ishtp_device *dev,
const struct gic_kvm_info *gic_get_kvm_info(void);
+struct irq_domain;
+struct fwnode_handle;
+int gicv2m_init(struct fwnode_handle *parent_handle,
+ struct irq_domain *parent);
+
#endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */
*/
void gic_init(void __iomem *dist , void __iomem *cpu);
-int gicv2m_init(struct fwnode_handle *parent_handle,
- struct irq_domain *parent);
-
void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
int gic_get_cpu_id(unsigned int cpu);
void gic_migrate_target(unsigned int new_cpu_id);
#define DIV_ROUND_DOWN_ULL(ll, d) \
({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; })
-#define DIV_ROUND_UP_ULL(ll, d) DIV_ROUND_DOWN_ULL((ll) + (d) - 1, (d))
+#define DIV_ROUND_UP_ULL(ll, d) \
+ DIV_ROUND_DOWN_ULL((unsigned long long)(ll) + (d) - 1, (d))
#if BITS_PER_LONG == 32
# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d)
* @flash_unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR
* @flash_is_locked: [FLASH-SPECIFIC] check if a region of the SPI NOR is
* @quad_enable: [FLASH-SPECIFIC] enables SPI NOR quad mode
+ * @clear_sr_bp: [FLASH-SPECIFIC] clears the Block Protection Bits from
+ * the SPI NOR Status Register.
* completely locked
* @priv: the private data
*/
int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
int (*quad_enable)(struct spi_nor *nor);
+ int (*clear_sr_bp)(struct spi_nor *nor);
void *priv;
};
mapping_gfp_mask(mapping));
}
-static inline struct page *find_subpage(struct page *page, pgoff_t offset)
-{
- unsigned long mask;
-
- if (PageHuge(page))
- return page;
-
- VM_BUG_ON_PAGE(PageTail(page), page);
-
- mask = (1UL << compound_order(page)) - 1;
- return page + (offset & mask);
-}
-
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
#endif /* CONFIG_ARM_PMU */
+#define ARMV8_SPE_PDEV_NAME "arm,spe-v1"
+
#endif /* __ARM_PMU_H__ */
#define PERF_PMU_CAP_NO_INTERRUPT 0x01
#define PERF_PMU_CAP_NO_NMI 0x02
#define PERF_PMU_CAP_AUX_NO_SG 0x04
+#define PERF_PMU_CAP_EXTENDED_REGS 0x08
#define PERF_PMU_CAP_EXCLUSIVE 0x10
#define PERF_PMU_CAP_ITRACE 0x20
#define PERF_PMU_CAP_HETEROGENEOUS_CPUS 0x40
#ifdef CONFIG_HAVE_PERF_REGS
#include <asm/perf_regs.h>
+
+#ifndef PERF_REG_EXTENDED_MASK
+#define PERF_REG_EXTENDED_MASK 0
+#endif
+
u64 perf_reg_value(struct pt_regs *regs, int idx);
int perf_reg_validate(u64 mask);
u64 perf_reg_abi(struct task_struct *task);
struct pt_regs *regs,
struct pt_regs *regs_user_copy);
#else
+
+#define PERF_REG_EXTENDED_MASK 0
+
static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
{
return 0;
static inline void *pfn_t_to_virt(pfn_t pfn)
{
- if (pfn_t_has_page(pfn))
+ if (pfn_t_has_page(pfn) && !is_device_private_page(pfn_t_to_page(pfn)))
return __va(pfn_t_to_phys(pfn));
return NULL;
}
#define spin_cpu_relax() cpu_relax()
#endif
-/*
- * spin_cpu_yield may be called to yield (undirected) to the hypervisor if
- * necessary. This should be used if the wait is expected to take longer
- * than context switch overhead, but we can't sleep or do a directed yield.
- */
-#ifndef spin_cpu_yield
-#define spin_cpu_yield() cpu_relax_yield()
-#endif
-
#ifndef spin_end
#define spin_end()
#endif
}
#endif
-#ifndef cpu_relax_yield
-#define cpu_relax_yield() cpu_relax()
-#endif
-
extern int yield_to(struct task_struct *p, bool preempt);
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
extern int set_user_sigmask(const sigset_t __user *usigmask, sigset_t *set,
sigset_t *oldset, size_t sigsetsize);
extern void restore_user_sigmask(const void __user *usigmask,
- sigset_t *sigsaved);
+ sigset_t *sigsaved, bool interrupted);
extern void set_current_blocked(sigset_t *);
extern void __set_current_blocked(const sigset_t *);
extern int show_unhandled_signals;
/*
* Call a function on all processors
*/
-int on_each_cpu(smp_call_func_t func, void *info, int wait);
+void on_each_cpu(smp_call_func_t func, void *info, int wait);
/*
* Call a function on processors specified by mask, which might include
/*
* Call a function on all other processors
*/
-int smp_call_function(smp_call_func_t func, void *info, int wait);
+void smp_call_function(smp_call_func_t func, void *info, int wait);
void smp_call_function_many(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait);
* These macros fold the SMP functionality into a single CPU system
*/
#define raw_smp_processor_id() 0
-static inline int up_smp_call_function(smp_call_func_t func, void *info)
+static inline void up_smp_call_function(smp_call_func_t func, void *info)
{
- return 0;
}
#define smp_call_function(func, info, wait) \
(up_smp_call_function(func, info))
int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
void stop_machine_park(int cpu);
void stop_machine_unpark(int cpu);
+void stop_machine_yield(const struct cpumask *cpumask);
#else /* CONFIG_SMP */
extern unsigned int pm_suspend_global_flags;
-#define PM_SUSPEND_FLAG_FW_SUSPEND (1 << 0)
-#define PM_SUSPEND_FLAG_FW_RESUME (1 << 1)
+#define PM_SUSPEND_FLAG_FW_SUSPEND BIT(0)
+#define PM_SUSPEND_FLAG_FW_RESUME BIT(1)
+#define PM_SUSPEND_FLAG_NO_PLATFORM BIT(2)
static inline void pm_suspend_clear_flags(void)
{
pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_RESUME;
}
+static inline void pm_set_suspend_no_platform(void)
+{
+ pm_suspend_global_flags |= PM_SUSPEND_FLAG_NO_PLATFORM;
+}
+
/**
* pm_suspend_via_firmware - Check if platform firmware will suspend the system.
*
return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_RESUME);
}
+/**
+ * pm_suspend_no_platform - Check if platform may change device power states.
+ *
+ * To be called during system-wide power management transitions to sleep states
+ * or during the subsequent system-wide transitions back to the working state.
+ *
+ * Return 'true' if the power states of devices remain under full control of the
+ * kernel throughout the system-wide suspend and resume cycle in progress (that
+ * is, if a device is put into a certain power state during suspend, it can be
+ * expected to remain in that state during resume).
+ */
+static inline bool pm_suspend_no_platform(void)
+{
+ return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_NO_PLATFORM);
+}
+
/* Suspend-to-idle state machnine. */
enum s2idle_states {
S2IDLE_STATE_NONE, /* Not suspended/suspending. */
return ktime_get_coarse_with_offset(TK_OFFS_TAI);
}
+static inline ktime_t ktime_get_coarse(void)
+{
+ struct timespec64 ts;
+
+ ktime_get_coarse_ts64(&ts);
+ return timespec64_to_ktime(ts);
+}
+
+static inline u64 ktime_get_coarse_ns(void)
+{
+ return ktime_to_ns(ktime_get_coarse());
+}
+
+static inline u64 ktime_get_coarse_real_ns(void)
+{
+ return ktime_to_ns(ktime_get_coarse_real());
+}
+
+static inline u64 ktime_get_coarse_boottime_ns(void)
+{
+ return ktime_to_ns(ktime_get_coarse_boottime());
+}
+
+static inline u64 ktime_get_coarse_clocktai_ns(void)
+{
+ return ktime_to_ns(ktime_get_coarse_clocktai());
+}
+
/**
* ktime_mono_to_real - Convert monotonic time to clock realtime
*/
return ktime_to_ns(ktime_get_real());
}
-static inline u64 ktime_get_boot_ns(void)
+static inline u64 ktime_get_boottime_ns(void)
{
return ktime_to_ns(ktime_get_boottime());
}
-static inline u64 ktime_get_tai_ns(void)
+static inline u64 ktime_get_clocktai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
}
#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
#endif
-/*
- * A deferrable timer will work normally when the system is busy, but
- * will not cause a CPU to come out of idle just to service it; instead,
- * the timer will be serviced when the CPU eventually wakes up with a
- * subsequent non-deferrable timer.
+/**
+ * @TIMER_DEFERRABLE: A deferrable timer will work normally when the
+ * system is busy, but will not cause a CPU to come out of idle just
+ * to service it; instead, the timer will be serviced when the CPU
+ * eventually wakes up with a subsequent non-deferrable timer.
*
- * An irqsafe timer is executed with IRQ disabled and it's safe to wait for
- * the completion of the running instance from IRQ handlers, for example,
- * by calling del_timer_sync().
+ * @TIMER_IRQSAFE: An irqsafe timer is executed with IRQ disabled and
+ * it's safe to wait for the completion of the running instance from
+ * IRQ handlers, for example, by calling del_timer_sync().
*
* Note: The irq disabled callback execution is a special case for
* workqueue locking issues. It's not meant for executing random crap
* with interrupts disabled. Abuse is monitored!
+ *
+ * @TIMER_PINNED: A pinned timer will not be affected by any timer
+ * placement heuristics (like, NOHZ) and will always expire on the CPU
+ * on which the timer was enqueued.
+ *
+ * Note: Because enqueuing of timers can migrate the timer from one
+ * CPU to another, pinned timers are not guaranteed to stay on the
+ * initialy selected CPU. They move to the CPU on which the enqueue
+ * function is invoked via mod_timer() or add_timer(). If the timer
+ * should be placed on a particular CPU, then add_timer_on() has to be
+ * used.
*/
#define TIMER_CPUMASK 0x0003FFFF
#define TIMER_MIGRATING 0x00040000
#define XA_FLAGS_TRACK_FREE ((__force gfp_t)4U)
#define XA_FLAGS_ZERO_BUSY ((__force gfp_t)8U)
#define XA_FLAGS_ALLOC_WRAPPED ((__force gfp_t)16U)
+#define XA_FLAGS_ACCOUNT ((__force gfp_t)32U)
#define XA_FLAGS_MARK(mark) ((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
(__force unsigned)(mark)))
* received by the device (not just by the host, in case it was
* buffered on the device) and be accurate to about 10ms.
* If the frame isn't buffered, just passing the return value of
- * ktime_get_boot_ns() is likely appropriate.
+ * ktime_get_boottime_ns() is likely appropriate.
* @parent_tsf: the time at the start of reception of the first octet of the
* timestamp field of the frame. The time is the TSF of the BSS specified
* by %parent_bssid.
inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
}
-static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
- struct in6_addr *daddr)
+static inline const struct in6_addr *rt6_nexthop(const struct rt6_info *rt,
+ const struct in6_addr *daddr)
{
if (rt->rt6i_flags & RTF_GATEWAY)
return &rt->rt6i_gateway;
struct rtable *rt_dst_alloc(struct net_device *dev,
unsigned int flags, u16 type,
bool nopolicy, bool noxfrm, bool will_cache);
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);
struct in_ifaddr;
void fib_add_ifaddr(struct in_ifaddr *);
return !!ctx->partially_sent_record;
}
-static inline int tls_complete_pending_work(struct sock *sk,
- struct tls_context *ctx,
- int flags, long *timeo)
-{
- int rc = 0;
-
- if (unlikely(sk->sk_write_pending))
- rc = wait_on_pending_writer(sk, timeo);
-
- if (!rc && tls_is_partially_sent_record(ctx))
- rc = tls_push_partial_record(sk, ctx, flags);
-
- return rc;
-}
-
static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
{
return tls_ctx->pending_open_record_frags;
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_DATAPAGE_H
+#define __VDSO_DATAPAGE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/bits.h>
+#include <linux/time.h>
+#include <linux/types.h>
+
+#define VDSO_BASES (CLOCK_TAI + 1)
+#define VDSO_HRES (BIT(CLOCK_REALTIME) | \
+ BIT(CLOCK_MONOTONIC) | \
+ BIT(CLOCK_BOOTTIME) | \
+ BIT(CLOCK_TAI))
+#define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \
+ BIT(CLOCK_MONOTONIC_COARSE))
+#define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW))
+
+#define CS_HRES_COARSE 0
+#define CS_RAW 1
+#define CS_BASES (CS_RAW + 1)
+
+/**
+ * struct vdso_timestamp - basetime per clock_id
+ * @sec: seconds
+ * @nsec: nanoseconds
+ *
+ * There is one vdso_timestamp object in vvar for each vDSO-accelerated
+ * clock_id. For high-resolution clocks, this encodes the time
+ * corresponding to vdso_data.cycle_last. For coarse clocks this encodes
+ * the actual time.
+ *
+ * To be noticed that for highres clocks nsec is left-shifted by
+ * vdso_data.cs[x].shift.
+ */
+struct vdso_timestamp {
+ u64 sec;
+ u64 nsec;
+};
+
+/**
+ * struct vdso_data - vdso datapage representation
+ * @seq: timebase sequence counter
+ * @clock_mode: clock mode
+ * @cycle_last: timebase at clocksource init
+ * @mask: clocksource mask
+ * @mult: clocksource multiplier
+ * @shift: clocksource shift
+ * @basetime[clock_id]: basetime per clock_id
+ * @tz_minuteswest: minutes west of Greenwich
+ * @tz_dsttime: type of DST correction
+ * @hrtimer_res: hrtimer resolution
+ * @__unused: unused
+ *
+ * vdso_data will be accessed by 64 bit and compat code at the same time
+ * so we should be careful before modifying this structure.
+ */
+struct vdso_data {
+ u32 seq;
+
+ s32 clock_mode;
+ u64 cycle_last;
+ u64 mask;
+ u32 mult;
+ u32 shift;
+
+ struct vdso_timestamp basetime[VDSO_BASES];
+
+ s32 tz_minuteswest;
+ s32 tz_dsttime;
+ u32 hrtimer_res;
+ u32 __unused;
+};
+
+/*
+ * We use the hidden visibility to prevent the compiler from generating a GOT
+ * relocation. Not only is going through a GOT useless (the entry couldn't and
+ * must not be overridden by another library), it does not even work: the linker
+ * cannot generate an absolute address to the data page.
+ *
+ * With the hidden visibility, the compiler simply generates a PC-relative
+ * relocation, and this is what we need.
+ */
+extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden")));
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __VDSO_DATAPAGE_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_HELPERS_H
+#define __VDSO_HELPERS_H
+
+#ifndef __ASSEMBLY__
+
+#include <vdso/datapage.h>
+
+static __always_inline u32 vdso_read_begin(const struct vdso_data *vd)
+{
+ u32 seq;
+
+ while ((seq = READ_ONCE(vd->seq)) & 1)
+ cpu_relax();
+
+ smp_rmb();
+ return seq;
+}
+
+static __always_inline u32 vdso_read_retry(const struct vdso_data *vd,
+ u32 start)
+{
+ u32 seq;
+
+ smp_rmb();
+ seq = READ_ONCE(vd->seq);
+ return seq != start;
+}
+
+static __always_inline void vdso_write_begin(struct vdso_data *vd)
+{
+ /*
+ * WRITE_ONCE it is required otherwise the compiler can validly tear
+ * updates to vd[x].seq and it is possible that the value seen by the
+ * reader it is inconsistent.
+ */
+ WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1);
+ WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1);
+ smp_wmb();
+}
+
+static __always_inline void vdso_write_end(struct vdso_data *vd)
+{
+ smp_wmb();
+ /*
+ * WRITE_ONCE it is required otherwise the compiler can validly tear
+ * updates to vd[x].seq and it is possible that the value seen by the
+ * reader it is inconsistent.
+ */
+ WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1);
+ WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __VDSO_HELPERS_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_VSYSCALL_H
+#define __VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __VDSO_VSYSCALL_H */
#endif /* CONFIG_BLK_DEV_RAM */
#ifdef CONFIG_BLK_DEV_RAM
-static void populate_initrd_image(char *err)
+static void __init populate_initrd_image(char *err)
{
ssize_t written;
int fd;
ksys_close(fd);
}
#else
-static void populate_initrd_image(char *err)
+static void __init populate_initrd_image(char *err)
{
printk(KERN_EMERG "Initramfs unpacking failed: %s\n", err);
}
if (err < 0)
goto free_prog;
- prog->aux->load_time = ktime_get_boot_ns();
+ prog->aux->load_time = ktime_get_boottime_ns();
err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
if (err)
goto free_prog;
/*
* SMT soft disabling on X86 requires to bring the CPU out of the
* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
- * CPU marked itself as booted_once in cpu_notify_starting() so the
+ * CPU marked itself as booted_once in notify_cpu_starting() so the
* cpu_smt_allowed() check will now return false if this is not the
* primary sibling.
*/
for_each_online_cpu(cpu) {
if (cpu == primary)
continue;
+
+ if (pm_wakeup_pending()) {
+ pr_info("Wakeup pending. Abort CPU freeze\n");
+ error = -EBUSY;
+ break;
+ }
+
trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
if (ret)
return ret;
+ if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
+ return -EINVAL;
+
/*
* Cannot fail STARTING/DYING callbacks.
*/
cpu_mitigations = CPU_MITIGATIONS_AUTO;
else if (!strcmp(arg, "auto,nosmt"))
cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
+ else
+ pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
+ arg);
return 0;
}
if (perf_event_check_period(event, value))
return -EINVAL;
+ if (!event->attr.freq && (value & (1ULL << 63)))
+ return -EINVAL;
+
event_function_call(event, __perf_event_period, &value);
return 0;
if (user_mode(regs)) {
regs_user->abi = perf_reg_abi(current);
regs_user->regs = regs;
- } else if (current->mm) {
+ } else if (!(current->flags & PF_KTHREAD)) {
perf_get_regs_user(regs_user, regs, regs_user_copy);
} else {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
+static inline bool has_extended_regs(struct perf_event *event)
+{
+ return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) ||
+ (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK);
+}
+
static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
{
struct perf_event_context *ctx = NULL;
perf_event_ctx_unlock(event->group_leader, ctx);
if (!ret) {
+ if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
+ has_extended_regs(event))
+ ret = -EOPNOTSUPP;
+
if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
- event_has_any_exclude_flag(event)) {
- if (event->destroy)
- event->destroy(event);
+ event_has_any_exclude_flag(event))
ret = -EINVAL;
- }
+
+ if (ret && event->destroy)
+ event->destroy(event);
}
if (ret)
break;
case CLOCK_BOOTTIME:
- event->clock = &ktime_get_boot_ns;
+ event->clock = &ktime_get_boottime_ns;
break;
case CLOCK_TAI:
- event->clock = &ktime_get_tai_ns;
+ event->clock = &ktime_get_clocktai_ns;
break;
default:
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);
- return page ? page_address(page) : NULL;
+ if (likely(page)) {
+ tsk->stack = page_address(page);
+ return tsk->stack;
+ }
+ return NULL;
#endif
}
#endif
};
-/**
- * pidfd_create() - Create a new pid file descriptor.
- *
- * @pid: struct pid that the pidfd will reference
- *
- * This creates a new pid file descriptor with the O_CLOEXEC flag set.
- *
- * Note, that this function can only be called after the fd table has
- * been unshared to avoid leaking the pidfd to the new process.
- *
- * Return: On success, a cloexec pidfd is returned.
- * On error, a negative errno number will be returned.
- */
-static int pidfd_create(struct pid *pid)
-{
- int fd;
-
- fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
- O_RDWR | O_CLOEXEC);
- if (fd < 0)
- put_pid(pid);
-
- return fd;
-}
-
static void __delayed_free_task(struct rcu_head *rhp)
{
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
int pidfd = -1, retval;
struct task_struct *p;
struct multiprocess_signals delayed;
+ struct file *pidfile = NULL;
/*
* Don't allow sharing the root directory with processes in a different
}
if (clone_flags & CLONE_PIDFD) {
- int reserved;
-
/*
* - CLONE_PARENT_SETTID is useless for pidfds and also
* parent_tidptr is used to return pidfds.
if (clone_flags &
(CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
return ERR_PTR(-EINVAL);
-
- /*
- * Verify that parent_tidptr is sane so we can potentially
- * reuse it later.
- */
- if (get_user(reserved, parent_tidptr))
- return ERR_PTR(-EFAULT);
-
- if (reserved != 0)
- return ERR_PTR(-EINVAL);
}
/*
* if the fd table isn't shared).
*/
if (clone_flags & CLONE_PIDFD) {
- retval = pidfd_create(pid);
+ retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
if (retval < 0)
goto bad_fork_free_pid;
pidfd = retval;
+
+ pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
+ O_RDWR | O_CLOEXEC);
+ if (IS_ERR(pidfile)) {
+ put_unused_fd(pidfd);
+ retval = PTR_ERR(pidfile);
+ goto bad_fork_free_pid;
+ }
+ get_pid(pid); /* held by pidfile now */
+
retval = put_user(pidfd, parent_tidptr);
if (retval)
goto bad_fork_put_pidfd;
*/
p->start_time = ktime_get_ns();
- p->real_start_time = ktime_get_boot_ns();
+ p->real_start_time = ktime_get_boottime_ns();
/*
* Make it visible to the rest of the system, but dont wake it up yet.
goto bad_fork_cancel_cgroup;
}
+ /* past the last point of failure */
+ if (pidfile)
+ fd_install(pidfd, pidfile);
init_task_pid_links(p);
if (likely(p->pid)) {
bad_fork_cgroup_threadgroup_change_end:
cgroup_threadgroup_change_end(current);
bad_fork_put_pidfd:
- if (clone_flags & CLONE_PIDFD)
- ksys_close(pidfd);
+ if (clone_flags & CLONE_PIDFD) {
+ fput(pidfile);
+ put_unused_fd(pidfd);
+ }
bad_fork_free_pid:
if (pid != &init_struct_pid)
free_pid(pid);
obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
obj-$(CONFIG_IRQ_TIMINGS) += timings.o
+ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y)
+ CFLAGS_timings.o += -DDEBUG
+endif
obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
return nodes;
}
-static int __irq_build_affinity_masks(const struct irq_affinity *affd,
- unsigned int startvec,
+static int __irq_build_affinity_masks(unsigned int startvec,
unsigned int numvecs,
unsigned int firstvec,
cpumask_var_t *node_to_cpumask,
* 1) spread present CPU on these vectors
* 2) spread other possible CPUs on these vectors
*/
-static int irq_build_affinity_masks(const struct irq_affinity *affd,
- unsigned int startvec, unsigned int numvecs,
+static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
unsigned int firstvec,
struct irq_affinity_desc *masks)
{
build_node_to_cpumask(node_to_cpumask);
/* Spread on present CPUs starting from affd->pre_vectors */
- nr_present = __irq_build_affinity_masks(affd, curvec, numvecs,
+ nr_present = __irq_build_affinity_masks(curvec, numvecs,
firstvec, node_to_cpumask,
cpu_present_mask, nmsk, masks);
else
curvec = firstvec + nr_present;
cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
- nr_others = __irq_build_affinity_masks(affd, curvec, numvecs,
+ nr_others = __irq_build_affinity_masks(curvec, numvecs,
firstvec, node_to_cpumask,
npresmsk, nmsk, masks);
put_online_cpus();
unsigned int this_vecs = affd->set_size[i];
int ret;
- ret = irq_build_affinity_masks(affd, curvec, this_vecs,
+ ret = irq_build_affinity_masks(curvec, this_vecs,
curvec, masks);
if (ret) {
kfree(masks);
/* It triggered already - consider it spurious. */
if (!(desc->istate & IRQS_WAITING)) {
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
} else
if (i < 32)
mask |= 1 << i;
mask |= 1 << i;
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
nr_of_irqs++;
}
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
}
irq_state_clr_started(desc);
}
+}
+
+
+void irq_shutdown_and_deactivate(struct irq_desc *desc)
+{
+ irq_shutdown(desc);
/*
* This must be called even if the interrupt was never started up,
* because the activation can happen before the interrupt is
unsigned int irq = irq_desc_get_irq(desc);
irqreturn_t res;
+ __kstat_incr_irqs_this_cpu(desc);
+
trace_irq_handler_entry(irq, action);
/*
* NMIs cannot be shared, there is only one action.
unsigned int irq = irq_desc_get_irq(desc);
irqreturn_t res;
+ __kstat_incr_irqs_this_cpu(desc);
+
trace_irq_handler_entry(irq, action);
res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
trace_irq_handler_exit(irq, action, res);
*/
if (irqd_affinity_is_managed(d)) {
irqd_set_managed_shutdown(d);
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
return false;
}
affinity = cpu_online_mask;
extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
extern void irq_shutdown(struct irq_desc *desc);
+extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
extern void irq_enable(struct irq_desc *desc);
extern void irq_disable(struct irq_desc *desc);
extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
extern void irq_mark_irq(unsigned int irq);
#endif
+extern int __irq_get_irqchip_state(struct irq_data *data,
+ enum irqchip_irq_state which,
+ bool *state);
+
extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
return value & U16_MAX;
}
+static __always_inline void irq_timings_push(u64 ts, int irq)
+{
+ struct irq_timings *timings = this_cpu_ptr(&irq_timings);
+
+ timings->values[timings->count & IRQ_TIMINGS_MASK] =
+ irq_timing_encode(ts, irq);
+
+ timings->count++;
+}
+
/*
* The function record_irq_time is only called in one place in the
* interrupts handler. We want this function always inline so the code
if (!static_branch_likely(&irq_timing_enabled))
return;
- if (desc->istate & IRQS_TIMINGS) {
- struct irq_timings *timings = this_cpu_ptr(&irq_timings);
-
- timings->values[timings->count & IRQ_TIMINGS_MASK] =
- irq_timing_encode(local_clock(),
- irq_desc_get_irq(desc));
-
- timings->count++;
- }
+ if (desc->istate & IRQS_TIMINGS)
+ irq_timings_push(local_clock(), irq_desc_get_irq(desc));
}
#else
static inline void irq_remove_timings(struct irq_desc *desc) {}
* @hwirq: The HW irq number to convert to a logical one
* @regs: Register file coming from the low-level handling code
*
+ * This function must be called from an NMI context.
+ *
* Returns: 0 on success, or -EINVAL if conversion has failed
*/
int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
unsigned int irq;
int ret = 0;
- nmi_enter();
+ /*
+ * NMI context needs to be setup earlier in order to deal with tracing.
+ */
+ WARN_ON(!in_nmi());
irq = irq_find_mapping(domain, hwirq);
else
ret = -EINVAL;
- nmi_exit();
set_irq_regs(old_regs);
return ret;
}
*per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
}
+static bool irq_is_nmi(struct irq_desc *desc)
+{
+ return desc->istate & IRQS_NMI;
+}
+
/**
* kstat_irqs - Get the statistics for an interrupt
* @irq: The interrupt number
if (!desc || !desc->kstat_irqs)
return 0;
if (!irq_settings_is_per_cpu_devid(desc) &&
- !irq_settings_is_per_cpu(desc))
+ !irq_settings_is_per_cpu(desc) &&
+ !irq_is_nmi(desc))
return desc->tot_count;
for_each_possible_cpu(cpu)
* @ops: domain callbacks
* @host_data: Controller private data pointer
*
- * Allocates and initialize and irq_domain structure.
+ * Allocates and initializes an irq_domain structure.
* Returns pointer to IRQ domain, or NULL on failure.
*/
struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
GFP_KERNEL, of_node_to_nid(of_node));
- if (WARN_ON(!domain))
+ if (!domain)
return NULL;
if (fwnode && is_fwnode_irqchip(fwnode)) {
#include <linux/module.h>
#include <linux/random.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
early_param("threadirqs", setup_forced_irqthreads);
#endif
-static void __synchronize_hardirq(struct irq_desc *desc)
+static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
{
+ struct irq_data *irqd = irq_desc_get_irq_data(desc);
bool inprogress;
do {
/* Ok, that indicated we're done: double-check carefully. */
raw_spin_lock_irqsave(&desc->lock, flags);
inprogress = irqd_irq_inprogress(&desc->irq_data);
+
+ /*
+ * If requested and supported, check at the chip whether it
+ * is in flight at the hardware level, i.e. already pending
+ * in a CPU and waiting for service and acknowledge.
+ */
+ if (!inprogress && sync_chip) {
+ /*
+ * Ignore the return code. inprogress is only updated
+ * when the chip supports it.
+ */
+ __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE,
+ &inprogress);
+ }
raw_spin_unlock_irqrestore(&desc->lock, flags);
/* Oops, that failed? */
* Returns: false if a threaded handler is active.
*
* This function may be called - with care - from IRQ context.
+ *
+ * It does not check whether there is an interrupt in flight at the
+ * hardware level, but not serviced yet, as this might deadlock when
+ * called with interrupts disabled and the target CPU of the interrupt
+ * is the current CPU.
*/
bool synchronize_hardirq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc) {
- __synchronize_hardirq(desc);
+ __synchronize_hardirq(desc, false);
return !atomic_read(&desc->threads_active);
}
* to complete before returning. If you use this function while
* holding a resource the IRQ handler may need you will deadlock.
*
- * This function may be called - with care - from IRQ context.
+ * Can only be called from preemptible code as it might sleep when
+ * an interrupt thread is associated to @irq.
+ *
+ * It optionally makes sure (when the irq chip supports that method)
+ * that the interrupt is not pending in any CPU and waiting for
+ * service.
*/
void synchronize_irq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc) {
- __synchronize_hardirq(desc);
+ __synchronize_hardirq(desc, true);
/*
* We made sure that no hardirq handler is
* running. Now verify that no threaded handlers are
/* If this was the last handler, shut down the IRQ line: */
if (!desc->action) {
irq_settings_clr_disable_unlazy(desc);
+ /* Only shutdown. Deactivate after synchronize_hardirq() */
irq_shutdown(desc);
}
unregister_handler_proc(irq, action);
- /* Make sure it's not being used on another CPU: */
- synchronize_hardirq(irq);
+ /*
+ * Make sure it's not being used on another CPU and if the chip
+ * supports it also make sure that there is no (not yet serviced)
+ * interrupt in flight at the hardware level.
+ */
+ __synchronize_hardirq(desc, true);
#ifdef CONFIG_DEBUG_SHIRQ
/*
* require it to deallocate resources over the slow bus.
*/
chip_bus_lock(desc);
+ /*
+ * There is no interrupt on the fly anymore. Deactivate it
+ * completely.
+ */
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ irq_domain_deactivate_irq(&desc->irq_data);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
irq_release_resources(desc);
chip_bus_sync_unlock(desc);
irq_remove_timings(desc);
}
irq_settings_clr_disable_unlazy(desc);
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
irq_release_resources(desc);
irq_put_desc_unlock(desc, flags);
}
+int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
+ bool *state)
+{
+ struct irq_chip *chip;
+ int err = -EINVAL;
+
+ do {
+ chip = irq_data_get_irq_chip(data);
+ if (chip->irq_get_irqchip_state)
+ break;
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ data = data->parent_data;
+#else
+ data = NULL;
+#endif
+ } while (data);
+
+ if (data)
+ err = chip->irq_get_irqchip_state(data, which, state);
+ return err;
+}
+
/**
* irq_get_irqchip_state - returns the irqchip state of a interrupt.
* @irq: Interrupt line that is forwarded to a VM
{
struct irq_desc *desc;
struct irq_data *data;
- struct irq_chip *chip;
unsigned long flags;
int err = -EINVAL;
data = irq_desc_get_irq_data(desc);
- do {
- chip = irq_data_get_irq_chip(data);
- if (chip->irq_get_irqchip_state)
- break;
-#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
- data = data->parent_data;
-#else
- data = NULL;
-#endif
- } while (data);
-
- if (data)
- err = chip->irq_get_irqchip_state(data, which, state);
+ err = __irq_get_irqchip_state(data, which, state);
irq_put_desc_busunlock(desc, flags);
return err;
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#define pr_fmt(fmt) "irq_timings: " fmt
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/static_key.h>
+#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/idr.h>
#include <linux/irq.h>
#define EMA_ALPHA_VAL 64
#define EMA_ALPHA_SHIFT 7
-#define PREDICTION_PERIOD_MIN 2
+#define PREDICTION_PERIOD_MIN 3
#define PREDICTION_PERIOD_MAX 5
#define PREDICTION_FACTOR 4
#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */
#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */
+/*
+ * Number of elements in the circular buffer: If it happens it was
+ * flushed before, then the number of elements could be smaller than
+ * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is
+ * used as we wrapped. The index begins from zero when we did not
+ * wrap. That could be done in a nicer way with the proper circular
+ * array structure type but with the cost of extra computation in the
+ * interrupt handler hot path. We choose efficiency.
+ */
+#define for_each_irqts(i, irqts) \
+ for (i = irqts->count < IRQ_TIMINGS_SIZE ? \
+ 0 : irqts->count & IRQ_TIMINGS_MASK, \
+ irqts->count = min(IRQ_TIMINGS_SIZE, \
+ irqts->count); \
+ irqts->count > 0; irqts->count--, \
+ i = (i + 1) & IRQ_TIMINGS_MASK)
+
struct irqt_stat {
u64 last_ts;
u64 ema_time[PREDICTION_BUFFER_SIZE];
static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
{
- int i;
+ int period;
+
+ /*
+ * Move the beginning pointer to the end minus the max period x 3.
+ * We are at the point we can begin searching the pattern
+ */
+ buffer = &buffer[len - (period_max * 3)];
+
+ /* Adjust the length to the maximum allowed period x 3 */
+ len = period_max * 3;
/*
* The buffer contains the suite of intervals, in a ilog2
* period beginning at the end of the buffer. We do that for
* each suffix.
*/
- for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) {
+ for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) {
- int *begin = &buffer[len - (i * 3)];
- int *ptr = begin;
+ /*
+ * The first comparison always succeed because the
+ * suffix is deduced from the first n-period bytes of
+ * the buffer and we compare the initial suffix with
+ * itself, so we can skip the first iteration.
+ */
+ int idx = period;
+ size_t size = period;
/*
* We look if the suite with period 'i' repeat
* itself. If it is truncated at the end, as it
* repeats we can use the period to find out the next
- * element.
+ * element with the modulo.
*/
- while (!memcmp(ptr, begin, i * sizeof(*ptr))) {
- ptr += i;
- if (ptr >= &buffer[len])
- return begin[((i * 3) % i)];
+ while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) {
+
+ /*
+ * Move the index in a period basis
+ */
+ idx += size;
+
+ /*
+ * If this condition is reached, all previous
+ * memcmp were successful, so the period is
+ * found.
+ */
+ if (idx == len)
+ return buffer[len % period];
+
+ /*
+ * If the remaining elements to compare are
+ * smaller than the period, readjust the size
+ * of the comparison for the last iteration.
+ */
+ if (len - idx < period)
+ size = len - idx;
}
}
return irqs->last_ts + irqs->ema_time[index];
}
+static __always_inline int irq_timings_interval_index(u64 interval)
+{
+ /*
+ * The PREDICTION_FACTOR increase the interval size for the
+ * array of exponential average.
+ */
+ u64 interval_us = (interval >> 10) / PREDICTION_FACTOR;
+
+ return likely(interval_us) ? ilog2(interval_us) : 0;
+}
+
+static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs,
+ u64 interval)
+{
+ int index;
+
+ /*
+ * Get the index in the ema table for this interrupt.
+ */
+ index = irq_timings_interval_index(interval);
+
+ /*
+ * Store the index as an element of the pattern in another
+ * circular array.
+ */
+ irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
+
+ irqs->ema_time[index] = irq_timings_ema_new(interval,
+ irqs->ema_time[index]);
+
+ irqs->count++;
+}
+
static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
{
u64 old_ts = irqs->last_ts;
u64 interval;
- int index;
/*
* The timestamps are absolute time values, we need to compute
return;
}
- /*
- * Get the index in the ema table for this interrupt. The
- * PREDICTION_FACTOR increase the interval size for the array
- * of exponential average.
- */
- index = likely(interval) ?
- ilog2((interval >> 10) / PREDICTION_FACTOR) : 0;
-
- /*
- * Store the index as an element of the pattern in another
- * circular array.
- */
- irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
-
- irqs->ema_time[index] = irq_timings_ema_new(interval,
- irqs->ema_time[index]);
-
- irqs->count++;
+ __irq_timings_store(irq, irqs, interval);
}
/**
* model while decrementing the counter because we consume the
* data from our circular buffer.
*/
-
- i = (irqts->count & IRQ_TIMINGS_MASK) - 1;
- irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
-
- for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
+ for_each_irqts(i, irqts) {
irq = irq_timing_decode(irqts->values[i], &ts);
s = idr_find(&irqt_stats, irq);
if (s)
return 0;
}
+
+#ifdef CONFIG_TEST_IRQ_TIMINGS
+struct timings_intervals {
+ u64 *intervals;
+ size_t count;
+};
+
+/*
+ * Intervals are given in nanosecond base
+ */
+static u64 intervals0[] __initdata = {
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000,
+};
+
+static u64 intervals1[] __initdata = {
+ 223947000, 1240000, 1384000, 1386000, 1386000,
+ 217416000, 1236000, 1384000, 1386000, 1387000,
+ 214719000, 1241000, 1386000, 1387000, 1384000,
+ 213696000, 1234000, 1384000, 1386000, 1388000,
+ 219904000, 1240000, 1385000, 1389000, 1385000,
+ 212240000, 1240000, 1386000, 1386000, 1386000,
+ 214415000, 1236000, 1384000, 1386000, 1387000,
+ 214276000, 1234000,
+};
+
+static u64 intervals2[] __initdata = {
+ 4000, 3000, 5000, 100000,
+ 3000, 3000, 5000, 117000,
+ 4000, 4000, 5000, 112000,
+ 4000, 3000, 4000, 110000,
+ 3000, 5000, 3000, 117000,
+ 4000, 4000, 5000, 112000,
+ 4000, 3000, 4000, 110000,
+ 3000, 4000, 5000, 112000,
+ 4000,
+};
+
+static u64 intervals3[] __initdata = {
+ 1385000, 212240000, 1240000,
+ 1386000, 214415000, 1236000,
+ 1384000, 214276000, 1234000,
+ 1386000, 214415000, 1236000,
+ 1385000, 212240000, 1240000,
+ 1386000, 214415000, 1236000,
+ 1384000, 214276000, 1234000,
+ 1386000, 214415000, 1236000,
+ 1385000, 212240000, 1240000,
+};
+
+static u64 intervals4[] __initdata = {
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000,
+};
+
+static struct timings_intervals tis[] __initdata = {
+ { intervals0, ARRAY_SIZE(intervals0) },
+ { intervals1, ARRAY_SIZE(intervals1) },
+ { intervals2, ARRAY_SIZE(intervals2) },
+ { intervals3, ARRAY_SIZE(intervals3) },
+ { intervals4, ARRAY_SIZE(intervals4) },
+};
+
+static int __init irq_timings_test_next_index(struct timings_intervals *ti)
+{
+ int _buffer[IRQ_TIMINGS_SIZE];
+ int buffer[IRQ_TIMINGS_SIZE];
+ int index, start, i, count, period_max;
+
+ count = ti->count - 1;
+
+ period_max = count > (3 * PREDICTION_PERIOD_MAX) ?
+ PREDICTION_PERIOD_MAX : count / 3;
+
+ /*
+ * Inject all values except the last one which will be used
+ * to compare with the next index result.
+ */
+ pr_debug("index suite: ");
+
+ for (i = 0; i < count; i++) {
+ index = irq_timings_interval_index(ti->intervals[i]);
+ _buffer[i & IRQ_TIMINGS_MASK] = index;
+ pr_cont("%d ", index);
+ }
+
+ start = count < IRQ_TIMINGS_SIZE ? 0 :
+ count & IRQ_TIMINGS_MASK;
+
+ count = min_t(int, count, IRQ_TIMINGS_SIZE);
+
+ for (i = 0; i < count; i++) {
+ int index = (start + i) & IRQ_TIMINGS_MASK;
+ buffer[i] = _buffer[index];
+ }
+
+ index = irq_timings_next_event_index(buffer, count, period_max);
+ i = irq_timings_interval_index(ti->intervals[ti->count - 1]);
+
+ if (index != i) {
+ pr_err("Expected (%d) and computed (%d) next indexes differ\n",
+ i, index);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __init irq_timings_next_index_selftest(void)
+{
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(tis); i++) {
+
+ pr_info("---> Injecting intervals number #%d (count=%zd)\n",
+ i, tis[i].count);
+
+ ret = irq_timings_test_next_index(&tis[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int __init irq_timings_test_irqs(struct timings_intervals *ti)
+{
+ struct irqt_stat __percpu *s;
+ struct irqt_stat *irqs;
+ int i, index, ret, irq = 0xACE5;
+
+ ret = irq_timings_alloc(irq);
+ if (ret) {
+ pr_err("Failed to allocate irq timings\n");
+ return ret;
+ }
+
+ s = idr_find(&irqt_stats, irq);
+ if (!s) {
+ ret = -EIDRM;
+ goto out;
+ }
+
+ irqs = this_cpu_ptr(s);
+
+ for (i = 0; i < ti->count; i++) {
+
+ index = irq_timings_interval_index(ti->intervals[i]);
+ pr_debug("%d: interval=%llu ema_index=%d\n",
+ i, ti->intervals[i], index);
+
+ __irq_timings_store(irq, irqs, ti->intervals[i]);
+ if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) {
+ pr_err("Failed to store in the circular buffer\n");
+ goto out;
+ }
+ }
+
+ if (irqs->count != ti->count) {
+ pr_err("Count differs\n");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ irq_timings_free(irq);
+
+ return ret;
+}
+
+static int __init irq_timings_irqs_selftest(void)
+{
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(tis); i++) {
+ pr_info("---> Injecting intervals number #%d (count=%zd)\n",
+ i, tis[i].count);
+ ret = irq_timings_test_irqs(&tis[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int __init irq_timings_test_irqts(struct irq_timings *irqts,
+ unsigned count)
+{
+ int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0;
+ int i, irq, oirq = 0xBEEF;
+ u64 ots = 0xDEAD, ts;
+
+ /*
+ * Fill the circular buffer by using the dedicated function.
+ */
+ for (i = 0; i < count; i++) {
+ pr_debug("%d: index=%d, ts=%llX irq=%X\n",
+ i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i);
+
+ irq_timings_push(ots + i, oirq + i);
+ }
+
+ /*
+ * Compute the first elements values after the index wrapped
+ * up or not.
+ */
+ ots += start;
+ oirq += start;
+
+ /*
+ * Test the circular buffer count is correct.
+ */
+ pr_debug("---> Checking timings array count (%d) is right\n", count);
+ if (WARN_ON(irqts->count != count))
+ return -EINVAL;
+
+ /*
+ * Test the macro allowing to browse all the irqts.
+ */
+ pr_debug("---> Checking the for_each_irqts() macro\n");
+ for_each_irqts(i, irqts) {
+
+ irq = irq_timing_decode(irqts->values[i], &ts);
+
+ pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n",
+ i, ts, ots, irq, oirq);
+
+ if (WARN_ON(ts != ots || irq != oirq))
+ return -EINVAL;
+
+ ots++; oirq++;
+ }
+
+ /*
+ * The circular buffer should have be flushed when browsed
+ * with for_each_irqts
+ */
+ pr_debug("---> Checking timings array is empty after browsing it\n");
+ if (WARN_ON(irqts->count))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __init irq_timings_irqts_selftest(void)
+{
+ struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
+ int i, ret;
+
+ /*
+ * Test the circular buffer with different number of
+ * elements. The purpose is to test at the limits (empty, half
+ * full, full, wrapped with the cursor at the boundaries,
+ * wrapped several times, etc ...
+ */
+ int count[] = { 0,
+ IRQ_TIMINGS_SIZE >> 1,
+ IRQ_TIMINGS_SIZE,
+ IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1),
+ 2 * IRQ_TIMINGS_SIZE,
+ (2 * IRQ_TIMINGS_SIZE) + 3,
+ };
+
+ for (i = 0; i < ARRAY_SIZE(count); i++) {
+
+ pr_info("---> Checking the timings with %d/%d values\n",
+ count[i], IRQ_TIMINGS_SIZE);
+
+ ret = irq_timings_test_irqts(irqts, count[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int __init irq_timings_selftest(void)
+{
+ int ret;
+
+ pr_info("------------------- selftest start -----------------\n");
+
+ /*
+ * At this point, we don't except any subsystem to use the irq
+ * timings but us, so it should not be enabled.
+ */
+ if (static_branch_unlikely(&irq_timing_enabled)) {
+ pr_warn("irq timings already initialized, skipping selftest\n");
+ return 0;
+ }
+
+ ret = irq_timings_irqts_selftest();
+ if (ret)
+ goto out;
+
+ ret = irq_timings_irqs_selftest();
+ if (ret)
+ goto out;
+
+ ret = irq_timings_next_index_selftest();
+out:
+ pr_info("---------- selftest end with %s -----------\n",
+ ret ? "failure" : "success");
+
+ return ret;
+}
+early_initcall(irq_timings_selftest);
+#endif
pm_suspend_target_state = state;
+ if (state == PM_SUSPEND_TO_IDLE)
+ pm_set_suspend_no_platform();
+
error = platform_suspend_begin(state);
if (error)
goto Close;
*/
static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
{
- rcu_read_lock();
- __ptrace_link(child, new_parent, __task_cred(new_parent));
- rcu_read_unlock();
+ __ptrace_link(child, new_parent, current_cred());
}
/**
BUG_ON(!child->ptrace);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+#ifdef TIF_SYSCALL_EMU
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
* This is useful for syscalls such as ppoll, pselect, io_pgetevents and
* epoll_pwait where a new sigmask is passed in from userland for the syscalls.
*/
-void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
+void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved,
+ bool interrupted)
{
if (!usigmask)
* Restoring sigmask here can lead to delivering signals that the above
* syscalls are intended to block because of the sigmask passed in.
*/
- if (signal_pending(current)) {
+ if (interrupted) {
current->saved_sigmask = *sigsaved;
set_restore_sigmask();
return;
cpumask_var_t cpumask_ipi;
};
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
+static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
-int smp_call_function(smp_call_func_t func, void *info, int wait)
+void smp_call_function(smp_call_func_t func, void *info, int wait)
{
preempt_disable();
smp_call_function_many(cpu_online_mask, func, info, wait);
preempt_enable();
-
- return 0;
}
EXPORT_SYMBOL(smp_call_function);
* early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
* of local_irq_disable/enable().
*/
-int on_each_cpu(void (*func) (void *info), void *info, int wait)
+void on_each_cpu(void (*func) (void *info), void *info, int wait)
{
unsigned long flags;
- int ret = 0;
preempt_disable();
- ret = smp_call_function(func, info, wait);
+ smp_call_function(func, info, wait);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
preempt_enable();
- return ret;
}
EXPORT_SYMBOL(on_each_cpu);
/* Find end, append list for that CPU. */
if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
- this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
+ __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
per_cpu(tasklet_vec, cpu).head = NULL;
per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
}
set_state(msdata, msdata->state + 1);
}
+void __weak stop_machine_yield(const struct cpumask *cpumask)
+{
+ cpu_relax();
+}
+
/* This is the cpu_stop function which stops the CPU. */
static int multi_cpu_stop(void *data)
{
struct multi_stop_data *msdata = data;
enum multi_stop_state curstate = MULTI_STOP_NONE;
int cpu = smp_processor_id(), err = 0;
+ const struct cpumask *cpumask;
unsigned long flags;
bool is_active;
*/
local_save_flags(flags);
- if (!msdata->active_cpus)
- is_active = cpu == cpumask_first(cpu_online_mask);
- else
- is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
+ if (!msdata->active_cpus) {
+ cpumask = cpu_online_mask;
+ is_active = cpu == cpumask_first(cpumask);
+ } else {
+ cpumask = msdata->active_cpus;
+ is_active = cpumask_test_cpu(cpu, cpumask);
+ }
/* Simple state machine */
do {
/* Chill out and ensure we re-read multi_stop_state. */
- cpu_relax_yield();
+ stop_machine_yield(cpumask);
if (msdata->state != curstate) {
curstate = msdata->state;
switch (curstate) {
endif
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
+obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
/**
* alarmtimer_suspend - Suspend time callback
* @dev: unused
- * @state: unused
*
* When we are going into suspend, we look through the bases
* to see which is the soonest timer to expire. We then
static int watchdog_running;
static atomic_t watchdog_reset_pending;
-static void inline clocksource_watchdog_lock(unsigned long *flags)
+static inline void clocksource_watchdog_lock(unsigned long *flags)
{
spin_lock_irqsave(&watchdog_lock, *flags);
}
-static void inline clocksource_watchdog_unlock(unsigned long *flags)
+static inline void clocksource_watchdog_unlock(unsigned long *flags)
{
spin_unlock_irqrestore(&watchdog_lock, *flags);
}
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/tick.h>
-#include <linux/seq_file.h>
#include <linux/err.h>
#include <linux/debugobjects.h>
#include <linux/sched/signal.h>
* @timer: hrtimer to stop
*
* Returns:
- * 0 when the timer was not active
- * 1 when the timer was active
- * -1 when the timer is currently executing the callback function and
+ *
+ * * 0 when the timer was not active
+ * * 1 when the timer was active
+ * * -1 when the timer is currently executing the callback function and
* cannot be stopped
*/
int hrtimer_try_to_cancel(struct hrtimer *timer)
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
+#define MAX_TAI_OFFSET 100000
/*
* phase-lock loop variables
time_constant = max(time_constant, 0l);
}
- if (txc->modes & ADJ_TAI && txc->constant >= 0)
+ if (txc->modes & ADJ_TAI &&
+ txc->constant >= 0 && txc->constant <= MAX_TAI_OFFSET)
*time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
*/
static void itimer_delete(struct k_itimer *timer)
{
- unsigned long flags;
-
retry_delete:
- spin_lock_irqsave(&timer->it_lock, flags);
+ spin_lock_irq(&timer->it_lock);
if (timer_delete_hook(timer) == TIMER_RETRY) {
- unlock_timer(timer, flags);
+ spin_unlock_irq(&timer->it_lock);
goto retry_delete;
}
list_del(&timer->list);
- /*
- * This keeps any tasks waiting on the spin lock from thinking
- * they got something (see the lock code above).
- */
- timer->it_signal = NULL;
- unlock_timer(timer, flags);
+ spin_unlock_irq(&timer->it_lock);
release_posix_timer(timer, IT_ID_SET);
}
if (tv) {
if (compat_get_timeval(&user_tv, tv))
return -EFAULT;
+
+ if (!timeval_valid(&user_tv))
+ return -EINVAL;
+
new_ts.tv_sec = user_tv.tv_sec;
new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
}
} while (read_seqcount_retry(&tk_core.seq, seq));
- return base + nsecs;
+ return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
SEQ_printf(m, "\n");
}
-static int timer_list_show(struct seq_file *m, void *v)
-{
- struct timer_list_iter *iter = v;
-
- if (iter->cpu == -1 && !iter->second_pass)
- timer_list_header(m, iter->now);
- else if (!iter->second_pass)
- print_cpu(m, iter->cpu, iter->now);
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
- else if (iter->cpu == -1 && iter->second_pass)
- timer_list_show_tickdevices_header(m);
- else
- print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
-#endif
- return 0;
-}
-
void sysrq_timer_list_show(void)
{
u64 now = ktime_to_ns(ktime_get());
return;
}
+#ifdef CONFIG_PROC_FS
+static int timer_list_show(struct seq_file *m, void *v)
+{
+ struct timer_list_iter *iter = v;
+
+ if (iter->cpu == -1 && !iter->second_pass)
+ timer_list_header(m, iter->now);
+ else if (!iter->second_pass)
+ print_cpu(m, iter->cpu, iter->now);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+ else if (iter->cpu == -1 && iter->second_pass)
+ timer_list_show_tickdevices_header(m);
+ else
+ print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
+#endif
+ return 0;
+}
+
static void *move_iter(struct timer_list_iter *iter, loff_t offset)
{
for (; offset; offset--) {
return 0;
}
__initcall(init_timer_list_procfs);
+#endif
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 ARM Ltd.
+ *
+ * Generic implementation of update_vsyscall and update_vsyscall_tz.
+ *
+ * Based on the x86 specific implementation.
+ */
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>
+
+static inline void update_vdso_data(struct vdso_data *vdata,
+ struct timekeeper *tk)
+{
+ struct vdso_timestamp *vdso_ts;
+ u64 nsec;
+
+ vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
+ vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
+ vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult;
+ vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift;
+ vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last;
+ vdata[CS_RAW].mask = tk->tkr_raw.mask;
+ vdata[CS_RAW].mult = tk->tkr_raw.mult;
+ vdata[CS_RAW].shift = tk->tkr_raw.shift;
+
+ /* CLOCK_REALTIME */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
+ vdso_ts->sec = tk->xtime_sec;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
+
+ /* CLOCK_MONOTONIC */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+
+ nsec = tk->tkr_mono.xtime_nsec;
+ nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+ while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+ nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ /* CLOCK_MONOTONIC_RAW */
+ vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
+ vdso_ts->sec = tk->raw_sec;
+ vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
+
+ /* CLOCK_BOOTTIME */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsec = tk->tkr_mono.xtime_nsec;
+ nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
+ ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
+ while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+ nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ /* CLOCK_TAI */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
+ vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
+
+ /*
+ * Read without the seqlock held by clock_getres().
+ * Note: No need to have a second copy.
+ */
+ WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution);
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ struct vdso_data *vdata = __arch_get_k_vdso_data();
+ struct vdso_timestamp *vdso_ts;
+ u64 nsec;
+
+ if (__arch_update_vdso_data()) {
+ /*
+ * Some architectures might want to skip the update of the
+ * data page.
+ */
+ return;
+ }
+
+ /* copy vsyscall data */
+ vdso_write_begin(vdata);
+
+ vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk);
+ vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk);
+
+ /* CLOCK_REALTIME_COARSE */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE];
+ vdso_ts->sec = tk->xtime_sec;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+
+ /* CLOCK_MONOTONIC_COARSE */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+ nsec = nsec + tk->wall_to_monotonic.tv_nsec;
+ while (nsec >= NSEC_PER_SEC) {
+ nsec = nsec - NSEC_PER_SEC;
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ if (__arch_use_vsyscall(vdata))
+ update_vdso_data(vdata, tk);
+
+ __arch_update_vsyscall(vdata, tk);
+
+ vdso_write_end(vdata);
+
+ __arch_sync_vdso_data(vdata);
+}
+
+void update_vsyscall_tz(void)
+{
+ struct vdso_data *vdata = __arch_get_k_vdso_data();
+
+ if (__arch_use_vsyscall(vdata)) {
+ vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
+ vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
+ }
+
+ __arch_sync_vdso_data(vdata);
+}
#include <linux/hash.h>
#include <linux/rcupdate.h>
#include <linux/kprobes.h>
-#include <linux/memory.h>
#include <trace/events/sched.h>
{
int ret;
- mutex_lock(&text_mutex);
-
ret = ftrace_arch_code_modify_prepare();
FTRACE_WARN_ON(ret);
if (ret)
- goto out_unlock;
+ return;
/*
* By default we use stop_machine() to modify the code.
ret = ftrace_arch_code_modify_post_process();
FTRACE_WARN_ON(ret);
-
-out_unlock:
- mutex_unlock(&text_mutex);
}
static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
struct ftrace_page *pg;
mutex_lock(&ftrace_lock);
- mutex_lock(&text_mutex);
if (ftrace_disabled)
goto out_unlock;
ftrace_arch_code_modify_post_process();
out_unlock:
- mutex_unlock(&text_mutex);
mutex_unlock(&ftrace_lock);
process_cached_mods(mod->name);
break;
}
#endif
- if (!tr->allocated_snapshot) {
+ if (tr->allocated_snapshot)
+ ret = resize_buffer_duplicate_size(&tr->max_buffer,
+ &tr->trace_buffer, iter->cpu_file);
+ else
ret = tracing_alloc_snapshot_instance(tr);
- if (ret < 0)
- break;
- }
+ if (ret < 0)
+ break;
local_irq_disable();
/* Now, we're going to swap */
if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
return count;
}
+static int tracing_err_log_release(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+
+ if (file->f_mode & FMODE_READ)
+ seq_release(inode, file);
+
+ return 0;
+}
+
static const struct file_operations tracing_err_log_fops = {
.open = tracing_err_log_open,
.write = tracing_err_log_write,
.read = seq_read,
.llseek = seq_lseek,
- .release = tracing_release_generic_tr,
+ .release = tracing_err_log_release,
};
static int tracing_buffers_open(struct inode *inode, struct file *filp)
}
EXPORT_SYMBOL(smp_call_function_single_async);
-int on_each_cpu(smp_call_func_t func, void *info, int wait)
+void on_each_cpu(smp_call_func_t func, void *info, int wait)
{
unsigned long flags;
local_irq_save(flags);
func(info);
local_irq_restore(flags);
- return 0;
}
EXPORT_SYMBOL(on_each_cpu);
config UCS2_STRING
tristate
+#
+# generic vdso
+#
+source "lib/vdso/Kconfig"
+
source "lib/fonts/Kconfig"
config SG_SPLIT
A benchmark measuring the performance of the rbtree library.
Also includes rbtree invariant checks.
+config REED_SOLOMON_TEST
+ tristate "Reed-Solomon library test"
+ depends on DEBUG_KERNEL || m
+ select REED_SOLOMON
+ select REED_SOLOMON_ENC16
+ select REED_SOLOMON_DEC16
+ help
+ This option enables the self-test function of rslib at boot,
+ or at module load time.
+
+ If unsure, say N.
+
config INTERVAL_TREE_TEST
tristate "Interval tree test"
depends on DEBUG_KERNEL
If unsure, say N.
+config TEST_IRQ_TIMINGS
+ bool "IRQ timings selftest"
+ depends on IRQ_TIMINGS
+ help
+ Enable this option to test the irq timings code on boot.
+
+ If unsure, say N.
+
config TEST_LKM
tristate "Test module loading with 'hello world' module"
depends on m
#define ODEBUG_POOL_SIZE 1024
#define ODEBUG_POOL_MIN_LEVEL 256
+#define ODEBUG_POOL_PERCPU_SIZE 64
+#define ODEBUG_BATCH_SIZE 16
#define ODEBUG_CHUNK_SHIFT PAGE_SHIFT
#define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT)
#define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1))
+/*
+ * We limit the freeing of debug objects via workqueue at a maximum
+ * frequency of 10Hz and about 1024 objects for each freeing operation.
+ * So it is freeing at most 10k debug objects per second.
+ */
+#define ODEBUG_FREE_WORK_MAX 1024
+#define ODEBUG_FREE_WORK_DELAY DIV_ROUND_UP(HZ, 10)
+
struct debug_bucket {
struct hlist_head list;
raw_spinlock_t lock;
};
+/*
+ * Debug object percpu free list
+ * Access is protected by disabling irq
+ */
+struct debug_percpu_free {
+ struct hlist_head free_objs;
+ int obj_free;
+};
+
+static DEFINE_PER_CPU(struct debug_percpu_free, percpu_obj_pool);
+
static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE];
static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
static HLIST_HEAD(obj_pool);
static HLIST_HEAD(obj_to_free);
+/*
+ * Because of the presence of percpu free pools, obj_pool_free will
+ * under-count those in the percpu free pools. Similarly, obj_pool_used
+ * will over-count those in the percpu free pools. Adjustments will be
+ * made at debug_stats_show(). Both obj_pool_min_free and obj_pool_max_used
+ * can be off.
+ */
static int obj_pool_min_free = ODEBUG_POOL_SIZE;
static int obj_pool_free = ODEBUG_POOL_SIZE;
static int obj_pool_used;
static int obj_pool_max_used;
+static bool obj_freeing;
/* The number of objs on the global free list */
static int obj_nr_tofree;
-static struct kmem_cache *obj_cache;
static int debug_objects_maxchain __read_mostly;
static int __maybe_unused debug_objects_maxchecked __read_mostly;
static int debug_objects_pool_min_level __read_mostly
= ODEBUG_POOL_MIN_LEVEL;
static struct debug_obj_descr *descr_test __read_mostly;
+static struct kmem_cache *obj_cache __read_mostly;
/*
* Track numbers of kmem_cache_alloc()/free() calls done.
static int debug_objects_freed;
static void free_obj_work(struct work_struct *work);
-static DECLARE_WORK(debug_obj_work, free_obj_work);
+static DECLARE_DELAYED_WORK(debug_obj_work, free_obj_work);
static int __init enable_object_debug(char *str)
{
static void fill_pool(void)
{
gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
- struct debug_obj *new, *obj;
+ struct debug_obj *obj;
unsigned long flags;
if (likely(obj_pool_free >= debug_objects_pool_min_level))
* Recheck with the lock held as the worker thread might have
* won the race and freed the global free list already.
*/
- if (obj_nr_tofree) {
+ while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) {
obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
hlist_del(&obj->node);
obj_nr_tofree--;
return;
while (obj_pool_free < debug_objects_pool_min_level) {
+ struct debug_obj *new[ODEBUG_BATCH_SIZE];
+ int cnt;
- new = kmem_cache_zalloc(obj_cache, gfp);
- if (!new)
+ for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
+ new[cnt] = kmem_cache_zalloc(obj_cache, gfp);
+ if (!new[cnt])
+ break;
+ }
+ if (!cnt)
return;
raw_spin_lock_irqsave(&pool_lock, flags);
- hlist_add_head(&new->node, &obj_pool);
- debug_objects_allocated++;
- obj_pool_free++;
+ while (cnt) {
+ hlist_add_head(&new[--cnt]->node, &obj_pool);
+ debug_objects_allocated++;
+ obj_pool_free++;
+ }
raw_spin_unlock_irqrestore(&pool_lock, flags);
}
}
}
/*
+ * Allocate a new object from the hlist
+ */
+static struct debug_obj *__alloc_object(struct hlist_head *list)
+{
+ struct debug_obj *obj = NULL;
+
+ if (list->first) {
+ obj = hlist_entry(list->first, typeof(*obj), node);
+ hlist_del(&obj->node);
+ }
+
+ return obj;
+}
+
+/*
* Allocate a new object. If the pool is empty, switch off the debugger.
* Must be called with interrupts disabled.
*/
static struct debug_obj *
alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
{
- struct debug_obj *obj = NULL;
+ struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool);
+ struct debug_obj *obj;
- raw_spin_lock(&pool_lock);
- if (obj_pool.first) {
- obj = hlist_entry(obj_pool.first, typeof(*obj), node);
+ if (likely(obj_cache)) {
+ obj = __alloc_object(&percpu_pool->free_objs);
+ if (obj) {
+ percpu_pool->obj_free--;
+ goto init_obj;
+ }
+ }
- obj->object = addr;
- obj->descr = descr;
- obj->state = ODEBUG_STATE_NONE;
- obj->astate = 0;
- hlist_del(&obj->node);
+ raw_spin_lock(&pool_lock);
+ obj = __alloc_object(&obj_pool);
+ if (obj) {
+ obj_pool_used++;
+ obj_pool_free--;
- hlist_add_head(&obj->node, &b->list);
+ /*
+ * Looking ahead, allocate one batch of debug objects and
+ * put them into the percpu free pool.
+ */
+ if (likely(obj_cache)) {
+ int i;
+
+ for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
+ struct debug_obj *obj2;
+
+ obj2 = __alloc_object(&obj_pool);
+ if (!obj2)
+ break;
+ hlist_add_head(&obj2->node,
+ &percpu_pool->free_objs);
+ percpu_pool->obj_free++;
+ obj_pool_used++;
+ obj_pool_free--;
+ }
+ }
- obj_pool_used++;
if (obj_pool_used > obj_pool_max_used)
obj_pool_max_used = obj_pool_used;
- obj_pool_free--;
if (obj_pool_free < obj_pool_min_free)
obj_pool_min_free = obj_pool_free;
}
raw_spin_unlock(&pool_lock);
+init_obj:
+ if (obj) {
+ obj->object = addr;
+ obj->descr = descr;
+ obj->state = ODEBUG_STATE_NONE;
+ obj->astate = 0;
+ hlist_add_head(&obj->node, &b->list);
+ }
return obj;
}
unsigned long flags;
HLIST_HEAD(tofree);
+ WRITE_ONCE(obj_freeing, false);
if (!raw_spin_trylock_irqsave(&pool_lock, flags))
return;
+ if (obj_pool_free >= debug_objects_pool_size)
+ goto free_objs;
+
/*
* The objs on the pool list might be allocated before the work is
* run, so recheck if pool list it full or not, if not fill pool
- * list from the global free list
+ * list from the global free list. As it is likely that a workload
+ * may be gearing up to use more and more objects, don't free any
+ * of them until the next round.
*/
while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
obj_pool_free++;
obj_nr_tofree--;
}
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
+ return;
+free_objs:
/*
* Pool list is already full and there are still objs on the free
* list. Move remaining free objs to a temporary list to free the
}
}
-static bool __free_object(struct debug_obj *obj)
+static void __free_object(struct debug_obj *obj)
{
+ struct debug_obj *objs[ODEBUG_BATCH_SIZE];
+ struct debug_percpu_free *percpu_pool;
+ int lookahead_count = 0;
unsigned long flags;
bool work;
- raw_spin_lock_irqsave(&pool_lock, flags);
- work = (obj_pool_free > debug_objects_pool_size) && obj_cache;
+ local_irq_save(flags);
+ if (!obj_cache)
+ goto free_to_obj_pool;
+
+ /*
+ * Try to free it into the percpu pool first.
+ */
+ percpu_pool = this_cpu_ptr(&percpu_obj_pool);
+ if (percpu_pool->obj_free < ODEBUG_POOL_PERCPU_SIZE) {
+ hlist_add_head(&obj->node, &percpu_pool->free_objs);
+ percpu_pool->obj_free++;
+ local_irq_restore(flags);
+ return;
+ }
+
+ /*
+ * As the percpu pool is full, look ahead and pull out a batch
+ * of objects from the percpu pool and free them as well.
+ */
+ for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) {
+ objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs);
+ if (!objs[lookahead_count])
+ break;
+ percpu_pool->obj_free--;
+ }
+
+free_to_obj_pool:
+ raw_spin_lock(&pool_lock);
+ work = (obj_pool_free > debug_objects_pool_size) && obj_cache &&
+ (obj_nr_tofree < ODEBUG_FREE_WORK_MAX);
obj_pool_used--;
if (work) {
obj_nr_tofree++;
hlist_add_head(&obj->node, &obj_to_free);
+ if (lookahead_count) {
+ obj_nr_tofree += lookahead_count;
+ obj_pool_used -= lookahead_count;
+ while (lookahead_count) {
+ hlist_add_head(&objs[--lookahead_count]->node,
+ &obj_to_free);
+ }
+ }
+
+ if ((obj_pool_free > debug_objects_pool_size) &&
+ (obj_nr_tofree < ODEBUG_FREE_WORK_MAX)) {
+ int i;
+
+ /*
+ * Free one more batch of objects from obj_pool.
+ */
+ for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
+ obj = __alloc_object(&obj_pool);
+ hlist_add_head(&obj->node, &obj_to_free);
+ obj_pool_free--;
+ obj_nr_tofree++;
+ }
+ }
} else {
obj_pool_free++;
hlist_add_head(&obj->node, &obj_pool);
+ if (lookahead_count) {
+ obj_pool_free += lookahead_count;
+ obj_pool_used -= lookahead_count;
+ while (lookahead_count) {
+ hlist_add_head(&objs[--lookahead_count]->node,
+ &obj_pool);
+ }
+ }
}
- raw_spin_unlock_irqrestore(&pool_lock, flags);
- return work;
+ raw_spin_unlock(&pool_lock);
+ local_irq_restore(flags);
}
/*
*/
static void free_object(struct debug_obj *obj)
{
- if (__free_object(obj))
- schedule_work(&debug_obj_work);
+ __free_object(obj);
+ if (!obj_freeing && obj_nr_tofree) {
+ WRITE_ONCE(obj_freeing, true);
+ schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
+ }
}
/*
__debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
{
enum debug_obj_state state;
+ bool check_stack = false;
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
debug_objects_oom();
return;
}
- debug_object_is_on_stack(addr, onstack);
+ check_stack = true;
}
switch (obj->state) {
break;
case ODEBUG_STATE_ACTIVE:
- debug_print_object(obj, "init");
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_print_object(obj, "init");
debug_object_fixup(descr->fixup_init, addr, state);
return;
case ODEBUG_STATE_DESTROYED:
+ raw_spin_unlock_irqrestore(&db->lock, flags);
debug_print_object(obj, "init");
- break;
+ return;
default:
break;
}
raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (check_stack)
+ debug_object_is_on_stack(addr, onstack);
}
/**
obj = lookup_object(addr, db);
if (obj) {
+ bool print_object = false;
+
switch (obj->state) {
case ODEBUG_STATE_INIT:
case ODEBUG_STATE_INACTIVE:
break;
case ODEBUG_STATE_ACTIVE:
- debug_print_object(obj, "activate");
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_print_object(obj, "activate");
ret = debug_object_fixup(descr->fixup_activate, addr, state);
return ret ? 0 : -EINVAL;
case ODEBUG_STATE_DESTROYED:
- debug_print_object(obj, "activate");
+ print_object = true;
ret = -EINVAL;
break;
default:
break;
}
raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (print_object)
+ debug_print_object(obj, "activate");
return ret;
}
raw_spin_unlock_irqrestore(&db->lock, flags);
+
/*
* We are here when a static object is activated. We
* let the type specific code confirm whether this is
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
+ bool print_object = false;
if (!debug_objects_enabled)
return;
if (!obj->astate)
obj->state = ODEBUG_STATE_INACTIVE;
else
- debug_print_object(obj, "deactivate");
+ print_object = true;
break;
case ODEBUG_STATE_DESTROYED:
- debug_print_object(obj, "deactivate");
+ print_object = true;
break;
default:
break;
}
- } else {
+ }
+
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (!obj) {
struct debug_obj o = { .object = addr,
.state = ODEBUG_STATE_NOTAVAILABLE,
.descr = descr };
debug_print_object(&o, "deactivate");
+ } else if (print_object) {
+ debug_print_object(obj, "deactivate");
}
-
- raw_spin_unlock_irqrestore(&db->lock, flags);
}
EXPORT_SYMBOL_GPL(debug_object_deactivate);
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
+ bool print_object = false;
if (!debug_objects_enabled)
return;
obj->state = ODEBUG_STATE_DESTROYED;
break;
case ODEBUG_STATE_ACTIVE:
- debug_print_object(obj, "destroy");
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_print_object(obj, "destroy");
debug_object_fixup(descr->fixup_destroy, addr, state);
return;
case ODEBUG_STATE_DESTROYED:
- debug_print_object(obj, "destroy");
+ print_object = true;
break;
default:
break;
}
out_unlock:
raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (print_object)
+ debug_print_object(obj, "destroy");
}
EXPORT_SYMBOL_GPL(debug_object_destroy);
switch (obj->state) {
case ODEBUG_STATE_ACTIVE:
- debug_print_object(obj, "free");
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_print_object(obj, "free");
debug_object_fixup(descr->fixup_free, addr, state);
return;
default:
struct debug_bucket *db;
struct debug_obj *obj;
unsigned long flags;
+ bool print_object = false;
if (!debug_objects_enabled)
return;
if (obj->astate == expect)
obj->astate = next;
else
- debug_print_object(obj, "active_state");
+ print_object = true;
break;
default:
- debug_print_object(obj, "active_state");
+ print_object = true;
break;
}
- } else {
+ }
+
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (!obj) {
struct debug_obj o = { .object = addr,
.state = ODEBUG_STATE_NOTAVAILABLE,
.descr = descr };
debug_print_object(&o, "active_state");
+ } else if (print_object) {
+ debug_print_object(obj, "active_state");
}
-
- raw_spin_unlock_irqrestore(&db->lock, flags);
}
EXPORT_SYMBOL_GPL(debug_object_active_state);
struct hlist_node *tmp;
struct debug_obj *obj;
int cnt, objs_checked = 0;
- bool work = false;
saddr = (unsigned long) address;
eaddr = saddr + size;
switch (obj->state) {
case ODEBUG_STATE_ACTIVE:
- debug_print_object(obj, "free");
descr = obj->descr;
state = obj->state;
raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_print_object(obj, "free");
debug_object_fixup(descr->fixup_free,
(void *) oaddr, state);
goto repeat;
default:
hlist_del(&obj->node);
- work |= __free_object(obj);
+ __free_object(obj);
break;
}
}
debug_objects_maxchecked = objs_checked;
/* Schedule work to actually kmem_cache_free() objects */
- if (work)
- schedule_work(&debug_obj_work);
+ if (!obj_freeing && obj_nr_tofree) {
+ WRITE_ONCE(obj_freeing, true);
+ schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
+ }
}
void debug_check_no_obj_freed(const void *address, unsigned long size)
static int debug_stats_show(struct seq_file *m, void *v)
{
+ int cpu, obj_percpu_free = 0;
+
+ for_each_possible_cpu(cpu)
+ obj_percpu_free += per_cpu(percpu_obj_pool.obj_free, cpu);
+
seq_printf(m, "max_chain :%d\n", debug_objects_maxchain);
seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked);
seq_printf(m, "warnings :%d\n", debug_objects_warnings);
seq_printf(m, "fixups :%d\n", debug_objects_fixups);
- seq_printf(m, "pool_free :%d\n", obj_pool_free);
+ seq_printf(m, "pool_free :%d\n", obj_pool_free + obj_percpu_free);
+ seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free);
seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
- seq_printf(m, "pool_used :%d\n", obj_pool_used);
+ seq_printf(m, "pool_used :%d\n", obj_pool_used - obj_percpu_free);
seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
seq_printf(m, "on_free_list :%d\n", obj_nr_tofree);
seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
static int __init debug_objects_init_debugfs(void)
{
- struct dentry *dbgdir, *dbgstats;
+ struct dentry *dbgdir;
if (!debug_objects_enabled)
return 0;
dbgdir = debugfs_create_dir("debug_objects", NULL);
- if (!dbgdir)
- return -ENOMEM;
- dbgstats = debugfs_create_file("stats", 0444, dbgdir, NULL,
- &debug_stats_fops);
- if (!dbgstats)
- goto err;
+ debugfs_create_file("stats", 0444, dbgdir, NULL, &debug_stats_fops);
return 0;
-
-err:
- debugfs_remove(dbgdir);
-
- return -ENOMEM;
}
__initcall(debug_objects_init_debugfs);
*/
void __init debug_objects_mem_init(void)
{
+ int cpu, extras;
+
if (!debug_objects_enabled)
return;
+ /*
+ * Initialize the percpu object pools
+ *
+ * Initialization is not strictly necessary, but was done for
+ * completeness.
+ */
+ for_each_possible_cpu(cpu)
+ INIT_HLIST_HEAD(&per_cpu(percpu_obj_pool.free_objs, cpu));
+
obj_cache = kmem_cache_create("debug_objects_cache",
sizeof (struct debug_obj), 0,
SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE,
* Increase the thresholds for allocating and freeing objects
* according to the number of possible CPUs available in the system.
*/
- debug_objects_pool_size += num_possible_cpus() * 32;
- debug_objects_pool_min_level += num_possible_cpus() * 4;
+ extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
+ debug_objects_pool_size += extras;
+ debug_objects_pool_min_level += extras;
}
* if (IS_ERR(base))
* return PTR_ERR(base);
*/
-void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
+void __iomem *devm_ioremap_resource(struct device *dev,
+ const struct resource *res)
{
resource_size_t size;
void __iomem *dest_ptr;
{
struct radix_tree_iter iter;
void __rcu **slot;
+ void *entry = NULL;
unsigned long base = idr->idr_base;
unsigned long id = *nextid;
id = (id < base) ? 0 : id - base;
- slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
+ radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, id) {
+ entry = rcu_dereference_raw(*slot);
+ if (!entry)
+ continue;
+ if (!xa_is_internal(entry))
+ break;
+ if (slot != &idr->idr_rt.xa_head && !xa_is_retry(entry))
+ break;
+ slot = radix_tree_iter_retry(&iter);
+ }
if (!slot)
return NULL;
id = iter.index + base;
return NULL;
*nextid = id;
- return rcu_dereference_raw(*slot);
+ return entry;
}
EXPORT_SYMBOL(idr_get_next);
int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
{
mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL;
+ struct karatsuba_ctx karactx = {};
mpi_ptr_t xp_marker = NULL;
mpi_ptr_t tspace = NULL;
mpi_ptr_t rp, ep, mp, bp;
int c;
mpi_limb_t e;
mpi_limb_t carry_limb;
- struct karatsuba_ctx karactx;
xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1));
if (!xp)
goto enomem;
- memset(&karactx, 0, sizeof karactx);
negative_result = (ep[0] & 1) && base->sign;
i = esize - 1;
if (mod_shift_cnt)
mpihelp_rshift(rp, rp, rsize, mod_shift_cnt);
MPN_NORMALIZE(rp, rsize);
-
- mpihelp_release_karatsuba_ctx(&karactx);
}
if (negative_result && rsize) {
leave:
rc = 0;
enomem:
+ mpihelp_release_karatsuba_ctx(&karactx);
if (assign_rp)
mpi_assign_limb_space(res, rp, size);
if (mp_marker)
typedef struct { u8 _[16 * $#]; } addrtype;
register addrtype *__ptr asm("1") = (addrtype *) ptr;
- asm volatile ("VLM %2,%3,0,%r1"
+ asm volatile ("VLM %2,%3,0,%1"
: : "m" (*__ptr), "a" (__ptr), "i" (x),
"i" (x + $# - 1));
}
#
obj-$(CONFIG_REED_SOLOMON) += reed_solomon.o
-
+obj-$(CONFIG_REED_SOLOMON_TEST) += test_rslib.o
uint16_t *index_of = rs->index_of;
uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error;
int count = 0;
+ int num_corrected;
uint16_t msk = (uint16_t) rs->nn;
/*
/* Check length parameter for validity */
pad = nn - nroots - len;
- BUG_ON(pad < 0 || pad >= nn);
+ BUG_ON(pad < 0 || pad >= nn - nroots);
/* Does the caller provide the syndrome ? */
- if (s != NULL)
- goto decode;
+ if (s != NULL) {
+ for (i = 0; i < nroots; i++) {
+ /* The syndrome is in index form,
+ * so nn represents zero
+ */
+ if (s[i] != nn)
+ goto decode;
+ }
+
+ /* syndrome is zero, no errors to correct */
+ return 0;
+ }
/* form the syndromes; i.e., evaluate data(x) at roots of
* g(x) */
/* if syndrome is zero, data[] is a codeword and there are no
* errors to correct. So return data[] unmodified
*/
- count = 0;
- goto finish;
+ return 0;
}
decode:
if (no_eras > 0) {
/* Init lambda to be the erasure locator polynomial */
lambda[1] = alpha_to[rs_modnn(rs,
- prim * (nn - 1 - eras_pos[0]))];
+ prim * (nn - 1 - (eras_pos[0] + pad)))];
for (i = 1; i < no_eras; i++) {
- u = rs_modnn(rs, prim * (nn - 1 - eras_pos[i]));
+ u = rs_modnn(rs, prim * (nn - 1 - (eras_pos[i] + pad)));
for (j = i + 1; j > 0; j--) {
tmp = index_of[lambda[j - 1]];
if (tmp != nn) {
if (lambda[i] != nn)
deg_lambda = i;
}
+
+ if (deg_lambda == 0) {
+ /*
+ * deg(lambda) is zero even though the syndrome is non-zero
+ * => uncorrectable error detected
+ */
+ return -EBADMSG;
+ }
+
/* Find roots of error+erasure locator polynomial by Chien search */
memcpy(®[1], &lambda[1], nroots * sizeof(reg[0]));
count = 0; /* Number of roots of lambda(x) */
}
if (q != 0)
continue; /* Not a root */
+
+ if (k < pad) {
+ /* Impossible error location. Uncorrectable error. */
+ return -EBADMSG;
+ }
+
/* store root (index-form) and error location number */
root[count] = i;
loc[count] = k;
* deg(lambda) unequal to number of roots => uncorrectable
* error detected
*/
- count = -EBADMSG;
- goto finish;
+ return -EBADMSG;
}
/*
* Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo
/*
* Compute error values in poly-form. num1 = omega(inv(X(l))), num2 =
* inv(X(l))**(fcr-1) and den = lambda_pr(inv(X(l))) all in poly-form
+ * Note: we reuse the buffer for b to store the correction pattern
*/
+ num_corrected = 0;
for (j = count - 1; j >= 0; j--) {
num1 = 0;
for (i = deg_omega; i >= 0; i--) {
num1 ^= alpha_to[rs_modnn(rs, omega[i] +
i * root[j])];
}
+
+ if (num1 == 0) {
+ /* Nothing to correct at this position */
+ b[j] = 0;
+ continue;
+ }
+
num2 = alpha_to[rs_modnn(rs, root[j] * (fcr - 1) + nn)];
den = 0;
i * root[j])];
}
}
- /* Apply error to data */
- if (num1 != 0 && loc[j] >= pad) {
- uint16_t cor = alpha_to[rs_modnn(rs,index_of[num1] +
- index_of[num2] +
- nn - index_of[den])];
- /* Store the error correction pattern, if a
- * correction buffer is available */
- if (corr) {
- corr[j] = cor;
- } else {
- /* If a data buffer is given and the
- * error is inside the message,
- * correct it */
- if (data && (loc[j] < (nn - nroots)))
- data[loc[j] - pad] ^= cor;
- }
+
+ b[j] = alpha_to[rs_modnn(rs, index_of[num1] +
+ index_of[num2] +
+ nn - index_of[den])];
+ num_corrected++;
+ }
+
+ /*
+ * We compute the syndrome of the 'error' and check that it matches
+ * the syndrome of the received word
+ */
+ for (i = 0; i < nroots; i++) {
+ tmp = 0;
+ for (j = 0; j < count; j++) {
+ if (b[j] == 0)
+ continue;
+
+ k = (fcr + i) * prim * (nn-loc[j]-1);
+ tmp ^= alpha_to[rs_modnn(rs, index_of[b[j]] + k)];
}
+
+ if (tmp != alpha_to[s[i]])
+ return -EBADMSG;
}
-finish:
- if (eras_pos != NULL) {
- for (i = 0; i < count; i++)
- eras_pos[i] = loc[i] - pad;
+ /*
+ * Store the error correction pattern, if a
+ * correction buffer is available
+ */
+ if (corr && eras_pos) {
+ j = 0;
+ for (i = 0; i < count; i++) {
+ if (b[i]) {
+ corr[j] = b[i];
+ eras_pos[j++] = loc[i] - pad;
+ }
+ }
+ } else if (data && par) {
+ /* Apply error to data and parity */
+ for (i = 0; i < count; i++) {
+ if (loc[i] < (nn - nroots))
+ data[loc[i] - pad] ^= b[i];
+ else
+ par[loc[i] - pad - len] ^= b[i];
+ }
}
- return count;
+ return num_corrected;
}
* @data: data field of a given type
* @par: received parity data field
* @len: data length
- * @s: syndrome data field (if NULL, syndrome is calculated)
+ * @s: syndrome data field, must be in index form
+ * (if NULL, syndrome is calculated)
* @no_eras: number of erasures
* @eras_pos: position of erasures, can be NULL
* @invmsk: invert data mask (will be xored on data, not on parity!)
* decoding, so the caller has to ensure that decoder invocations are
* serialized.
*
- * Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
+ * Returns the number of corrected symbols or -EBADMSG for uncorrectable
+ * errors. The count includes errors in the parity.
*/
int decode_rs8(struct rs_control *rsc, uint8_t *data, uint16_t *par, int len,
uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
* @data: data field of a given type
* @par: received parity data field
* @len: data length
- * @s: syndrome data field (if NULL, syndrome is calculated)
+ * @s: syndrome data field, must be in index form
+ * (if NULL, syndrome is calculated)
* @no_eras: number of erasures
* @eras_pos: position of erasures, can be NULL
* @invmsk: invert data mask (will be xored on data, not on parity!)
* decoding, so the caller has to ensure that decoder invocations are
* serialized.
*
- * Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
+ * Returns the number of corrected symbols or -EBADMSG for uncorrectable
+ * errors. The count includes errors in the parity.
*/
int decode_rs16(struct rs_control *rsc, uint16_t *data, uint16_t *par, int len,
uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for Generic Reed Solomon encoder / decoder library
+ *
+ * Written by Ferdinand Blomqvist
+ * Based on previous work by Phil Karn, KA9Q
+ */
+#include <linux/rslib.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+enum verbosity {
+ V_SILENT,
+ V_PROGRESS,
+ V_CSUMMARY
+};
+
+enum method {
+ CORR_BUFFER,
+ CALLER_SYNDROME,
+ IN_PLACE
+};
+
+#define __param(type, name, init, msg) \
+ static type name = init; \
+ module_param(name, type, 0444); \
+ MODULE_PARM_DESC(name, msg)
+
+__param(int, v, V_PROGRESS, "Verbosity level");
+__param(int, ewsc, 1, "Erasures without symbol corruption");
+__param(int, bc, 1, "Test for correct behaviour beyond error correction capacity");
+
+struct etab {
+ int symsize;
+ int genpoly;
+ int fcs;
+ int prim;
+ int nroots;
+ int ntrials;
+};
+
+/* List of codes to test */
+static struct etab Tab[] = {
+ {2, 0x7, 1, 1, 1, 100000 },
+ {3, 0xb, 1, 1, 2, 100000 },
+ {3, 0xb, 1, 1, 3, 100000 },
+ {3, 0xb, 2, 1, 4, 100000 },
+ {4, 0x13, 1, 1, 4, 10000 },
+ {5, 0x25, 1, 1, 6, 1000 },
+ {6, 0x43, 3, 1, 8, 1000 },
+ {7, 0x89, 1, 1, 14, 500 },
+ {8, 0x11d, 1, 1, 30, 100 },
+ {8, 0x187, 112, 11, 32, 100 },
+ {9, 0x211, 1, 1, 33, 80 },
+ {0, 0, 0, 0, 0, 0},
+};
+
+
+struct estat {
+ int dwrong;
+ int irv;
+ int wepos;
+ int nwords;
+};
+
+struct bcstat {
+ int rfail;
+ int rsuccess;
+ int noncw;
+ int nwords;
+};
+
+struct wspace {
+ uint16_t *c; /* sent codeword */
+ uint16_t *r; /* received word */
+ uint16_t *s; /* syndrome */
+ uint16_t *corr; /* correction buffer */
+ int *errlocs;
+ int *derrlocs;
+};
+
+struct pad {
+ int mult;
+ int shift;
+};
+
+static struct pad pad_coef[] = {
+ { 0, 0 },
+ { 1, 2 },
+ { 1, 1 },
+ { 3, 2 },
+ { 1, 0 },
+};
+
+static void free_ws(struct wspace *ws)
+{
+ if (!ws)
+ return;
+
+ kfree(ws->errlocs);
+ kfree(ws->c);
+ kfree(ws);
+}
+
+static struct wspace *alloc_ws(struct rs_codec *rs)
+{
+ int nroots = rs->nroots;
+ struct wspace *ws;
+ int nn = rs->nn;
+
+ ws = kzalloc(sizeof(*ws), GFP_KERNEL);
+ if (!ws)
+ return NULL;
+
+ ws->c = kmalloc_array(2 * (nn + nroots),
+ sizeof(uint16_t), GFP_KERNEL);
+ if (!ws->c)
+ goto err;
+
+ ws->r = ws->c + nn;
+ ws->s = ws->r + nn;
+ ws->corr = ws->s + nroots;
+
+ ws->errlocs = kmalloc_array(nn + nroots, sizeof(int), GFP_KERNEL);
+ if (!ws->errlocs)
+ goto err;
+
+ ws->derrlocs = ws->errlocs + nn;
+ return ws;
+
+err:
+ free_ws(ws);
+ return NULL;
+}
+
+
+/*
+ * Generates a random codeword and stores it in c. Generates random errors and
+ * erasures, and stores the random word with errors in r. Erasure positions are
+ * stored in derrlocs, while errlocs has one of three values in every position:
+ *
+ * 0 if there is no error in this position;
+ * 1 if there is a symbol error in this position;
+ * 2 if there is an erasure without symbol corruption.
+ *
+ * Returns the number of corrupted symbols.
+ */
+static int get_rcw_we(struct rs_control *rs, struct wspace *ws,
+ int len, int errs, int eras)
+{
+ int nroots = rs->codec->nroots;
+ int *derrlocs = ws->derrlocs;
+ int *errlocs = ws->errlocs;
+ int dlen = len - nroots;
+ int nn = rs->codec->nn;
+ uint16_t *c = ws->c;
+ uint16_t *r = ws->r;
+ int errval;
+ int errloc;
+ int i;
+
+ /* Load c with random data and encode */
+ for (i = 0; i < dlen; i++)
+ c[i] = prandom_u32() & nn;
+
+ memset(c + dlen, 0, nroots * sizeof(*c));
+ encode_rs16(rs, c, dlen, c + dlen, 0);
+
+ /* Make copyand add errors and erasures */
+ memcpy(r, c, len * sizeof(*r));
+ memset(errlocs, 0, len * sizeof(*errlocs));
+ memset(derrlocs, 0, nroots * sizeof(*derrlocs));
+
+ /* Generating random errors */
+ for (i = 0; i < errs; i++) {
+ do {
+ /* Error value must be nonzero */
+ errval = prandom_u32() & nn;
+ } while (errval == 0);
+
+ do {
+ /* Must not choose the same location twice */
+ errloc = prandom_u32() % len;
+ } while (errlocs[errloc] != 0);
+
+ errlocs[errloc] = 1;
+ r[errloc] ^= errval;
+ }
+
+ /* Generating random erasures */
+ for (i = 0; i < eras; i++) {
+ do {
+ /* Must not choose the same location twice */
+ errloc = prandom_u32() % len;
+ } while (errlocs[errloc] != 0);
+
+ derrlocs[i] = errloc;
+
+ if (ewsc && (prandom_u32() & 1)) {
+ /* Erasure with the symbol intact */
+ errlocs[errloc] = 2;
+ } else {
+ /* Erasure with corrupted symbol */
+ do {
+ /* Error value must be nonzero */
+ errval = prandom_u32() & nn;
+ } while (errval == 0);
+
+ errlocs[errloc] = 1;
+ r[errloc] ^= errval;
+ errs++;
+ }
+ }
+
+ return errs;
+}
+
+static void fix_err(uint16_t *data, int nerrs, uint16_t *corr, int *errlocs)
+{
+ int i;
+
+ for (i = 0; i < nerrs; i++)
+ data[errlocs[i]] ^= corr[i];
+}
+
+static void compute_syndrome(struct rs_control *rsc, uint16_t *data,
+ int len, uint16_t *syn)
+{
+ struct rs_codec *rs = rsc->codec;
+ uint16_t *alpha_to = rs->alpha_to;
+ uint16_t *index_of = rs->index_of;
+ int nroots = rs->nroots;
+ int prim = rs->prim;
+ int fcr = rs->fcr;
+ int i, j;
+
+ /* Calculating syndrome */
+ for (i = 0; i < nroots; i++) {
+ syn[i] = data[0];
+ for (j = 1; j < len; j++) {
+ if (syn[i] == 0) {
+ syn[i] = data[j];
+ } else {
+ syn[i] = data[j] ^
+ alpha_to[rs_modnn(rs, index_of[syn[i]]
+ + (fcr + i) * prim)];
+ }
+ }
+ }
+
+ /* Convert to index form */
+ for (i = 0; i < nroots; i++)
+ syn[i] = rs->index_of[syn[i]];
+}
+
+/* Test up to error correction capacity */
+static void test_uc(struct rs_control *rs, int len, int errs,
+ int eras, int trials, struct estat *stat,
+ struct wspace *ws, int method)
+{
+ int dlen = len - rs->codec->nroots;
+ int *derrlocs = ws->derrlocs;
+ int *errlocs = ws->errlocs;
+ uint16_t *corr = ws->corr;
+ uint16_t *c = ws->c;
+ uint16_t *r = ws->r;
+ uint16_t *s = ws->s;
+ int derrs, nerrs;
+ int i, j;
+
+ for (j = 0; j < trials; j++) {
+ nerrs = get_rcw_we(rs, ws, len, errs, eras);
+
+ switch (method) {
+ case CORR_BUFFER:
+ derrs = decode_rs16(rs, r, r + dlen, dlen,
+ NULL, eras, derrlocs, 0, corr);
+ fix_err(r, derrs, corr, derrlocs);
+ break;
+ case CALLER_SYNDROME:
+ compute_syndrome(rs, r, len, s);
+ derrs = decode_rs16(rs, NULL, NULL, dlen,
+ s, eras, derrlocs, 0, corr);
+ fix_err(r, derrs, corr, derrlocs);
+ break;
+ case IN_PLACE:
+ derrs = decode_rs16(rs, r, r + dlen, dlen,
+ NULL, eras, derrlocs, 0, NULL);
+ break;
+ default:
+ continue;
+ }
+
+ if (derrs != nerrs)
+ stat->irv++;
+
+ if (method != IN_PLACE) {
+ for (i = 0; i < derrs; i++) {
+ if (errlocs[derrlocs[i]] != 1)
+ stat->wepos++;
+ }
+ }
+
+ if (memcmp(r, c, len * sizeof(*r)))
+ stat->dwrong++;
+ }
+ stat->nwords += trials;
+}
+
+static int ex_rs_helper(struct rs_control *rs, struct wspace *ws,
+ int len, int trials, int method)
+{
+ static const char * const desc[] = {
+ "Testing correction buffer interface...",
+ "Testing with caller provided syndrome...",
+ "Testing in-place interface..."
+ };
+
+ struct estat stat = {0, 0, 0, 0};
+ int nroots = rs->codec->nroots;
+ int errs, eras, retval;
+
+ if (v >= V_PROGRESS)
+ pr_info(" %s\n", desc[method]);
+
+ for (errs = 0; errs <= nroots / 2; errs++)
+ for (eras = 0; eras <= nroots - 2 * errs; eras++)
+ test_uc(rs, len, errs, eras, trials, &stat, ws, method);
+
+ if (v >= V_CSUMMARY) {
+ pr_info(" Decodes wrong: %d / %d\n",
+ stat.dwrong, stat.nwords);
+ pr_info(" Wrong return value: %d / %d\n",
+ stat.irv, stat.nwords);
+ if (method != IN_PLACE)
+ pr_info(" Wrong error position: %d\n", stat.wepos);
+ }
+
+ retval = stat.dwrong + stat.wepos + stat.irv;
+ if (retval && v >= V_PROGRESS)
+ pr_warn(" FAIL: %d decoding failures!\n", retval);
+
+ return retval;
+}
+
+static int exercise_rs(struct rs_control *rs, struct wspace *ws,
+ int len, int trials)
+{
+
+ int retval = 0;
+ int i;
+
+ if (v >= V_PROGRESS)
+ pr_info("Testing up to error correction capacity...\n");
+
+ for (i = 0; i <= IN_PLACE; i++)
+ retval |= ex_rs_helper(rs, ws, len, trials, i);
+
+ return retval;
+}
+
+/* Tests for correct behaviour beyond error correction capacity */
+static void test_bc(struct rs_control *rs, int len, int errs,
+ int eras, int trials, struct bcstat *stat,
+ struct wspace *ws)
+{
+ int nroots = rs->codec->nroots;
+ int dlen = len - nroots;
+ int *derrlocs = ws->derrlocs;
+ uint16_t *corr = ws->corr;
+ uint16_t *r = ws->r;
+ int derrs, j;
+
+ for (j = 0; j < trials; j++) {
+ get_rcw_we(rs, ws, len, errs, eras);
+ derrs = decode_rs16(rs, r, r + dlen, dlen,
+ NULL, eras, derrlocs, 0, corr);
+ fix_err(r, derrs, corr, derrlocs);
+
+ if (derrs >= 0) {
+ stat->rsuccess++;
+
+ /*
+ * We check that the returned word is actually a
+ * codeword. The obious way to do this would be to
+ * compute the syndrome, but we don't want to replicate
+ * that code here. However, all the codes are in
+ * systematic form, and therefore we can encode the
+ * returned word, and see whether the parity changes or
+ * not.
+ */
+ memset(corr, 0, nroots * sizeof(*corr));
+ encode_rs16(rs, r, dlen, corr, 0);
+
+ if (memcmp(r + dlen, corr, nroots * sizeof(*corr)))
+ stat->noncw++;
+ } else {
+ stat->rfail++;
+ }
+ }
+ stat->nwords += trials;
+}
+
+static int exercise_rs_bc(struct rs_control *rs, struct wspace *ws,
+ int len, int trials)
+{
+ struct bcstat stat = {0, 0, 0, 0};
+ int nroots = rs->codec->nroots;
+ int errs, eras, cutoff;
+
+ if (v >= V_PROGRESS)
+ pr_info("Testing beyond error correction capacity...\n");
+
+ for (errs = 1; errs <= nroots; errs++) {
+ eras = nroots - 2 * errs + 1;
+ if (eras < 0)
+ eras = 0;
+
+ cutoff = nroots <= len - errs ? nroots : len - errs;
+ for (; eras <= cutoff; eras++)
+ test_bc(rs, len, errs, eras, trials, &stat, ws);
+ }
+
+ if (v >= V_CSUMMARY) {
+ pr_info(" decoder gives up: %d / %d\n",
+ stat.rfail, stat.nwords);
+ pr_info(" decoder returns success: %d / %d\n",
+ stat.rsuccess, stat.nwords);
+ pr_info(" not a codeword: %d / %d\n",
+ stat.noncw, stat.rsuccess);
+ }
+
+ if (stat.noncw && v >= V_PROGRESS)
+ pr_warn(" FAIL: %d silent failures!\n", stat.noncw);
+
+ return stat.noncw;
+}
+
+static int run_exercise(struct etab *e)
+{
+ int nn = (1 << e->symsize) - 1;
+ int kk = nn - e->nroots;
+ struct rs_control *rsc;
+ int retval = -ENOMEM;
+ int max_pad = kk - 1;
+ int prev_pad = -1;
+ struct wspace *ws;
+ int i;
+
+ rsc = init_rs(e->symsize, e->genpoly, e->fcs, e->prim, e->nroots);
+ if (!rsc)
+ return retval;
+
+ ws = alloc_ws(rsc->codec);
+ if (!ws)
+ goto err;
+
+ retval = 0;
+ for (i = 0; i < ARRAY_SIZE(pad_coef); i++) {
+ int pad = (pad_coef[i].mult * max_pad) >> pad_coef[i].shift;
+ int len = nn - pad;
+
+ if (pad == prev_pad)
+ continue;
+
+ prev_pad = pad;
+ if (v >= V_PROGRESS) {
+ pr_info("Testing (%d,%d)_%d code...\n",
+ len, kk - pad, nn + 1);
+ }
+
+ retval |= exercise_rs(rsc, ws, len, e->ntrials);
+ if (bc)
+ retval |= exercise_rs_bc(rsc, ws, len, e->ntrials);
+ }
+
+ free_ws(ws);
+
+err:
+ free_rs(rsc);
+ return retval;
+}
+
+static int __init test_rslib_init(void)
+{
+ int i, fail = 0;
+
+ for (i = 0; Tab[i].symsize != 0 ; i++) {
+ int retval;
+
+ retval = run_exercise(Tab + i);
+ if (retval < 0)
+ return -ENOMEM;
+
+ fail |= retval;
+ }
+
+ if (fail)
+ pr_warn("rslib: test failed\n");
+ else
+ pr_info("rslib: test ok\n");
+
+ return -EAGAIN; /* Fail will directly unload the module */
+}
+
+static void __exit test_rslib_exit(void)
+{
+}
+
+module_init(test_rslib_init)
+module_exit(test_rslib_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Ferdinand Blomqvist");
+MODULE_DESCRIPTION("Reed-Solomon library test");
return xa_store(xa, index, xa_mk_index(index), gfp);
}
+static void xa_insert_index(struct xarray *xa, unsigned long index)
+{
+ XA_BUG_ON(xa, xa_insert(xa, index, xa_mk_index(index),
+ GFP_KERNEL) != 0);
+}
+
static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
{
u32 id;
}
}
+static noinline void check_insert(struct xarray *xa)
+{
+ unsigned long i;
+
+ for (i = 0; i < 1024; i++) {
+ xa_insert_index(xa, i);
+ XA_BUG_ON(xa, xa_load(xa, i - 1) != NULL);
+ XA_BUG_ON(xa, xa_load(xa, i + 1) != NULL);
+ xa_erase_index(xa, i);
+ }
+
+ for (i = 10; i < BITS_PER_LONG; i++) {
+ xa_insert_index(xa, 1UL << i);
+ XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 1) != NULL);
+ XA_BUG_ON(xa, xa_load(xa, (1UL << i) + 1) != NULL);
+ xa_erase_index(xa, 1UL << i);
+
+ xa_insert_index(xa, (1UL << i) - 1);
+ XA_BUG_ON(xa, xa_load(xa, (1UL << i) - 2) != NULL);
+ XA_BUG_ON(xa, xa_load(xa, 1UL << i) != NULL);
+ xa_erase_index(xa, (1UL << i) - 1);
+ }
+
+ xa_insert_index(xa, ~0UL);
+ XA_BUG_ON(xa, xa_load(xa, 0UL) != NULL);
+ XA_BUG_ON(xa, xa_load(xa, ~1UL) != NULL);
+ xa_erase_index(xa, ~0UL);
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
static noinline void check_cmpxchg(struct xarray *xa)
{
void *FIVE = xa_mk_value(5);
check_xa_mark(&array);
check_xa_shrink(&array);
check_xas_erase(&array);
+ check_insert(&array);
check_cmpxchg(&array);
check_reserve(&array);
check_reserve(&xa0);
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+
+config HAVE_GENERIC_VDSO
+ bool
+
+if HAVE_GENERIC_VDSO
+
+config GENERIC_GETTIMEOFDAY
+ bool
+ help
+ This is a generic implementation of gettimeofday vdso.
+ Each architecture that enables this feature has to
+ provide the fallback implementation.
+
+config GENERIC_VDSO_32
+ bool
+ depends on GENERIC_GETTIMEOFDAY && !64BIT
+ help
+ This config option helps to avoid possible performance issues
+ in 32 bit only architectures.
+
+config GENERIC_COMPAT_VDSO
+ bool
+ help
+ This config option enables the compat VDSO layer.
+
+config CROSS_COMPILE_COMPAT_VDSO
+ string "32 bit Toolchain prefix for compat vDSO"
+ default ""
+ depends on GENERIC_COMPAT_VDSO
+ help
+ Defines the cross-compiler prefix for compiling compat vDSO.
+ If a 64 bit compiler (i.e. x86_64) can compile the VDSO for
+ 32 bit, it does not need to define this parameter.
+
+endif
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+
+GENERIC_VDSO_MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
+GENERIC_VDSO_DIR := $(dir $(GENERIC_VDSO_MK_PATH))
+
+c-gettimeofday-$(CONFIG_GENERIC_GETTIMEOFDAY) := $(addprefix $(GENERIC_VDSO_DIR), gettimeofday.c)
+
+# This cmd checks that the vdso library does not contain absolute relocation
+# It has to be called after the linking of the vdso library and requires it
+# as a parameter.
+#
+# $(ARCH_REL_TYPE_ABS) is defined in the arch specific makefile and corresponds
+# to the absolute relocation types printed by "objdump -R" and accepted by the
+# dynamic linker.
+ifndef ARCH_REL_TYPE_ABS
+$(error ARCH_REL_TYPE_ABS is not set)
+endif
+
+quiet_cmd_vdso_check = VDSOCHK $@
+ cmd_vdso_check = if $(OBJDUMP) -R $@ | egrep -h "$(ARCH_REL_TYPE_ABS)"; \
+ then (echo >&2 "$@: dynamic relocations are not supported"; \
+ rm -f $@; /bin/false); fi
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic userspace implementations of gettimeofday() and similar.
+ */
+#include <linux/compiler.h>
+#include <linux/math64.h>
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/hrtimer_defs.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+
+/*
+ * The generic vDSO implementation requires that gettimeofday.h
+ * provides:
+ * - __arch_get_vdso_data(): to get the vdso datapage.
+ * - __arch_get_hw_counter(): to get the hw counter based on the
+ * clock_mode.
+ * - gettimeofday_fallback(): fallback for gettimeofday.
+ * - clock_gettime_fallback(): fallback for clock_gettime.
+ * - clock_getres_fallback(): fallback for clock_getres.
+ */
+#ifdef ENABLE_COMPAT_VDSO
+#include <asm/vdso/compat_gettimeofday.h>
+#else
+#include <asm/vdso/gettimeofday.h>
+#endif /* ENABLE_COMPAT_VDSO */
+
+#ifndef vdso_calc_delta
+/*
+ * Default implementation which works for all sane clocksources. That
+ * obviously excludes x86/TSC.
+ */
+static __always_inline
+u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ return ((cycles - last) & mask) * mult;
+}
+#endif
+
+static int do_hres(const struct vdso_data *vd, clockid_t clk,
+ struct __kernel_timespec *ts)
+{
+ const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
+ u64 cycles, last, sec, ns;
+ u32 seq;
+
+ do {
+ seq = vdso_read_begin(vd);
+ cycles = __arch_get_hw_counter(vd->clock_mode);
+ ns = vdso_ts->nsec;
+ last = vd->cycle_last;
+ if (unlikely((s64)cycles < 0))
+ return clock_gettime_fallback(clk, ts);
+
+ ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
+ ns >>= vd->shift;
+ sec = vdso_ts->sec;
+ } while (unlikely(vdso_read_retry(vd, seq)));
+
+ /*
+ * Do this outside the loop: a race inside the loop could result
+ * in __iter_div_u64_rem() being extremely slow.
+ */
+ ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
+ ts->tv_nsec = ns;
+
+ return 0;
+}
+
+static void do_coarse(const struct vdso_data *vd, clockid_t clk,
+ struct __kernel_timespec *ts)
+{
+ const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
+ u32 seq;
+
+ do {
+ seq = vdso_read_begin(vd);
+ ts->tv_sec = vdso_ts->sec;
+ ts->tv_nsec = vdso_ts->nsec;
+ } while (unlikely(vdso_read_retry(vd, seq)));
+}
+
+static __maybe_unused int
+__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ u32 msk;
+
+ /* Check for negative values or invalid clocks */
+ if (unlikely((u32) clock >= MAX_CLOCKS))
+ goto fallback;
+
+ /*
+ * Convert the clockid to a bitmask and use it to check which
+ * clocks are handled in the VDSO directly.
+ */
+ msk = 1U << clock;
+ if (likely(msk & VDSO_HRES)) {
+ return do_hres(&vd[CS_HRES_COARSE], clock, ts);
+ } else if (msk & VDSO_COARSE) {
+ do_coarse(&vd[CS_HRES_COARSE], clock, ts);
+ return 0;
+ } else if (msk & VDSO_RAW) {
+ return do_hres(&vd[CS_RAW], clock, ts);
+ }
+
+fallback:
+ return clock_gettime_fallback(clock, ts);
+}
+
+static __maybe_unused int
+__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
+{
+ struct __kernel_timespec ts;
+ int ret;
+
+ if (res == NULL)
+ goto fallback;
+
+ ret = __cvdso_clock_gettime(clock, &ts);
+
+ if (ret == 0) {
+ res->tv_sec = ts.tv_sec;
+ res->tv_nsec = ts.tv_nsec;
+ }
+
+ return ret;
+
+fallback:
+ return clock_gettime_fallback(clock, (struct __kernel_timespec *)res);
+}
+
+static __maybe_unused int
+__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
+{
+ const struct vdso_data *vd = __arch_get_vdso_data();
+
+ if (likely(tv != NULL)) {
+ struct __kernel_timespec ts;
+
+ if (do_hres(&vd[CS_HRES_COARSE], CLOCK_REALTIME, &ts))
+ return gettimeofday_fallback(tv, tz);
+
+ tv->tv_sec = ts.tv_sec;
+ tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC;
+ }
+
+ if (unlikely(tz != NULL)) {
+ tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
+ tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime;
+ }
+
+ return 0;
+}
+
+#ifdef VDSO_HAS_TIME
+static __maybe_unused time_t __cvdso_time(time_t *time)
+{
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ time_t t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
+
+ if (time)
+ *time = t;
+
+ return t;
+}
+#endif /* VDSO_HAS_TIME */
+
+#ifdef VDSO_HAS_CLOCK_GETRES
+static __maybe_unused
+int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
+{
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ u64 ns;
+ u32 msk;
+ u64 hrtimer_res = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res);
+
+ /* Check for negative values or invalid clocks */
+ if (unlikely((u32) clock >= MAX_CLOCKS))
+ goto fallback;
+
+ /*
+ * Convert the clockid to a bitmask and use it to check which
+ * clocks are handled in the VDSO directly.
+ */
+ msk = 1U << clock;
+ if (msk & VDSO_HRES) {
+ /*
+ * Preserves the behaviour of posix_get_hrtimer_res().
+ */
+ ns = hrtimer_res;
+ } else if (msk & VDSO_COARSE) {
+ /*
+ * Preserves the behaviour of posix_get_coarse_res().
+ */
+ ns = LOW_RES_NSEC;
+ } else if (msk & VDSO_RAW) {
+ /*
+ * Preserves the behaviour of posix_get_hrtimer_res().
+ */
+ ns = hrtimer_res;
+ } else {
+ goto fallback;
+ }
+
+ if (res) {
+ res->tv_sec = 0;
+ res->tv_nsec = ns;
+ }
+
+ return 0;
+
+fallback:
+ return clock_getres_fallback(clock, res);
+}
+
+static __maybe_unused int
+__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
+{
+ struct __kernel_timespec ts;
+ int ret;
+
+ if (res == NULL)
+ goto fallback;
+
+ ret = __cvdso_clock_getres(clock, &ts);
+
+ if (ret == 0) {
+ res->tv_sec = ts.tv_sec;
+ res->tv_nsec = ts.tv_nsec;
+ }
+
+ return ret;
+
+fallback:
+ return clock_getres_fallback(clock, (struct __kernel_timespec *)res);
+}
+#endif /* VDSO_HAS_CLOCK_GETRES */
xas_destroy(xas);
return false;
}
+ if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
+ gfp |= __GFP_ACCOUNT;
xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
if (!xas->xa_alloc)
return false;
xas_destroy(xas);
return false;
}
+ if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
+ gfp |= __GFP_ACCOUNT;
if (gfpflags_allow_blocking(gfp)) {
xas_unlock_type(xas, lock_type);
xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
if (node) {
xas->xa_alloc = NULL;
} else {
- node = kmem_cache_alloc(radix_tree_node_cachep,
- GFP_NOWAIT | __GFP_NOWARN);
+ gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
+
+ if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT)
+ gfp |= __GFP_ACCOUNT;
+
+ node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
if (!node) {
xas_set_err(xas, -ENOMEM);
return NULL;
* @pvec: pagevec with pages to delete
*
* The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index
- * and is optimised for it to be dense.
+ * from the mapping. The function expects @pvec to be sorted by page index.
* It tolerates holes in @pvec (mapping entries at those indices are not
* modified). The function expects only THP head pages to be present in the
- * @pvec.
+ * @pvec and takes care to delete all corresponding tail pages from the
+ * mapping as well.
*
* The function expects the i_pages lock to be held.
*/
{
XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
int total_pages = 0;
- int i = 0;
+ int i = 0, tail_pages = 0;
struct page *page;
mapping_set_update(&xas, mapping);
xas_for_each(&xas, page, ULONG_MAX) {
- if (i >= pagevec_count(pvec))
+ if (i >= pagevec_count(pvec) && !tail_pages)
break;
-
- /* A swap/dax/shadow entry got inserted? Skip it. */
if (xa_is_value(page))
continue;
- /*
- * A page got inserted in our range? Skip it. We have our
- * pages locked so they are protected from being removed.
- * If we see a page whose index is higher than ours, it
- * means our page has been removed, which shouldn't be
- * possible because we're holding the PageLock.
- */
- if (page != pvec->pages[i]) {
- VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
- page);
- continue;
- }
-
- WARN_ON_ONCE(!PageLocked(page));
-
- if (page->index == xas.xa_index)
+ if (!tail_pages) {
+ /*
+ * Some page got inserted in our range? Skip it. We
+ * have our pages locked so they are protected from
+ * being removed.
+ */
+ if (page != pvec->pages[i]) {
+ VM_BUG_ON_PAGE(page->index >
+ pvec->pages[i]->index, page);
+ continue;
+ }
+ WARN_ON_ONCE(!PageLocked(page));
+ if (PageTransHuge(page) && !PageHuge(page))
+ tail_pages = HPAGE_PMD_NR - 1;
page->mapping = NULL;
- /* Leave page->index set: truncation lookup relies on it */
-
- /*
- * Move to the next page in the vector if this is a regular
- * page or the index is of the last sub-page of this compound
- * page.
- */
- if (page->index + (1UL << compound_order(page)) - 1 ==
- xas.xa_index)
+ /*
+ * Leave page->index set: truncation lookup relies
+ * upon it
+ */
i++;
+ } else {
+ VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
+ != pvec->pages[i]->index, page);
+ tail_pages--;
+ }
xas_store(&xas, NULL);
total_pages++;
}
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
{
XA_STATE(xas, &mapping->i_pages, offset);
- struct page *page;
+ struct page *head, *page;
rcu_read_lock();
repeat:
if (!page || xa_is_value(page))
goto out;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
+ goto repeat;
+
+ /* The page was split under us? */
+ if (compound_head(page) != head) {
+ put_page(head);
goto repeat;
+ }
/*
- * Has the page moved or been split?
+ * Has the page moved?
* This is part of the lockless pagecache protocol. See
* include/linux/pagemap.h for details.
*/
if (unlikely(page != xas_reload(&xas))) {
- put_page(page);
+ put_page(head);
goto repeat;
}
- page = find_subpage(page, offset);
out:
rcu_read_unlock();
rcu_read_lock();
xas_for_each(&xas, page, ULONG_MAX) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/*
if (xa_is_value(page))
goto export;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- page = find_subpage(page, xas.xa_index);
export:
indices[ret] = xas.xa_index;
break;
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
rcu_read_lock();
xas_for_each(&xas, page, end) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/* Skip over shadow, swap and DAX entries */
if (xa_is_value(page))
continue;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+ pages[ret] = page;
if (++ret == nr_pages) {
*start = xas.xa_index + 1;
goto out;
}
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
rcu_read_lock();
for (page = xas_load(&xas); page; page = xas_next(&xas)) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/*
if (xa_is_value(page))
break;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+ pages[ret] = page;
if (++ret == nr_pages)
break;
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
rcu_read_lock();
xas_for_each_marked(&xas, page, end, tag) {
+ struct page *head;
if (xas_retry(&xas, page))
continue;
/*
if (xa_is_value(page))
continue;
- if (!page_cache_get_speculative(page))
+ head = compound_head(page);
+ if (!page_cache_get_speculative(head))
goto retry;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto put_page;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto put_page;
- pages[ret] = find_subpage(page, xas.xa_index);
+ pages[ret] = page;
if (++ret == nr_pages) {
*index = xas.xa_index + 1;
goto out;
}
continue;
put_page:
- put_page(page);
+ put_page(head);
retry:
xas_reset(&xas);
}
pgoff_t last_pgoff = start_pgoff;
unsigned long max_idx;
XA_STATE(xas, &mapping->i_pages, start_pgoff);
- struct page *page;
+ struct page *head, *page;
rcu_read_lock();
xas_for_each(&xas, page, end_pgoff) {
if (xa_is_value(page))
goto next;
+ head = compound_head(page);
+
/*
* Check for a locked page first, as a speculative
* reference may adversely influence page migration.
*/
- if (PageLocked(page))
+ if (PageLocked(head))
goto next;
- if (!page_cache_get_speculative(page))
+ if (!page_cache_get_speculative(head))
goto next;
- /* Has the page moved or been split? */
+ /* The page was split under us? */
+ if (compound_head(page) != head)
+ goto skip;
+
+ /* Has the page moved? */
if (unlikely(page != xas_reload(&xas)))
goto skip;
- page = find_subpage(page, xas.xa_index);
if (!PageUptodate(page) ||
PageReadahead(page) ||
if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
shmem_uncharge(head->mapping->host, 1);
put_page(head + i);
- } else if (!PageAnon(page)) {
- __xa_store(&head->mapping->i_pages, head[i].index,
- head + i, 0);
}
}
/*
* Dissolve a given free hugepage into free buddy pages. This function does
- * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the
- * dissolution fails because a give page is not a free hugepage, or because
- * free hugepages are fully reserved.
+ * nothing for in-use hugepages and non-hugepages.
+ * This function returns values like below:
+ *
+ * -EBUSY: failed to dissolved free hugepages or the hugepage is in-use
+ * (allocated or reserved.)
+ * 0: successfully dissolved free hugepages or the page is not a
+ * hugepage (considered as already dissolved)
*/
int dissolve_free_huge_page(struct page *page)
{
int rc = -EBUSY;
+ /* Not to disrupt normal path by vainly holding hugetlb_lock */
+ if (!PageHuge(page))
+ return 0;
+
spin_lock(&hugetlb_lock);
- if (PageHuge(page) && !page_count(page)) {
+ if (!PageHuge(page)) {
+ rc = 0;
+ goto out;
+ }
+
+ if (!page_count(page)) {
struct page *head = compound_head(page);
struct hstate *h = page_hstate(head);
int nid = page_to_nid(head);
for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) {
page = pfn_to_page(pfn);
- if (PageHuge(page) && !page_count(page)) {
- rc = dissolve_free_huge_page(page);
- if (rc)
- break;
- }
+ rc = dissolve_free_huge_page(page);
+ if (rc)
+ break;
}
return rc;
result = SCAN_FAIL;
goto xa_locked;
}
- xas_store(&xas, new_page);
+ xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
nr_none++;
continue;
}
list_add_tail(&page->lru, &pagelist);
/* Finally, replace with the new page. */
- xas_store(&xas, new_page);
+ xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
continue;
out_unlock:
unlock_page(page);
xas_for_each(xas, page, ULONG_MAX) {
if (xa_is_value(page))
continue;
- page = find_subpage(page, xas->xa_index);
if (page_count(page) - page_mapcount(page) > 1)
xas_set_mark(xas, MEMFD_TAG_PINNED);
bool clear = true;
if (xa_is_value(page))
continue;
- page = find_subpage(page, xas.xa_index);
if (page_count(page) - page_mapcount(page) != 1) {
/*
* On the last scan, we clean up all those tags
if (!ret) {
if (set_hwpoison_free_buddy_page(page))
num_poisoned_pages_inc();
+ else
+ ret = -EBUSY;
}
}
return ret;
static int soft_offline_free_page(struct page *page)
{
- int rc = 0;
- struct page *head = compound_head(page);
+ int rc = dissolve_free_huge_page(page);
- if (PageHuge(head))
- rc = dissolve_free_huge_page(page);
if (!rc) {
if (set_hwpoison_free_buddy_page(page))
num_poisoned_pages_inc();
else {
nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed,
*nodes);
- pol->w.cpuset_mems_allowed = tmp;
+ pol->w.cpuset_mems_allowed = *nodes;
}
if (nodes_empty(tmp))
for (i = 1; i < HPAGE_PMD_NR; i++) {
xas_next(&xas);
- xas_store(&xas, newpage);
+ xas_store(&xas, newpage + i);
}
}
/*
* Determines whether the kernel must panic because of the panic_on_oom sysctl.
*/
-static void check_panic_on_oom(struct oom_control *oc,
- enum oom_constraint constraint)
+static void check_panic_on_oom(struct oom_control *oc)
{
if (likely(!sysctl_panic_on_oom))
return;
* does not panic for cpuset, mempolicy, or memcg allocation
* failures.
*/
- if (constraint != CONSTRAINT_NONE)
+ if (oc->constraint != CONSTRAINT_NONE)
return;
}
/* Do not panic for oom kills triggered by sysrq */
bool out_of_memory(struct oom_control *oc)
{
unsigned long freed = 0;
- enum oom_constraint constraint = CONSTRAINT_NONE;
if (oom_killer_disabled)
return false;
* Check if there were limitations on the allocation (only relevant for
* NUMA and memcg) that may require different handling.
*/
- constraint = constrained_alloc(oc);
- if (constraint != CONSTRAINT_MEMORY_POLICY)
+ oc->constraint = constrained_alloc(oc);
+ if (oc->constraint != CONSTRAINT_MEMORY_POLICY)
oc->nodemask = NULL;
- check_panic_on_oom(oc, constraint);
+ check_panic_on_oom(oc);
if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
first_deferred_pfn)) {
pgdat->first_deferred_pfn = ULONG_MAX;
pgdat_resize_unlock(pgdat, &flags);
- return true;
+ /* Retry only once. */
+ return first_deferred_pfn != ULONG_MAX;
}
/*
end_pfn = pfn + count * BITS_PER_BYTE;
if (end_pfn > max_pfn)
- end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS);
+ end_pfn = max_pfn;
for (; pfn < end_pfn; pfn++) {
bit = pfn % BITMAP_CHUNK_BITS;
end_pfn = pfn + count * BITS_PER_BYTE;
if (end_pfn > max_pfn)
- end_pfn = ALIGN(max_pfn, BITMAP_CHUNK_BITS);
+ end_pfn = max_pfn;
for (; pfn < end_pfn; pfn++) {
bit = pfn % BITMAP_CHUNK_BITS;
static struct bio *get_swap_bio(gfp_t gfp_flags,
struct page *page, bio_end_io_t end_io)
{
- int i, nr = hpage_nr_pages(page);
struct bio *bio;
- bio = bio_alloc(gfp_flags, nr);
+ bio = bio_alloc(gfp_flags, 1);
if (bio) {
struct block_device *bdev;
bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
bio->bi_end_io = end_io;
- for (i = 0; i < nr; i++)
- bio_add_page(bio, page + i, PAGE_SIZE, 0);
- VM_BUG_ON(bio->bi_iter.bi_size != PAGE_SIZE * nr);
+ bio_add_page(bio, page, PAGE_SIZE * hpage_nr_pages(page), 0);
}
return bio;
}
unlock_page(page);
WRITE_ONCE(bio->bi_private, NULL);
bio_put(bio);
- blk_wake_io_task(waiter);
- put_task_struct(waiter);
+ if (waiter) {
+ blk_wake_io_task(waiter);
+ put_task_struct(waiter);
+ }
}
int generic_swapfile_activate(struct swap_info_struct *sis,
* Keep this task valid during swap readpage because the oom killer may
* attempt to access it in the page fault retry time check.
*/
- get_task_struct(current);
- bio->bi_private = current;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
- if (synchronous)
+ if (synchronous) {
bio->bi_opf |= REQ_HIPRI;
+ get_task_struct(current);
+ bio->bi_private = current;
+ }
count_vm_event(PSWPIN);
bio_get(bio);
qc = submit_bio(bio);
if (xas_error(&xas))
goto unlock;
next:
- xas_store(&xas, page);
+ xas_store(&xas, page + i);
if (++i < nr) {
xas_next(&xas);
goto next;
for (i = 0; i < nr; i++) {
VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
set_page_private(page + i, entry.val + i);
- xas_store(&xas, page);
+ xas_store(&xas, page + i);
xas_next(&xas);
}
address_space->nrpages += nr;
for (i = 0; i < nr; i++) {
void *entry = xas_store(&xas, NULL);
- VM_BUG_ON_PAGE(entry != page, entry);
+ VM_BUG_ON_PAGE(entry != page + i, entry);
set_page_private(page + i, 0);
xas_next(&xas);
}
unsigned long nva_start_addr, unsigned long size,
enum fit_type type)
{
- struct vmap_area *lva;
+ struct vmap_area *lva = NULL;
if (type == FL_FIT_TYPE) {
/*
if (type != FL_FIT_TYPE) {
augment_tree_propagate_from(va);
- if (type == NE_FIT_TYPE)
+ if (lva) /* type == NE_FIT_TYPE */
insert_vmap_area_augment(lva, &va->rb_node,
&free_vmap_area_root, &free_vmap_area_list);
}
int flush_dmap = 0;
int i;
- /*
- * The below block can be removed when all architectures that have
- * direct map permissions also have set_direct_map_() implementations.
- * This is concerned with resetting the direct map any an vm alias with
- * execute permissions, without leaving a RW+X window.
- */
- if (flush_reset && !IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) {
- set_memory_nx((unsigned long)area->addr, area->nr_pages);
- set_memory_rw((unsigned long)area->addr, area->nr_pages);
- }
-
remove_vm_area(area->addr);
/* If this is not VM_FLUSH_RESET_PERMS memory, no need for the below. */
}
/*
- * pgdat->kswapd_classzone_idx is the highest zone index that a recent
- * allocation request woke kswapd for. When kswapd has not woken recently,
- * the value is MAX_NR_ZONES which is not a valid index. This compares a
- * given classzone and returns it or the highest classzone index kswapd
- * was recently woke for.
+ * The pgdat->kswapd_classzone_idx is used to pass the highest zone index to be
+ * reclaimed by kswapd from the waker. If the value is MAX_NR_ZONES which is not
+ * a valid index then either kswapd runs for first time or kswapd couldn't sleep
+ * after previous reclaim attempt (node is still unbalanced). In that case
+ * return the zone index of the previous kswapd reclaim cycle.
*/
static enum zone_type kswapd_classzone_idx(pg_data_t *pgdat,
- enum zone_type classzone_idx)
+ enum zone_type prev_classzone_idx)
{
if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
- return classzone_idx;
-
- return max(pgdat->kswapd_classzone_idx, classzone_idx);
+ return prev_classzone_idx;
+ return pgdat->kswapd_classzone_idx;
}
static void kswapd_try_to_sleep(pg_data_t *pgdat, int alloc_order, int reclaim_order,
/* Read the new order and classzone_idx */
alloc_order = reclaim_order = pgdat->kswapd_order;
- classzone_idx = kswapd_classzone_idx(pgdat, 0);
+ classzone_idx = kswapd_classzone_idx(pgdat, classzone_idx);
pgdat->kswapd_order = 0;
pgdat->kswapd_classzone_idx = MAX_NR_ZONES;
if (!cpuset_zone_allowed(zone, gfp_flags))
return;
pgdat = zone->zone_pgdat;
- pgdat->kswapd_classzone_idx = kswapd_classzone_idx(pgdat,
- classzone_idx);
+
+ if (pgdat->kswapd_classzone_idx == MAX_NR_ZONES)
+ pgdat->kswapd_classzone_idx = classzone_idx;
+ else
+ pgdat->kswapd_classzone_idx = max(pgdat->kswapd_classzone_idx,
+ classzone_idx);
pgdat->kswapd_order = max(pgdat->kswapd_order, order);
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
struct in6_addr *daddr,
struct sk_buff *skb)
{
- struct lowpan_peer *peer;
- struct in6_addr *nexthop;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
int count = atomic_read(&dev->peer_count);
+ const struct in6_addr *nexthop;
+ struct lowpan_peer *peer;
BT_DBG("peers %d addr %pI6c rt %p", count, daddr, rt);
* actually encrypted before enforcing a key size.
*/
return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
- hcon->enc_key_size > HCI_MIN_ENC_KEY_SIZE);
+ hcon->enc_key_size >= HCI_MIN_ENC_KEY_SIZE);
}
static void l2cap_do_start(struct l2cap_chan *chan)
static int ip_mc_finish_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
+ struct rtable *new_rt;
int ret;
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
return ret;
}
+ /* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting
+ * this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten,
+ * see ipv4_pktinfo_prepare().
+ */
+ new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb));
+ if (new_rt) {
+ new_rt->rt_iif = 0;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &new_rt->dst);
+ }
+
return dev_loopback_xmit(net, sk, skb);
}
}
sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
iph->saddr, iph->daddr,
- skb->dev->ifindex, sdif);
+ dif, sdif);
}
out:
read_unlock(&raw_v4_hashinfo.lock);
}
EXPORT_SYMBOL(rt_dst_alloc);
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
+{
+ struct rtable *new_rt;
+
+ new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+ rt->dst.flags);
+
+ if (new_rt) {
+ new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
+ new_rt->rt_flags = rt->rt_flags;
+ new_rt->rt_type = rt->rt_type;
+ new_rt->rt_is_input = rt->rt_is_input;
+ new_rt->rt_iif = rt->rt_iif;
+ new_rt->rt_pmtu = rt->rt_pmtu;
+ new_rt->rt_mtu_locked = rt->rt_mtu_locked;
+ new_rt->rt_gw_family = rt->rt_gw_family;
+ if (rt->rt_gw_family == AF_INET)
+ new_rt->rt_gw4 = rt->rt_gw4;
+ else if (rt->rt_gw_family == AF_INET6)
+ new_rt->rt_gw6 = rt->rt_gw6;
+ INIT_LIST_HEAD(&new_rt->rt_uncached);
+
+ new_rt->dst.flags |= DST_HOST;
+ new_rt->dst.input = rt->dst.input;
+ new_rt->dst.output = rt->dst.output;
+ new_rt->dst.error = rt->dst.error;
+ new_rt->dst.lastuse = jiffies;
+ new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
+ }
+ return new_rt;
+}
+EXPORT_SYMBOL(rt_dst_clone);
+
/* called in rcu_read_lock() section */
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev,
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ const struct in6_addr *nexthop;
struct neighbour *neigh;
- struct in6_addr *nexthop;
int ret;
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
{
const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
- return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
+ return ip6_neigh_lookup(rt6_nexthop(rt, &in6addr_any),
+ dst->dev, skb, daddr);
}
static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
.data = &init_net.ipv6.sysctl.skip_notify_on_dev_down,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
+ const struct in6_addr *nexthop;
struct flow_offload *flow;
struct net_device *outdev;
- struct in6_addr *nexthop;
struct ipv6hdr *ip6h;
struct rt6_info *rt;
ts = __packet_set_timestamp(po, ph, skb);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
+
+ if (!packet_read_pending(&po->tx_ring))
+ complete(&po->skb_completion);
}
sock_wfree(skb);
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
struct net_device *dev;
struct virtio_net_hdr *vnet_hdr = NULL;
struct sockcm_cookie sockc;
int len_sum = 0;
int status = TP_STATUS_AVAILABLE;
int hlen, tlen, copylen = 0;
+ long timeo = 0;
mutex_lock(&po->pg_vec_lock);
if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
size_max = dev->mtu + reserve + VLAN_HLEN;
+ reinit_completion(&po->skb_completion);
+
do {
ph = packet_current_frame(po, &po->tx_ring,
TP_STATUS_SEND_REQUEST);
if (unlikely(ph == NULL)) {
- if (need_wait && need_resched())
- schedule();
+ if (need_wait && skb) {
+ timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
+ timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
+ if (timeo <= 0) {
+ err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
+ goto out_put;
+ }
+ }
+ /* check for additional frames */
continue;
}
sock_init_data(sock, sk);
po = pkt_sk(sk);
+ init_completion(&po->skb_completion);
sk->sk_family = PF_PACKET;
po->num = proto;
po->xmit = dev_queue_xmit;
req3->tp_sizeof_priv ||
req3->tp_feature_req_word) {
err = -EINVAL;
- goto out;
+ goto out_free_pg_vec;
}
}
break;
prb_shutdown_retire_blk_timer(po, rb_queue);
}
+out_free_pg_vec:
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
unsigned int tp_hdrlen;
unsigned int tp_reserve;
unsigned int tp_tstamp;
+ struct completion skb_completion;
struct net_device __rcu *cached_dev;
int (*xmit)(struct sk_buff *skb);
struct packet_type prot_hook ____cacheline_aligned_in_smp;
static int __init cbs_module_init(void)
{
- int err = register_netdevice_notifier(&cbs_device_notifier);
+ int err;
+ err = register_netdevice_notifier(&cbs_device_notifier);
if (err)
return err;
- return register_qdisc(&cbs_qdisc_ops);
+ err = register_qdisc(&cbs_qdisc_ops);
+ if (err)
+ unregister_netdevice_notifier(&cbs_device_notifier);
+
+ return err;
}
static void __exit cbs_module_exit(void)
/* Initialize the bind addr area */
sctp_bind_addr_init(&ep->base.bind_addr, 0);
- /* Remember who we are attached to. */
- ep->base.sk = sk;
- sock_hold(ep->base.sk);
-
/* Create the lists of associations. */
INIT_LIST_HEAD(&ep->asocs);
ep->prsctp_enable = net->sctp.prsctp_enable;
ep->reconf_enable = net->sctp.reconf_enable;
+ /* Remember who we are attached to. */
+ ep->base.sk = sk;
+ sock_hold(ep->base.sk);
+
return ep;
nomem_shkey:
rc = smc_pnet_init();
if (rc)
- return rc;
+ goto out_pernet_subsys;
rc = smc_llc_init();
if (rc) {
proto_unregister(&smc_proto);
out_pnet:
smc_pnet_exit();
+out_pernet_subsys:
+ unregister_pernet_subsys(&smc_net_ops);
+
return rc;
}
rc = smc_lgr_create(smc, ini);
if (rc)
goto out;
+ lgr = conn->lgr;
+ write_lock_bh(&lgr->conns_lock);
smc_lgr_register_conn(conn); /* add smc conn to lgr */
+ write_unlock_bh(&lgr->conns_lock);
}
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
/* Save client advertised inbound read limit for use later in accept. */
newxprt->sc_ord = param->initiator_depth;
- /* Set the local and remote addresses in the transport */
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
+ /* The remote port is arbitrary and not under the control of the
+ * client ULP. Set it to a fixed value so that the DRC continues
+ * to be effective after a reconnect.
+ */
+ rpc_set_port((struct sockaddr *)&newxprt->sc_xprt.xpt_remote, 0);
+
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
struct sock_xprt *transport =
container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
+ rpc_fraghdr rm = xs_stream_record_marker(xdr);
+ unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
int status;
int sent = 0;
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, NULL, 0, xdr,
- transport->xmit.offset,
- xs_stream_record_marker(xdr),
- &sent);
+ transport->xmit.offset, rm, &sent);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - transport->xmit.offset, status);
if (likely(sent > 0) || status == 0) {
transport->xmit.offset += sent;
req->rq_bytes_sent = transport->xmit.offset;
- if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ if (likely(req->rq_bytes_sent >= msglen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
transport->xmit.offset = 0;
return 0;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
+ rpc_fraghdr rm = xs_stream_record_marker(xdr);
+ unsigned int msglen = rm ? req->rq_slen + sizeof(rm) : req->rq_slen;
bool vm_wait = false;
int status;
int sent;
while (1) {
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
- transport->xmit.offset,
- xs_stream_record_marker(xdr),
- &sent);
+ transport->xmit.offset, rm, &sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - transport->xmit.offset, status);
* reset the count of bytes sent. */
transport->xmit.offset += sent;
req->rq_bytes_sent = transport->xmit.offset;
- if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ if (likely(req->rq_bytes_sent >= msglen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
transport->xmit.offset = 0;
return 0;
if (err)
goto out_sysctl;
- err = register_pernet_subsys(&tipc_net_ops);
+ err = register_pernet_device(&tipc_net_ops);
if (err)
goto out_pernet;
if (err)
goto out_socket;
- err = register_pernet_subsys(&tipc_topsrv_net_ops);
+ err = register_pernet_device(&tipc_topsrv_net_ops);
if (err)
goto out_pernet_topsrv;
pr_info("Started in single node mode\n");
return 0;
out_bearer:
- unregister_pernet_subsys(&tipc_topsrv_net_ops);
+ unregister_pernet_device(&tipc_topsrv_net_ops);
out_pernet_topsrv:
tipc_socket_stop();
out_socket:
- unregister_pernet_subsys(&tipc_net_ops);
+ unregister_pernet_device(&tipc_net_ops);
out_pernet:
tipc_unregister_sysctl();
out_sysctl:
static void __exit tipc_exit(void)
{
tipc_bearer_cleanup();
- unregister_pernet_subsys(&tipc_topsrv_net_ops);
+ unregister_pernet_device(&tipc_topsrv_net_ops);
tipc_socket_stop();
- unregister_pernet_subsys(&tipc_net_ops);
+ unregister_pernet_device(&tipc_net_ops);
tipc_netlink_stop();
tipc_netlink_compat_stop();
tipc_unregister_sysctl();
if (!bearer)
return -EMSGSIZE;
- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
name = (char *)TLV_DATA(msg->req);
- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
if (!link)
return -EMSGSIZE;
- len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME);
+ len = TLV_GET_DATA_LEN(msg->req);
+ if (len <= 0)
+ return -EINVAL;
+
+ len = min_t(int, len, TIPC_MAX_BEARER_NAME);
if (!string_is_valid(name, len))
return -EINVAL;
goto skip_tx_cleanup;
}
- if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
+ if (unlikely(sk->sk_write_pending) &&
+ !wait_on_pending_writer(sk, &timeo))
tls_handle_open_record(sk, 0);
/* We need these for tls_sw_fallback handling of other packets */
int main(int argc, char *argv[])
{
- int pidfd = 0, ret = EXIT_FAILURE;
+ int pidfd = -1, ret = EXIT_FAILURE;
char buf[4096] = { 0 };
pid_t pid;
int procfd, statusfd;
pid = pidfd_clone(CLONE_PIDFD, &pidfd);
if (pid < 0)
- exit(ret);
+ err(ret, "CLONE_PIDFD");
+ if (pidfd == -1) {
+ warnx("CLONE_PIDFD is not supported by the kernel");
+ goto out;
+ }
procfd = pidfd_metadata_fd(pid, pidfd);
close(pidfd);
if (copy_from_user(ev, arg, 8))
return -EFAULT;
memset(&tmpev, 0, sizeof(tmpev));
- snd_seq_oss_fill_addr(dp, &tmpev, dp->addr.port, dp->addr.client);
+ snd_seq_oss_fill_addr(dp, &tmpev, dp->addr.client, dp->addr.port);
tmpev.time.tick = 0;
if (! snd_seq_oss_process_event(dp, (union evrec *)ev, &tmpev)) {
snd_seq_oss_dispatch(dp, &tmpev, 0, 0);
memset(&event, 0, sizeof(event));
/* set dummy -- to be sure */
event.type = SNDRV_SEQ_EVENT_NOTEOFF;
- snd_seq_oss_fill_addr(dp, &event, dp->addr.port, dp->addr.client);
+ snd_seq_oss_fill_addr(dp, &event, dp->addr.client, dp->addr.port);
if (snd_seq_oss_process_event(dp, rec, &event))
return 0; /* invalid event - no need to insert queue */
u8 *b;
for (f = 0; f < frames; f++) {
- port = (s->data_block_counter + f) % 8;
+ port = (8 - s->tx_first_dbc + s->data_block_counter + f) % 8;
b = (u8 *)&buffer[p->midi_position];
len = b[0] - 0x80;
int snd_hdac_refresh_widgets(struct hdac_device *codec, bool sysfs)
{
hda_nid_t start_nid;
- int nums, err;
+ int nums, err = 0;
+ /*
+ * Serialize against multiple threads trying to update the sysfs
+ * widgets array.
+ */
+ mutex_lock(&codec->widget_lock);
nums = snd_hdac_get_sub_nodes(codec, codec->afg, &start_nid);
if (!start_nid || nums <= 0 || nums >= 0xff) {
dev_err(&codec->dev, "cannot read sub nodes for FG 0x%02x\n",
codec->afg);
- return -EINVAL;
+ err = -EINVAL;
+ goto unlock;
}
if (sysfs) {
- mutex_lock(&codec->widget_lock);
err = hda_widget_sysfs_reinit(codec, start_nid, nums);
- mutex_unlock(&codec->widget_lock);
if (err < 0)
- return err;
+ goto unlock;
}
codec->num_nodes = nums;
codec->start_nid = start_nid;
codec->end_nid = start_nid + nums;
- return 0;
+unlock:
+ mutex_unlock(&codec->widget_lock);
+ return err;
}
EXPORT_SYMBOL_GPL(snd_hdac_refresh_widgets);
SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1558, 0x95e1, "Clevo P95xER", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950),
- SND_PCI_QUIRK(0x1558, 0x96e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
- SND_PCI_QUIRK(0x1558, 0x97e1, "System76 Oryx Pro (oryp5)", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
- SND_PCI_QUIRK(0x1558, 0x65d1, "Tuxedo Book XC1509", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x96e1, "Clevo P960[ER][CDFN]-K", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1558, 0x97e1, "Clevo P970[ER][CDFN]", ALC1220_FIXUP_CLEVO_P950),
+ SND_PCI_QUIRK(0x1558, 0x65d1, "Clevo PB51[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD),
SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD),
SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530),
SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
+ SND_PCI_QUIRK(0x17aa, 0x3111, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
SND_PCI_QUIRK(0x17aa, 0x312a, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
line6pcm->max_packet_size_out =
usb_maxpacket(line6->usbdev,
usb_sndisocpipe(line6->usbdev, ep_write), 1);
+ if (!line6pcm->max_packet_size_in || !line6pcm->max_packet_size_out) {
+ dev_err(line6pcm->line6->ifcdev,
+ "cannot get proper max packet size\n");
+ return -EINVAL;
+ }
spin_lock_init(&line6pcm->out.lock);
spin_lock_init(&line6pcm->in.lock);
return err;
}
- kctl->private_value |= (value << 24);
+ kctl->private_value |= ((unsigned int)value << 24);
return 0;
}
if (err < 0)
return err;
- kctl->private_value |= value[0] << 24;
+ kctl->private_value |= (unsigned int)value[0] << 24;
return 0;
}
/* These include both GPRs and XMMX registers */
PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
};
+
+#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
+
#endif /* _ASM_X86_PERF_REGS_H */
void perf_regs_load(u64 *regs);
#define PERF_REGS_MAX PERF_REG_X86_XMM_MAX
-#define PERF_XMM_REGS_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
#ifndef HAVE_ARCH_X86_64_SUPPORT
#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.sample_type = PERF_SAMPLE_REGS_INTR,
- .sample_regs_intr = PERF_XMM_REGS_MASK,
+ .sample_regs_intr = PERF_REG_EXTENDED_MASK,
.precise_ip = 1,
.disabled = 1,
.exclude_kernel = 1,
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
if (fd != -1) {
close(fd);
- return (PERF_XMM_REGS_MASK | PERF_REGS_MASK);
+ return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
}
return PERF_REGS_MASK;
}
}
+DEFINE_IDR(find_idr);
+
+static void *idr_throbber(void *arg)
+{
+ time_t start = time(NULL);
+ int id = *(int *)arg;
+
+ rcu_register_thread();
+ do {
+ idr_alloc(&find_idr, xa_mk_value(id), id, id + 1, GFP_KERNEL);
+ idr_remove(&find_idr, id);
+ } while (time(NULL) < start + 10);
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+void idr_find_test_1(int anchor_id, int throbber_id)
+{
+ pthread_t throbber;
+ time_t start = time(NULL);
+
+ pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
+
+ BUG_ON(idr_alloc(&find_idr, xa_mk_value(anchor_id), anchor_id,
+ anchor_id + 1, GFP_KERNEL) != anchor_id);
+
+ do {
+ int id = 0;
+ void *entry = idr_get_next(&find_idr, &id);
+ BUG_ON(entry != xa_mk_value(id));
+ } while (time(NULL) < start + 11);
+
+ pthread_join(throbber, NULL);
+
+ idr_remove(&find_idr, anchor_id);
+ BUG_ON(!idr_is_empty(&find_idr));
+}
+
+void idr_find_test(void)
+{
+ idr_find_test_1(100000, 0);
+ idr_find_test_1(0, 100000);
+}
+
void idr_checks(void)
{
unsigned long i;
idr_u32_test(1);
idr_u32_test(0);
idr_align_test(&idr);
+ idr_find_test();
}
#define module_init(x)
kvm_vm_restart(vm, O_RDWR);
vm_vcpu_add(vm, VCPU_ID, 0, 0);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
free(state);
tempfile
prot_sao
segv_errors
-wild_bctr
\ No newline at end of file
+wild_bctr
+large_vm_fork_separation
\ No newline at end of file
noarg:
$(MAKE) -C ../
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
+ large_vm_fork_separation
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
$(OUTPUT)/prot_sao: ../utils.c
$(OUTPUT)/wild_bctr: CFLAGS += -m64
+$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019, Michael Ellerman, IBM Corp.
+//
+// Test that allocating memory beyond the memory limit and then forking is
+// handled correctly, ie. the child is able to access the mappings beyond the
+// memory limit and the child's writes are not visible to the parent.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB
+#endif
+
+
+static int test(void)
+{
+ int p2c[2], c2p[2], rc, status, c, *p;
+ unsigned long page_size;
+ pid_t pid;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ SKIP_IF(page_size != 65536);
+
+ // Create a mapping at 512TB to allocate an extended_id
+ p = mmap((void *)(512ul << 40), page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ printf("Error: couldn't mmap(), confirm kernel has 4TB support?\n");
+ return 1;
+ }
+
+ printf("parent writing %p = 1\n", p);
+ *p = 1;
+
+ FAIL_IF(pipe(p2c) == -1 || pipe(c2p) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ FAIL_IF(read(p2c[0], &c, 1) != 1);
+
+ pid = getpid();
+ printf("child writing %p = %d\n", p, pid);
+ *p = pid;
+
+ FAIL_IF(write(c2p[1], &c, 1) != 1);
+ FAIL_IF(read(p2c[0], &c, 1) != 1);
+ exit(0);
+ }
+
+ c = 0;
+ FAIL_IF(write(p2c[1], &c, 1) != 1);
+ FAIL_IF(read(c2p[0], &c, 1) != 1);
+
+ // Prevent compiler optimisation
+ barrier();
+
+ rc = 0;
+ printf("parent reading %p = %d\n", p, *p);
+ if (*p != 1) {
+ printf("Error: BUG! parent saw child's write! *p = %d\n", *p);
+ rc = 1;
+ }
+
+ FAIL_IF(write(p2c[1], &c, 1) != 1);
+ FAIL_IF(waitpid(pid, &status, 0) == -1);
+ FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status));
+
+ if (rc == 0)
+ printf("success: test completed OK\n");
+
+ return rc;
+}
+
+int main(void)
+{
+ return test_harness(test, "large_vm_fork_separation");
+}
#define SAMPLE_READINGS 10
#define MEAN_SAMPLE_INTERVAL 0.1
#define STEP_INTERVAL 1.0
-#define MAX_PRECISION 100e-9
-#define MAX_FREQ_ERROR 10e-6
-#define MAX_STDDEV 1000e-9
+#define MAX_PRECISION 500e-9
+#define MAX_FREQ_ERROR 0.02e-6
+#define MAX_STDDEV 50e-9
#ifndef ADJ_SETOFFSET
#define ADJ_SETOFFSET 0x0100